Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : MODULE fftw3_lib
8 : USE ISO_C_BINDING, ONLY: C_ASSOCIATED, &
9 : C_CHAR, &
10 : C_DOUBLE, &
11 : C_DOUBLE_COMPLEX, &
12 : C_INT, &
13 : C_PTR
14 : #if defined(__FFTW3)
15 : USE ISO_C_BINDING, ONLY: &
16 : C_FLOAT, &
17 : C_FLOAT_COMPLEX, &
18 : C_FUNPTR, &
19 : C_INT32_T, &
20 : C_INTPTR_T, &
21 : C_LOC, &
22 : C_NULL_CHAR, &
23 : C_SIZE_T, C_F_POINTER
24 : USE mathconstants, ONLY: z_zero
25 : #endif
26 : USE cp_files, ONLY: get_unit_number
27 : USE fft_kinds, ONLY: dp
28 : USE fft_plan, ONLY: fft_plan_type
29 :
30 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
31 :
32 : #include "../../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
38 : PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
39 : PUBLIC :: fftw_alloc, fftw_dealloc
40 :
41 : #if defined(__FFTW3)
42 : #include "fftw3.f03"
43 : #endif
44 :
45 : INTERFACE fftw_alloc
46 : MODULE PROCEDURE :: fftw_alloc_complex_1d
47 : MODULE PROCEDURE :: fftw_alloc_complex_2d
48 : MODULE PROCEDURE :: fftw_alloc_complex_3d
49 : END INTERFACE fftw_alloc
50 :
51 : INTERFACE fftw_dealloc
52 : MODULE PROCEDURE :: fftw_dealloc_complex_1d
53 : MODULE PROCEDURE :: fftw_dealloc_complex_2d
54 : MODULE PROCEDURE :: fftw_dealloc_complex_3d
55 : END INTERFACE fftw_dealloc
56 :
57 : CONTAINS
58 :
59 : #:set maxdim = 3
60 : #:for dim in range(1, maxdim+1)
61 : ! Concatenate the components of the dimensions passed to this function to use it if FFTW3 is not used
62 : #:set dim_extended = ", ".join(["n("+str(i)+")" for i in range(1, dim+1)])
63 110382 : SUBROUTINE fftw_alloc_complex_${dim}$d(array, n)
64 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(OUT) :: array
65 : INTEGER, DIMENSION(${dim}$), INTENT(IN) :: n
66 :
67 : #if defined(__FFTW3)
68 : TYPE(C_PTR) :: data_ptr
69 386776 : data_ptr = fftw_alloc_complex(INT(PRODUCT(n), KIND=C_SIZE_T))
70 386776 : CALL C_F_POINTER(data_ptr, array, n)
71 : #else
72 : ! Just allocate the array
73 : ALLOCATE (array(${dim_extended}$))
74 : #endif
75 :
76 110382 : END SUBROUTINE fftw_alloc_complex_${dim}$d
77 :
78 110382 : SUBROUTINE fftw_dealloc_complex_${dim}$d(array)
79 : COMPLEX(C_DOUBLE_COMPLEX), DIMENSION(:${", :"*(dim-1)}$), CONTIGUOUS, POINTER, INTENT(INOUT) :: array
80 :
81 : #if defined(__FFTW3)
82 110382 : CALL fftw_free(C_LOC(array))
83 110382 : NULLIFY (array)
84 : #else
85 : ! Just deallocate the array
86 : DEALLOCATE (array)
87 : #endif
88 :
89 110382 : END SUBROUTINE fftw_dealloc_complex_${dim}$d
90 : #:endfor
91 :
92 : #if defined(__FFTW3)
93 : ! **************************************************************************************************
94 : !> \brief A workaround that allows us to compile with -Werror=unused-parameter
95 : ! **************************************************************************************************
96 0 : SUBROUTINE dummy_routine_to_call_mark_used()
97 : MARK_USED(FFTW_R2HC)
98 : MARK_USED(FFTW_HC2R)
99 : MARK_USED(FFTW_DHT)
100 : MARK_USED(FFTW_REDFT00)
101 : MARK_USED(FFTW_REDFT01)
102 : MARK_USED(FFTW_REDFT10)
103 : MARK_USED(FFTW_REDFT11)
104 : MARK_USED(FFTW_RODFT00)
105 : MARK_USED(FFTW_RODFT01)
106 : MARK_USED(FFTW_RODFT10)
107 : MARK_USED(FFTW_RODFT11)
108 : MARK_USED(FFTW_FORWARD)
109 : MARK_USED(FFTW_BACKWARD)
110 : MARK_USED(FFTW_MEASURE)
111 : MARK_USED(FFTW_DESTROY_INPUT)
112 : MARK_USED(FFTW_UNALIGNED)
113 : MARK_USED(FFTW_CONSERVE_MEMORY)
114 : MARK_USED(FFTW_EXHAUSTIVE)
115 : MARK_USED(FFTW_PRESERVE_INPUT)
116 : MARK_USED(FFTW_PATIENT)
117 : MARK_USED(FFTW_ESTIMATE)
118 : MARK_USED(FFTW_WISDOM_ONLY)
119 : MARK_USED(FFTW_ESTIMATE_PATIENT)
120 : MARK_USED(FFTW_BELIEVE_PCOST)
121 : MARK_USED(FFTW_NO_DFT_R2HC)
122 : MARK_USED(FFTW_NO_NONTHREADED)
123 : MARK_USED(FFTW_NO_BUFFERING)
124 : MARK_USED(FFTW_NO_INDIRECT_OP)
125 : MARK_USED(FFTW_ALLOW_LARGE_GENERIC)
126 : MARK_USED(FFTW_NO_RANK_SPLITS)
127 : MARK_USED(FFTW_NO_VRANK_SPLITS)
128 : MARK_USED(FFTW_NO_VRECURSE)
129 : MARK_USED(FFTW_NO_SIMD)
130 : MARK_USED(FFTW_NO_SLOW)
131 : MARK_USED(FFTW_NO_FIXED_RADIX_LARGE_N)
132 : MARK_USED(FFTW_ALLOW_PRUNING)
133 0 : END SUBROUTINE dummy_routine_to_call_mark_used
134 : #endif
135 :
136 : ! **************************************************************************************************
137 : !> \brief ...
138 : !> \param wisdom_file ...
139 : !> \param ionode ...
140 : ! **************************************************************************************************
141 9741 : SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
142 :
143 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
144 : LOGICAL :: ionode
145 :
146 : #if defined(__FFTW3)
147 9741 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
148 : INTEGER :: file_name_length, i, iunit, istat
149 : INTEGER(KIND=C_INT) :: isuccess
150 : ! Write out FFTW3 wisdom to file (if we can)
151 : ! only the ionode updates the wisdom
152 9741 : IF (ionode) THEN
153 4974 : iunit = get_unit_number()
154 : ! Check whether the file can be opened in the necessary manner
155 4974 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
156 4974 : IF (istat == 0) THEN
157 2 : CLOSE (iunit)
158 2 : file_name_length = LEN_TRIM(wisdom_file)
159 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
160 18 : DO i = 1, file_name_length
161 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
162 : END DO
163 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
164 2 : isuccess = fftw_export_wisdom_to_filename(wisdom_file_name_c)
165 2 : IF (isuccess == 0) &
166 : CALL cp_warn(__LOCATION__, "Error exporting wisdom to file "//TRIM(wisdom_file)//". "// &
167 0 : "Wisdom was not exported.")
168 : END IF
169 : END IF
170 :
171 9741 : CALL fftw_cleanup()
172 : #else
173 : MARK_USED(wisdom_file)
174 : MARK_USED(ionode)
175 : #endif
176 :
177 9741 : END SUBROUTINE fftw3_do_cleanup
178 :
179 : ! **************************************************************************************************
180 : !> \brief ...
181 : !> \param wisdom_file ...
182 : ! **************************************************************************************************
183 9951 : SUBROUTINE fftw3_do_init(wisdom_file)
184 :
185 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
186 :
187 : #if defined(__FFTW3)
188 9951 : CHARACTER(LEN=1, KIND=C_CHAR), DIMENSION(:), ALLOCATABLE :: wisdom_file_name_c
189 : INTEGER :: file_name_length, i, istat, iunit
190 : INTEGER(KIND=C_INT) :: isuccess
191 : LOGICAL :: file_exists
192 :
193 9951 : isuccess = fftw_init_threads()
194 9951 : IF (isuccess == 0) &
195 0 : CPABORT("Error initializing FFTW with threads")
196 :
197 : ! Read FFTW wisdom (if available)
198 : ! all nodes are opening the file here...
199 9951 : INQUIRE (FILE=wisdom_file, exist=file_exists)
200 9951 : IF (file_exists) THEN
201 2 : iunit = get_unit_number()
202 2 : file_name_length = LEN_TRIM(wisdom_file)
203 : ! Check whether the file can be opened in the necessary manner
204 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
205 2 : ACTION="READ", IOSTAT=istat)
206 2 : IF (istat == 0) THEN
207 2 : CLOSE (iunit)
208 2 : file_name_length = LEN_TRIM(wisdom_file)
209 6 : ALLOCATE (wisdom_file_name_c(file_name_length + 1))
210 18 : DO i = 1, file_name_length
211 18 : wisdom_file_name_c(i) = wisdom_file(i:i)
212 : END DO
213 2 : wisdom_file_name_c(file_name_length + 1) = C_NULL_CHAR
214 2 : isuccess = fftw_import_wisdom_from_filename(wisdom_file_name_c)
215 2 : IF (isuccess == 0) &
216 : CALL cp_warn(__LOCATION__, "Error importing wisdom from file "//TRIM(wisdom_file)//". "// &
217 : "Maybe the file was created with a different configuration than CP2K is run with. "// &
218 0 : "CP2K continues without importing wisdom.")
219 : END IF
220 : END IF
221 : #else
222 : MARK_USED(wisdom_file)
223 : #endif
224 :
225 9951 : END SUBROUTINE fftw3_do_init
226 :
227 : ! **************************************************************************************************
228 : !> \brief ...
229 : !> \param DATA ...
230 : !> \param max_length ...
231 : !> \par History
232 : !> JGH 23-Jan-2006 : initial version
233 : !> Adapted for new interface
234 : !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
235 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
236 : !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
237 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
238 : !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
239 : !> \author JGH
240 : ! **************************************************************************************************
241 528 : SUBROUTINE fftw3_get_lengths(DATA, max_length)
242 :
243 : INTEGER, DIMENSION(*) :: DATA
244 : INTEGER, INTENT(INOUT) :: max_length
245 :
246 : INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
247 : maxn_fives, maxn_sevens, &
248 : maxn_thirteens, maxn_threes, &
249 : maxn_twos, ndata, nmax, number
250 528 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
251 :
252 : !------------------------------------------------------------------------------
253 : ! compute ndata
254 : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
255 : !! powers of small prime numbers though...
256 :
257 528 : maxn_twos = 15
258 528 : maxn_threes = 3
259 528 : maxn_fives = 2
260 528 : maxn_sevens = 1
261 528 : maxn_elevens = 1
262 528 : maxn_thirteens = 0
263 528 : maxn = 37748736
264 :
265 528 : ndata = 0
266 8976 : DO h = 0, maxn_twos
267 8448 : nmax = HUGE(0)/2**h
268 42768 : DO i = 0, maxn_threes
269 143616 : DO j = 0, maxn_fives
270 337920 : DO k = 0, maxn_sevens
271 709632 : DO m = 0, maxn_elevens
272 405504 : number = (3**i)*(5**j)*(7**k)*(11**m)
273 :
274 405504 : IF (number > nmax) CYCLE
275 :
276 405504 : number = number*2**h
277 405504 : IF (number >= maxn) CYCLE
278 :
279 608256 : ndata = ndata + 1
280 : END DO
281 : END DO
282 : END DO
283 : END DO
284 : END DO
285 :
286 2112 : ALLOCATE (dlocal(ndata), idx(ndata))
287 :
288 528 : ndata = 0
289 389136 : dlocal(:) = 0
290 8976 : DO h = 0, maxn_twos
291 8448 : nmax = HUGE(0)/2**h
292 42768 : DO i = 0, maxn_threes
293 143616 : DO j = 0, maxn_fives
294 337920 : DO k = 0, maxn_sevens
295 709632 : DO m = 0, maxn_elevens
296 405504 : number = (3**i)*(5**j)*(7**k)*(11**m)
297 :
298 405504 : IF (number > nmax) CYCLE
299 :
300 405504 : number = number*2**h
301 405504 : IF (number >= maxn) CYCLE
302 :
303 388608 : ndata = ndata + 1
304 608256 : dlocal(ndata) = number
305 : END DO
306 : END DO
307 : END DO
308 : END DO
309 : END DO
310 :
311 528 : CALL sortint(dlocal, ndata, idx)
312 528 : ndata = MIN(ndata, max_length)
313 389136 : DATA(1:ndata) = dlocal(1:ndata)
314 528 : max_length = ndata
315 :
316 528 : DEALLOCATE (dlocal, idx)
317 :
318 528 : END SUBROUTINE fftw3_get_lengths
319 :
320 : ! **************************************************************************************************
321 : !> \brief ...
322 : !> \param iarr ...
323 : !> \param n ...
324 : !> \param index ...
325 : ! **************************************************************************************************
326 528 : SUBROUTINE sortint(iarr, n, index)
327 :
328 : INTEGER, INTENT(IN) :: n
329 : INTEGER, INTENT(INOUT) :: iarr(1:n)
330 : INTEGER, INTENT(OUT) :: INDEX(1:n)
331 :
332 : INTEGER, PARAMETER :: m = 7, nstack = 50
333 :
334 : INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
335 : j, jstack, k, l, temp
336 :
337 : !------------------------------------------------------------------------------
338 :
339 389136 : DO i = 1, n
340 389136 : INDEX(i) = i
341 : END DO
342 : jstack = 0
343 : l = 1
344 : ir = n
345 : DO WHILE (.TRUE.)
346 146256 : IF (ir - l < m) THEN
347 315744 : DO j = l + 1, ir
348 242352 : a = iarr(j)
349 242352 : ib = INDEX(j)
350 575520 : DO i = j - 1, 0, -1
351 575520 : IF (i == 0) EXIT
352 574464 : IF (iarr(i) <= a) EXIT
353 333168 : iarr(i + 1) = iarr(i)
354 575520 : INDEX(i + 1) = INDEX(i)
355 : END DO
356 242352 : iarr(i + 1) = a
357 315744 : INDEX(i + 1) = ib
358 : END DO
359 73392 : IF (jstack == 0) RETURN
360 72864 : ir = istack(jstack)
361 72864 : l = istack(jstack - 1)
362 72864 : jstack = jstack - 2
363 : ELSE
364 72864 : k = (l + ir)/2
365 72864 : temp = iarr(k)
366 72864 : iarr(k) = iarr(l + 1)
367 72864 : iarr(l + 1) = temp
368 72864 : itemp = INDEX(k)
369 72864 : INDEX(k) = INDEX(l + 1)
370 72864 : INDEX(l + 1) = itemp
371 72864 : IF (iarr(l + 1) > iarr(ir)) THEN
372 34320 : temp = iarr(l + 1)
373 34320 : iarr(l + 1) = iarr(ir)
374 34320 : iarr(ir) = temp
375 34320 : itemp = INDEX(l + 1)
376 34320 : INDEX(l + 1) = INDEX(ir)
377 34320 : INDEX(ir) = itemp
378 : END IF
379 72864 : IF (iarr(l) > iarr(ir)) THEN
380 27456 : temp = iarr(l)
381 27456 : iarr(l) = iarr(ir)
382 27456 : iarr(ir) = temp
383 27456 : itemp = INDEX(l)
384 27456 : INDEX(l) = INDEX(ir)
385 27456 : INDEX(ir) = itemp
386 : END IF
387 72864 : IF (iarr(l + 1) > iarr(l)) THEN
388 27456 : temp = iarr(l + 1)
389 27456 : iarr(l + 1) = iarr(l)
390 27456 : iarr(l) = temp
391 27456 : itemp = INDEX(l + 1)
392 27456 : INDEX(l + 1) = INDEX(l)
393 27456 : INDEX(l) = itemp
394 : END IF
395 72864 : i = l + 1
396 72864 : j = ir
397 72864 : a = iarr(l)
398 72864 : ib = INDEX(l)
399 464112 : DO WHILE (.TRUE.)
400 536976 : i = i + 1
401 1742928 : DO WHILE (iarr(i) < a)
402 1205952 : i = i + 1
403 : END DO
404 536976 : j = j - 1
405 1330560 : DO WHILE (iarr(j) > a)
406 793584 : j = j - 1
407 : END DO
408 536976 : IF (j < i) EXIT
409 464112 : temp = iarr(i)
410 464112 : iarr(i) = iarr(j)
411 464112 : iarr(j) = temp
412 464112 : itemp = INDEX(i)
413 464112 : INDEX(i) = INDEX(j)
414 464112 : INDEX(j) = itemp
415 : END DO
416 72864 : iarr(l) = iarr(j)
417 72864 : iarr(j) = a
418 72864 : INDEX(l) = INDEX(j)
419 72864 : INDEX(j) = ib
420 72864 : jstack = jstack + 2
421 72864 : IF (jstack > nstack) CPABORT("Nstack too small in sortint")
422 72864 : IF (ir - i + 1 >= j - l) THEN
423 37488 : istack(jstack) = ir
424 37488 : istack(jstack - 1) = i
425 37488 : ir = j - 1
426 : ELSE
427 35376 : istack(jstack) = j - 1
428 35376 : istack(jstack - 1) = l
429 35376 : l = i
430 : END IF
431 : END IF
432 :
433 : END DO
434 :
435 : END SUBROUTINE sortint
436 :
437 : ! **************************************************************************************************
438 :
439 : ! **************************************************************************************************
440 : !> \brief ...
441 : !> \param plan ...
442 : !> \param fft_rank ...
443 : !> \param dim_n ...
444 : !> \param dim_istride ...
445 : !> \param dim_ostride ...
446 : !> \param hm_rank ...
447 : !> \param hm_n ...
448 : !> \param hm_istride ...
449 : !> \param hm_ostride ...
450 : !> \param zin ...
451 : !> \param zout ...
452 : !> \param fft_direction ...
453 : !> \param fftw_plan_type ...
454 : !> \param valid ...
455 : ! **************************************************************************************************
456 56556 : SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
457 : dim_istride, dim_ostride, hm_rank, &
458 : hm_n, hm_istride, hm_ostride, &
459 : zin, zout, fft_direction, fftw_plan_type, &
460 : valid)
461 :
462 : TYPE(C_PTR), INTENT(INOUT) :: plan
463 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin, zout
464 : INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
465 : hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
466 : fft_direction, fftw_plan_type, hm_rank
467 : LOGICAL, INTENT(OUT) :: valid
468 :
469 : #if defined(__FFTW3)
470 : TYPE(fftw_iodim) :: dim(2), hm(2)
471 : INTEGER :: i
472 :
473 169668 : DO i = 1, 2
474 113112 : DIM(i) = fftw_iodim(dim_n(i), dim_istride(i), dim_ostride(i))
475 169668 : hm(i) = fftw_iodim(hm_n(i), hm_istride(i), hm_ostride(i))
476 : END DO
477 :
478 : plan = fftw_plan_guru_dft(fft_rank, &
479 : dim, hm_rank, hm, &
480 : zin, zout, &
481 56556 : fft_direction, fftw_plan_type)
482 :
483 56556 : valid = C_ASSOCIATED(plan)
484 :
485 : #else
486 : MARK_USED(plan)
487 : MARK_USED(fft_rank)
488 : MARK_USED(dim_n)
489 : MARK_USED(dim_istride)
490 : MARK_USED(dim_ostride)
491 : MARK_USED(hm_rank)
492 : MARK_USED(hm_n)
493 : MARK_USED(hm_istride)
494 : MARK_USED(hm_ostride)
495 : MARK_USED(fft_direction)
496 : MARK_USED(fftw_plan_type)
497 : !MARK_USED does not work with assumed size arguments
498 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
499 : valid = .FALSE.
500 :
501 : #endif
502 :
503 56556 : END SUBROUTINE fftw3_create_guru_plan
504 :
505 : ! **************************************************************************************************
506 :
507 : ! **************************************************************************************************
508 : !> \brief Attempt to create a plan with the guru interface for a 2d sub-space.
509 : !> If this fails, fall back to the FFTW3 threaded 3D transform instead
510 : !> of the hand-optimised version.
511 : !> \return ...
512 : ! **************************************************************************************************
513 56556 : FUNCTION fftw3_is_guru_supported() RESULT(guru_supported)
514 : LOGICAL :: guru_supported
515 : #if defined(__FFTW3)
516 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
517 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
518 : TYPE(C_PTR) :: test_plan
519 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
520 :
521 56556 : dim_n(1) = 1
522 56556 : dim_n(2) = 1
523 56556 : dim_istride(1) = 1
524 56556 : dim_istride(2) = 1
525 56556 : dim_ostride(1) = 1
526 56556 : dim_ostride(2) = 1
527 56556 : howmany_n(1) = 1
528 56556 : howmany_n(2) = 1
529 56556 : howmany_istride(1) = 1
530 56556 : howmany_istride(2) = 1
531 56556 : howmany_ostride(1) = 1
532 56556 : howmany_ostride(2) = 1
533 56556 : zin = z_zero
534 : CALL fftw3_create_guru_plan(test_plan, 1, &
535 : dim_n, dim_istride, dim_ostride, &
536 : 2, howmany_n, howmany_istride, howmany_ostride, &
537 : zin, zin, &
538 56556 : FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
539 56556 : IF (guru_supported) THEN
540 56556 : CALL fftw_destroy_plan(test_plan)
541 : END IF
542 :
543 : #else
544 : guru_supported = .FALSE.
545 : #endif
546 :
547 56556 : END FUNCTION fftw3_is_guru_supported
548 :
549 : ! **************************************************************************************************
550 :
551 : ! **************************************************************************************************
552 : !> \brief ...
553 : !> \param nrows ...
554 : !> \param nt ...
555 : !> \param rows_per_thread ...
556 : !> \param rows_per_thread_r ...
557 : !> \param th_planA ...
558 : !> \param th_planB ...
559 : ! **************************************************************************************************
560 0 : SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
561 : th_planA, th_planB)
562 :
563 : INTEGER, INTENT(IN) :: nrows, nt
564 : INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
565 : th_planA, th_planB
566 :
567 0 : IF (MOD(nrows, nt) == 0) THEN
568 0 : rows_per_thread = nrows/nt
569 0 : rows_per_thread_r = 0
570 0 : th_planA = nt
571 0 : th_planB = 0
572 : ELSE
573 0 : rows_per_thread = nrows/nt + 1
574 0 : rows_per_thread_r = nrows/nt
575 0 : th_planA = MOD(nrows, nt)
576 0 : th_planB = nt - th_planA
577 : END IF
578 :
579 0 : END SUBROUTINE fftw3_compute_rows_per_th
580 :
581 : ! **************************************************************************************************
582 :
583 : ! **************************************************************************************************
584 : !> \brief ...
585 : !> \param plan ...
586 : !> \param plan_r ...
587 : !> \param dim_n ...
588 : !> \param dim_istride ...
589 : !> \param dim_ostride ...
590 : !> \param hm_n ...
591 : !> \param hm_istride ...
592 : !> \param hm_ostride ...
593 : !> \param input ...
594 : !> \param output ...
595 : !> \param fft_direction ...
596 : !> \param fftw_plan_type ...
597 : !> \param rows_per_th ...
598 : !> \param rows_per_th_r ...
599 : ! **************************************************************************************************
600 0 : SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
601 : hm_n, hm_istride, hm_ostride, &
602 : input, output, &
603 : fft_direction, fftw_plan_type, rows_per_th, &
604 : rows_per_th_r)
605 :
606 : TYPE(C_PTR), INTENT(INOUT) :: plan, plan_r
607 : INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
608 : dim_ostride(2), hm_n(2), &
609 : hm_istride(2), hm_ostride(2)
610 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
611 : INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
612 : INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
613 :
614 : LOGICAL :: valid
615 :
616 : ! First plans will have an additional row
617 :
618 0 : hm_n(2) = rows_per_th
619 : CALL fftw3_create_guru_plan(plan, 1, &
620 : dim_n, dim_istride, dim_ostride, &
621 : 2, hm_n, hm_istride, hm_ostride, &
622 : input, output, &
623 0 : fft_direction, fftw_plan_type, valid)
624 :
625 0 : IF (.NOT. valid) THEN
626 0 : CPABORT("fftw3_create_plan")
627 : END IF
628 :
629 : !!!! Remainder
630 0 : hm_n(2) = rows_per_th_r
631 : CALL fftw3_create_guru_plan(plan_r, 1, &
632 : dim_n, dim_istride, dim_ostride, &
633 : 2, hm_n, hm_istride, hm_ostride, &
634 : input, output, &
635 0 : fft_direction, fftw_plan_type, valid)
636 0 : IF (.NOT. valid) THEN
637 0 : CPABORT("fftw3_create_plan (remaining)")
638 : END IF
639 :
640 0 : END SUBROUTINE fftw3_create_3d_plans
641 :
642 : ! **************************************************************************************************
643 :
644 : ! **************************************************************************************************
645 : !> \brief ...
646 : !> \param plan ...
647 : !> \param zin ...
648 : !> \param zout ...
649 : !> \param plan_style ...
650 : ! **************************************************************************************************
651 56556 : SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
652 :
653 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
654 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
655 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
656 : INTEGER :: plan_style
657 : #if defined(__FFTW3)
658 : INTEGER :: n1, n2, n3
659 : INTEGER :: nt
660 : INTEGER :: rows_per_th
661 : INTEGER :: rows_per_th_r
662 : INTEGER :: fft_direction
663 : INTEGER :: th_planA, th_planB
664 56556 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
665 :
666 : ! GURU Interface
667 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
668 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
669 :
670 : INTEGER :: fftw_plan_type
671 113072 : SELECT CASE (plan_style)
672 : CASE (1)
673 56516 : fftw_plan_type = FFTW_ESTIMATE
674 : CASE (2)
675 8 : fftw_plan_type = FFTW_MEASURE
676 : CASE (3)
677 24 : fftw_plan_type = FFTW_PATIENT
678 : CASE (4)
679 8 : fftw_plan_type = FFTW_EXHAUSTIVE
680 : CASE DEFAULT
681 56556 : CPABORT("fftw3_create_plan_3d")
682 : END SELECT
683 :
684 56556 : IF (plan%fsign == +1) THEN
685 28278 : fft_direction = FFTW_FORWARD
686 : ELSE
687 28278 : fft_direction = FFTW_BACKWARD
688 : END IF
689 :
690 56556 : n1 = plan%n_3d(1)
691 56556 : n2 = plan%n_3d(2)
692 56556 : n3 = plan%n_3d(3)
693 :
694 56556 : nt = 1
695 56556 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
696 : !$OMP MASTER
697 : !$ nt = omp_get_num_threads()
698 : !$OMP END MASTER
699 : !$OMP END PARALLEL
700 :
701 : IF ((.NOT. fftw3_is_guru_supported()) .OR. &
702 56556 : (.NOT. plan_style == 1) .OR. &
703 : (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
704 : ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
705 : ! the grid size is small (and we are single-threaded) then
706 : ! FFTW3 does a better job than handmade optimization
707 : ! so plan a single 3D FFT which will execute using all the threads
708 :
709 56556 : plan%separated_plans = .FALSE.
710 56556 : !$ CALL fftw_plan_with_nthreads(nt)
711 :
712 56556 : IF (plan%fft_in_place) THEN
713 28278 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zin, fft_direction, fftw_plan_type)
714 : ELSE
715 28278 : plan%fftw_plan = fftw_plan_dft_3d(n3, n2, n1, zin, zout, fft_direction, fftw_plan_type)
716 : END IF
717 : ELSE
718 0 : ALLOCATE (tmp(n1*n2*n3))
719 : ! ************************* PLANS WITH TRANSPOSITIONS ****************************
720 : ! In the cases described above, we manually thread each stage of the 3D FFT.
721 : !
722 : ! The following plans replace the 3D FFT call by running 1D FFTW across all
723 : ! 3 directions of the array.
724 : !
725 : ! Output of FFTW is transposed to ensure that the next round of FFTW access
726 : ! contiguous information.
727 : !
728 : ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
729 : ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
730 : ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
731 : ! will perform the final transposition. Performance evaluation showed that using an external
732 : ! DO loop to do the final transposition performed better than directly transposing the output.
733 : ! However, this might vary depending on the compiler/platform, so a potential tuning spot
734 : ! is to perform the final transposition within the fftw library rather than using the external loop
735 : ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
736 : !
737 : ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
738 : !
739 : ! OpenMP : Work is distributed on the Z plane.
740 : ! All transpositions are out-of-place to facilitate multi-threading
741 : !
742 : !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
743 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
744 0 : th_planA, th_planB)
745 :
746 0 : dim_n(1) = n1
747 0 : dim_istride(1) = 1
748 0 : dim_ostride(1) = n2
749 0 : howmany_n(1) = n2
750 0 : howmany_n(2) = rows_per_th
751 0 : howmany_istride(1) = n1
752 0 : howmany_istride(2) = n1*n2
753 0 : howmany_ostride(1) = 1
754 0 : howmany_ostride(2) = n1*n2
755 : CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
756 : dim_n, dim_istride, dim_ostride, howmany_n, &
757 : howmany_istride, howmany_ostride, &
758 : zin, tmp, &
759 : fft_direction, fftw_plan_type, rows_per_th, &
760 0 : rows_per_th_r)
761 :
762 : !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
763 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
764 0 : th_planA, th_planB)
765 0 : dim_n(1) = n2
766 : dim_istride(1) = 1
767 0 : dim_ostride(1) = n3
768 0 : howmany_n(1) = n1
769 0 : howmany_n(2) = rows_per_th
770 0 : howmany_istride(1) = n2
771 : howmany_istride(2) = n1*n2
772 : !!! transposed Z axis on output
773 0 : howmany_ostride(1) = n2*n3
774 0 : howmany_ostride(2) = 1
775 :
776 : CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
777 : dim_n, dim_istride, dim_ostride, &
778 : howmany_n, howmany_istride, howmany_ostride, &
779 : tmp, zin, &
780 : fft_direction, fftw_plan_type, rows_per_th, &
781 0 : rows_per_th_r)
782 :
783 : !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
784 : CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
785 0 : th_planA, th_planB)
786 0 : dim_n(1) = n3
787 : dim_istride(1) = 1
788 0 : dim_ostride(1) = 1 ! To transpose: n2*n1
789 0 : howmany_n(1) = n2
790 0 : howmany_n(2) = rows_per_th
791 0 : howmany_istride(1) = n3
792 0 : howmany_istride(2) = n2*n3
793 0 : howmany_ostride(1) = n3 ! To transpose: n1
794 0 : howmany_ostride(2) = n2*n3 ! To transpose: 1
795 :
796 : CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
797 : dim_n, dim_istride, dim_ostride, &
798 : howmany_n, howmany_istride, howmany_ostride, &
799 : zin, tmp, &
800 : fft_direction, fftw_plan_type, rows_per_th, &
801 0 : rows_per_th_r)
802 :
803 0 : plan%separated_plans = .TRUE.
804 :
805 0 : DEALLOCATE (tmp)
806 : END IF
807 :
808 : #else
809 : MARK_USED(plan)
810 : MARK_USED(plan_style)
811 : !MARK_USED does not work with assumed size arguments
812 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
813 : #endif
814 :
815 56556 : END SUBROUTINE fftw3_create_plan_3d
816 :
817 : ! **************************************************************************************************
818 :
819 : ! **************************************************************************************************
820 : !> \brief ...
821 : !> \param plan ...
822 : !> \param plan_r ...
823 : !> \param split_dim ...
824 : !> \param nt ...
825 : !> \param tid ...
826 : !> \param input ...
827 : !> \param istride ...
828 : !> \param output ...
829 : !> \param ostride ...
830 : ! **************************************************************************************************
831 0 : SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
832 : input, istride, output, ostride)
833 :
834 : INTEGER, INTENT(IN) :: split_dim, nt, tid
835 : INTEGER, INTENT(IN) :: istride, ostride
836 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
837 : TYPE(C_PTR) :: plan, plan_r
838 : #if defined(__FFTW3)
839 : INTEGER :: i_off, o_off
840 : INTEGER :: th_planA, th_planB
841 : INTEGER :: rows_per_thread, rows_per_thread_r
842 :
843 : CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
844 : rows_per_thread_r, &
845 0 : th_planA, th_planB)
846 :
847 0 : IF (th_planB > 0) THEN
848 0 : IF (tid < th_planA) THEN
849 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
850 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
851 0 : IF (rows_per_thread > 0) THEN
852 : CALL fftw_execute_dft(plan, input(i_off), &
853 0 : output(o_off))
854 : END IF
855 0 : ELSE IF ((tid - th_planA) < th_planB) THEN
856 :
857 : i_off = (th_planA)*istride*(rows_per_thread) + &
858 0 : (tid - th_planA)*istride*(rows_per_thread_r) + 1
859 : o_off = (th_planA)*ostride*(rows_per_thread) + &
860 0 : (tid - th_planA)*ostride*(rows_per_thread_r) + 1
861 :
862 : CALL fftw_execute_dft(plan_r, input(i_off), &
863 0 : output(o_off))
864 : END IF
865 :
866 : ELSE
867 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
868 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
869 :
870 : CALL fftw_execute_dft(plan, input(i_off), &
871 0 : output(o_off))
872 :
873 : END IF
874 : #else
875 : MARK_USED(plan)
876 : MARK_USED(plan_r)
877 : MARK_USED(split_dim)
878 : MARK_USED(nt)
879 : MARK_USED(tid)
880 : MARK_USED(istride)
881 : MARK_USED(ostride)
882 : !MARK_USED does not work with assumed size arguments
883 : IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
884 : #endif
885 :
886 0 : END SUBROUTINE fftw3_workshare_execute_dft
887 :
888 : ! **************************************************************************************************
889 :
890 : ! **************************************************************************************************
891 : !> \brief ...
892 : !> \param plan ...
893 : !> \param scale ...
894 : !> \param zin ...
895 : !> \param zout ...
896 : !> \param stat ...
897 : ! **************************************************************************************************
898 650290 : SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
899 :
900 : TYPE(fft_plan_type), INTENT(IN) :: plan
901 : REAL(KIND=dp), INTENT(IN) :: scale
902 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
903 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
904 : INTEGER, INTENT(OUT) :: stat
905 : #if defined(__FFTW3)
906 650290 : COMPLEX(KIND=dp), POINTER :: xout(:)
907 650290 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
908 : INTEGER :: n1, n2, n3
909 : INTEGER :: tid, nt
910 : INTEGER :: i, j, k
911 :
912 650290 : n1 = plan%n_3d(1)
913 650290 : n2 = plan%n_3d(2)
914 650290 : n3 = plan%n_3d(3)
915 :
916 650290 : stat = 1
917 :
918 : ! We use a POINTER to the output array to avoid duplicating code
919 650290 : IF (plan%fft_in_place) THEN
920 650286 : xout => zin(:n1*n2*n3)
921 : ELSE
922 4 : xout => zout(:n1*n2*n3)
923 : END IF
924 :
925 : ! Either compute the full 3D FFT using a multithreaded plan
926 650290 : IF (.NOT. plan%separated_plans) THEN
927 650290 : CALL fftw_execute_dft(plan%fftw_plan, zin, xout)
928 : ELSE
929 : ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
930 0 : ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
931 0 : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
932 : tid = 0
933 : nt = 1
934 :
935 : !$ tid = omp_get_thread_num()
936 : !$ nt = omp_get_num_threads()
937 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
938 : n3, nt, tid, &
939 : zin, n1*n2, tmp1, n1*n2)
940 :
941 : !$OMP BARRIER
942 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
943 : n3, nt, tid, &
944 : tmp1, n1*n2, xout, 1)
945 : !$OMP BARRIER
946 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
947 : n1, nt, tid, &
948 : xout, n2*n3, tmp1, n2*n3)
949 : !$OMP BARRIER
950 :
951 : !$OMP DO COLLAPSE(3)
952 : DO i = 1, n1
953 : DO j = 1, n2
954 : DO k = 1, n3
955 : xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
956 : tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
957 : END DO
958 : END DO
959 : END DO
960 : !$OMP END DO
961 :
962 : !$OMP END PARALLEL
963 : END IF
964 :
965 650290 : IF (scale /= 1.0_dp) THEN
966 308975 : CALL zdscal(n1*n2*n3, scale, xout, 1)
967 : END IF
968 :
969 : #else
970 : MARK_USED(plan)
971 : MARK_USED(scale)
972 : !MARK_USED does not work with assumed size arguments
973 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
974 : stat = 0
975 :
976 : #endif
977 :
978 650290 : END SUBROUTINE fftw33d
979 :
980 : ! **************************************************************************************************
981 :
982 : ! **************************************************************************************************
983 : !> \brief ...
984 : !> \param plan ...
985 : !> \param zin ...
986 : !> \param zout ...
987 : !> \param plan_style ...
988 : ! **************************************************************************************************
989 163752 : SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
990 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
991 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
992 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
993 : INTEGER, INTENT(IN) :: plan_style
994 : #if defined(__FFTW3)
995 : INTEGER :: istride, idist, ostride, odist, num_threads, num_rows
996 :
997 : INTEGER :: fftw_plan_type
998 327288 : SELECT CASE (plan_style)
999 : CASE (1)
1000 163536 : fftw_plan_type = FFTW_ESTIMATE
1001 : CASE (2)
1002 48 : fftw_plan_type = FFTW_MEASURE
1003 : CASE (3)
1004 156 : fftw_plan_type = FFTW_PATIENT
1005 : CASE (4)
1006 12 : fftw_plan_type = FFTW_EXHAUSTIVE
1007 : CASE DEFAULT
1008 163752 : CPABORT("fftw3_create_plan_1dm")
1009 : END SELECT
1010 :
1011 163752 : num_threads = 1
1012 163752 : plan%separated_plans = .FALSE.
1013 : !$OMP PARALLEL DEFAULT(NONE), &
1014 163752 : !$OMP SHARED(NUM_THREADS)
1015 : !$OMP MASTER
1016 : !$ num_threads = omp_get_num_threads()
1017 : !$OMP END MASTER
1018 : !$OMP END PARALLEL
1019 :
1020 163752 : num_rows = plan%m/num_threads
1021 163752 : !$ plan%num_threads_needed = num_threads
1022 :
1023 : ! Check for number of rows less than num_threads
1024 163752 : !$ IF (plan%m < num_threads) THEN
1025 0 : !$ num_rows = 1
1026 0 : !$ plan%num_threads_needed = plan%m
1027 : !$ END IF
1028 :
1029 : ! Check for total number of rows not divisible by num_threads
1030 163752 : !$ IF (num_rows*plan%num_threads_needed /= plan%m) THEN
1031 0 : !$ plan%need_alt_plan = .TRUE.
1032 : !$ END IF
1033 :
1034 163752 : !$ plan%num_rows = num_rows
1035 163752 : istride = 1
1036 163752 : idist = plan%n
1037 163752 : ostride = 1
1038 163752 : odist = plan%n
1039 163752 : IF (plan%fsign == +1 .AND. plan%trans) THEN
1040 81864 : istride = plan%m
1041 81864 : idist = 1
1042 81888 : ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1043 81864 : ostride = plan%m
1044 81864 : odist = 1
1045 : END IF
1046 :
1047 163752 : IF (plan%fsign == +1) THEN
1048 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
1049 81876 : zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
1050 : ELSE
1051 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, istride, idist, &
1052 81876 : zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
1053 : END IF
1054 :
1055 163752 : !$ IF (plan%need_alt_plan) THEN
1056 0 : !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1057 0 : !$ IF (plan%fsign == +1) THEN
1058 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
1059 0 : !$ zout, 0, ostride, odist, FFTW_FORWARD, fftw_plan_type)
1060 : !$ ELSE
1061 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, istride, idist, &
1062 0 : !$ zout, 0, ostride, odist, FFTW_BACKWARD, fftw_plan_type)
1063 : !$ END IF
1064 : !$ END IF
1065 :
1066 : #else
1067 : MARK_USED(plan)
1068 : MARK_USED(plan_style)
1069 : !MARK_USED does not work with assumed size arguments
1070 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1071 : #endif
1072 :
1073 163752 : END SUBROUTINE fftw3_create_plan_1dm
1074 :
1075 : ! **************************************************************************************************
1076 : !> \brief ...
1077 : !> \param plan ...
1078 : ! **************************************************************************************************
1079 220308 : SUBROUTINE fftw3_destroy_plan(plan)
1080 :
1081 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1082 :
1083 : #if defined(__FFTW3)
1084 220308 : !$ IF (plan%need_alt_plan) THEN
1085 0 : !$ CALL fftw_destroy_plan(plan%alt_fftw_plan)
1086 : !$ END IF
1087 :
1088 220308 : IF (.NOT. plan%separated_plans) THEN
1089 220308 : CALL fftw_destroy_plan(plan%fftw_plan)
1090 : ELSE
1091 : ! If it is a separated plan then we have to destroy
1092 : ! each dim plan individually
1093 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx)
1094 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny)
1095 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz)
1096 0 : CALL fftw_destroy_plan(plan%fftw_plan_nx_r)
1097 0 : CALL fftw_destroy_plan(plan%fftw_plan_ny_r)
1098 0 : CALL fftw_destroy_plan(plan%fftw_plan_nz_r)
1099 : END IF
1100 :
1101 : #else
1102 : MARK_USED(plan)
1103 : #endif
1104 :
1105 220308 : END SUBROUTINE fftw3_destroy_plan
1106 :
1107 : ! **************************************************************************************************
1108 : !> \brief ...
1109 : !> \param plan ...
1110 : !> \param zin ...
1111 : !> \param zout ...
1112 : !> \param scale ...
1113 : !> \param stat ...
1114 : ! **************************************************************************************************
1115 8462778 : SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1116 : TYPE(fft_plan_type), INTENT(IN) :: plan
1117 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1118 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1119 : TARGET :: zout
1120 : REAL(KIND=dp), INTENT(IN) :: scale
1121 : INTEGER, INTENT(OUT) :: stat
1122 :
1123 : INTEGER :: in_offset, my_id, num_rows, out_offset, &
1124 : scal_offset
1125 : TYPE(C_PTR) :: fftw_plan
1126 :
1127 : !------------------------------------------------------------------------------
1128 :
1129 8462778 : my_id = 0
1130 8462778 : num_rows = plan%m
1131 :
1132 : !$OMP PARALLEL DEFAULT(NONE), &
1133 : !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1134 : !$OMP SHARED(zin,zout), &
1135 8462778 : !$OMP SHARED(plan,scale,stat)
1136 : !$ my_id = omp_get_thread_num()
1137 :
1138 : !$ if (my_id < plan%num_threads_needed) then
1139 :
1140 : fftw_plan = plan%fftw_plan
1141 :
1142 : in_offset = 1
1143 : out_offset = 1
1144 : scal_offset = 1
1145 :
1146 : !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1147 : !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1148 : !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1149 : !$ in_offset = 1 + plan%num_rows*my_id
1150 : !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1151 : !$ out_offset = 1 + plan%num_rows*my_id
1152 : !$ END IF
1153 : !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1154 : !$ IF (plan%need_alt_plan .AND. my_id == plan%num_threads_needed - 1) THEN
1155 : !$ num_rows = plan%alt_num_rows
1156 : !$ fftw_plan = plan%alt_fftw_plan
1157 : !$ ELSE
1158 : !$ num_rows = plan%num_rows
1159 : !$ END IF
1160 :
1161 : #if defined(__FFTW3)
1162 : !$OMP MASTER
1163 : stat = 1
1164 : !$OMP END MASTER
1165 : CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1166 : !$ end if
1167 : ! all threads need to meet at this barrier
1168 : !$OMP BARRIER
1169 : !$ if (my_id < plan%num_threads_needed) then
1170 : IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1171 : !$ end if
1172 :
1173 : #else
1174 : MARK_USED(plan)
1175 : MARK_USED(scale)
1176 : !MARK_USED does not work with assumed size arguments
1177 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1178 : stat = 0
1179 :
1180 : !$ else
1181 : !$ end if
1182 :
1183 : #endif
1184 :
1185 : !$OMP END PARALLEL
1186 :
1187 8462778 : END SUBROUTINE fftw31dm
1188 :
1189 0 : END MODULE fftw3_lib
|