Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2022 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 : #define XFFTW_PLAN_WITH_NTHREADS dfftw_plan_with_nthreads
8 : #define XFFTW_PLAN_DFT_3D dfftw_plan_dft_3d
9 : #define XFFTW_PLAN_GURU_DFT dfftw_plan_guru_dft
10 : #define XFFTW_EXECUTE_DFT dfftw_execute_dft
11 : #define XFFTW_DESTROY_PLAN dfftw_destroy_plan
12 :
13 : MODULE fftw3_lib
14 :
15 : USE ISO_C_BINDING, ONLY: C_ASSOCIATED,&
16 : C_CHAR,&
17 : C_INT,&
18 : C_PTR
19 : USE cp_files, ONLY: get_unit_number
20 : USE fft_kinds, ONLY: dp
21 : USE fft_plan, ONLY: fft_plan_type
22 :
23 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
24 :
25 : #include "../../base/base_uses.f90"
26 :
27 : IMPLICIT NONE
28 : PRIVATE
29 :
30 : PUBLIC :: fftw3_do_init, fftw3_do_cleanup, fftw3_get_lengths, fftw33d, fftw31dm
31 : PUBLIC :: fftw3_destroy_plan, fftw3_create_plan_1dm, fftw3_create_plan_3d
32 :
33 : #if defined ( __FFTW3 )
34 : INTERFACE
35 : SUBROUTINE fftw_cleanup() BIND(C, name="fftw_cleanup")
36 : END SUBROUTINE
37 : END INTERFACE
38 : #endif
39 :
40 : CONTAINS
41 :
42 : ! **************************************************************************************************
43 : !> \brief ...
44 : !> \param wisdom_file ...
45 : !> \param ionode ...
46 : ! **************************************************************************************************
47 7993 : SUBROUTINE fftw3_do_cleanup(wisdom_file, ionode)
48 :
49 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
50 : LOGICAL :: ionode
51 :
52 : #if defined ( __FFTW3 )
53 : INTEGER :: iunit, istat
54 : ! Write out FFTW3 wisdom to file (if we can)
55 : ! only the ionode updates the wisdom
56 7993 : IF (ionode) THEN
57 4100 : iunit = get_unit_number()
58 4100 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="UNKNOWN", FORM="FORMATTED", ACTION="WRITE", IOSTAT=istat)
59 4100 : IF (istat == 0) THEN
60 0 : CALL fftw_export_wisdom_to_file(iunit)
61 0 : CLOSE (iunit)
62 : END IF
63 : END IF
64 :
65 7993 : CALL fftw_cleanup()
66 : #else
67 : MARK_USED(wisdom_file)
68 : MARK_USED(ionode)
69 : #endif
70 :
71 7993 : END SUBROUTINE
72 :
73 : ! **************************************************************************************************
74 : !> \brief ...
75 : !> \param wisdom_file ...
76 : ! **************************************************************************************************
77 8203 : SUBROUTINE fftw3_do_init(wisdom_file)
78 :
79 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
80 :
81 : #if defined ( __FFTW3 )
82 : INTEGER :: istat, isuccess, iunit
83 : LOGICAL :: exist
84 :
85 : !$ LOGICAL :: mkl_is_safe
86 :
87 : ! If using the Intel compiler then we need to declare
88 : ! a C interface to a global variable in MKL that sets
89 : ! the number of threads which can concurrently execute
90 : ! an FFT
91 : ! We need __INTEL_COMPILER so we can be sure that the compiler
92 : ! understands the !DEC$ version definitions
93 : #if defined (__INTEL_COMPILER) && defined (__MKL) && defined (__FFTW3)
94 : !$ include "mkl.fi"
95 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
96 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
97 : !DIR$ ATTRIBUTES ALIGN : 8 :: fftw3_mkl
98 : !$ COMMON/fftw3_mkl/ignore(4), mkl_dft_number_of_user_threads, ignore2(7)
99 : !$ INTEGER*4 :: ignore, mkl_dft_number_of_user_threads, ignore2
100 : !$ BIND(c) :: /fftw3_mkl/
101 : !DEC$ ENDIF
102 : !DEC$ ENDIF
103 : #elif defined (__MKL) && defined (__FFTW3)
104 : ! Preprocessing is enabled by default, and below header is not language specific
105 : #include <mkl_version.h>
106 : #endif
107 :
108 : ! Read FFTW wisdom (if available)
109 : ! all nodes are opening the file here...
110 8203 : INQUIRE (FILE=wisdom_file, exist=exist)
111 8203 : IF (exist) THEN
112 0 : iunit = get_unit_number()
113 : OPEN (UNIT=iunit, FILE=wisdom_file, STATUS="OLD", FORM="FORMATTED", POSITION="REWIND", &
114 0 : ACTION="READ", IOSTAT=istat)
115 0 : IF (istat == 0) THEN
116 0 : CALL fftw_import_wisdom_from_file(isuccess, iunit)
117 : ! write(*,*) "FFTW3 import wisdom from file ....",MERGE((/"OK "/),(/"NOT OK"/),(/isuccess==1/))
118 0 : CLOSE (iunit)
119 : END IF
120 : END IF
121 :
122 : ! Now check if we have a real FFTW3 library, or are using MKL wrappers
123 :
124 8203 : !$ IF (fftw3_is_mkl_wrapper() .and. omp_get_max_threads() .gt. 1) THEN
125 : ! If we are not using the Intel compiler, there is no way to tell which
126 : ! MKL version is in use, so fail safe...
127 0 : !$ mkl_is_safe = .FALSE.
128 : #if defined (__MKL) && defined (__FFTW3) && defined(INTEL_MKL_VERSION) && (110100 < INTEL_MKL_VERSION)
129 : !$ mkl_is_safe = .TRUE.
130 : #elif defined (__INTEL_COMPILER) && defined (__MKL) && defined (__FFTW3)
131 : ! If we have an Intel compiler (__INTEL_COMPILER is defined) then check the
132 : ! MKL version and make the appropriate action
133 : !DEC$ IF DEFINED (INTEL_MKL_VERSION)
134 : !DEC$ IF INTEL_MKL_VERSION .EQ. 110100
135 : !$ mkl_dft_number_of_user_threads = omp_get_max_threads()
136 : !DEC$ ENDIF
137 : !DEC$ IF INTEL_MKL_VERSION .GE. 110100
138 : !$ mkl_is_safe = .TRUE.
139 : !DEC$ ENDIF
140 : !DEC$ ENDIF
141 : #endif
142 : !$ IF (.NOT. mkl_is_safe) THEN
143 : !$ STOP "Intel's FFTW3 interface to MKL is not "// &
144 : !$ "thread-safe prior to MKL 11.1.0! Please "// &
145 : !$ "rebuild CP2K, linking against FFTW 3 from "// &
146 : !$ "www.fftw.org or a newer version of MKL. "// &
147 0 : !$ "Now exiting..."
148 : !$ END IF
149 : !$ END IF
150 : #else
151 : MARK_USED(wisdom_file)
152 : #endif
153 :
154 8203 : END SUBROUTINE
155 :
156 : ! **************************************************************************************************
157 : !> \brief ...
158 : !> \param DATA ...
159 : !> \param max_length ...
160 : !> \par History
161 : !> JGH 23-Jan-2006 : initial version
162 : !> Adapted for new interface
163 : !> IAB 09-Jan-2009 : Modified to cache plans in fft_plan_type
164 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
165 : !> IAB 09-Oct-2009 : Added OpenMP directives to 1D FFT, and planning routines
166 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2009 on behalf of the HECToR project
167 : !> IAB 11-Sep-2012 : OpenMP parallel 3D FFT (Ruyman Reyes, PRACE)
168 : !> \author JGH
169 : ! **************************************************************************************************
170 504 : SUBROUTINE fftw3_get_lengths(DATA, max_length)
171 :
172 : INTEGER, DIMENSION(*) :: DATA
173 : INTEGER, INTENT(INOUT) :: max_length
174 :
175 : INTEGER :: h, i, j, k, m, maxn, maxn_elevens, &
176 : maxn_fives, maxn_sevens, &
177 : maxn_thirteens, maxn_threes, &
178 : maxn_twos, ndata, nmax, number
179 504 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dlocal, idx
180 :
181 : !------------------------------------------------------------------------------
182 : ! compute ndata
183 : !! FFTW can do arbitrary(?) lengths, maybe you want to limit them to some
184 : !! powers of small prime numbers though...
185 :
186 504 : maxn_twos = 15
187 504 : maxn_threes = 3
188 504 : maxn_fives = 2
189 504 : maxn_sevens = 1
190 504 : maxn_elevens = 1
191 504 : maxn_thirteens = 0
192 504 : maxn = 37748736
193 :
194 504 : ndata = 0
195 8568 : DO h = 0, maxn_twos
196 8064 : nmax = HUGE(0)/2**h
197 40824 : DO i = 0, maxn_threes
198 137088 : DO j = 0, maxn_fives
199 322560 : DO k = 0, maxn_sevens
200 677376 : DO m = 0, maxn_elevens
201 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
202 :
203 387072 : IF (number > nmax) CYCLE
204 :
205 387072 : number = number*2**h
206 387072 : IF (number >= maxn) CYCLE
207 :
208 580608 : ndata = ndata + 1
209 : END DO
210 : END DO
211 : END DO
212 : END DO
213 : END DO
214 :
215 2520 : ALLOCATE (dlocal(ndata), idx(ndata))
216 :
217 504 : ndata = 0
218 371448 : dlocal(:) = 0
219 8568 : DO h = 0, maxn_twos
220 8064 : nmax = HUGE(0)/2**h
221 40824 : DO i = 0, maxn_threes
222 137088 : DO j = 0, maxn_fives
223 322560 : DO k = 0, maxn_sevens
224 677376 : DO m = 0, maxn_elevens
225 387072 : number = (3**i)*(5**j)*(7**k)*(11**m)
226 :
227 387072 : IF (number > nmax) CYCLE
228 :
229 387072 : number = number*2**h
230 387072 : IF (number >= maxn) CYCLE
231 :
232 370944 : ndata = ndata + 1
233 580608 : dlocal(ndata) = number
234 : END DO
235 : END DO
236 : END DO
237 : END DO
238 : END DO
239 :
240 504 : CALL sortint(dlocal, ndata, idx)
241 504 : ndata = MIN(ndata, max_length)
242 371448 : DATA(1:ndata) = dlocal(1:ndata)
243 504 : max_length = ndata
244 :
245 504 : DEALLOCATE (dlocal, idx)
246 :
247 504 : END SUBROUTINE fftw3_get_lengths
248 :
249 : ! **************************************************************************************************
250 : !> \brief ...
251 : !> \param iarr ...
252 : !> \param n ...
253 : !> \param index ...
254 : ! **************************************************************************************************
255 504 : SUBROUTINE sortint(iarr, n, index)
256 :
257 : INTEGER, INTENT(IN) :: n
258 : INTEGER, INTENT(INOUT) :: iarr(1:n)
259 : INTEGER, INTENT(OUT) :: INDEX(1:n)
260 :
261 : INTEGER, PARAMETER :: m = 7, nstack = 50
262 :
263 : INTEGER :: a, i, ib, ir, istack(1:nstack), itemp, &
264 : j, jstack, k, l, temp
265 :
266 : !------------------------------------------------------------------------------
267 :
268 371448 : DO i = 1, n
269 371448 : INDEX(i) = i
270 : END DO
271 : jstack = 0
272 : l = 1
273 : ir = n
274 : DO WHILE (.TRUE.)
275 139608 : IF (ir - l < m) THEN
276 301392 : DO j = l + 1, ir
277 231336 : a = iarr(j)
278 231336 : ib = INDEX(j)
279 549360 : DO i = j - 1, 0, -1
280 549360 : IF (i == 0) EXIT
281 548352 : IF (iarr(i) <= a) EXIT
282 318024 : iarr(i + 1) = iarr(i)
283 549360 : INDEX(i + 1) = INDEX(i)
284 : END DO
285 231336 : iarr(i + 1) = a
286 301392 : INDEX(i + 1) = ib
287 : END DO
288 70056 : IF (jstack == 0) RETURN
289 69552 : ir = istack(jstack)
290 69552 : l = istack(jstack - 1)
291 69552 : jstack = jstack - 2
292 : ELSE
293 69552 : k = (l + ir)/2
294 69552 : temp = iarr(k)
295 69552 : iarr(k) = iarr(l + 1)
296 69552 : iarr(l + 1) = temp
297 69552 : itemp = INDEX(k)
298 69552 : INDEX(k) = INDEX(l + 1)
299 69552 : INDEX(l + 1) = itemp
300 69552 : IF (iarr(l + 1) > iarr(ir)) THEN
301 32760 : temp = iarr(l + 1)
302 32760 : iarr(l + 1) = iarr(ir)
303 32760 : iarr(ir) = temp
304 32760 : itemp = INDEX(l + 1)
305 32760 : INDEX(l + 1) = INDEX(ir)
306 32760 : INDEX(ir) = itemp
307 : END IF
308 69552 : IF (iarr(l) > iarr(ir)) THEN
309 26208 : temp = iarr(l)
310 26208 : iarr(l) = iarr(ir)
311 26208 : iarr(ir) = temp
312 26208 : itemp = INDEX(l)
313 26208 : INDEX(l) = INDEX(ir)
314 26208 : INDEX(ir) = itemp
315 : END IF
316 69552 : IF (iarr(l + 1) > iarr(l)) THEN
317 26208 : temp = iarr(l + 1)
318 26208 : iarr(l + 1) = iarr(l)
319 26208 : iarr(l) = temp
320 26208 : itemp = INDEX(l + 1)
321 26208 : INDEX(l + 1) = INDEX(l)
322 26208 : INDEX(l) = itemp
323 : END IF
324 69552 : i = l + 1
325 69552 : j = ir
326 69552 : a = iarr(l)
327 69552 : ib = INDEX(l)
328 443016 : DO WHILE (.TRUE.)
329 512568 : i = i + 1
330 1663704 : DO WHILE (iarr(i) < a)
331 1151136 : i = i + 1
332 : END DO
333 512568 : j = j - 1
334 1270080 : DO WHILE (iarr(j) > a)
335 757512 : j = j - 1
336 : END DO
337 512568 : IF (j < i) EXIT
338 443016 : temp = iarr(i)
339 443016 : iarr(i) = iarr(j)
340 443016 : iarr(j) = temp
341 443016 : itemp = INDEX(i)
342 443016 : INDEX(i) = INDEX(j)
343 443016 : INDEX(j) = itemp
344 : END DO
345 69552 : iarr(l) = iarr(j)
346 69552 : iarr(j) = a
347 69552 : INDEX(l) = INDEX(j)
348 69552 : INDEX(j) = ib
349 69552 : jstack = jstack + 2
350 69552 : IF (jstack > nstack) CPABORT(" Nstack too small in sortr")
351 69552 : IF (ir - i + 1 >= j - l) THEN
352 35784 : istack(jstack) = ir
353 35784 : istack(jstack - 1) = i
354 35784 : ir = j - 1
355 : ELSE
356 33768 : istack(jstack) = j - 1
357 33768 : istack(jstack - 1) = l
358 33768 : l = i
359 : END IF
360 : END IF
361 :
362 : END DO
363 :
364 : END SUBROUTINE sortint
365 :
366 : ! **************************************************************************************************
367 :
368 : ! **************************************************************************************************
369 : !> \brief ...
370 : !> \param plan ...
371 : !> \param fft_rank ...
372 : !> \param dim_n ...
373 : !> \param dim_istride ...
374 : !> \param dim_ostride ...
375 : !> \param hm_rank ...
376 : !> \param hm_n ...
377 : !> \param hm_istride ...
378 : !> \param hm_ostride ...
379 : !> \param zin ...
380 : !> \param zout ...
381 : !> \param fft_direction ...
382 : !> \param fftw_plan_type ...
383 : !> \param valid ...
384 : ! **************************************************************************************************
385 45695 : SUBROUTINE fftw3_create_guru_plan(plan, fft_rank, dim_n, &
386 : dim_istride, dim_ostride, hm_rank, &
387 : hm_n, hm_istride, hm_ostride, &
388 : zin, zout, fft_direction, fftw_plan_type, &
389 : valid)
390 :
391 : IMPLICIT NONE
392 :
393 : TYPE(C_PTR), INTENT(INOUT) :: plan
394 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
395 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
396 : INTEGER, INTENT(IN) :: dim_n(2), dim_istride(2), dim_ostride(2), &
397 : hm_n(2), hm_istride(2), hm_ostride(2), fft_rank, &
398 : fft_direction, fftw_plan_type, hm_rank
399 : LOGICAL, INTENT(OUT) :: valid
400 :
401 : #if defined (__FFTW3)
402 :
403 : CALL XFFTW_PLAN_GURU_DFT(plan, fft_rank, &
404 : dim_n, dim_istride, dim_ostride, &
405 : hm_rank, hm_n, hm_istride, hm_ostride, &
406 : zin, zout, &
407 45695 : fft_direction, fftw_plan_type)
408 :
409 45695 : valid = C_ASSOCIATED(plan)
410 :
411 : #else
412 : MARK_USED(plan)
413 : MARK_USED(fft_rank)
414 : MARK_USED(dim_n)
415 : MARK_USED(dim_istride)
416 : MARK_USED(dim_ostride)
417 : MARK_USED(hm_rank)
418 : MARK_USED(hm_n)
419 : MARK_USED(hm_istride)
420 : MARK_USED(hm_ostride)
421 : MARK_USED(fft_direction)
422 : MARK_USED(fftw_plan_type)
423 : !MARK_USED does not work with assumed size arguments
424 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
425 : valid = .FALSE.
426 :
427 : #endif
428 :
429 45695 : END SUBROUTINE
430 :
431 : ! **************************************************************************************************
432 :
433 : ! **************************************************************************************************
434 : !> \brief ...
435 : !> \return ...
436 : ! **************************************************************************************************
437 45695 : FUNCTION fftw3_is_mkl_wrapper() RESULT(is_mkl)
438 :
439 : IMPLICIT NONE
440 :
441 : LOGICAL :: is_mkl
442 : #if defined ( __FFTW3 )
443 : LOGICAL :: guru_supported
444 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
445 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
446 : TYPE(C_PTR) :: test_plan
447 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1) :: zin
448 :
449 : #include "fftw3.f"
450 :
451 : ! Attempt to create a plan with the guru interface for a 2d sub-space
452 : ! If this fails (e.g. for MKL's FFTW3 interface), fall back to the
453 : ! FFTW3 threaded 3D transform instead of the hand-optimised version
454 45695 : dim_n(1) = 1
455 45695 : dim_istride(1) = 1
456 45695 : dim_ostride(1) = 1
457 45695 : howmany_n(1) = 1
458 45695 : howmany_n(2) = 1
459 45695 : howmany_istride(1) = 1
460 45695 : howmany_istride(2) = 1
461 45695 : howmany_ostride(1) = 1
462 45695 : howmany_ostride(2) = 1
463 45695 : zin = CMPLX(0.0_dp, 0.0_dp, KIND=dp)
464 : CALL fftw3_create_guru_plan(test_plan, 1, &
465 : dim_n, dim_istride, dim_ostride, &
466 : 2, howmany_n, howmany_istride, howmany_ostride, &
467 : zin, zin, &
468 45695 : FFTW_FORWARD, FFTW_ESTIMATE, guru_supported)
469 45695 : IF (guru_supported) THEN
470 45695 : CALL XFFTW_DESTROY_PLAN(test_plan)
471 45695 : is_mkl = .FALSE.
472 : ELSE
473 : is_mkl = .TRUE.
474 : END IF
475 :
476 : #else
477 : is_mkl = .FALSE.
478 : #endif
479 :
480 45695 : END FUNCTION
481 :
482 : ! **************************************************************************************************
483 :
484 : ! **************************************************************************************************
485 : !> \brief ...
486 : !> \param nrows ...
487 : !> \param nt ...
488 : !> \param rows_per_thread ...
489 : !> \param rows_per_thread_r ...
490 : !> \param th_planA ...
491 : !> \param th_planB ...
492 : ! **************************************************************************************************
493 0 : SUBROUTINE fftw3_compute_rows_per_th(nrows, nt, rows_per_thread, rows_per_thread_r, &
494 : th_planA, th_planB)
495 :
496 : INTEGER, INTENT(IN) :: nrows, nt
497 : INTEGER, INTENT(OUT) :: rows_per_thread, rows_per_thread_r, &
498 : th_planA, th_planB
499 :
500 0 : IF (MOD(nrows, nt) .EQ. 0) THEN
501 0 : rows_per_thread = nrows/nt
502 0 : rows_per_thread_r = 0
503 0 : th_planA = nt
504 0 : th_planB = 0
505 : ELSE
506 0 : rows_per_thread = nrows/nt + 1
507 0 : rows_per_thread_r = nrows/nt
508 0 : th_planA = MOD(nrows, nt)
509 0 : th_planB = nt - th_planA
510 : END IF
511 :
512 0 : END SUBROUTINE
513 :
514 : ! **************************************************************************************************
515 :
516 : ! **************************************************************************************************
517 : !> \brief ...
518 : !> \param plan ...
519 : !> \param plan_r ...
520 : !> \param dim_n ...
521 : !> \param dim_istride ...
522 : !> \param dim_ostride ...
523 : !> \param hm_n ...
524 : !> \param hm_istride ...
525 : !> \param hm_ostride ...
526 : !> \param input ...
527 : !> \param output ...
528 : !> \param fft_direction ...
529 : !> \param fftw_plan_type ...
530 : !> \param rows_per_th ...
531 : !> \param rows_per_th_r ...
532 : ! **************************************************************************************************
533 0 : SUBROUTINE fftw3_create_3d_plans(plan, plan_r, dim_n, dim_istride, dim_ostride, &
534 : hm_n, hm_istride, hm_ostride, &
535 : input, output, &
536 : fft_direction, fftw_plan_type, rows_per_th, &
537 : rows_per_th_r)
538 :
539 : TYPE(C_PTR), INTENT(INOUT) :: plan, plan_r
540 : INTEGER, INTENT(INOUT) :: dim_n(2), dim_istride(2), &
541 : dim_ostride(2), hm_n(2), &
542 : hm_istride(2), hm_ostride(2)
543 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
544 : INTEGER, INTENT(INOUT) :: fft_direction, fftw_plan_type
545 : INTEGER, INTENT(IN) :: rows_per_th, rows_per_th_r
546 :
547 : LOGICAL :: valid
548 :
549 : ! First plans will have an additional row
550 :
551 0 : hm_n(2) = rows_per_th
552 : CALL fftw3_create_guru_plan(plan, 1, &
553 : dim_n, dim_istride, dim_ostride, &
554 : 2, hm_n, hm_istride, hm_ostride, &
555 : input, output, &
556 0 : fft_direction, fftw_plan_type, valid)
557 :
558 0 : IF (.NOT. valid) THEN
559 0 : CPABORT("fftw3_create_plan")
560 : END IF
561 :
562 : !!!! Remainder
563 0 : hm_n(2) = rows_per_th_r
564 : CALL fftw3_create_guru_plan(plan_r, 1, &
565 : dim_n, dim_istride, dim_ostride, &
566 : 2, hm_n, hm_istride, hm_ostride, &
567 : input, output, &
568 0 : fft_direction, fftw_plan_type, valid)
569 0 : IF (.NOT. valid) THEN
570 0 : CPABORT("fftw3_create_plan (remaining)")
571 : END IF
572 :
573 0 : END SUBROUTINE
574 :
575 : ! **************************************************************************************************
576 :
577 : ! **************************************************************************************************
578 : !> \brief ...
579 : !> \param plan ...
580 : !> \param zin ...
581 : !> \param zout ...
582 : !> \param plan_style ...
583 : ! **************************************************************************************************
584 37492 : SUBROUTINE fftw3_create_plan_3d(plan, zin, zout, plan_style)
585 :
586 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
587 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zin
588 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: zout
589 : INTEGER :: plan_style
590 : #if defined ( __FFTW3 )
591 : INTEGER :: n1, n2, n3
592 : INTEGER :: nt
593 : INTEGER :: rows_per_th
594 : INTEGER :: rows_per_th_r
595 : INTEGER :: fft_direction
596 : INTEGER :: th_planA, th_planB
597 37492 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp(:)
598 :
599 : ! GURU Interface
600 : INTEGER :: dim_n(2), dim_istride(2), dim_ostride(2), &
601 : howmany_n(2), howmany_istride(2), howmany_ostride(2)
602 :
603 : #include "fftw3.f"
604 : INTEGER :: fftw_plan_type
605 74960 : SELECT CASE (plan_style)
606 : CASE (1)
607 37468 : fftw_plan_type = FFTW_ESTIMATE
608 : CASE (2)
609 8 : fftw_plan_type = FFTW_MEASURE
610 : CASE (3)
611 8 : fftw_plan_type = FFTW_PATIENT
612 : CASE (4)
613 8 : fftw_plan_type = FFTW_EXHAUSTIVE
614 : CASE DEFAULT
615 37492 : CPABORT("fftw3_create_plan_3d")
616 : END SELECT
617 :
618 : #if defined (__FFTW3_UNALIGNED)
619 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
620 : #endif
621 :
622 37492 : IF (plan%fsign == +1) THEN
623 18746 : fft_direction = FFTW_FORWARD
624 : ELSE
625 18746 : fft_direction = FFTW_BACKWARD
626 : END IF
627 :
628 37492 : n1 = plan%n_3d(1)
629 37492 : n2 = plan%n_3d(2)
630 37492 : n3 = plan%n_3d(3)
631 :
632 37492 : nt = 1
633 37492 : !$OMP PARALLEL DEFAULT(NONE) SHARED(nt)
634 : !$OMP MASTER
635 : !$ nt = omp_get_num_threads()
636 : !$OMP END MASTER
637 : !$OMP END PARALLEL
638 :
639 : IF ((fftw3_is_mkl_wrapper()) .OR. &
640 37492 : (.NOT. plan_style == 1) .OR. &
641 : (n1 < 256 .AND. n2 < 256 .AND. n3 < 256 .AND. nt == 1)) THEN
642 : ! If the plan type is MEASURE, PATIENT and EXHAUSTIVE or
643 : ! the grid size is small (and we are single-threaded) then
644 : ! FFTW3 does a better job than handmade optimization
645 : ! so plan a single 3D FFT which will execute using all the threads
646 :
647 37492 : plan%separated_plans = .FALSE.
648 37492 : !$ CALL XFFTW_PLAN_WITH_NTHREADS(nt)
649 :
650 37492 : IF (plan%fft_in_place) THEN
651 18746 : CALL XFFTW_PLAN_DFT_3D(plan%fftw_plan, n1, n2, n3, zin, zin, fft_direction, fftw_plan_type)
652 : ELSE
653 18746 : CALL XFFTW_PLAN_DFT_3D(plan%fftw_plan, n1, n2, n3, zin, zout, fft_direction, fftw_plan_type)
654 : END IF
655 : ELSE
656 0 : ALLOCATE (tmp(n1*n2*n3))
657 : ! ************************* PLANS WITH TRANSPOSITIONS ****************************
658 : ! In the cases described above, we manually thread each stage of the 3D FFT.
659 : !
660 : ! The following plans replace the 3D FFT call by running 1D FFTW across all
661 : ! 3 directions of the array.
662 : !
663 : ! Output of FFTW is transposed to ensure that the next round of FFTW access
664 : ! contiguous information.
665 : !
666 : ! Assuming the input matrix is M(n3,n2,n1), FFTW/Transp are :
667 : ! M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
668 : ! Notice that last matrix is transposed in the Z axis. A DO-loop in the execute routine
669 : ! will perform the final transposition. Performance evaluation showed that using an external
670 : ! DO loop to do the final transposition performed better than directly transposing the output.
671 : ! However, this might vary depending on the compiler/platform, so a potential tuning spot
672 : ! is to perform the final transposition within the fftw library rather than using the external loop
673 : ! See comments below in Z-FFT for how to transpose the output to avoid the final DO loop.
674 : !
675 : ! Doc. for the Guru interface is in http://www.fftw.org/doc/Guru-Interface.html
676 : !
677 : ! OpenMP : Work is distributed on the Z plane.
678 : ! All transpositions are out-of-place to facilitate multi-threading
679 : !
680 : !!!! Plan for X : M(n3,n2,n1) -> fftw(x) -> M(n3,n1,n2)
681 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
682 0 : th_planA, th_planB)
683 :
684 0 : dim_n(1) = n1
685 0 : dim_istride(1) = 1
686 0 : dim_ostride(1) = n2
687 0 : howmany_n(1) = n2
688 0 : howmany_n(2) = rows_per_th
689 0 : howmany_istride(1) = n1
690 0 : howmany_istride(2) = n1*n2
691 0 : howmany_ostride(1) = 1
692 0 : howmany_ostride(2) = n1*n2
693 : CALL fftw3_create_3d_plans(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
694 : dim_n, dim_istride, dim_ostride, howmany_n, &
695 : howmany_istride, howmany_ostride, &
696 : zin, tmp, &
697 : fft_direction, fftw_plan_type, rows_per_th, &
698 0 : rows_per_th_r)
699 :
700 : !!!! Plan for Y : M(n3,n1,n2) -> fftw(y) -> M(n1,n2,n3)
701 : CALL fftw3_compute_rows_per_th(n3, nt, rows_per_th, rows_per_th_r, &
702 0 : th_planA, th_planB)
703 0 : dim_n(1) = n2
704 : dim_istride(1) = 1
705 0 : dim_ostride(1) = n3
706 0 : howmany_n(1) = n1
707 0 : howmany_n(2) = rows_per_th
708 0 : howmany_istride(1) = n2
709 : howmany_istride(2) = n1*n2
710 : !!! transposed Z axis on output
711 0 : howmany_ostride(1) = n2*n3
712 0 : howmany_ostride(2) = 1
713 :
714 : CALL fftw3_create_3d_plans(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
715 : dim_n, dim_istride, dim_ostride, &
716 : howmany_n, howmany_istride, howmany_ostride, &
717 : tmp, zin, &
718 : fft_direction, fftw_plan_type, rows_per_th, &
719 0 : rows_per_th_r)
720 :
721 : !!!! Plan for Z : M(n1,n2,n3) -> fftw(z) -> M(n1,n2,n3)
722 : CALL fftw3_compute_rows_per_th(n1, nt, rows_per_th, rows_per_th_r, &
723 0 : th_planA, th_planB)
724 0 : dim_n(1) = n3
725 : dim_istride(1) = 1
726 0 : dim_ostride(1) = 1 ! To transpose: n2*n1
727 0 : howmany_n(1) = n2
728 0 : howmany_n(2) = rows_per_th
729 0 : howmany_istride(1) = n3
730 0 : howmany_istride(2) = n2*n3
731 0 : howmany_ostride(1) = n3 ! To transpose: n1
732 0 : howmany_ostride(2) = n2*n3 ! To transpose: 1
733 :
734 : CALL fftw3_create_3d_plans(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
735 : dim_n, dim_istride, dim_ostride, &
736 : howmany_n, howmany_istride, howmany_ostride, &
737 : zin, tmp, &
738 : fft_direction, fftw_plan_type, rows_per_th, &
739 0 : rows_per_th_r)
740 :
741 0 : plan%separated_plans = .TRUE.
742 :
743 0 : DEALLOCATE (tmp)
744 : END IF
745 :
746 : #else
747 : MARK_USED(plan)
748 : MARK_USED(plan_style)
749 : !MARK_USED does not work with assumed size arguments
750 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
751 : #endif
752 :
753 37492 : END SUBROUTINE fftw3_create_plan_3d
754 :
755 : ! **************************************************************************************************
756 :
757 : ! **************************************************************************************************
758 : !> \brief ...
759 : !> \param plan ...
760 : !> \param plan_r ...
761 : !> \param split_dim ...
762 : !> \param nt ...
763 : !> \param tid ...
764 : !> \param input ...
765 : !> \param istride ...
766 : !> \param output ...
767 : !> \param ostride ...
768 : ! **************************************************************************************************
769 0 : SUBROUTINE fftw3_workshare_execute_dft(plan, plan_r, split_dim, nt, tid, &
770 : input, istride, output, ostride)
771 :
772 : INTEGER, INTENT(IN) :: split_dim, nt, tid
773 : INTEGER, INTENT(IN) :: istride, ostride
774 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT) :: input, output
775 : TYPE(C_PTR) :: plan, plan_r
776 : #if defined (__FFTW3)
777 : INTEGER :: i_off, o_off
778 : INTEGER :: th_planA, th_planB
779 : INTEGER :: rows_per_thread, rows_per_thread_r
780 :
781 : CALL fftw3_compute_rows_per_th(split_dim, nt, rows_per_thread, &
782 : rows_per_thread_r, &
783 0 : th_planA, th_planB)
784 :
785 0 : IF (th_planB .GT. 0) THEN
786 0 : IF (tid .LT. th_planA) THEN
787 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
788 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
789 0 : IF (rows_per_thread .GT. 0) THEN
790 : CALL XFFTW_EXECUTE_DFT(plan, input(i_off), &
791 0 : output(o_off))
792 : END IF
793 0 : ELSE IF ((tid - th_planA) < th_planB) THEN
794 :
795 : i_off = (th_planA)*istride*(rows_per_thread) + &
796 0 : (tid - th_planA)*istride*(rows_per_thread_r) + 1
797 : o_off = (th_planA)*ostride*(rows_per_thread) + &
798 0 : (tid - th_planA)*ostride*(rows_per_thread_r) + 1
799 :
800 : CALL XFFTW_EXECUTE_DFT(plan_r, input(i_off), &
801 0 : output(o_off))
802 : END IF
803 :
804 : ELSE
805 0 : i_off = (tid)*(istride*(rows_per_thread)) + 1
806 0 : o_off = (tid)*(ostride*(rows_per_thread)) + 1
807 :
808 : CALL XFFTW_EXECUTE_DFT(plan, input(i_off), &
809 0 : output(o_off))
810 :
811 : END IF
812 : #else
813 : MARK_USED(plan)
814 : MARK_USED(plan_r)
815 : MARK_USED(split_dim)
816 : MARK_USED(nt)
817 : MARK_USED(tid)
818 : MARK_USED(istride)
819 : MARK_USED(ostride)
820 : !MARK_USED does not work with assumed size arguments
821 : IF (.FALSE.) THEN; DO; IF (ABS(input(1)) > ABS(output(1))) EXIT; END DO; END IF
822 : #endif
823 :
824 0 : END SUBROUTINE
825 :
826 : ! **************************************************************************************************
827 :
828 : ! **************************************************************************************************
829 : !> \brief ...
830 : !> \param plan ...
831 : !> \param scale ...
832 : !> \param zin ...
833 : !> \param zout ...
834 : !> \param stat ...
835 : ! **************************************************************************************************
836 526776 : SUBROUTINE fftw33d(plan, scale, zin, zout, stat)
837 :
838 : TYPE(fft_plan_type), INTENT(IN) :: plan
839 : REAL(KIND=dp), INTENT(IN) :: scale
840 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zin
841 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), TARGET:: zout
842 : INTEGER, INTENT(OUT) :: stat
843 : #if defined ( __FFTW3 )
844 526776 : COMPLEX(KIND=dp), POINTER :: xout(:)
845 526776 : COMPLEX(KIND=dp), ALLOCATABLE :: tmp1(:)
846 : INTEGER :: n1, n2, n3
847 : INTEGER :: tid, nt
848 : INTEGER :: i, j, k
849 :
850 : !------------------------------------------------------------------------------
851 :
852 : #include "fftw3.f"
853 :
854 526776 : n1 = plan%n_3d(1)
855 526776 : n2 = plan%n_3d(2)
856 526776 : n3 = plan%n_3d(3)
857 :
858 526776 : stat = 1
859 :
860 : ! We use a POINTER to the output array to avoid duplicating code
861 526776 : IF (plan%fft_in_place) THEN
862 526772 : xout => zin(:n1*n2*n3)
863 : ELSE
864 4 : xout => zout(:n1*n2*n3)
865 : END IF
866 :
867 : ! Either compute the full 3D FFT using a multithreaded plan
868 526776 : IF (.NOT. plan%separated_plans) THEN
869 526776 : CALL XFFTW_EXECUTE_DFT(plan%fftw_plan, zin, xout)
870 : ELSE
871 : ! Or use the 3 stage FFT scheme described in fftw3_create_plan_3d
872 0 : ALLOCATE (tmp1(n1*n2*n3)) ! Temporary vector used for transpositions
873 0 : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(tid,nt,i,j,k) SHARED(zin,tmp1,n1,n2,n3,plan,xout)
874 : tid = 0
875 : nt = 1
876 :
877 : !$ tid = omp_get_thread_num()
878 : !$ nt = omp_get_num_threads()
879 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nx, plan%fftw_plan_nx_r, &
880 : n3, nt, tid, &
881 : zin, n1*n2, tmp1, n1*n2)
882 :
883 : !$OMP BARRIER
884 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_ny, plan%fftw_plan_ny_r, &
885 : n3, nt, tid, &
886 : tmp1, n1*n2, xout, 1)
887 : !$OMP BARRIER
888 : CALL fftw3_workshare_execute_dft(plan%fftw_plan_nz, plan%fftw_plan_nz_r, &
889 : n1, nt, tid, &
890 : xout, n2*n3, tmp1, n2*n3)
891 : !$OMP BARRIER
892 :
893 : !$OMP DO COLLAPSE(3)
894 : DO i = 1, n1
895 : DO j = 1, n2
896 : DO k = 1, n3
897 : xout((i - 1) + (j - 1)*n1 + (k - 1)*n1*n2 + 1) = &
898 : tmp1((k - 1) + (j - 1)*n3 + (i - 1)*n3*n2 + 1)
899 : END DO
900 : END DO
901 : END DO
902 : !$OMP END DO
903 :
904 : !$OMP END PARALLEL
905 : END IF
906 :
907 526776 : IF (scale /= 1.0_dp) THEN
908 238953 : CALL zdscal(n1*n2*n3, scale, xout, 1)
909 : END IF
910 :
911 : #else
912 : MARK_USED(plan)
913 : MARK_USED(scale)
914 : !MARK_USED does not work with assumed size arguments
915 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
916 : stat = 0
917 :
918 : #endif
919 :
920 526776 : END SUBROUTINE fftw33d
921 :
922 : ! **************************************************************************************************
923 :
924 : ! **************************************************************************************************
925 : !> \brief ...
926 : !> \param plan ...
927 : !> \param zin ...
928 : !> \param zout ...
929 : !> \param plan_style ...
930 : ! **************************************************************************************************
931 124984 : SUBROUTINE fftw3_create_plan_1dm(plan, zin, zout, plan_style)
932 :
933 : IMPLICIT NONE
934 :
935 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
936 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zin
937 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN) :: zout
938 : INTEGER, INTENT(IN) :: plan_style
939 : #if defined ( __FFTW3 )
940 : INTEGER :: ii, di, io, DO, num_threads, num_rows
941 :
942 : #include "fftw3.f"
943 : INTEGER :: fftw_plan_type
944 249860 : SELECT CASE (plan_style)
945 : CASE (1)
946 124876 : fftw_plan_type = FFTW_ESTIMATE
947 : CASE (2)
948 48 : fftw_plan_type = FFTW_MEASURE
949 : CASE (3)
950 48 : fftw_plan_type = FFTW_PATIENT
951 : CASE (4)
952 12 : fftw_plan_type = FFTW_EXHAUSTIVE
953 : CASE DEFAULT
954 124984 : CPABORT("fftw3_create_plan_1dm")
955 : END SELECT
956 :
957 : #if defined (__FFTW3_UNALIGNED)
958 : fftw_plan_type = fftw_plan_type + FFTW_UNALIGNED
959 : #endif
960 124984 : num_threads = 1
961 124984 : plan%separated_plans = .FALSE.
962 : !$OMP PARALLEL DEFAULT(NONE), &
963 124984 : !$OMP SHARED(NUM_THREADS)
964 : !$OMP MASTER
965 : !$ num_threads = omp_get_num_threads()
966 : !$OMP END MASTER
967 : !$OMP END PARALLEL
968 :
969 124984 : num_rows = plan%m/num_threads
970 124984 : !$ plan%num_threads_needed = num_threads
971 :
972 : ! Check for number of rows less than num_threads
973 124984 : !$ IF (plan%m < num_threads) THEN
974 0 : !$ num_rows = 1
975 0 : !$ plan%num_threads_needed = plan%m
976 : !$ END IF
977 :
978 : ! Check for total number of rows not divisible by num_threads
979 124984 : !$ IF (num_rows*plan%num_threads_needed .NE. plan%m) THEN
980 0 : !$ plan%need_alt_plan = .TRUE.
981 : !$ END IF
982 :
983 124984 : !$ plan%num_rows = num_rows
984 124984 : ii = 1
985 124984 : di = plan%n
986 124984 : io = 1
987 124984 : DO = plan%n
988 124984 : IF (plan%fsign == +1 .AND. plan%trans) THEN
989 62358 : ii = plan%m
990 62358 : di = 1
991 62626 : ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
992 62358 : io = plan%m
993 62358 : DO = 1
994 : END IF
995 :
996 124984 : IF (plan%fsign == +1) THEN
997 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
998 62626 : zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
999 : ELSE
1000 : CALL dfftw_plan_many_dft(plan%fftw_plan, 1, plan%n, num_rows, zin, 0, ii, di, &
1001 62358 : zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1002 : END IF
1003 :
1004 124984 : !$ IF (plan%need_alt_plan) THEN
1005 0 : !$ plan%alt_num_rows = plan%m - (plan%num_threads_needed - 1)*num_rows
1006 0 : !$ IF (plan%fsign == +1) THEN
1007 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1008 0 : !$ zout, 0, io, DO, FFTW_FORWARD, fftw_plan_type)
1009 : !$ ELSE
1010 : !$ CALL dfftw_plan_many_dft(plan%alt_fftw_plan, 1, plan%n, plan%alt_num_rows, zin, 0, ii, di, &
1011 0 : !$ zout, 0, io, DO, FFTW_BACKWARD, fftw_plan_type)
1012 : !$ END IF
1013 : !$ END IF
1014 :
1015 : #else
1016 : MARK_USED(plan)
1017 : MARK_USED(plan_style)
1018 : !MARK_USED does not work with assumed size arguments
1019 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1020 : #endif
1021 :
1022 124984 : END SUBROUTINE fftw3_create_plan_1dm
1023 :
1024 : ! **************************************************************************************************
1025 : !> \brief ...
1026 : !> \param plan ...
1027 : ! **************************************************************************************************
1028 162476 : SUBROUTINE fftw3_destroy_plan(plan)
1029 :
1030 : TYPE(fft_plan_type), INTENT(INOUT) :: plan
1031 :
1032 : #if defined ( __FFTW3 )
1033 162476 : !$ IF (plan%need_alt_plan) THEN
1034 0 : !$ CALL XFFTW_DESTROY_PLAN(plan%alt_fftw_plan)
1035 : !$ END IF
1036 :
1037 162476 : IF (.NOT. plan%separated_plans) THEN
1038 162476 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan)
1039 : ELSE
1040 : ! If it is a separated plan then we have to destroy
1041 : ! each dim plan individually
1042 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nx)
1043 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_ny)
1044 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nz)
1045 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nx_r)
1046 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_ny_r)
1047 0 : CALL XFFTW_DESTROY_PLAN(plan%fftw_plan_nz_r)
1048 : END IF
1049 :
1050 : #else
1051 : MARK_USED(plan)
1052 : #endif
1053 :
1054 162476 : END SUBROUTINE fftw3_destroy_plan
1055 :
1056 : ! **************************************************************************************************
1057 : !> \brief ...
1058 : !> \param plan ...
1059 : !> \param zin ...
1060 : !> \param zout ...
1061 : !> \param scale ...
1062 : !> \param stat ...
1063 : ! **************************************************************************************************
1064 5907394 : SUBROUTINE fftw31dm(plan, zin, zout, scale, stat)
1065 : TYPE(fft_plan_type), INTENT(IN) :: plan
1066 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(IN), TARGET :: zin
1067 : COMPLEX(KIND=dp), DIMENSION(*), INTENT(INOUT), &
1068 : TARGET :: zout
1069 : REAL(KIND=dp), INTENT(IN) :: scale
1070 : INTEGER, INTENT(OUT) :: stat
1071 :
1072 : INTEGER :: in_offset, my_id, num_rows, out_offset, &
1073 : scal_offset
1074 : TYPE(C_PTR) :: fftw_plan
1075 :
1076 : !------------------------------------------------------------------------------
1077 :
1078 5907394 : my_id = 0
1079 5907394 : num_rows = plan%m
1080 :
1081 : !$OMP PARALLEL DEFAULT(NONE), &
1082 : !$OMP PRIVATE(my_id,num_rows,in_offset,out_offset,scal_offset,fftw_plan), &
1083 : !$OMP SHARED(zin,zout), &
1084 5907394 : !$OMP SHARED(plan,scale,stat)
1085 : !$ my_id = omp_get_thread_num()
1086 :
1087 : !$ if (my_id < plan%num_threads_needed) then
1088 :
1089 : fftw_plan = plan%fftw_plan
1090 :
1091 : in_offset = 1
1092 : out_offset = 1
1093 : scal_offset = 1
1094 :
1095 : !$ in_offset = 1 + plan%num_rows*my_id*plan%n
1096 : !$ out_offset = 1 + plan%num_rows*my_id*plan%n
1097 : !$ IF (plan%fsign == +1 .AND. plan%trans) THEN
1098 : !$ in_offset = 1 + plan%num_rows*my_id
1099 : !$ ELSEIF (plan%fsign == -1 .AND. plan%trans) THEN
1100 : !$ out_offset = 1 + plan%num_rows*my_id
1101 : !$ END IF
1102 : !$ scal_offset = 1 + plan%n*plan%num_rows*my_id
1103 : !$ IF (plan%need_alt_plan .AND. my_id .EQ. plan%num_threads_needed - 1) THEN
1104 : !$ num_rows = plan%alt_num_rows
1105 : !$ fftw_plan = plan%alt_fftw_plan
1106 : !$ ELSE
1107 : !$ num_rows = plan%num_rows
1108 : !$ END IF
1109 :
1110 : #if defined ( __FFTW3 )
1111 : !$OMP MASTER
1112 : stat = 1
1113 : !$OMP END MASTER
1114 : CALL dfftw_execute_dft(fftw_plan, zin(in_offset:in_offset), zout(out_offset:out_offset))
1115 : !$ end if
1116 : ! all theads need to meet at this barrier
1117 : !$OMP BARRIER
1118 : !$ if (my_id < plan%num_threads_needed) then
1119 : IF (scale /= 1.0_dp) CALL zdscal(plan%n*num_rows, scale, zout(scal_offset:scal_offset), 1)
1120 : !$ end if
1121 :
1122 : #else
1123 : MARK_USED(plan)
1124 : MARK_USED(scale)
1125 : !MARK_USED does not work with assumed size arguments
1126 : IF (.FALSE.) THEN; DO; IF (ABS(zin(1)) > ABS(zout(1))) EXIT; END DO; END IF
1127 : stat = 0
1128 :
1129 : !$ else
1130 : !$ end if
1131 :
1132 : #endif
1133 :
1134 : !$OMP END PARALLEL
1135 :
1136 5907394 : END SUBROUTINE fftw31dm
1137 :
1138 : ! Copyright (c) 2003, 2006 Matteo Frigo
1139 : ! Copyright (c) 2003, 2006 Massachusetts Institute of Technology
1140 : !
1141 : ! This program is free software; you can redistribute it and/or modify
1142 : ! it under the terms of the GNU General Public License as published by
1143 : ! the Free Software Foundation; either version 2 of the License, or
1144 : ! (at your option) any later version.
1145 : !
1146 : ! This program is distributed in the hope that it will be useful,
1147 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
1148 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1149 : ! GNU General Public License for more details.
1150 : !
1151 : ! You should have received a copy of the GNU General Public License
1152 : ! along with this program; if not, write to the Free Software
1153 : ! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1154 : !
1155 : !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
1156 : !
1157 : ! This is an example implementation of Fortran wisdom export/import
1158 : ! to/from a Fortran unit (file), exploiting the generic
1159 : ! dfftw_export_wisdom/dfftw_import_wisdom functions.
1160 : !
1161 : ! We cannot compile this file into the FFTW library itself, lest all
1162 : ! FFTW-calling programs be required to link to the Fortran I/O
1163 : ! libraries.
1164 : !
1165 : ! adapted to become more standard Fortran 90 [2007-10] Joost VandeVondele
1166 : ! and added some namespacing
1167 : !
1168 : !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
1169 :
1170 : ! **************************************************************************************************
1171 : !> \brief ...
1172 : !> \param c ...
1173 : !> \param iunit ...
1174 : ! **************************************************************************************************
1175 0 : SUBROUTINE fftw_write_char(c, iunit) BIND(C, name="fftw_write_char")
1176 : CHARACTER(KIND=C_CHAR) :: c
1177 : INTEGER(KIND=C_INT) :: iunit
1178 :
1179 0 : WRITE (iunit, '(a)', ADVANCE="NO") c
1180 0 : END SUBROUTINE
1181 :
1182 : ! **************************************************************************************************
1183 : !> \brief ...
1184 : !> \param iunit ...
1185 : ! **************************************************************************************************
1186 0 : SUBROUTINE fftw_export_wisdom_to_file(iunit)
1187 : INTEGER :: iunit
1188 :
1189 : #if defined ( __FFTW3 )
1190 0 : CALL dfftw_export_wisdom(fftw_write_char, iunit)
1191 : #else
1192 : MARK_USED(iunit)
1193 : #endif
1194 0 : END SUBROUTINE
1195 :
1196 : ! Fortran 77 does not have any portable way to read an arbitrary
1197 : ! file one character at a time [needs to wait for stream IO of F2003].
1198 : ! The best alternative seems to be to
1199 : ! read a whole line into a buffer, since for fftw-exported wisdom we
1200 : ! can bound the line length. (If the file contains longer lines,
1201 : ! then the lines will be truncated and the wisdom import should
1202 : ! simply fail.) Ugh (and not thread safe).
1203 :
1204 : ! **************************************************************************************************
1205 : !> \brief ...
1206 : !> \param ic ...
1207 : !> \param iunit ...
1208 : ! **************************************************************************************************
1209 0 : SUBROUTINE fftw_read_char(ic, iunit) BIND(C, name="fftw_read_char")
1210 : INTEGER(KIND=C_INT) :: ic, iunit
1211 :
1212 : CHARACTER(LEN=256) :: buf
1213 :
1214 : SAVE buf
1215 : INTEGER ibuf
1216 : DATA ibuf/257/
1217 : SAVE ibuf
1218 0 : IF (ibuf .LT. 257) THEN
1219 0 : ic = ICHAR(buf(ibuf:ibuf))
1220 0 : ibuf = ibuf + 1
1221 0 : RETURN
1222 : END IF
1223 0 : READ (iunit, 123, END=666) buf
1224 0 : ic = ICHAR(buf(1:1))
1225 0 : ibuf = 2
1226 0 : RETURN
1227 0 : 666 ic = -1
1228 0 : ibuf = 257
1229 : 123 FORMAT(a256)
1230 : END SUBROUTINE
1231 :
1232 : ! **************************************************************************************************
1233 : !> \brief ...
1234 : !> \param isuccess ...
1235 : !> \param iunit ...
1236 : ! **************************************************************************************************
1237 0 : SUBROUTINE fftw_import_wisdom_from_file(isuccess, iunit)
1238 : INTEGER :: isuccess, iunit
1239 :
1240 0 : isuccess = 0
1241 : #if defined ( __FFTW3 )
1242 0 : CALL dfftw_import_wisdom(isuccess, fftw_read_char, iunit)
1243 : #else
1244 : MARK_USED(iunit)
1245 : #endif
1246 0 : END SUBROUTINE
1247 :
1248 : END MODULE
|