Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \par History
10 : !> JGH (30-Nov-2000): ESSL FFT Library added
11 : !> JGH (05-Jan-2001): Added SGI library FFT
12 : !> JGH (14-Jan-2001): Added parallel 3d FFT
13 : !> JGH (10-Feb-2006): New interface type
14 : !> JGH (31-Mar-2008): Remove local allocates and reshapes (performance)
15 : !> Possible problems can be related with setting arrays
16 : !> not to zero
17 : !> Some interfaces could be further simplified by avoiding
18 : !> an initial copy. However, this assumes contiguous arrays
19 : !> IAB (15-Oct-2008): Moved mp_cart_sub calls out of cube_tranpose_* and into
20 : !> fft_scratch type, reducing number of calls dramatically
21 : !> IAB (05-Dec-2008): Moved all other non-essential MPI calls into scratch type
22 : !> IAB (09-Jan-2009): Added fft_plan_type to store FFT data, including cached FFTW plans
23 : !> IAB (13-Feb-2009): Extended plan caching to serial 3D FFT (fft3d_s)
24 : !> IAB (09-Oct-2009): Added OpenMP directives to parallel 3D FFT
25 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008-2009 on behalf of the HECToR project
26 : !> \author JGH
27 : ! **************************************************************************************************
28 : MODULE fft_tools
29 : USE ISO_C_BINDING, ONLY: C_F_POINTER,&
30 : C_LOC,&
31 : C_PTR,&
32 : C_SIZE_T
33 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit
34 : USE fft_lib, ONLY: &
35 : fft_1dm, fft_3d, fft_alloc, fft_create_plan_1dm, fft_create_plan_3d, fft_dealloc, &
36 : fft_destroy_plan, fft_do_cleanup, fft_do_init, fft_get_lengths, fft_library
37 : USE fft_plan, ONLY: fft_plan_type
38 : USE kinds, ONLY: dp,&
39 : dp_size,&
40 : sp
41 : USE mathconstants, ONLY: z_zero
42 : USE message_passing, ONLY: mp_cart_type,&
43 : mp_comm_null,&
44 : mp_comm_type,&
45 : mp_request_type,&
46 : mp_waitall
47 : USE offload_api, ONLY: offload_free_pinned_mem,&
48 : offload_malloc_pinned_mem
49 :
50 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
51 :
52 : #include "../base/base_uses.f90"
53 :
54 : IMPLICIT NONE
55 :
56 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'fft_tools'
57 :
58 : ! Types for the pool of scratch data needed in FFT routines
59 : ! keep the subroutine "is_equal" up-to-date
60 : ! needs a default initialization
61 : TYPE fft_scratch_sizes
62 : INTEGER :: nx = 0, ny = 0, nz = 0
63 : INTEGER :: lmax = 0, mmax = 0, nmax = 0
64 : INTEGER :: mx1 = 0, mx2 = 0, mx3 = 0
65 : INTEGER :: my1 = 0, my2 = 0, my3 = 0
66 : INTEGER :: mz1 = 0, mz2 = 0, mz3 = 0
67 : INTEGER :: mcz1 = 0, mcz2 = 0, mcy3 = 0, mcx2 = 0
68 : INTEGER :: lg = 0, mg = 0
69 : INTEGER :: nbx = 0, nbz = 0
70 : INTEGER :: nmray = 0, nyzray = 0
71 : TYPE(mp_comm_type) :: gs_group = mp_comm_type()
72 : TYPE(mp_cart_type) :: rs_group = mp_cart_type()
73 : INTEGER, DIMENSION(2) :: g_pos = 0, r_pos = 0, r_dim = 0
74 : INTEGER :: numtask = 0
75 : END TYPE fft_scratch_sizes
76 :
77 : TYPE fft_scratch_type
78 : INTEGER :: fft_scratch_id = 0
79 : INTEGER :: tf_type = -1
80 : LOGICAL :: in_use = .TRUE.
81 : TYPE(mp_comm_type) :: group = mp_comm_type()
82 : INTEGER, DIMENSION(3) :: nfft = -1
83 : ! to be used in cube_transpose_* routines
84 : TYPE(mp_cart_type), DIMENSION(2) :: cart_sub_comm = mp_cart_type()
85 : INTEGER, DIMENSION(2) :: dim = -1, pos = -1
86 : ! to be used in fft3d_s
87 : COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
88 : :: ziptr => NULL(), zoptr => NULL()
89 : ! to be used in fft3d_ps : block distribution
90 : COMPLEX(KIND=dp), DIMENSION(:, :), CONTIGUOUS, POINTER &
91 : :: p1buf => NULL(), p2buf => NULL(), p3buf => NULL(), p4buf => NULL(), &
92 : p5buf => NULL(), p6buf => NULL(), p7buf => NULL()
93 : ! to be used in fft3d_ps : plane distribution
94 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
95 : :: r1buf => NULL(), r2buf => NULL()
96 : COMPLEX(KIND=dp), DIMENSION(:, :, :), POINTER, CONTIGUOUS &
97 : :: tbuf => NULL()
98 : ! to be used in fft3d_pb
99 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
100 : :: a1buf => NULL(), a2buf => NULL(), a3buf => NULL(), &
101 : a4buf => NULL(), a5buf => NULL(), a6buf => NULL()
102 : ! to be used in communication routines
103 : INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: scount => NULL(), rcount => NULL(), sdispl => NULL(), rdispl => NULL()
104 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgcube => NULL()
105 : INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: xzcount => NULL(), yzcount => NULL(), xzdispl => NULL(), yzdispl => NULL()
106 : INTEGER :: in = 0, mip = -1
107 : REAL(KIND=dp) :: rsratio = 1.0_dp
108 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS &
109 : :: xzbuf => NULL(), yzbuf => NULL()
110 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS &
111 : :: xzbuf_sgl => NULL(), yzbuf_sgl => NULL()
112 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS &
113 : :: rbuf1 => NULL(), rbuf2 => NULL(), rbuf3 => NULL(), rbuf4 => NULL(), &
114 : rbuf5 => NULL(), rbuf6 => NULL(), rr => NULL()
115 : COMPLEX(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS &
116 : :: ss => NULL(), tt => NULL()
117 : INTEGER, DIMENSION(:, :), POINTER, CONTIGUOUS :: pgrid => NULL()
118 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: xcor => NULL(), zcor => NULL(), pzcoord => NULL()
119 : TYPE(fft_scratch_sizes) :: sizes = fft_scratch_sizes()
120 : TYPE(fft_plan_type), DIMENSION(6) :: fft_plan = fft_plan_type()
121 : INTEGER :: last_tick = -1
122 : END TYPE fft_scratch_type
123 :
124 : TYPE fft_scratch_pool_type
125 : TYPE(fft_scratch_type), POINTER :: fft_scratch => NULL()
126 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_next => NULL()
127 : END TYPE fft_scratch_pool_type
128 :
129 : INTEGER, SAVE :: init_fft_pool = 0
130 : ! the clock for fft pool. Allows to identify the least recently used scratch
131 : INTEGER, SAVE :: tick_fft_pool = 0
132 : ! limit the number of scratch pools to fft_pool_scratch_limit.
133 : INTEGER, SAVE :: fft_pool_scratch_limit = 15
134 : TYPE(fft_scratch_pool_type), POINTER, SAVE:: fft_scratch_first
135 : ! END of types for the pool of scratch data needed in FFT routines
136 :
137 : PRIVATE
138 : PUBLIC :: init_fft, fft3d, finalize_fft
139 : PUBLIC :: fft_alloc, fft_dealloc
140 : PUBLIC :: fft_radix_operations, fft_fw1d
141 : PUBLIC :: FWFFT, BWFFT
142 : PUBLIC :: FFT_RADIX_CLOSEST, FFT_RADIX_NEXT
143 : PUBLIC :: FFT_RADIX_NEXT_ODD
144 :
145 : INTEGER, PARAMETER :: FWFFT = +1, BWFFT = -1
146 : INTEGER, PARAMETER :: FFT_RADIX_CLOSEST = 493, FFT_RADIX_NEXT = 494
147 : INTEGER, PARAMETER :: FFT_RADIX_ALLOWED = 495, FFT_RADIX_DISALLOWED = 496
148 : INTEGER, PARAMETER :: FFT_RADIX_NEXT_ODD = 497
149 :
150 : REAL(KIND=dp), PARAMETER :: ratio_sparse_alltoall = 0.5_dp
151 :
152 : ! these saved variables are FFT globals
153 : INTEGER, SAVE :: fft_type = 0
154 : LOGICAL, SAVE :: alltoall_sgl = .FALSE.
155 : LOGICAL, SAVE :: use_fftsg_sizes = .TRUE.
156 : INTEGER, SAVE :: fft_plan_style = 1
157 :
158 : ! these are only needed for pw_gpu (-D__OFFLOAD)
159 : PUBLIC :: get_fft_scratch, release_fft_scratch
160 : PUBLIC :: cube_transpose_1, cube_transpose_2
161 : PUBLIC :: yz_to_x, x_to_yz, xz_to_yz, yz_to_xz
162 : PUBLIC :: fft_scratch_sizes, fft_scratch_type
163 : PUBLIC :: fft_type, fft_plan_style
164 :
165 : INTERFACE fft3d
166 : MODULE PROCEDURE fft3d_s, fft3d_ps, fft3d_pb
167 : END INTERFACE
168 :
169 : ! **************************************************************************************************
170 :
171 : CONTAINS
172 :
173 : ! **************************************************************************************************
174 : !> \brief ...
175 : !> \param fftlib ...
176 : !> \param alltoall ...
177 : !> \param fftsg_sizes ...
178 : !> \param pool_limit ...
179 : !> \param wisdom_file ...
180 : !> \param plan_style ...
181 : !> \author JGH
182 : ! **************************************************************************************************
183 9005 : SUBROUTINE init_fft(fftlib, alltoall, fftsg_sizes, pool_limit, wisdom_file, &
184 : plan_style)
185 :
186 : CHARACTER(LEN=*), INTENT(IN) :: fftlib
187 : LOGICAL, INTENT(IN) :: alltoall, fftsg_sizes
188 : INTEGER, INTENT(IN) :: pool_limit
189 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
190 : INTEGER, INTENT(IN) :: plan_style
191 :
192 9005 : use_fftsg_sizes = fftsg_sizes
193 9005 : alltoall_sgl = alltoall
194 9005 : fft_pool_scratch_limit = pool_limit
195 9005 : fft_type = fft_library(fftlib)
196 9005 : fft_plan_style = plan_style
197 :
198 9005 : IF (fft_type <= 0) CPABORT("Unknown FFT library: "//TRIM(fftlib))
199 :
200 9005 : CALL fft_do_init(fft_type, wisdom_file)
201 :
202 : ! setup the FFT scratch pool, if one is associated, clear first
203 9005 : CALL release_fft_scratch_pool()
204 9005 : CALL init_fft_scratch_pool()
205 :
206 9005 : END SUBROUTINE init_fft
207 :
208 : ! **************************************************************************************************
209 : !> \brief does whatever is needed to finalize the current fft setup
210 : !> \param para_env ...
211 : !> \param wisdom_file ...
212 : !> \par History
213 : !> 10.2007 created [Joost VandeVondele]
214 : ! **************************************************************************************************
215 8795 : SUBROUTINE finalize_fft(para_env, wisdom_file)
216 : CLASS(mp_comm_type) :: para_env
217 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
218 :
219 : ! release the FFT scratch pool
220 :
221 8795 : CALL release_fft_scratch_pool()
222 :
223 : ! finalize fft libs
224 :
225 8795 : CALL fft_do_cleanup(fft_type, wisdom_file, para_env%is_source())
226 :
227 8795 : END SUBROUTINE finalize_fft
228 :
229 : ! **************************************************************************************************
230 : !> \brief Determine the allowed lengths of FFT's '''
231 : !> \param radix_in ...
232 : !> \param radix_out ...
233 : !> \param operation ...
234 : !> \par History
235 : !> new library structure (JGH)
236 : !> \author Ari Seitsonen
237 : ! **************************************************************************************************
238 108338 : SUBROUTINE fft_radix_operations(radix_in, radix_out, operation)
239 :
240 : INTEGER, INTENT(IN) :: radix_in
241 : INTEGER, INTENT(OUT) :: radix_out
242 : INTEGER, INTENT(IN) :: operation
243 :
244 : INTEGER, PARAMETER :: fft_type_sg = 1
245 :
246 : INTEGER :: i, iloc, ldata
247 108338 : INTEGER, ALLOCATABLE, DIMENSION(:) :: DATA
248 :
249 108338 : ldata = 1024
250 108338 : ALLOCATE (DATA(ldata))
251 111046450 : DATA = -1
252 :
253 : ! if the user wants to use fftsg sizes go for it
254 108338 : IF (use_fftsg_sizes) THEN
255 107834 : CALL fft_get_lengths(fft_type_sg, DATA, ldata)
256 : ELSE
257 504 : CALL fft_get_lengths(fft_type, DATA, ldata)
258 : END IF
259 :
260 108338 : iloc = 0
261 1199988 : DO i = 1, ldata
262 1199988 : IF (DATA(i) == radix_in) THEN
263 : iloc = i
264 : EXIT
265 : ELSE
266 1170580 : IF (OPERATION == FFT_RADIX_ALLOWED) THEN
267 : CYCLE
268 1170580 : ELSE IF (DATA(i) > radix_in) THEN
269 : iloc = i
270 : EXIT
271 : END IF
272 : END IF
273 : END DO
274 :
275 108338 : IF (iloc == 0) THEN
276 0 : CPABORT("Index to radix array not found.")
277 : END IF
278 :
279 108338 : IF (OPERATION == FFT_RADIX_ALLOWED) THEN
280 0 : IF (DATA(iloc) == radix_in) THEN
281 0 : radix_out = FFT_RADIX_ALLOWED
282 : ELSE
283 0 : radix_out = FFT_RADIX_DISALLOWED
284 : END IF
285 :
286 108338 : ELSE IF (OPERATION == FFT_RADIX_CLOSEST) THEN
287 270 : IF (DATA(iloc) == radix_in) THEN
288 102 : radix_out = DATA(iloc)
289 : ELSE
290 168 : IF (ABS(DATA(iloc - 1) - radix_in) <= &
291 : ABS(DATA(iloc) - radix_in)) THEN
292 162 : radix_out = DATA(iloc - 1)
293 : ELSE
294 6 : radix_out = DATA(iloc)
295 : END IF
296 : END IF
297 :
298 108068 : ELSE IF (OPERATION == FFT_RADIX_NEXT) THEN
299 106700 : radix_out = DATA(iloc)
300 :
301 1368 : ELSE IF (OPERATION == FFT_RADIX_NEXT_ODD) THEN
302 3028 : DO i = iloc, ldata
303 3028 : IF (MOD(DATA(i), 2) == 1) THEN
304 1368 : radix_out = DATA(i)
305 1368 : EXIT
306 : END IF
307 : END DO
308 1368 : IF (MOD(radix_out, 2) == 0) THEN
309 0 : CPABORT("No odd radix found.")
310 : END IF
311 :
312 : ELSE
313 0 : CPABORT("Disallowed radix operation.")
314 : END IF
315 :
316 108338 : DEALLOCATE (DATA)
317 :
318 108338 : END SUBROUTINE fft_radix_operations
319 :
320 : ! **************************************************************************************************
321 : !> \brief Performs m 1-D forward FFT-s of size n.
322 : !> \param n size of the FFT
323 : !> \param m number of FFT-s
324 : !> \param trans shape of input and output arrays: [n x m] if it is FALSE, [m x n] if it is TRUE
325 : !> \param zin input array
326 : !> \param zout output array
327 : !> \param scale scaling factor
328 : !> \param stat status of the operation, non-zero code indicates an error
329 : ! **************************************************************************************************
330 208 : SUBROUTINE fft_fw1d(n, m, trans, zin, zout, scale, stat)
331 : INTEGER, INTENT(in) :: n, m
332 : LOGICAL, INTENT(in) :: trans
333 : COMPLEX(kind=dp), DIMENSION(*), INTENT(inout) :: zin, zout
334 : REAL(kind=dp), INTENT(in) :: scale
335 : INTEGER, INTENT(out) :: stat
336 :
337 : CHARACTER(len=*), PARAMETER :: routineN = 'fft_fw1d'
338 :
339 : INTEGER :: handle
340 : TYPE(fft_plan_type) :: fft_plan
341 :
342 208 : CALL timeset(routineN, handle)
343 :
344 208 : IF (fft_type == 3) THEN
345 208 : CALL fft_create_plan_1dm(fft_plan, fft_type, FWFFT, trans, n, m, zin, zout, fft_plan_style)
346 208 : CALL fft_1dm(fft_plan, zin, zout, scale, stat)
347 208 : CALL fft_destroy_plan(fft_plan)
348 : ELSE
349 : CALL cp_warn(__LOCATION__, &
350 0 : "FFT library in use cannot handle transformation of an arbitrary length.")
351 0 : stat = 1
352 : END IF
353 :
354 208 : CALL timestop(handle)
355 832 : END SUBROUTINE fft_fw1d
356 :
357 : ! **************************************************************************************************
358 : !> \brief Calls the 3D-FFT function from the initialized library
359 : !> \param fsign ...
360 : !> \param n ...
361 : !> \param zin ...
362 : !> \param zout ...
363 : !> \param scale ...
364 : !> \param status ...
365 : !> \param debug ...
366 : !> \par History
367 : !> none
368 : !> \author JGH
369 : ! **************************************************************************************************
370 614828 : SUBROUTINE fft3d_s(fsign, n, zin, zout, scale, status, debug)
371 :
372 : INTEGER, INTENT(IN) :: fsign
373 : INTEGER, DIMENSION(:), INTENT(INOUT) :: n
374 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
375 : INTENT(INOUT) :: zin
376 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
377 : INTENT(INOUT), OPTIONAL, TARGET :: zout
378 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
379 : INTEGER, INTENT(OUT), OPTIONAL :: status
380 : LOGICAL, INTENT(IN), OPTIONAL :: debug
381 :
382 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_s'
383 :
384 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
385 614828 : POINTER :: zoptr
386 : COMPLEX(KIND=dp), DIMENSION(1, 1, 1), TARGET :: zdum
387 : INTEGER :: handle, ld(3), lo(3), output_unit, sign, &
388 : stat
389 : LOGICAL :: fft_in_place, test
390 : REAL(KIND=dp) :: in_sum, norm, out_sum
391 : TYPE(fft_scratch_type), POINTER :: fft_scratch
392 :
393 614828 : CALL timeset(routineN, handle)
394 614828 : output_unit = cp_logger_get_default_io_unit()
395 :
396 614828 : IF (PRESENT(scale)) THEN
397 577777 : norm = scale
398 : ELSE
399 37051 : norm = 1.0_dp
400 : END IF
401 :
402 614828 : IF (PRESENT(debug)) THEN
403 576905 : test = debug
404 : ELSE
405 : test = .FALSE.
406 : END IF
407 :
408 614828 : IF (PRESENT(zout)) THEN
409 : fft_in_place = .FALSE.
410 : ELSE
411 614820 : fft_in_place = .TRUE.
412 : END IF
413 :
414 614828 : IF (test) THEN
415 0 : in_sum = SUM(ABS(zin))
416 : END IF
417 :
418 614828 : ld(1) = SIZE(zin, 1)
419 614828 : ld(2) = SIZE(zin, 2)
420 614828 : ld(3) = SIZE(zin, 3)
421 :
422 614828 : IF (n(1) /= ld(1) .OR. n(2) /= ld(2) .OR. n(3) /= ld(3)) THEN
423 0 : CPABORT("Size and dimension (zin) have to be the same.")
424 : END IF
425 :
426 614828 : sign = fsign
427 614828 : CALL get_fft_scratch(fft_scratch, tf_type=400, n=n)
428 :
429 614828 : IF (fft_in_place) THEN
430 614820 : zoptr => zdum
431 614820 : IF (fsign == FWFFT) THEN
432 290340 : CALL fft_3d(fft_scratch%fft_plan(1), norm, zin, zoptr, stat)
433 : ELSE
434 324480 : CALL fft_3d(fft_scratch%fft_plan(2), norm, zin, zoptr, stat)
435 : END IF
436 : ELSE
437 8 : IF (fsign == FWFFT) THEN
438 4 : CALL fft_3d(fft_scratch%fft_plan(3), norm, zin, zout, stat)
439 : ELSE
440 4 : CALL fft_3d(fft_scratch%fft_plan(4), norm, zin, zout, stat)
441 : END IF
442 : END IF
443 :
444 614828 : CALL release_fft_scratch(fft_scratch)
445 :
446 614828 : IF (PRESENT(zout)) THEN
447 8 : lo(1) = SIZE(zout, 1)
448 8 : lo(2) = SIZE(zout, 2)
449 8 : lo(3) = SIZE(zout, 3)
450 8 : IF (n(1) /= lo(1) .OR. n(2) /= lo(2) .OR. n(3) /= lo(3)) THEN
451 0 : CPABORT("Size and dimension (zout) have to be the same.")
452 : END IF
453 : END IF
454 :
455 614828 : IF (PRESENT(status)) THEN
456 8993 : status = stat
457 : END IF
458 :
459 614828 : IF (test .AND. output_unit > 0) THEN
460 0 : IF (PRESENT(zout)) THEN
461 0 : out_sum = SUM(ABS(zout))
462 0 : WRITE (output_unit, '(A)') " Out of place 3D FFT (local) : fft3d_s"
463 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
464 0 : WRITE (output_unit, '(A,T60,3I7)') " Input array dimensions ", ld
465 0 : WRITE (output_unit, '(A,T60,3I7)') " Output array dimensions ", lo
466 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
467 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
468 : ELSE
469 0 : out_sum = SUM(ABS(zin))
470 0 : WRITE (output_unit, '(A)') " In place 3D FFT (local) : fft3d_s"
471 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
472 0 : WRITE (output_unit, '(A,T60,3I7)') " Input/output array dimensions ", ld
473 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of input data ", in_sum
474 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of output data ", out_sum
475 : END IF
476 : END IF
477 :
478 614828 : CALL timestop(handle)
479 :
480 614828 : END SUBROUTINE fft3d_s
481 :
482 : ! **************************************************************************************************
483 : !> \brief ...
484 : !> \param fsign ...
485 : !> \param n ...
486 : !> \param cin ...
487 : !> \param gin ...
488 : !> \param gs_group ...
489 : !> \param rs_group ...
490 : !> \param yzp ...
491 : !> \param nyzray ...
492 : !> \param bo ...
493 : !> \param scale ...
494 : !> \param status ...
495 : !> \param debug ...
496 : ! **************************************************************************************************
497 2615884 : SUBROUTINE fft3d_ps(fsign, n, cin, gin, gs_group, rs_group, yzp, nyzray, &
498 2615884 : bo, scale, status, debug)
499 :
500 : INTEGER, INTENT(IN) :: fsign
501 : INTEGER, DIMENSION(:), INTENT(IN) :: n
502 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
503 : INTENT(INOUT) :: cin
504 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
505 : INTENT(INOUT) :: gin
506 : TYPE(mp_comm_type), INTENT(IN) :: gs_group
507 : TYPE(mp_cart_type), INTENT(IN) :: rs_group
508 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
509 : INTENT(IN) :: yzp
510 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nyzray
511 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
512 : INTENT(IN) :: bo
513 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
514 : INTEGER, INTENT(OUT), OPTIONAL :: status
515 : LOGICAL, INTENT(IN), OPTIONAL :: debug
516 :
517 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_ps'
518 :
519 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
520 2615884 : POINTER :: pbuf, qbuf, rbuf, sbuf
521 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
522 2615884 : POINTER :: tbuf
523 : INTEGER :: g_pos, handle, lg, lmax, mcx2, mcz1, mcz2, mg, mmax, mx1, mx2, my1, mz2, n1, n2, &
524 : nmax, numtask, numtask_g, numtask_r, nx, ny, nz, output_unit, r_dim(2), r_pos(2), rp, &
525 : sign, stat
526 2615884 : INTEGER, ALLOCATABLE, DIMENSION(:) :: p2p
527 : LOGICAL :: test
528 : REAL(KIND=dp) :: norm, sum_data
529 18311188 : TYPE(fft_scratch_sizes) :: fft_scratch_size
530 : TYPE(fft_scratch_type), POINTER :: fft_scratch
531 :
532 2615884 : CALL timeset(routineN, handle)
533 2615884 : output_unit = cp_logger_get_default_io_unit()
534 :
535 2615884 : IF (PRESENT(debug)) THEN
536 2615884 : test = debug
537 : ELSE
538 : test = .FALSE.
539 : END IF
540 :
541 2615884 : numtask_g = gs_group%num_pe
542 2615884 : g_pos = gs_group%mepos
543 2615884 : numtask_r = rs_group%num_pe
544 7847652 : r_dim = rs_group%num_pe_cart
545 7847652 : r_pos = rs_group%mepos_cart
546 2615884 : IF (numtask_g /= numtask_r) THEN
547 0 : CPABORT("Real space and G space groups are different.")
548 : END IF
549 2615884 : numtask = numtask_r
550 :
551 2615884 : IF (PRESENT(scale)) THEN
552 2615884 : norm = scale
553 : ELSE
554 0 : norm = 1.0_dp
555 : END IF
556 :
557 2615884 : sign = fsign
558 :
559 2615884 : lg = SIZE(gin, 1)
560 2615884 : mg = SIZE(gin, 2)
561 :
562 2615884 : nx = SIZE(cin, 1)
563 2615884 : ny = SIZE(cin, 2)
564 2615884 : nz = SIZE(cin, 3)
565 :
566 2615884 : IF (mg == 0) THEN
567 : mmax = 1
568 : ELSE
569 2615884 : mmax = mg
570 : END IF
571 2615884 : lmax = MAX(lg, (nx*ny*nz)/mmax + 1)
572 :
573 7847652 : ALLOCATE (p2p(0:numtask - 1))
574 :
575 2615884 : CALL gs_group%rank_compare(rs_group, p2p)
576 :
577 2615884 : rp = p2p(g_pos)
578 2615884 : mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
579 2615884 : my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
580 2615884 : mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
581 2615884 : mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
582 :
583 7847652 : n1 = MAXVAL(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
584 7847652 : n2 = MAXVAL(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
585 2615884 : nmax = MAX((2*n2)/numtask, 2)*mx2*mz2
586 7847652 : nmax = MAX(nmax, n1*MAXVAL(nyzray))
587 7847652 : n1 = MAXVAL(bo(2, 1, :, 2))
588 7847652 : n2 = MAXVAL(bo(2, 3, :, 2))
589 :
590 2615884 : fft_scratch_size%nx = nx
591 2615884 : fft_scratch_size%ny = ny
592 2615884 : fft_scratch_size%nz = nz
593 2615884 : fft_scratch_size%lmax = lmax
594 2615884 : fft_scratch_size%mmax = mmax
595 2615884 : fft_scratch_size%mx1 = mx1
596 2615884 : fft_scratch_size%mx2 = mx2
597 2615884 : fft_scratch_size%my1 = my1
598 2615884 : fft_scratch_size%mz2 = mz2
599 2615884 : fft_scratch_size%lg = lg
600 2615884 : fft_scratch_size%mg = mg
601 2615884 : fft_scratch_size%nbx = n1
602 2615884 : fft_scratch_size%nbz = n2
603 7847652 : mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
604 7847652 : mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
605 7847652 : mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
606 2615884 : fft_scratch_size%mcz1 = mcz1
607 2615884 : fft_scratch_size%mcx2 = mcx2
608 2615884 : fft_scratch_size%mcz2 = mcz2
609 2615884 : fft_scratch_size%nmax = nmax
610 7847652 : fft_scratch_size%nmray = MAXVAL(nyzray)
611 2615884 : fft_scratch_size%nyzray = nyzray(g_pos)
612 2615884 : fft_scratch_size%gs_group = gs_group
613 2615884 : fft_scratch_size%rs_group = rs_group
614 7847652 : fft_scratch_size%g_pos = g_pos
615 7847652 : fft_scratch_size%r_pos = r_pos
616 7847652 : fft_scratch_size%r_dim = r_dim
617 2615884 : fft_scratch_size%numtask = numtask
618 :
619 2615884 : IF (test) THEN
620 8 : IF (g_pos == 0 .AND. output_unit > 0) THEN
621 4 : WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_ps"
622 4 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
623 4 : WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg, mg
624 4 : WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", nx, ny, nz
625 : END IF
626 : END IF
627 :
628 2615884 : IF (r_dim(2) > 1) THEN
629 :
630 : !
631 : ! real space is distributed over x and y coordinate
632 : ! we have two stages of communication
633 : !
634 :
635 0 : IF (r_dim(1) == 1) THEN
636 0 : CPABORT("This processor distribution is not supported.")
637 : END IF
638 0 : CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
639 :
640 0 : IF (sign == FWFFT) THEN
641 : ! cin -> gin
642 :
643 0 : IF (test) THEN
644 0 : sum_data = ABS(SUM(cin))
645 0 : CALL gs_group%sum(sum_data)
646 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
647 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
648 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
649 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
650 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
651 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
652 : END IF
653 : END IF
654 :
655 0 : pbuf => fft_scratch%p1buf
656 0 : qbuf => fft_scratch%p2buf
657 :
658 : ! FFT along z
659 0 : CALL fft_1dm(fft_scratch%fft_plan(1), cin, qbuf, norm, stat)
660 :
661 0 : rbuf => fft_scratch%p3buf
662 :
663 0 : IF (test) THEN
664 0 : sum_data = ABS(SUM(qbuf))
665 0 : CALL gs_group%sum(sum_data)
666 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
667 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
668 : END IF
669 : END IF
670 :
671 : ! Exchange data ( transpose of matrix )
672 0 : CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
673 :
674 0 : IF (test) THEN
675 0 : sum_data = ABS(SUM(rbuf))
676 0 : CALL gs_group%sum(sum_data)
677 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
678 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) T", sum_data
679 : END IF
680 : END IF
681 :
682 0 : pbuf => fft_scratch%p4buf
683 :
684 : ! FFT along y
685 0 : CALL fft_1dm(fft_scratch%fft_plan(2), rbuf, pbuf, 1.0_dp, stat)
686 :
687 0 : qbuf => fft_scratch%p5buf
688 :
689 0 : IF (test) THEN
690 0 : sum_data = ABS(SUM(pbuf))
691 0 : CALL gs_group%sum(sum_data)
692 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
693 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) TS", sum_data
694 : END IF
695 : END IF
696 :
697 : ! Exchange data ( transpose of matrix ) and sort
698 : CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
699 0 : bo(:, :, :, 2), qbuf, fft_scratch)
700 :
701 0 : IF (test) THEN
702 0 : sum_data = ABS(SUM(qbuf))
703 0 : CALL gs_group%sum(sum_data)
704 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
705 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) TS", sum_data
706 : END IF
707 : END IF
708 :
709 : ! FFT along x
710 0 : CALL fft_1dm(fft_scratch%fft_plan(3), qbuf, gin, 1.0_dp, stat)
711 :
712 0 : IF (test) THEN
713 0 : sum_data = ABS(SUM(gin))
714 0 : CALL gs_group%sum(sum_data)
715 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
716 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
717 : END IF
718 : END IF
719 :
720 0 : ELSE IF (sign == BWFFT) THEN
721 : ! gin -> cin
722 :
723 0 : IF (test) THEN
724 0 : sum_data = ABS(SUM(gin))
725 0 : CALL gs_group%sum(sum_data)
726 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
727 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
728 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
729 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Y ", n(2), mx2*mz2
730 0 : WRITE (output_unit, '(A,T60,3I7)') " Transform Z ", n(3), mx1*my1
731 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
732 : END IF
733 : END IF
734 :
735 0 : pbuf => fft_scratch%p7buf
736 :
737 : ! FFT along x
738 0 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, pbuf, norm, stat)
739 :
740 0 : qbuf => fft_scratch%p4buf
741 :
742 0 : IF (test) THEN
743 0 : sum_data = ABS(SUM(pbuf))
744 0 : CALL gs_group%sum(sum_data)
745 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
746 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
747 : END IF
748 : END IF
749 :
750 : ! Exchange data ( transpose of matrix ) and sort
751 : CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, yzp, nyzray, &
752 0 : bo(:, :, :, 2), qbuf, fft_scratch)
753 :
754 0 : IF (test) THEN
755 0 : sum_data = ABS(SUM(qbuf))
756 0 : CALL gs_group%sum(sum_data)
757 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
758 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
759 : END IF
760 : END IF
761 :
762 0 : rbuf => fft_scratch%p3buf
763 :
764 : ! FFT along y
765 0 : CALL fft_1dm(fft_scratch%fft_plan(5), qbuf, rbuf, 1.0_dp, stat)
766 :
767 0 : pbuf => fft_scratch%p2buf
768 :
769 0 : IF (test) THEN
770 0 : sum_data = ABS(SUM(rbuf))
771 0 : CALL gs_group%sum(sum_data)
772 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
773 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
774 : END IF
775 : END IF
776 :
777 : ! Exchange data ( transpose of matrix )
778 0 : CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), pbuf, fft_scratch)
779 :
780 0 : IF (test) THEN
781 0 : sum_data = ABS(SUM(pbuf))
782 0 : CALL gs_group%sum(sum_data)
783 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
784 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) T", sum_data
785 : END IF
786 : END IF
787 :
788 0 : qbuf => fft_scratch%p1buf
789 :
790 : ! FFT along z
791 0 : CALL fft_1dm(fft_scratch%fft_plan(6), pbuf, cin, 1.0_dp, stat)
792 :
793 0 : IF (test) THEN
794 0 : sum_data = ABS(SUM(cin))
795 0 : CALL gs_group%sum(sum_data)
796 0 : IF (g_pos == 0 .AND. output_unit > 0) THEN
797 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
798 : END IF
799 : END IF
800 :
801 : ELSE
802 :
803 0 : CPABORT("Illegal fsign parameter.")
804 :
805 : END IF
806 :
807 0 : CALL release_fft_scratch(fft_scratch)
808 :
809 : ELSE
810 :
811 : !
812 : ! real space is only distributed over x coordinate
813 : ! we have one stage of communication, after the transform of
814 : ! direction x
815 : !
816 :
817 2615884 : CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
818 :
819 2615884 : sbuf => fft_scratch%r1buf
820 2615884 : tbuf => fft_scratch%tbuf
821 :
822 79693857614 : sbuf = z_zero
823 80072547383 : tbuf = z_zero
824 :
825 2615884 : IF (sign == FWFFT) THEN
826 : ! cin -> gin
827 :
828 1291630 : IF (test) THEN
829 9284 : sum_data = ABS(SUM(cin))
830 4 : CALL gs_group%sum(sum_data)
831 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
832 2 : WRITE (output_unit, '(A)') " One step communication algorithm "
833 2 : WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
834 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
835 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
836 : END IF
837 : END IF
838 :
839 : ! FFT along y and z
840 1291630 : CALL fft_1dm(fft_scratch%fft_plan(1), cin, sbuf, 1._dp, stat)
841 1291630 : CALL fft_1dm(fft_scratch%fft_plan(2), sbuf, tbuf, 1._dp, stat)
842 :
843 1291630 : IF (test) THEN
844 8740 : sum_data = ABS(SUM(tbuf))
845 4 : CALL gs_group%sum(sum_data)
846 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
847 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
848 : END IF
849 : END IF
850 :
851 : ! Exchange data ( transpose of matrix ) and sort
852 : CALL yz_to_x(tbuf, gs_group, g_pos, p2p, yzp, nyzray, &
853 1291630 : bo(:, :, :, 2), sbuf, fft_scratch)
854 :
855 1291630 : IF (test) THEN
856 8776 : sum_data = ABS(SUM(sbuf))
857 4 : CALL gs_group%sum(sum_data)
858 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
859 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
860 : END IF
861 : END IF
862 : ! FFT along x
863 1291630 : CALL fft_1dm(fft_scratch%fft_plan(3), sbuf, gin, norm, stat)
864 :
865 1291630 : IF (test) THEN
866 8708 : sum_data = ABS(SUM(gin))
867 4 : CALL gs_group%sum(sum_data)
868 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
869 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
870 : END IF
871 : END IF
872 :
873 1324254 : ELSE IF (sign == BWFFT) THEN
874 : ! gin -> cin
875 :
876 1324254 : IF (test) THEN
877 8708 : sum_data = ABS(SUM(gin))
878 4 : CALL gs_group%sum(sum_data)
879 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
880 2 : WRITE (output_unit, '(A)') " One step communication algorithm "
881 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), nyzray(g_pos)
882 2 : WRITE (output_unit, '(A,T60,3I7)') " Transform YZ ", n(2), n(3), nx
883 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
884 : END IF
885 : END IF
886 :
887 : ! FFT along x
888 1324254 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, sbuf, norm, stat)
889 :
890 1324254 : IF (test) THEN
891 8776 : sum_data = ABS(SUM(sbuf))
892 4 : CALL gs_group%sum(sum_data)
893 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
894 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) TS", sum_data
895 : END IF
896 : END IF
897 :
898 : ! Exchange data ( transpose of matrix ) and sort
899 : CALL x_to_yz(sbuf, gs_group, g_pos, p2p, yzp, nyzray, &
900 1324254 : bo(:, :, :, 2), tbuf, fft_scratch)
901 :
902 1324254 : IF (test) THEN
903 8740 : sum_data = ABS(SUM(tbuf))
904 4 : CALL gs_group%sum(sum_data)
905 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
906 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) TS", sum_data
907 : END IF
908 : END IF
909 :
910 : ! FFT along y and z
911 1324254 : CALL fft_1dm(fft_scratch%fft_plan(5), tbuf, sbuf, 1._dp, stat)
912 1324254 : CALL fft_1dm(fft_scratch%fft_plan(6), sbuf, cin, 1._dp, stat)
913 :
914 1324254 : IF (test) THEN
915 9284 : sum_data = ABS(SUM(cin))
916 4 : CALL gs_group%sum(sum_data)
917 4 : IF (g_pos == 0 .AND. output_unit > 0) THEN
918 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
919 : END IF
920 : END IF
921 : ELSE
922 0 : CPABORT("Illegal fsign parameter.")
923 : END IF
924 :
925 2615884 : CALL release_fft_scratch(fft_scratch)
926 :
927 : END IF
928 :
929 2615884 : DEALLOCATE (p2p)
930 :
931 2615884 : IF (PRESENT(status)) THEN
932 0 : status = stat
933 : END IF
934 2615884 : CALL timestop(handle)
935 :
936 5231768 : END SUBROUTINE fft3d_ps
937 :
938 : ! **************************************************************************************************
939 : !> \brief ...
940 : !> \param fsign ...
941 : !> \param n ...
942 : !> \param zin ...
943 : !> \param gin ...
944 : !> \param group ...
945 : !> \param bo ...
946 : !> \param scale ...
947 : !> \param status ...
948 : !> \param debug ...
949 : ! **************************************************************************************************
950 208 : SUBROUTINE fft3d_pb(fsign, n, zin, gin, group, bo, scale, status, debug)
951 :
952 : INTEGER, INTENT(IN) :: fsign
953 : INTEGER, DIMENSION(3), INTENT(IN) :: n
954 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
955 : INTENT(INOUT) :: zin
956 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
957 : INTENT(INOUT) :: gin
958 : TYPE(mp_cart_type), INTENT(IN) :: group
959 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:, :), &
960 : INTENT(IN) :: bo
961 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: scale
962 : INTEGER, INTENT(OUT), OPTIONAL :: status
963 : LOGICAL, INTENT(IN), OPTIONAL :: debug
964 :
965 : CHARACTER(len=*), PARAMETER :: routineN = 'fft3d_pb'
966 :
967 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
968 208 : POINTER :: abuf, bbuf
969 : INTEGER :: handle, lg(2), lz(3), mcx2, mcy3, mcz1, &
970 : mcz2, mx1, mx2, mx3, my1, my2, my3, &
971 : my_pos, mz1, mz2, mz3, output_unit, &
972 : sign, stat
973 : INTEGER, DIMENSION(2) :: dim
974 : LOGICAL :: test
975 : REAL(KIND=dp) :: norm, sum_data
976 1456 : TYPE(fft_scratch_sizes) :: fft_scratch_size
977 : TYPE(fft_scratch_type), POINTER :: fft_scratch
978 :
979 : !------------------------------------------------------------------------------
980 : ! "Real Space" 1) xyZ or 1) xYZ
981 : ! 2) xYz or not used
982 : ! "G Space" 3) Xyz or 3) XYz
983 : !
984 : ! There is one communicator (2-dimensional) for all distributions
985 : ! np = n1 * n2, where np is the total number of processors
986 : ! If n2 = 1, we have the second case and only one transpose step is needed
987 : !
988 : ! Assignment of dimensions to axis for different steps
989 : ! First case: 1) n1=x; n2=y
990 : ! 2) n1=x; n2=z
991 : ! 3) n1=y; n2=z
992 : ! Second case 1) n1=x
993 : ! 3) n1=z
994 : !
995 : ! The more general case with two communicators for the initial and final
996 : ! distribution is not covered.
997 : !------------------------------------------------------------------------------
998 :
999 208 : CALL timeset(routineN, handle)
1000 208 : output_unit = cp_logger_get_default_io_unit()
1001 :
1002 624 : dim = group%num_pe_cart
1003 208 : my_pos = group%mepos
1004 :
1005 208 : IF (PRESENT(debug)) THEN
1006 208 : test = debug
1007 : ELSE
1008 : test = .FALSE.
1009 : END IF
1010 :
1011 208 : IF (PRESENT(scale)) THEN
1012 208 : norm = scale
1013 : ELSE
1014 0 : norm = 1.0_dp
1015 : END IF
1016 :
1017 208 : sign = fsign
1018 :
1019 208 : IF (test) THEN
1020 8 : lg(1) = SIZE(gin, 1)
1021 8 : lg(2) = SIZE(gin, 2)
1022 8 : lz(1) = SIZE(zin, 1)
1023 8 : lz(2) = SIZE(zin, 2)
1024 8 : lz(3) = SIZE(zin, 3)
1025 8 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1026 4 : WRITE (output_unit, '(A)') " Parallel 3D FFT : fft3d_pb"
1027 4 : WRITE (output_unit, '(A,T60,3I7)') " Transform lengths ", n
1028 4 : WRITE (output_unit, '(A,T67,2I7)') " Array dimensions (gin) ", lg
1029 4 : WRITE (output_unit, '(A,T60,3I7)') " Array dimensions (cin) ", lz
1030 : END IF
1031 : END IF
1032 :
1033 208 : mx1 = bo(2, 1, my_pos, 1) - bo(1, 1, my_pos, 1) + 1
1034 208 : my1 = bo(2, 2, my_pos, 1) - bo(1, 2, my_pos, 1) + 1
1035 208 : mz1 = bo(2, 3, my_pos, 1) - bo(1, 3, my_pos, 1) + 1
1036 208 : mx2 = bo(2, 1, my_pos, 2) - bo(1, 1, my_pos, 2) + 1
1037 208 : my2 = bo(2, 2, my_pos, 2) - bo(1, 2, my_pos, 2) + 1
1038 208 : mz2 = bo(2, 3, my_pos, 2) - bo(1, 3, my_pos, 2) + 1
1039 208 : mx3 = bo(2, 1, my_pos, 3) - bo(1, 1, my_pos, 3) + 1
1040 208 : my3 = bo(2, 2, my_pos, 3) - bo(1, 2, my_pos, 3) + 1
1041 208 : mz3 = bo(2, 3, my_pos, 3) - bo(1, 3, my_pos, 3) + 1
1042 208 : fft_scratch_size%mx1 = mx1
1043 208 : fft_scratch_size%mx2 = mx2
1044 208 : fft_scratch_size%mx3 = mx3
1045 208 : fft_scratch_size%my1 = my1
1046 208 : fft_scratch_size%my2 = my2
1047 208 : fft_scratch_size%my3 = my3
1048 208 : fft_scratch_size%mz1 = mz1
1049 208 : fft_scratch_size%mz2 = mz2
1050 208 : fft_scratch_size%mz3 = mz3
1051 624 : mcz1 = MAXVAL(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
1052 624 : mcx2 = MAXVAL(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
1053 624 : mcz2 = MAXVAL(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
1054 624 : mcy3 = MAXVAL(bo(2, 2, :, 3) - bo(1, 2, :, 3) + 1)
1055 208 : fft_scratch_size%mcz1 = mcz1
1056 208 : fft_scratch_size%mcx2 = mcx2
1057 208 : fft_scratch_size%mcz2 = mcz2
1058 208 : fft_scratch_size%mcy3 = mcy3
1059 208 : fft_scratch_size%gs_group = group
1060 208 : fft_scratch_size%rs_group = group
1061 624 : fft_scratch_size%g_pos = my_pos
1062 208 : fft_scratch_size%numtask = DIM(1)*DIM(2)
1063 :
1064 208 : IF (DIM(1) > 1 .AND. DIM(2) > 1) THEN
1065 :
1066 : !
1067 : ! First case; two stages of communication
1068 : !
1069 :
1070 0 : CALL get_fft_scratch(fft_scratch, tf_type=100, n=n, fft_sizes=fft_scratch_size)
1071 :
1072 0 : IF (sign == FWFFT) THEN
1073 : ! Stage 1 -> 3
1074 :
1075 0 : bbuf => fft_scratch%a2buf
1076 :
1077 0 : IF (test) THEN
1078 0 : sum_data = ABS(SUM(zin))
1079 0 : CALL group%sum(sum_data)
1080 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1081 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
1082 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1083 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1084 : END IF
1085 : END IF
1086 :
1087 : ! FFT along z
1088 0 : CALL fft_1dm(fft_scratch%fft_plan(1), zin, bbuf, norm, stat)
1089 :
1090 0 : abuf => fft_scratch%a3buf
1091 :
1092 0 : IF (test) THEN
1093 0 : sum_data = ABS(SUM(bbuf))
1094 0 : CALL group%sum(sum_data)
1095 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1096 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1097 : END IF
1098 : END IF
1099 :
1100 0 : CALL cube_transpose_2(bbuf, bo(:, :, :, 1), bo(:, :, :, 2), abuf, fft_scratch)
1101 :
1102 0 : bbuf => fft_scratch%a4buf
1103 :
1104 0 : IF (test) THEN
1105 0 : sum_data = ABS(SUM(abuf))
1106 0 : CALL group%sum(sum_data)
1107 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1108 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1109 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1110 : END IF
1111 : END IF
1112 :
1113 : ! FFT along y
1114 0 : CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1115 :
1116 0 : abuf => fft_scratch%a5buf
1117 :
1118 0 : IF (test) THEN
1119 0 : sum_data = ABS(SUM(bbuf))
1120 0 : CALL group%sum(sum_data)
1121 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1122 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1123 : END IF
1124 : END IF
1125 :
1126 0 : CALL cube_transpose_4(bbuf, bo(:, :, :, 2), bo(:, :, :, 3), abuf, fft_scratch)
1127 :
1128 0 : IF (test) THEN
1129 0 : sum_data = ABS(SUM(abuf))
1130 0 : CALL group%sum(sum_data)
1131 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1132 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1133 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1134 : END IF
1135 : END IF
1136 :
1137 : ! FFT along x
1138 0 : CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1139 :
1140 0 : IF (test) THEN
1141 0 : sum_data = ABS(SUM(gin))
1142 0 : CALL group%sum(sum_data)
1143 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1144 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1145 : END IF
1146 : END IF
1147 :
1148 0 : ELSEIF (sign == BWFFT) THEN
1149 : ! Stage 3 -> 1
1150 :
1151 0 : bbuf => fft_scratch%a5buf
1152 :
1153 0 : IF (test) THEN
1154 0 : sum_data = ABS(SUM(gin))
1155 0 : CALL group%sum(sum_data)
1156 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1157 0 : WRITE (output_unit, '(A)') " Two step communication algorithm "
1158 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1159 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1160 : END IF
1161 : END IF
1162 :
1163 : ! FFT along x
1164 0 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1165 :
1166 0 : abuf => fft_scratch%a4buf
1167 :
1168 0 : IF (test) THEN
1169 0 : sum_data = ABS(SUM(bbuf))
1170 0 : CALL group%sum(sum_data)
1171 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1172 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1173 : END IF
1174 : END IF
1175 :
1176 0 : CALL cube_transpose_3(bbuf, bo(:, :, :, 3), bo(:, :, :, 2), abuf, fft_scratch)
1177 :
1178 0 : bbuf => fft_scratch%a3buf
1179 :
1180 0 : IF (test) THEN
1181 0 : sum_data = ABS(SUM(abuf))
1182 0 : CALL group%sum(sum_data)
1183 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1184 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx2*mz2
1185 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1186 : END IF
1187 : END IF
1188 :
1189 : ! FFT along y
1190 0 : CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1191 :
1192 0 : abuf => fft_scratch%a2buf
1193 :
1194 0 : IF (test) THEN
1195 0 : sum_data = ABS(SUM(bbuf))
1196 0 : CALL group%sum(sum_data)
1197 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1198 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) T", sum_data
1199 : END IF
1200 : END IF
1201 :
1202 0 : CALL cube_transpose_1(bbuf, bo(:, :, :, 2), bo(:, :, :, 1), abuf, fft_scratch)
1203 :
1204 0 : IF (test) THEN
1205 0 : sum_data = ABS(SUM(abuf))
1206 0 : CALL group%sum(sum_data)
1207 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1208 0 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1209 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(5) ", sum_data
1210 : END IF
1211 : END IF
1212 :
1213 : ! FFT along z
1214 0 : CALL fft_1dm(fft_scratch%fft_plan(6), abuf, zin, norm, stat)
1215 :
1216 0 : IF (test) THEN
1217 0 : sum_data = ABS(SUM(zin))
1218 0 : CALL group%sum(sum_data)
1219 0 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1220 0 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(6) ", sum_data
1221 : END IF
1222 : END IF
1223 :
1224 : ELSE
1225 0 : CPABORT("Illegal fsign parameter.")
1226 : END IF
1227 :
1228 0 : CALL release_fft_scratch(fft_scratch)
1229 :
1230 208 : ELSEIF (DIM(2) == 1) THEN
1231 :
1232 : !
1233 : ! Second case; one stage of communication
1234 : !
1235 :
1236 208 : CALL get_fft_scratch(fft_scratch, tf_type=101, n=n, fft_sizes=fft_scratch_size)
1237 :
1238 208 : IF (sign == FWFFT) THEN
1239 : ! Stage 1 -> 3
1240 :
1241 104 : IF (test) THEN
1242 9284 : sum_data = ABS(SUM(zin))
1243 4 : CALL group%sum(sum_data)
1244 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1245 2 : WRITE (output_unit, '(A)') " one step communication algorithm "
1246 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1247 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1248 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1249 : END IF
1250 : END IF
1251 :
1252 104 : abuf => fft_scratch%a3buf
1253 104 : bbuf => fft_scratch%a4buf
1254 : ! FFT along z and y
1255 104 : CALL fft_1dm(fft_scratch%fft_plan(1), zin, abuf, norm, stat)
1256 104 : CALL fft_1dm(fft_scratch%fft_plan(2), abuf, bbuf, 1.0_dp, stat)
1257 :
1258 104 : abuf => fft_scratch%a5buf
1259 :
1260 104 : IF (test) THEN
1261 8708 : sum_data = ABS(SUM(bbuf))
1262 4 : CALL group%sum(sum_data)
1263 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1264 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1265 : END IF
1266 : END IF
1267 :
1268 104 : CALL cube_transpose_6(bbuf, group, bo(:, :, :, 1), bo(:, :, :, 3), abuf, fft_scratch)
1269 :
1270 104 : IF (test) THEN
1271 8260 : sum_data = ABS(SUM(abuf))
1272 4 : CALL group%sum(sum_data)
1273 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1274 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1275 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1276 : END IF
1277 : END IF
1278 :
1279 : ! FFT along x
1280 104 : CALL fft_1dm(fft_scratch%fft_plan(3), abuf, gin, 1.0_dp, stat)
1281 :
1282 104 : IF (test) THEN
1283 8708 : sum_data = ABS(SUM(gin))
1284 4 : CALL group%sum(sum_data)
1285 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1286 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1287 : END IF
1288 : END IF
1289 :
1290 104 : ELSEIF (sign == BWFFT) THEN
1291 : ! Stage 3 -> 1
1292 :
1293 104 : IF (test) THEN
1294 8708 : sum_data = ABS(SUM(gin))
1295 4 : CALL group%sum(sum_data)
1296 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1297 2 : WRITE (output_unit, '(A)') " one step communication algorithm "
1298 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform X ", n(1), my3*mz3
1299 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(1) ", sum_data
1300 : END IF
1301 : END IF
1302 :
1303 104 : bbuf => fft_scratch%a5buf
1304 :
1305 : ! FFT along x
1306 104 : CALL fft_1dm(fft_scratch%fft_plan(4), gin, bbuf, 1.0_dp, stat)
1307 :
1308 104 : abuf => fft_scratch%a4buf
1309 :
1310 104 : IF (test) THEN
1311 8260 : sum_data = ABS(SUM(bbuf))
1312 4 : CALL group%sum(sum_data)
1313 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1314 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(2) T", sum_data
1315 : END IF
1316 : END IF
1317 :
1318 104 : CALL cube_transpose_5(bbuf, group, bo(:, :, :, 3), bo(:, :, :, 1), abuf, fft_scratch)
1319 :
1320 104 : bbuf => fft_scratch%a3buf
1321 :
1322 104 : IF (test) THEN
1323 8708 : sum_data = ABS(SUM(abuf))
1324 4 : CALL group%sum(sum_data)
1325 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1326 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Y ", n(2), mx1*mz1
1327 2 : WRITE (output_unit, '(A,T67,2I7)') " Transform Z ", n(3), mx1*my1
1328 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(3) ", sum_data
1329 : END IF
1330 : END IF
1331 :
1332 : ! FFT along y
1333 104 : CALL fft_1dm(fft_scratch%fft_plan(5), abuf, bbuf, 1.0_dp, stat)
1334 :
1335 : ! FFT along z
1336 104 : CALL fft_1dm(fft_scratch%fft_plan(6), bbuf, zin, norm, stat)
1337 :
1338 104 : IF (test) THEN
1339 9284 : sum_data = ABS(SUM(zin))
1340 4 : CALL group%sum(sum_data)
1341 4 : IF (my_pos == 0 .AND. output_unit > 0) THEN
1342 2 : WRITE (output_unit, '(A,T61,E20.14)') " Sum of data(4) ", sum_data
1343 : END IF
1344 : END IF
1345 :
1346 : ELSE
1347 0 : CPABORT("Illegal fsign parameter.")
1348 : END IF
1349 :
1350 208 : CALL release_fft_scratch(fft_scratch)
1351 :
1352 : ELSE
1353 :
1354 0 : CPABORT("This partition not implemented.")
1355 :
1356 : END IF
1357 :
1358 208 : IF (PRESENT(status)) THEN
1359 0 : status = stat
1360 : END IF
1361 :
1362 208 : CALL timestop(handle)
1363 :
1364 416 : END SUBROUTINE fft3d_pb
1365 :
1366 : ! **************************************************************************************************
1367 : !> \brief ...
1368 : !> \param sb ...
1369 : !> \param group ...
1370 : !> \param my_pos ...
1371 : !> \param p2p ...
1372 : !> \param yzp ...
1373 : !> \param nray ...
1374 : !> \param bo ...
1375 : !> \param tb ...
1376 : !> \param fft_scratch ...
1377 : !> \par History
1378 : !> 15. Feb. 2006 : single precision all_to_all
1379 : !> \author JGH (14-Jan-2001)
1380 : ! **************************************************************************************************
1381 1324254 : SUBROUTINE x_to_yz(sb, group, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1382 :
1383 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1384 : INTENT(IN) :: sb
1385 : TYPE(mp_comm_type), INTENT(IN) :: group
1386 : INTEGER, INTENT(IN) :: my_pos
1387 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1388 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1389 : INTENT(IN) :: yzp
1390 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1391 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1392 : INTENT(IN) :: bo
1393 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1394 : INTENT(INOUT) :: tb
1395 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1396 :
1397 : CHARACTER(len=*), PARAMETER :: routineN = 'x_to_yz'
1398 :
1399 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1400 1324254 : POINTER :: rr
1401 : COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1402 1324254 : POINTER :: ss, tt
1403 : INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1404 : nm, np, nr, nx
1405 1324254 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1406 :
1407 1324254 : CALL timeset(routineN, handle)
1408 :
1409 1324254 : np = SIZE(p2p)
1410 1324254 : scount => fft_scratch%scount
1411 1324254 : rcount => fft_scratch%rcount
1412 1324254 : sdispl => fft_scratch%sdispl
1413 1324254 : rdispl => fft_scratch%rdispl
1414 :
1415 1324254 : IF (alltoall_sgl) THEN
1416 230 : ss => fft_scratch%ss
1417 230 : tt => fft_scratch%tt
1418 3441099 : ss(:, :) = CMPLX(sb(:, :), KIND=sp)
1419 3353860 : tt(:, :) = 0._sp
1420 : ELSE
1421 1324024 : rr => fft_scratch%rr
1422 : END IF
1423 :
1424 1324254 : mpr = p2p(my_pos)
1425 3972762 : nm = MAXVAL(nray(0:np - 1))
1426 1324254 : nr = nray(my_pos)
1427 : !$OMP PARALLEL DO DEFAULT(NONE), &
1428 : !$OMP PRIVATE(ix,nx), &
1429 1324254 : !$OMP SHARED(np,p2p,bo,nr,scount,sdispl)
1430 : DO ip = 0, np - 1
1431 : ix = p2p(ip)
1432 : nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1433 : scount(ip) = nr*nx
1434 : sdispl(ip) = nr*(bo(1, 1, ix) - 1)
1435 : END DO
1436 : !$OMP END PARALLEL DO
1437 1324254 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1438 : !$OMP PARALLEL DO DEFAULT(NONE), &
1439 : !$OMP PRIVATE(nr), &
1440 1324254 : !$OMP SHARED(np,nray,nx,rcount,rdispl,nm)
1441 : DO ip = 0, np - 1
1442 : nr = nray(ip)
1443 : rcount(ip) = nr*nx
1444 : rdispl(ip) = nm*nx*ip
1445 : END DO
1446 : !$OMP END PARALLEL DO
1447 1324254 : IF (alltoall_sgl) THEN
1448 230 : CALL group%alltoall(ss, scount, sdispl, tt, rcount, rdispl)
1449 : ELSE
1450 1324024 : CALL group%alltoall(sb, scount, sdispl, rr, rcount, rdispl)
1451 : END IF
1452 :
1453 1324254 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1454 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1455 : !$OMP PRIVATE(ixx,ir,iy,iz,ix) &
1456 1324254 : !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tt,rr,tb)
1457 : DO ip = 0, np - 1
1458 : DO ix = 1, nx
1459 : ixx = nray(ip)*(ix - 1)
1460 : IF (alltoall_sgl) THEN
1461 : DO ir = 1, nray(ip)
1462 : iy = yzp(1, ir, ip)
1463 : iz = yzp(2, ir, ip)
1464 : tb(iy, iz, ix) = tt(ir + ixx, ip)
1465 : END DO
1466 : ELSE
1467 : DO ir = 1, nray(ip)
1468 : iy = yzp(1, ir, ip)
1469 : iz = yzp(2, ir, ip)
1470 : tb(iy, iz, ix) = rr(ir + ixx, ip)
1471 : END DO
1472 : END IF
1473 : END DO
1474 : END DO
1475 : !$OMP END PARALLEL DO
1476 :
1477 1324254 : CALL timestop(handle)
1478 :
1479 1324254 : END SUBROUTINE x_to_yz
1480 :
1481 : ! **************************************************************************************************
1482 : !> \brief ...
1483 : !> \param tb ...
1484 : !> \param group ...
1485 : !> \param my_pos ...
1486 : !> \param p2p ...
1487 : !> \param yzp ...
1488 : !> \param nray ...
1489 : !> \param bo ...
1490 : !> \param sb ...
1491 : !> \param fft_scratch ...
1492 : !> \par History
1493 : !> 15. Feb. 2006 : single precision all_to_all
1494 : !> \author JGH (14-Jan-2001)
1495 : ! **************************************************************************************************
1496 1291630 : SUBROUTINE yz_to_x(tb, group, my_pos, p2p, yzp, nray, bo, sb, fft_scratch)
1497 :
1498 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :, :), &
1499 : INTENT(IN) :: tb
1500 : TYPE(mp_comm_type), INTENT(IN) :: group
1501 : INTEGER, INTENT(IN) :: my_pos
1502 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1503 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
1504 : INTENT(IN) :: yzp
1505 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1506 : INTEGER, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1507 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1508 : INTENT(INOUT) :: sb
1509 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
1510 :
1511 : CHARACTER(len=*), PARAMETER :: routineN = 'yz_to_x'
1512 :
1513 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1514 1291630 : POINTER :: rr
1515 : COMPLEX(KIND=sp), CONTIGUOUS, DIMENSION(:, :), &
1516 1291630 : POINTER :: ss, tt
1517 : INTEGER :: handle, ip, ir, ix, ixx, iy, iz, mpr, &
1518 : nm, np, nr, nx
1519 1291630 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
1520 :
1521 1291630 : CALL timeset(routineN, handle)
1522 :
1523 1291630 : np = SIZE(p2p)
1524 1291630 : mpr = p2p(my_pos)
1525 1291630 : scount => fft_scratch%scount
1526 1291630 : rcount => fft_scratch%rcount
1527 1291630 : sdispl => fft_scratch%sdispl
1528 1291630 : rdispl => fft_scratch%rdispl
1529 :
1530 1291630 : IF (alltoall_sgl) THEN
1531 238 : ss => fft_scratch%ss
1532 238 : tt => fft_scratch%tt
1533 3703835 : ss = 0._sp
1534 3609884 : tt = 0._sp
1535 : ELSE
1536 1291392 : rr => fft_scratch%rr
1537 : END IF
1538 :
1539 1291630 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1540 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
1541 : !$OMP PRIVATE(ip, ixx, ir, iy, iz, ix) &
1542 1291630 : !$OMP SHARED(np,nray,nx,alltoall_sgl,yzp,tb,tt,rr)
1543 : DO ip = 0, np - 1
1544 : DO ix = 1, nx
1545 : ixx = nray(ip)*(ix - 1)
1546 : IF (alltoall_sgl) THEN
1547 : DO ir = 1, nray(ip)
1548 : iy = yzp(1, ir, ip)
1549 : iz = yzp(2, ir, ip)
1550 : tt(ir + ixx, ip) = CMPLX(tb(iy, iz, ix), KIND=sp)
1551 : END DO
1552 : ELSE
1553 : DO ir = 1, nray(ip)
1554 : iy = yzp(1, ir, ip)
1555 : iz = yzp(2, ir, ip)
1556 : rr(ir + ixx, ip) = tb(iy, iz, ix)
1557 : END DO
1558 : END IF
1559 : END DO
1560 : END DO
1561 : !$OMP END PARALLEL DO
1562 3874890 : nm = MAXVAL(nray(0:np - 1))
1563 1291630 : nr = nray(my_pos)
1564 : !$OMP PARALLEL DO DEFAULT(NONE), &
1565 : !$OMP PRIVATE(ix,nx), &
1566 1291630 : !$OMP SHARED(np,p2p,bo,rcount,rdispl,nr)
1567 : DO ip = 0, np - 1
1568 : ix = p2p(ip)
1569 : nx = bo(2, 1, ix) - bo(1, 1, ix) + 1
1570 : rcount(ip) = nr*nx
1571 : rdispl(ip) = nr*(bo(1, 1, ix) - 1)
1572 : END DO
1573 : !$OMP END PARALLEL DO
1574 1291630 : nx = bo(2, 1, mpr) - bo(1, 1, mpr) + 1
1575 : !$OMP PARALLEL DO DEFAULT(NONE), &
1576 : !$OMP PRIVATE(nr), &
1577 1291630 : !$OMP SHARED(np,nray,scount,sdispl,nx,nm)
1578 : DO ip = 0, np - 1
1579 : nr = nray(ip)
1580 : scount(ip) = nr*nx
1581 : sdispl(ip) = nm*nx*ip
1582 : END DO
1583 : !$OMP END PARALLEL DO
1584 :
1585 1291630 : IF (alltoall_sgl) THEN
1586 238 : CALL group%alltoall(tt, scount, sdispl, ss, rcount, rdispl)
1587 3703835 : sb = ss
1588 : ELSE
1589 1291392 : CALL group%alltoall(rr, scount, sdispl, sb, rcount, rdispl)
1590 : END IF
1591 :
1592 1291630 : CALL timestop(handle)
1593 :
1594 1291630 : END SUBROUTINE yz_to_x
1595 :
1596 : ! **************************************************************************************************
1597 : !> \brief ...
1598 : !> \param sb ...
1599 : !> \param group ...
1600 : !> \param dims ...
1601 : !> \param my_pos ...
1602 : !> \param p2p ...
1603 : !> \param yzp ...
1604 : !> \param nray ...
1605 : !> \param bo ...
1606 : !> \param tb ...
1607 : !> \param fft_scratch ...
1608 : !> \par History
1609 : !> 15. Feb. 2006 : single precision all_to_all
1610 : !> \author JGH (18-Jan-2001)
1611 : ! **************************************************************************************************
1612 0 : SUBROUTINE yz_to_xz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1613 :
1614 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1615 : INTENT(IN) :: sb
1616 :
1617 : CLASS(mp_comm_type), INTENT(IN) :: group
1618 : INTEGER, DIMENSION(2), INTENT(IN) :: dims
1619 : INTEGER, INTENT(IN) :: my_pos
1620 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1621 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1622 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1623 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1624 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1625 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1626 :
1627 : CHARACTER(len=*), PARAMETER :: routineN = 'yz_to_xz'
1628 :
1629 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1630 0 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1631 : INTEGER :: handle, icrs, ip, ipl, ipr, ir, ix, iz, &
1632 : jj, jx, jy, jz, myx, myz, np, npx, &
1633 : npz, nx, nz, rs_pos
1634 0 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1635 0 : xcor, zcor
1636 0 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1637 :
1638 0 : CALL timeset(routineN, handle)
1639 :
1640 0 : np = SIZE(p2p)
1641 :
1642 0 : rs_pos = p2p(my_pos)
1643 :
1644 0 : IF (alltoall_sgl) THEN
1645 0 : yzbuf_sgl => fft_scratch%yzbuf_sgl
1646 0 : xzbuf_sgl => fft_scratch%xzbuf_sgl
1647 : ELSE
1648 0 : yzbuf => fft_scratch%yzbuf
1649 0 : xzbuf => fft_scratch%xzbuf
1650 : END IF
1651 0 : npx = dims(1)
1652 0 : npz = dims(2)
1653 0 : pgrid => fft_scratch%pgrid
1654 0 : xcor => fft_scratch%xcor
1655 0 : zcor => fft_scratch%zcor
1656 0 : pzcoord => fft_scratch%pzcoord
1657 0 : scount => fft_scratch%scount
1658 0 : rcount => fft_scratch%rcount
1659 0 : sdispl => fft_scratch%sdispl
1660 0 : rdispl => fft_scratch%rdispl
1661 :
1662 0 : nx = SIZE(sb, 2)
1663 :
1664 : ! If the send and recv counts are not already cached, then
1665 : ! calculate and store them
1666 0 : IF (fft_scratch%in == 0) THEN
1667 :
1668 0 : scount = 0
1669 :
1670 0 : DO ix = 0, npx - 1
1671 0 : ip = pgrid(ix, 0)
1672 0 : xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1673 : END DO
1674 0 : DO iz = 0, npz - 1
1675 0 : ip = pgrid(0, iz)
1676 0 : zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1677 : END DO
1678 0 : DO jx = 1, nx
1679 0 : IF (alltoall_sgl) THEN
1680 0 : DO ir = 1, nray(my_pos)
1681 0 : jy = yzp(1, ir, my_pos)
1682 0 : jz = yzp(2, ir, my_pos)
1683 0 : ip = pgrid(xcor(jx), zcor(jz))
1684 0 : scount(ip) = scount(ip) + 1
1685 : END DO
1686 : ELSE
1687 0 : DO ir = 1, nray(my_pos)
1688 0 : jy = yzp(1, ir, my_pos)
1689 0 : jz = yzp(2, ir, my_pos)
1690 0 : ip = pgrid(xcor(jx), zcor(jz))
1691 0 : scount(ip) = scount(ip) + 1
1692 : END DO
1693 : END IF
1694 : END DO
1695 :
1696 0 : CALL group%alltoall(scount, rcount, 1)
1697 0 : fft_scratch%yzcount = scount
1698 0 : fft_scratch%xzcount = rcount
1699 :
1700 : ! Work out the correct displacements in the buffers
1701 0 : sdispl(0) = 0
1702 0 : rdispl(0) = 0
1703 0 : DO ip = 1, np - 1
1704 0 : sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1705 0 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1706 : END DO
1707 :
1708 0 : fft_scratch%yzdispl = sdispl
1709 0 : fft_scratch%xzdispl = rdispl
1710 :
1711 0 : icrs = 0
1712 0 : DO ip = 0, np - 1
1713 0 : IF (scount(ip) /= 0) icrs = icrs + 1
1714 0 : IF (rcount(ip) /= 0) icrs = icrs + 1
1715 : END DO
1716 0 : CALL group%sum(icrs)
1717 0 : fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
1718 :
1719 0 : fft_scratch%in = 1
1720 : ELSE
1721 0 : scount = fft_scratch%yzcount
1722 0 : rcount = fft_scratch%xzcount
1723 0 : sdispl = fft_scratch%yzdispl
1724 0 : rdispl = fft_scratch%xzdispl
1725 : END IF
1726 :
1727 : ! Do the actual packing
1728 : !$OMP PARALLEL DO DEFAULT(NONE), &
1729 : !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1730 : !$OMP SHARED(np,p2p,pzcoord,bo,nray,yzp,zcor),&
1731 : !$OMP SHARED(yzbuf,sb,scount,sdispl,my_pos),&
1732 0 : !$OMP SHARED(yzbuf_sgl,alltoall_sgl)
1733 : DO ip = 0, np - 1
1734 : IF (scount(ip) == 0) CYCLE
1735 : ipl = p2p(ip)
1736 : jj = 0
1737 : nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1738 : DO ir = 1, nray(my_pos)
1739 : jz = yzp(2, ir, my_pos)
1740 : IF (zcor(jz) == pzcoord(ipl)) THEN
1741 : jj = jj + 1
1742 : jy = yzp(1, ir, my_pos)
1743 : IF (alltoall_sgl) THEN
1744 : DO jx = 0, nx - 1
1745 : yzbuf_sgl(sdispl(ip) + jj + jx*scount(ip)/nx) = CMPLX(sb(ir, jx + bo(1, 1, ipl)), KIND=sp)
1746 : END DO
1747 : ELSE
1748 : DO jx = 0, nx - 1
1749 : yzbuf(sdispl(ip) + jj + jx*scount(ip)/nx) = sb(ir, jx + bo(1, 1, ipl))
1750 : END DO
1751 : END IF
1752 : END IF
1753 : END DO
1754 : END DO
1755 : !$OMP END PARALLEL DO
1756 :
1757 0 : IF (alltoall_sgl) THEN
1758 0 : CALL group%alltoall(yzbuf_sgl, scount, sdispl, xzbuf_sgl, rcount, rdispl)
1759 : ELSE
1760 0 : IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1761 0 : CALL sparse_alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl, group)
1762 : ELSE
1763 0 : CALL group%alltoall(yzbuf, scount, sdispl, xzbuf, rcount, rdispl)
1764 : END IF
1765 : END IF
1766 :
1767 0 : myx = fft_scratch%sizes%r_pos(1)
1768 0 : myz = fft_scratch%sizes%r_pos(2)
1769 0 : nz = bo(2, 3, rs_pos) - bo(1, 3, rs_pos) + 1
1770 :
1771 : !$OMP PARALLEL DO DEFAULT(NONE), &
1772 : !$OMP PRIVATE(ipr,jj,ir,jx,jy,jz),&
1773 : !$OMP SHARED(tb,np,p2p,bo,rs_pos,nray),&
1774 : !$OMP SHARED(yzp,alltoall_sgl,zcor,myz),&
1775 0 : !$OMP SHARED(xzbuf,xzbuf_sgl,nz,rdispl)
1776 : DO ip = 0, np - 1
1777 : ipr = p2p(ip)
1778 : jj = 0
1779 : DO jx = 0, bo(2, 1, rs_pos) - bo(1, 1, rs_pos)
1780 : DO ir = 1, nray(ip)
1781 : jz = yzp(2, ir, ip)
1782 : IF (alltoall_sgl) THEN
1783 : IF (zcor(jz) == myz) THEN
1784 : jj = jj + 1
1785 : jy = yzp(1, ir, ip)
1786 : jz = jz - bo(1, 3, rs_pos) + 1
1787 : tb(jy, jz + jx*nz) = xzbuf_sgl(jj + rdispl(ipr))
1788 : END IF
1789 : ELSE
1790 : IF (zcor(jz) == myz) THEN
1791 : jj = jj + 1
1792 : jy = yzp(1, ir, ip)
1793 : jz = jz - bo(1, 3, rs_pos) + 1
1794 : tb(jy, jz + jx*nz) = xzbuf(jj + rdispl(ipr))
1795 : END IF
1796 : END IF
1797 : END DO
1798 : END DO
1799 : END DO
1800 : !$OMP END PARALLEL DO
1801 :
1802 0 : CALL timestop(handle)
1803 :
1804 0 : END SUBROUTINE yz_to_xz
1805 :
1806 : ! **************************************************************************************************
1807 : !> \brief ...
1808 : !> \param sb ...
1809 : !> \param group ...
1810 : !> \param dims ...
1811 : !> \param my_pos ...
1812 : !> \param p2p ...
1813 : !> \param yzp ...
1814 : !> \param nray ...
1815 : !> \param bo ...
1816 : !> \param tb ...
1817 : !> \param fft_scratch ...
1818 : !> \par History
1819 : !> 15. Feb. 2006 : single precision all_to_all
1820 : !> \author JGH (19-Jan-2001)
1821 : ! **************************************************************************************************
1822 0 : SUBROUTINE xz_to_yz(sb, group, dims, my_pos, p2p, yzp, nray, bo, tb, fft_scratch)
1823 :
1824 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1825 : INTENT(IN) :: sb
1826 :
1827 : CLASS(mp_comm_type), INTENT(IN) :: group
1828 : INTEGER, DIMENSION(2), INTENT(IN) :: dims
1829 : INTEGER, INTENT(IN) :: my_pos
1830 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: p2p
1831 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: yzp
1832 : INTEGER, CONTIGUOUS, DIMENSION(0:), INTENT(IN) :: nray
1833 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: bo
1834 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT), CONTIGUOUS :: tb
1835 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
1836 :
1837 : CHARACTER(len=*), PARAMETER :: routineN = 'xz_to_yz'
1838 :
1839 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf, yzbuf
1840 0 : COMPLEX(KIND=sp), DIMENSION(:), POINTER, CONTIGUOUS :: xzbuf_sgl, yzbuf_sgl
1841 : INTEGER :: handle, icrs, ip, ipl, ir, ix, ixx, iz, &
1842 : jj, jx, jy, jz, mp, myx, myz, np, npx, &
1843 : npz, nx, nz
1844 0 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: pzcoord, rcount, rdispl, scount, sdispl, &
1845 0 : xcor, zcor
1846 0 : INTEGER, DIMENSION(:, :), CONTIGUOUS, POINTER :: pgrid
1847 :
1848 0 : CALL timeset(routineN, handle)
1849 :
1850 0 : np = SIZE(p2p)
1851 :
1852 0 : IF (alltoall_sgl) THEN
1853 0 : yzbuf_sgl => fft_scratch%yzbuf_sgl
1854 0 : xzbuf_sgl => fft_scratch%xzbuf_sgl
1855 : ELSE
1856 0 : yzbuf => fft_scratch%yzbuf
1857 0 : xzbuf => fft_scratch%xzbuf
1858 : END IF
1859 0 : npx = dims(1)
1860 0 : npz = dims(2)
1861 0 : pgrid => fft_scratch%pgrid
1862 0 : xcor => fft_scratch%xcor
1863 0 : zcor => fft_scratch%zcor
1864 0 : pzcoord => fft_scratch%pzcoord
1865 0 : scount => fft_scratch%scount
1866 0 : rcount => fft_scratch%rcount
1867 0 : sdispl => fft_scratch%sdispl
1868 0 : rdispl => fft_scratch%rdispl
1869 :
1870 : ! If the send and recv counts are not already cached, then
1871 : ! calculate and store them
1872 0 : IF (fft_scratch%in == 0) THEN
1873 :
1874 0 : rcount = 0
1875 0 : nx = MAXVAL(bo(2, 1, :))
1876 :
1877 0 : DO ix = 0, npx - 1
1878 0 : ip = pgrid(ix, 0)
1879 0 : xcor(bo(1, 1, ip):bo(2, 1, ip)) = ix
1880 : END DO
1881 0 : DO iz = 0, npz - 1
1882 0 : ip = pgrid(0, iz)
1883 0 : zcor(bo(1, 3, ip):bo(2, 3, ip)) = iz
1884 : END DO
1885 0 : DO jx = 1, nx
1886 0 : DO ir = 1, nray(my_pos)
1887 0 : jy = yzp(1, ir, my_pos)
1888 0 : jz = yzp(2, ir, my_pos)
1889 0 : ip = pgrid(xcor(jx), zcor(jz))
1890 0 : rcount(ip) = rcount(ip) + 1
1891 : END DO
1892 : END DO
1893 :
1894 0 : CALL group%alltoall(rcount, scount, 1)
1895 0 : fft_scratch%xzcount = scount
1896 0 : fft_scratch%yzcount = rcount
1897 :
1898 : ! Work out the correct displacements in the buffers
1899 0 : sdispl(0) = 0
1900 0 : rdispl(0) = 0
1901 0 : DO ip = 1, np - 1
1902 0 : sdispl(ip) = sdispl(ip - 1) + scount(ip - 1)
1903 0 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
1904 : END DO
1905 :
1906 0 : fft_scratch%xzdispl = sdispl
1907 0 : fft_scratch%yzdispl = rdispl
1908 :
1909 0 : icrs = 0
1910 0 : DO ip = 0, np - 1
1911 0 : IF (scount(ip) /= 0) icrs = icrs + 1
1912 0 : IF (rcount(ip) /= 0) icrs = icrs + 1
1913 : END DO
1914 0 : CALL group%sum(icrs)
1915 0 : fft_scratch%rsratio = REAL(icrs, KIND=dp)/(REAL(2*np, KIND=dp)*REAL(np, KIND=dp))
1916 :
1917 0 : fft_scratch%in = 1
1918 : ELSE
1919 0 : scount = fft_scratch%xzcount
1920 0 : rcount = fft_scratch%yzcount
1921 0 : sdispl = fft_scratch%xzdispl
1922 0 : rdispl = fft_scratch%yzdispl
1923 : END IF
1924 :
1925 : ! Now do the actual packing
1926 0 : myx = fft_scratch%sizes%r_pos(1)
1927 0 : myz = fft_scratch%sizes%r_pos(2)
1928 0 : mp = p2p(my_pos)
1929 0 : nz = bo(2, 3, mp) - bo(1, 3, mp) + 1
1930 0 : nx = bo(2, 1, mp) - bo(1, 1, mp) + 1
1931 :
1932 : !$OMP PARALLEL DO DEFAULT(NONE), &
1933 : !$OMP PRIVATE(jj,ipl,ir,jx,jy,jz,ixx),&
1934 : !$OMP SHARED(np,p2p,nray,yzp,zcor,myz,bo,mp),&
1935 : !$OMP SHARED(alltoall_sgl,nx,scount,sdispl),&
1936 0 : !$OMP SHARED(xzbuf,xzbuf_sgl,sb,nz)
1937 : DO ip = 0, np - 1
1938 : jj = 0
1939 : ipl = p2p(ip)
1940 : DO ir = 1, nray(ip)
1941 : jz = yzp(2, ir, ip)
1942 : IF (zcor(jz) == myz) THEN
1943 : jj = jj + 1
1944 : jy = yzp(1, ir, ip)
1945 : jz = yzp(2, ir, ip) - bo(1, 3, mp) + 1
1946 : IF (alltoall_sgl) THEN
1947 : DO jx = 0, nx - 1
1948 : ixx = jj + jx*scount(ipl)/nx
1949 : xzbuf_sgl(ixx + sdispl(ipl)) = CMPLX(sb(jy, jz + jx*nz), KIND=sp)
1950 : END DO
1951 : ELSE
1952 : DO jx = 0, nx - 1
1953 : ixx = jj + jx*scount(ipl)/nx
1954 : xzbuf(ixx + sdispl(ipl)) = sb(jy, jz + jx*nz)
1955 : END DO
1956 : END IF
1957 : END IF
1958 : END DO
1959 : END DO
1960 : !$OMP END PARALLEL DO
1961 :
1962 0 : IF (alltoall_sgl) THEN
1963 0 : CALL group%alltoall(xzbuf_sgl, scount, sdispl, yzbuf_sgl, rcount, rdispl)
1964 : ELSE
1965 0 : IF (fft_scratch%rsratio < ratio_sparse_alltoall) THEN
1966 0 : CALL sparse_alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl, group)
1967 : ELSE
1968 0 : CALL group%alltoall(xzbuf, scount, sdispl, yzbuf, rcount, rdispl)
1969 : END IF
1970 : END IF
1971 :
1972 : !$OMP PARALLEL DO DEFAULT(NONE), &
1973 : !$OMP PRIVATE(ipl,jj,nx,ir,jx,jy,jz),&
1974 : !$OMP SHARED(p2p,pzcoord,bo,nray,my_pos,yzp),&
1975 : !$OMP SHARED(rcount,rdispl,tb,yzbuf,zcor),&
1976 0 : !$OMP SHARED(yzbuf_sgl,alltoall_sgl,np)
1977 : DO ip = 0, np - 1
1978 : IF (rcount(ip) == 0) CYCLE
1979 : ipl = p2p(ip)
1980 : jj = 0
1981 : nx = bo(2, 1, ipl) - bo(1, 1, ipl) + 1
1982 : DO ir = 1, nray(my_pos)
1983 : jz = yzp(2, ir, my_pos)
1984 : IF (zcor(jz) == pzcoord(ipl)) THEN
1985 : jj = jj + 1
1986 : jy = yzp(1, ir, my_pos)
1987 : IF (alltoall_sgl) THEN
1988 : DO jx = 0, nx - 1
1989 : tb(ir, jx + bo(1, 1, ipl)) = yzbuf_sgl(rdispl(ip) + jj + jx*rcount(ip)/nx)
1990 : END DO
1991 : ELSE
1992 : DO jx = 0, nx - 1
1993 : tb(ir, jx + bo(1, 1, ipl)) = yzbuf(rdispl(ip) + jj + jx*rcount(ip)/nx)
1994 : END DO
1995 : END IF
1996 : END IF
1997 : END DO
1998 : END DO
1999 : !$OMP END PARALLEL DO
2000 :
2001 0 : CALL timestop(handle)
2002 :
2003 0 : END SUBROUTINE xz_to_yz
2004 :
2005 : ! **************************************************************************************************
2006 : !> \brief ...
2007 : !> \param cin ...
2008 : !> \param boin ...
2009 : !> \param boout ...
2010 : !> \param sout ...
2011 : !> \param fft_scratch ...
2012 : !> \par History
2013 : !> none
2014 : !> \author JGH (20-Jan-2001)
2015 : ! **************************************************************************************************
2016 0 : SUBROUTINE cube_transpose_1(cin, boin, boout, sout, fft_scratch)
2017 :
2018 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2019 : INTENT(IN) :: cin
2020 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2021 : INTENT(IN) :: boin, boout
2022 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2023 : INTENT(OUT) :: sout
2024 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2025 :
2026 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_1'
2027 :
2028 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2029 0 : POINTER :: rbuf
2030 : INTEGER :: handle, ip, ipl, ir, is, ixy, iz, mip, &
2031 : mz, np, nx, ny, nz
2032 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2033 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2034 : INTEGER, DIMENSION(2) :: dim, pos
2035 :
2036 0 : CALL timeset(routineN, handle)
2037 :
2038 0 : mip = fft_scratch%mip
2039 0 : dim = fft_scratch%dim
2040 0 : pos = fft_scratch%pos
2041 0 : scount => fft_scratch%scount
2042 0 : rcount => fft_scratch%rcount
2043 0 : sdispl => fft_scratch%sdispl
2044 0 : rdispl => fft_scratch%rdispl
2045 0 : pgrid => fft_scratch%pgcube
2046 0 : np = DIM(2)
2047 :
2048 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2049 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2050 :
2051 : !$OMP PARALLEL DO DEFAULT(NONE), &
2052 : !$OMP PRIVATE(ipl,ny), &
2053 0 : !$OMP SHARED(np,pgrid,boout,scount,sdispl,nx,nz)
2054 : DO ip = 0, np - 1
2055 : ipl = pgrid(ip, 2)
2056 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2057 : scount(ip) = nx*nz*ny
2058 : sdispl(ip) = nx*nz*(boout(1, 2, ipl) - 1)
2059 : END DO
2060 : !$OMP END PARALLEL DO
2061 0 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2062 0 : mz = MAXVAL(boin(2, 3, :) - boin(1, 3, :) + 1)
2063 : !$OMP PARALLEL DO DEFAULT(NONE), &
2064 : !$OMP PRIVATE(ipl,nz), &
2065 0 : !$OMP SHARED(np,pgrid,boin,nx,ny,rcount,rdispl,mz)
2066 : DO ip = 0, np - 1
2067 : ipl = pgrid(ip, 2)
2068 : nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2069 : rcount(ip) = nx*nz*ny
2070 : rdispl(ip) = nx*ny*mz*ip
2071 : END DO
2072 : !$OMP END PARALLEL DO
2073 :
2074 0 : rbuf => fft_scratch%rbuf1
2075 :
2076 0 : CALL fft_scratch%cart_sub_comm(2)%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2077 :
2078 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2079 : !$OMP PRIVATE(ip,ipl,nz,iz,is,ir) &
2080 0 : !$OMP SHARED(nx,ny,np,pgrid,boin,sout,rbuf)
2081 : DO ixy = 1, nx*ny
2082 : DO ip = 0, np - 1
2083 : ipl = pgrid(ip, 2)
2084 : nz = boin(2, 3, ipl) - boin(1, 3, ipl) + 1
2085 : DO iz = 1, nz
2086 : is = boin(1, 3, ipl) + iz - 1
2087 : ir = iz + nz*(ixy - 1)
2088 : sout(is, ixy) = rbuf(ir, ip)
2089 : END DO
2090 : END DO
2091 : END DO
2092 : !$OMP END PARALLEL DO
2093 :
2094 0 : CALL timestop(handle)
2095 :
2096 0 : END SUBROUTINE cube_transpose_1
2097 :
2098 : ! **************************************************************************************************
2099 : !> \brief ...
2100 : !> \param cin ...
2101 : !> \param boin ...
2102 : !> \param boout ...
2103 : !> \param sout ...
2104 : !> \param fft_scratch ...
2105 : ! **************************************************************************************************
2106 0 : SUBROUTINE cube_transpose_2(cin, boin, boout, sout, fft_scratch)
2107 :
2108 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2109 : INTENT(IN) :: cin
2110 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2111 : INTENT(IN) :: boin, boout
2112 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2113 : INTENT(OUT) :: sout
2114 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2115 :
2116 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_2'
2117 :
2118 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2119 0 : POINTER :: rbuf
2120 : INTEGER :: handle, ip, ipl, ir, ixy, iz, mip, mz, &
2121 : np, nx, ny, nz
2122 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2123 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2124 : INTEGER, DIMENSION(2) :: dim, pos
2125 : TYPE(mp_comm_type) :: sub_group
2126 :
2127 0 : CALL timeset(routineN, handle)
2128 :
2129 0 : sub_group = fft_scratch%cart_sub_comm(2)
2130 0 : mip = fft_scratch%mip
2131 0 : dim = fft_scratch%dim
2132 0 : pos = fft_scratch%pos
2133 0 : scount => fft_scratch%scount
2134 0 : rcount => fft_scratch%rcount
2135 0 : sdispl => fft_scratch%sdispl
2136 0 : rdispl => fft_scratch%rdispl
2137 0 : pgrid => fft_scratch%pgcube
2138 0 : np = DIM(2)
2139 :
2140 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2141 0 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2142 0 : mz = MAXVAL(boout(2, 3, :) - boout(1, 3, :) + 1)
2143 :
2144 0 : rbuf => fft_scratch%rbuf2
2145 :
2146 : !$OMP PARALLEL DEFAULT(NONE), &
2147 : !$OMP PRIVATE(ip,ipl,nz,iz,ir), &
2148 0 : !$OMP SHARED(nx,ny,np,pgrid,boout,rbuf,cin,scount,sdispl,mz)
2149 : !$OMP DO COLLAPSE(2)
2150 : DO ixy = 1, nx*ny
2151 : DO ip = 0, np - 1
2152 : ipl = pgrid(ip, 2)
2153 : nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2154 : DO iz = boout(1, 3, ipl), boout(2, 3, ipl)
2155 : ir = iz - boout(1, 3, ipl) + 1 + (ixy - 1)*nz
2156 : rbuf(ir, ip) = cin(iz, ixy)
2157 : END DO
2158 : END DO
2159 : END DO
2160 : !$OMP END DO
2161 : !$OMP DO
2162 : DO ip = 0, np - 1
2163 : ipl = pgrid(ip, 2)
2164 : nz = boout(2, 3, ipl) - boout(1, 3, ipl) + 1
2165 : scount(ip) = nx*ny*nz
2166 : sdispl(ip) = nx*ny*mz*ip
2167 : END DO
2168 : !$OMP END DO
2169 : !$OMP END PARALLEL
2170 0 : nz = boout(2, 3, mip) - boout(1, 3, mip) + 1
2171 : !$OMP PARALLEL DO DEFAULT(NONE), &
2172 : !$OMP PRIVATE(ipl,ny), &
2173 0 : !$OMP SHARED(np,pgrid,boin,nx,nz,rcount,rdispl)
2174 : DO ip = 0, np - 1
2175 : ipl = pgrid(ip, 2)
2176 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2177 : rcount(ip) = nx*ny*nz
2178 : rdispl(ip) = nx*nz*(boin(1, 2, ipl) - 1)
2179 : END DO
2180 : !$OMP END PARALLEL DO
2181 :
2182 0 : CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2183 :
2184 0 : CALL timestop(handle)
2185 :
2186 0 : END SUBROUTINE cube_transpose_2
2187 :
2188 : ! **************************************************************************************************
2189 : !> \brief ...
2190 : !> \param cin ...
2191 : !> \param boin ...
2192 : !> \param boout ...
2193 : !> \param sout ...
2194 : !> \param fft_scratch ...
2195 : ! **************************************************************************************************
2196 0 : SUBROUTINE cube_transpose_3(cin, boin, boout, sout, fft_scratch)
2197 :
2198 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2199 : INTENT(IN) :: cin
2200 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2201 : INTENT(IN) :: boin, boout
2202 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2203 : INTENT(OUT) :: sout
2204 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2205 :
2206 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_3'
2207 :
2208 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2209 0 : POINTER :: rbuf
2210 : INTEGER :: handle, ip, ipl, ir, is, ixz, iy, lb, &
2211 : mip, my, my_id, np, num_threads, nx, &
2212 : ny, nz, ub
2213 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2214 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2215 : INTEGER, DIMENSION(2) :: dim, pos
2216 : TYPE(mp_comm_type) :: sub_group
2217 :
2218 0 : CALL timeset(routineN, handle)
2219 :
2220 0 : sub_group = fft_scratch%cart_sub_comm(1)
2221 0 : mip = fft_scratch%mip
2222 0 : dim = fft_scratch%dim
2223 0 : pos = fft_scratch%pos
2224 0 : np = DIM(1)
2225 0 : scount => fft_scratch%scount
2226 0 : rcount => fft_scratch%rcount
2227 0 : sdispl => fft_scratch%sdispl
2228 0 : rdispl => fft_scratch%rdispl
2229 0 : pgrid => fft_scratch%pgcube
2230 :
2231 0 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2232 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2233 : !$OMP PARALLEL DO DEFAULT(NONE), &
2234 : !$OMP PRIVATE(ipl, nx), &
2235 0 : !$OMP SHARED(np,pgrid,boout,ny,nz,scount,sdispl)
2236 : DO ip = 0, np - 1
2237 : ipl = pgrid(ip, 1)
2238 : nx = boout(2, 1, ipl) - boout(1, 1, ipl) + 1
2239 : scount(ip) = nx*nz*ny
2240 : sdispl(ip) = ny*nz*(boout(1, 1, ipl) - 1)
2241 : END DO
2242 : !$OMP END PARALLEL DO
2243 0 : nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2244 0 : my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
2245 : !$OMP PARALLEL DO DEFAULT(NONE), &
2246 : !$OMP PRIVATE(ipl, ny), &
2247 0 : !$OMP SHARED(np,pgrid,boin,nx,nz,my,rcount,rdispl)
2248 : DO ip = 0, np - 1
2249 : ipl = pgrid(ip, 1)
2250 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2251 : rcount(ip) = nx*nz*ny
2252 : rdispl(ip) = nx*my*nz*ip
2253 : END DO
2254 : !$OMP END PARALLEL DO
2255 :
2256 0 : rbuf => fft_scratch%rbuf3
2257 0 : num_threads = 1
2258 0 : my_id = 0
2259 : !$OMP PARALLEL DEFAULT(NONE), &
2260 : !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub) &
2261 0 : !$OMP SHARED(rbuf)
2262 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2263 : !$ my_id = omp_get_thread_num()
2264 : IF (my_id < num_threads) THEN
2265 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2266 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2267 : rbuf(:, lb:ub) = 0.0_dp
2268 : END IF
2269 : !$OMP END PARALLEL
2270 :
2271 0 : CALL sub_group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2272 :
2273 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2274 : !$OMP PRIVATE(ip,ipl,ny,iy,is,ir) &
2275 0 : !$OMP SHARED(nx,nz,np,pgrid,boin,rbuf,sout)
2276 : DO ixz = 1, nx*nz
2277 : DO ip = 0, np - 1
2278 : ipl = pgrid(ip, 1)
2279 : ny = boin(2, 2, ipl) - boin(1, 2, ipl) + 1
2280 : DO iy = 1, ny
2281 : is = boin(1, 2, ipl) + iy - 1
2282 : ir = iy + ny*(ixz - 1)
2283 : sout(is, ixz) = rbuf(ir, ip)
2284 : END DO
2285 : END DO
2286 : END DO
2287 : !$OMP END PARALLEL DO
2288 :
2289 0 : CALL timestop(handle)
2290 :
2291 0 : END SUBROUTINE cube_transpose_3
2292 :
2293 : ! **************************************************************************************************
2294 : !> \brief ...
2295 : !> \param cin ...
2296 : !> \param boin ...
2297 : !> \param boout ...
2298 : !> \param sout ...
2299 : !> \param fft_scratch ...
2300 : ! **************************************************************************************************
2301 0 : SUBROUTINE cube_transpose_4(cin, boin, boout, sout, fft_scratch)
2302 :
2303 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2304 : INTENT(IN) :: cin
2305 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), &
2306 : INTENT(IN) :: boin, boout
2307 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2308 : INTENT(OUT) :: sout
2309 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2310 :
2311 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_4'
2312 :
2313 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2314 0 : POINTER :: rbuf
2315 : INTEGER :: handle, ip, ipl, ir, iy, izx, lb, mip, &
2316 : my, my_id, np, num_threads, nx, ny, &
2317 : nz, ub
2318 0 : INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: rcount, rdispl, scount, sdispl
2319 0 : INTEGER, CONTIGUOUS, DIMENSION(:, :), POINTER :: pgrid
2320 : INTEGER, DIMENSION(2) :: dim, pos
2321 : TYPE(mp_comm_type) :: sub_group
2322 :
2323 0 : CALL timeset(routineN, handle)
2324 :
2325 0 : sub_group = fft_scratch%cart_sub_comm(1)
2326 0 : mip = fft_scratch%mip
2327 0 : dim = fft_scratch%dim
2328 0 : pos = fft_scratch%pos
2329 0 : np = DIM(1)
2330 0 : scount => fft_scratch%scount
2331 0 : rcount => fft_scratch%rcount
2332 0 : sdispl => fft_scratch%sdispl
2333 0 : rdispl => fft_scratch%rdispl
2334 0 : pgrid => fft_scratch%pgcube
2335 :
2336 0 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2337 0 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2338 0 : my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
2339 :
2340 0 : rbuf => fft_scratch%rbuf4
2341 0 : num_threads = 1
2342 0 : my_id = 0
2343 : !$OMP PARALLEL DEFAULT(NONE), &
2344 : !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ipl,ny,iy,ir), &
2345 0 : !$OMP SHARED(rbuf,nz,nx,np,pgrid,boout,cin,my,scount,sdispl)
2346 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2347 : !$ my_id = omp_get_thread_num()
2348 : IF (my_id < num_threads) THEN
2349 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2350 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2351 : rbuf(:, lb:ub) = 0.0_dp
2352 : END IF
2353 : !$OMP BARRIER
2354 :
2355 : !$OMP DO COLLAPSE(2)
2356 : DO izx = 1, nz*nx
2357 : DO ip = 0, np - 1
2358 : ipl = pgrid(ip, 1)
2359 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2360 : DO iy = boout(1, 2, ipl), boout(2, 2, ipl)
2361 : ir = iy - boout(1, 2, ipl) + 1 + (izx - 1)*ny
2362 : rbuf(ir, ip) = cin(iy, izx)
2363 : END DO
2364 : END DO
2365 : END DO
2366 : !$OMP END DO
2367 : !$OMP DO
2368 : DO ip = 0, np - 1
2369 : ipl = pgrid(ip, 1)
2370 : ny = boout(2, 2, ipl) - boout(1, 2, ipl) + 1
2371 : scount(ip) = nx*ny*nz
2372 : sdispl(ip) = nx*nz*my*ip
2373 : END DO
2374 : !$OMP END DO
2375 : !$OMP END PARALLEL
2376 0 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2377 : !$OMP PARALLEL DO DEFAULT(NONE), &
2378 : !$OMP PRIVATE(ipl,nx), &
2379 0 : !$OMP SHARED(np,pgrid,boin,rcount,rdispl,ny,nz)
2380 : DO ip = 0, np - 1
2381 : ipl = pgrid(ip, 1)
2382 : nx = boin(2, 1, ipl) - boin(1, 1, ipl) + 1
2383 : rcount(ip) = nx*ny*nz
2384 : rdispl(ip) = ny*nz*(boin(1, 1, ipl) - 1)
2385 : END DO
2386 : !$OMP END PARALLEL DO
2387 :
2388 0 : CALL sub_group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2389 :
2390 0 : CALL timestop(handle)
2391 :
2392 0 : END SUBROUTINE cube_transpose_4
2393 :
2394 : ! **************************************************************************************************
2395 : !> \brief ...
2396 : !> \param cin ...
2397 : !> \param group ...
2398 : !> \param boin ...
2399 : !> \param boout ...
2400 : !> \param sout ...
2401 : !> \param fft_scratch ...
2402 : ! **************************************************************************************************
2403 104 : SUBROUTINE cube_transpose_5(cin, group, boin, boout, sout, fft_scratch)
2404 :
2405 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2406 : INTENT(IN) :: cin
2407 :
2408 : CLASS(mp_comm_type), INTENT(IN) :: group
2409 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2410 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2411 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2412 :
2413 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_5'
2414 :
2415 104 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2416 : INTEGER :: handle, ip, ir, is, ixz, iy, lb, mip, &
2417 : my, my_id, np, num_threads, nx, ny, &
2418 : nz, ub
2419 104 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2420 :
2421 104 : CALL timeset(routineN, handle)
2422 :
2423 104 : np = fft_scratch%sizes%numtask
2424 104 : mip = fft_scratch%mip
2425 104 : scount => fft_scratch%scount
2426 104 : rcount => fft_scratch%rcount
2427 104 : sdispl => fft_scratch%sdispl
2428 104 : rdispl => fft_scratch%rdispl
2429 :
2430 104 : ny = boin(2, 2, mip) - boin(1, 2, mip) + 1
2431 104 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2432 : !$OMP PARALLEL DO DEFAULT(NONE), &
2433 : !$OMP PRIVATE(nx), &
2434 104 : !$OMP SHARED(np,boout,ny,nz,scount,sdispl)
2435 : DO ip = 0, np - 1
2436 : nx = boout(2, 1, ip) - boout(1, 1, ip) + 1
2437 : scount(ip) = nx*nz*ny
2438 : sdispl(ip) = ny*nz*(boout(1, 1, ip) - 1)
2439 : END DO
2440 : !$OMP END PARALLEL DO
2441 104 : nx = boout(2, 1, mip) - boout(1, 1, mip) + 1
2442 312 : my = MAXVAL(boin(2, 2, :) - boin(1, 2, :) + 1)
2443 : !$OMP PARALLEL DO DEFAULT(NONE), &
2444 : !$OMP PRIVATE(ny), &
2445 104 : !$OMP SHARED(np,boin,nx,nz,rcount,rdispl,my)
2446 : DO ip = 0, np - 1
2447 : ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2448 : rcount(ip) = nx*nz*ny
2449 : rdispl(ip) = nx*my*nz*ip
2450 : END DO
2451 : !$OMP END PARALLEL DO
2452 :
2453 104 : rbuf => fft_scratch%rbuf5
2454 104 : num_threads = 1
2455 104 : my_id = 0
2456 : !$OMP PARALLEL DEFAULT(NONE), &
2457 : !$OMP PRIVATE(NUM_THREADS, my_id, lb, ub), &
2458 104 : !$OMP SHARED(rbuf)
2459 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2460 : !$ my_id = omp_get_thread_num()
2461 : IF (my_id < num_threads) THEN
2462 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2463 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2464 : rbuf(:, lb:ub) = 0.0_dp
2465 : END IF
2466 : !$OMP END PARALLEL
2467 :
2468 104 : CALL group%alltoall(cin, scount, sdispl, rbuf, rcount, rdispl)
2469 :
2470 : !$OMP PARALLEL DO DEFAULT(NONE) COLLAPSE(2) &
2471 : !$OMP PRIVATE(ip,ny,iy,is,ir) &
2472 104 : !$OMP SHARED(nx,nz,np,boin,sout,rbuf)
2473 : DO ixz = 1, nx*nz
2474 : DO ip = 0, np - 1
2475 : ny = boin(2, 2, ip) - boin(1, 2, ip) + 1
2476 : DO iy = 1, ny
2477 : is = boin(1, 2, ip) + iy - 1
2478 : ir = iy + ny*(ixz - 1)
2479 : sout(is, ixz) = rbuf(ir, ip)
2480 : END DO
2481 : END DO
2482 : END DO
2483 : !$OMP END PARALLEL DO
2484 :
2485 104 : CALL timestop(handle)
2486 :
2487 104 : END SUBROUTINE cube_transpose_5
2488 :
2489 : ! **************************************************************************************************
2490 : !> \brief ...
2491 : !> \param cin ...
2492 : !> \param group ...
2493 : !> \param boin ...
2494 : !> \param boout ...
2495 : !> \param sout ...
2496 : !> \param fft_scratch ...
2497 : ! **************************************************************************************************
2498 104 : SUBROUTINE cube_transpose_6(cin, group, boin, boout, sout, fft_scratch)
2499 :
2500 : COMPLEX(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
2501 : INTENT(IN) :: cin
2502 :
2503 : CLASS(mp_comm_type), INTENT(IN) :: group
2504 : INTEGER, CONTIGUOUS, DIMENSION(:, :, 0:), INTENT(IN) :: boin, boout
2505 : COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT), CONTIGUOUS :: sout
2506 : TYPE(fft_scratch_type), INTENT(IN) :: fft_scratch
2507 :
2508 : CHARACTER(len=*), PARAMETER :: routineN = 'cube_transpose_6'
2509 :
2510 104 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: rbuf
2511 : INTEGER :: handle, ip, ir, iy, izx, lb, mip, my, &
2512 : my_id, np, num_threads, nx, ny, nz, ub
2513 104 : INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: rcount, rdispl, scount, sdispl
2514 :
2515 104 : CALL timeset(routineN, handle)
2516 :
2517 104 : np = fft_scratch%sizes%numtask
2518 104 : mip = fft_scratch%mip
2519 104 : scount => fft_scratch%scount
2520 104 : rcount => fft_scratch%rcount
2521 104 : sdispl => fft_scratch%sdispl
2522 104 : rdispl => fft_scratch%rdispl
2523 :
2524 104 : nx = boin(2, 1, mip) - boin(1, 1, mip) + 1
2525 104 : nz = boin(2, 3, mip) - boin(1, 3, mip) + 1
2526 312 : my = MAXVAL(boout(2, 2, :) - boout(1, 2, :) + 1)
2527 :
2528 104 : rbuf => fft_scratch%rbuf5
2529 104 : num_threads = 1
2530 104 : my_id = 0
2531 : !$OMP PARALLEL DEFAULT(NONE), &
2532 : !$OMP PRIVATE(NUM_THREADS,my_id,lb,ub,ip,ny,iy,ir), &
2533 104 : !$OMP SHARED(rbuf,nx,nz,np,boout,cin,my,scount,sdispl)
2534 : !$ num_threads = MIN(omp_get_max_threads(), SIZE(rbuf, 2))
2535 : !$ my_id = omp_get_thread_num()
2536 : IF (my_id < num_threads) THEN
2537 : lb = (SIZE(rbuf, 2)*my_id)/num_threads
2538 : ub = (SIZE(rbuf, 2)*(my_id + 1))/num_threads - 1
2539 : rbuf(:, lb:ub) = 0.0_dp
2540 : END IF
2541 : !$OMP BARRIER
2542 :
2543 : !$OMP DO COLLAPSE(2)
2544 : DO izx = 1, nz*nx
2545 : DO ip = 0, np - 1
2546 : ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2547 : DO iy = boout(1, 2, ip), boout(2, 2, ip)
2548 : ir = iy - boout(1, 2, ip) + 1 + (izx - 1)*ny
2549 : rbuf(ir, ip) = cin(iy, izx)
2550 : END DO
2551 : END DO
2552 : END DO
2553 : !$OMP END DO
2554 : !$OMP DO
2555 : DO ip = 0, np - 1
2556 : ny = boout(2, 2, ip) - boout(1, 2, ip) + 1
2557 : scount(ip) = nx*ny*nz
2558 : sdispl(ip) = nx*nz*my*ip
2559 : END DO
2560 : !$OMP END DO
2561 : !$OMP END PARALLEL
2562 104 : ny = boout(2, 2, mip) - boout(1, 2, mip) + 1
2563 : !$OMP PARALLEL DO DEFAULT(NONE), &
2564 : !$OMP PRIVATE(nx), &
2565 104 : !$OMP SHARED(np,boin,rcount,rdispl,nz,ny)
2566 : DO ip = 0, np - 1
2567 : nx = boin(2, 1, ip) - boin(1, 1, ip) + 1
2568 : rcount(ip) = nx*ny*nz
2569 : rdispl(ip) = ny*nz*(boin(1, 1, ip) - 1)
2570 : END DO
2571 : !$OMP END PARALLEL DO
2572 :
2573 104 : CALL group%alltoall(rbuf, scount, sdispl, sout, rcount, rdispl)
2574 :
2575 104 : CALL timestop(handle)
2576 :
2577 104 : END SUBROUTINE cube_transpose_6
2578 :
2579 : ! **************************************************************************************************
2580 : !> \brief ...
2581 : ! **************************************************************************************************
2582 9005 : SUBROUTINE init_fft_scratch_pool()
2583 :
2584 9005 : CALL release_fft_scratch_pool()
2585 :
2586 : ! Allocate first scratch and mark it as used
2587 9005 : ALLOCATE (fft_scratch_first)
2588 261145 : ALLOCATE (fft_scratch_first%fft_scratch)
2589 : ! this is a very special scratch, it seems, we always keep it 'most - recent' so we will never delete it
2590 9005 : fft_scratch_first%fft_scratch%last_tick = HUGE(fft_scratch_first%fft_scratch%last_tick)
2591 :
2592 9005 : init_fft_pool = init_fft_pool + 1
2593 :
2594 9005 : END SUBROUTINE init_fft_scratch_pool
2595 :
2596 : ! **************************************************************************************************
2597 : !> \brief ...
2598 : !> \param fft_scratch ...
2599 : ! **************************************************************************************************
2600 44164 : SUBROUTINE deallocate_fft_scratch_type(fft_scratch)
2601 : TYPE(fft_scratch_type), INTENT(INOUT) :: fft_scratch
2602 :
2603 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2604 : INTEGER :: ierr
2605 : COMPLEX(KIND=dp), POINTER :: dummy_ptr_z
2606 : #endif
2607 :
2608 : ! deallocate structures
2609 44164 : IF (ASSOCIATED(fft_scratch%ziptr)) THEN
2610 10233 : CALL fft_dealloc(fft_scratch%ziptr)
2611 : END IF
2612 44164 : IF (ASSOCIATED(fft_scratch%zoptr)) THEN
2613 10233 : CALL fft_dealloc(fft_scratch%zoptr)
2614 : END IF
2615 44164 : IF (ASSOCIATED(fft_scratch%p1buf)) THEN
2616 0 : CALL fft_dealloc(fft_scratch%p1buf)
2617 : END IF
2618 44164 : IF (ASSOCIATED(fft_scratch%p2buf)) THEN
2619 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2620 : dummy_ptr_z => fft_scratch%p2buf(1, 1)
2621 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2622 : #else
2623 0 : CALL fft_dealloc(fft_scratch%p2buf)
2624 : #endif
2625 : END IF
2626 44164 : IF (ASSOCIATED(fft_scratch%p3buf)) THEN
2627 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2628 : dummy_ptr_z => fft_scratch%p3buf(1, 1)
2629 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2630 : #else
2631 0 : CALL fft_dealloc(fft_scratch%p3buf)
2632 : #endif
2633 : END IF
2634 44164 : IF (ASSOCIATED(fft_scratch%p4buf)) THEN
2635 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2636 : dummy_ptr_z => fft_scratch%p4buf(1, 1)
2637 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2638 : #else
2639 0 : CALL fft_dealloc(fft_scratch%p4buf)
2640 : #endif
2641 : END IF
2642 44164 : IF (ASSOCIATED(fft_scratch%p5buf)) THEN
2643 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2644 : dummy_ptr_z => fft_scratch%p5buf(1, 1)
2645 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2646 : #else
2647 0 : CALL fft_dealloc(fft_scratch%p5buf)
2648 : #endif
2649 : END IF
2650 44164 : IF (ASSOCIATED(fft_scratch%p6buf)) THEN
2651 0 : CALL fft_dealloc(fft_scratch%p6buf)
2652 : END IF
2653 44164 : IF (ASSOCIATED(fft_scratch%p7buf)) THEN
2654 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2655 : dummy_ptr_z => fft_scratch%p7buf(1, 1)
2656 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2657 : #else
2658 0 : CALL fft_dealloc(fft_scratch%p7buf)
2659 : #endif
2660 : END IF
2661 44164 : IF (ASSOCIATED(fft_scratch%r1buf)) THEN
2662 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2663 : dummy_ptr_z => fft_scratch%r1buf(1, 1)
2664 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2665 : #else
2666 24918 : CALL fft_dealloc(fft_scratch%r1buf)
2667 : #endif
2668 : END IF
2669 44164 : IF (ASSOCIATED(fft_scratch%r2buf)) THEN
2670 24918 : CALL fft_dealloc(fft_scratch%r2buf)
2671 : END IF
2672 44164 : IF (ASSOCIATED(fft_scratch%tbuf)) THEN
2673 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2674 : dummy_ptr_z => fft_scratch%tbuf(1, 1, 1)
2675 : ierr = offload_free_pinned_mem(c_loc(dummy_ptr_z))
2676 : #else
2677 24918 : CALL fft_dealloc(fft_scratch%tbuf)
2678 : #endif
2679 : END IF
2680 44164 : IF (ASSOCIATED(fft_scratch%a1buf)) THEN
2681 8 : CALL fft_dealloc(fft_scratch%a1buf)
2682 : END IF
2683 44164 : IF (ASSOCIATED(fft_scratch%a2buf)) THEN
2684 8 : CALL fft_dealloc(fft_scratch%a2buf)
2685 : END IF
2686 44164 : IF (ASSOCIATED(fft_scratch%a3buf)) THEN
2687 8 : CALL fft_dealloc(fft_scratch%a3buf)
2688 : END IF
2689 44164 : IF (ASSOCIATED(fft_scratch%a4buf)) THEN
2690 8 : CALL fft_dealloc(fft_scratch%a4buf)
2691 : END IF
2692 44164 : IF (ASSOCIATED(fft_scratch%a5buf)) THEN
2693 8 : CALL fft_dealloc(fft_scratch%a5buf)
2694 : END IF
2695 44164 : IF (ASSOCIATED(fft_scratch%a6buf)) THEN
2696 8 : CALL fft_dealloc(fft_scratch%a6buf)
2697 : END IF
2698 44164 : IF (ASSOCIATED(fft_scratch%scount)) THEN
2699 0 : DEALLOCATE (fft_scratch%scount, fft_scratch%rcount, &
2700 24926 : fft_scratch%sdispl, fft_scratch%rdispl)
2701 : END IF
2702 44164 : IF (ASSOCIATED(fft_scratch%rr)) THEN
2703 24910 : DEALLOCATE (fft_scratch%rr)
2704 : END IF
2705 44164 : IF (ASSOCIATED(fft_scratch%xzbuf)) THEN
2706 0 : DEALLOCATE (fft_scratch%xzbuf)
2707 : END IF
2708 44164 : IF (ASSOCIATED(fft_scratch%yzbuf)) THEN
2709 0 : DEALLOCATE (fft_scratch%yzbuf)
2710 : END IF
2711 44164 : IF (ASSOCIATED(fft_scratch%xzbuf_sgl)) THEN
2712 0 : DEALLOCATE (fft_scratch%xzbuf_sgl)
2713 : END IF
2714 44164 : IF (ASSOCIATED(fft_scratch%yzbuf_sgl)) THEN
2715 0 : DEALLOCATE (fft_scratch%yzbuf_sgl)
2716 : END IF
2717 44164 : IF (ASSOCIATED(fft_scratch%ss)) THEN
2718 8 : DEALLOCATE (fft_scratch%ss)
2719 : END IF
2720 44164 : IF (ASSOCIATED(fft_scratch%tt)) THEN
2721 8 : DEALLOCATE (fft_scratch%tt)
2722 : END IF
2723 44164 : IF (ASSOCIATED(fft_scratch%pgrid)) THEN
2724 0 : DEALLOCATE (fft_scratch%pgrid)
2725 : END IF
2726 44164 : IF (ASSOCIATED(fft_scratch%pgcube)) THEN
2727 24926 : DEALLOCATE (fft_scratch%pgcube)
2728 : END IF
2729 44164 : IF (ASSOCIATED(fft_scratch%xcor)) THEN
2730 0 : DEALLOCATE (fft_scratch%xcor, fft_scratch%zcor)
2731 : END IF
2732 44164 : IF (ASSOCIATED(fft_scratch%pzcoord)) THEN
2733 0 : DEALLOCATE (fft_scratch%pzcoord)
2734 : END IF
2735 44164 : IF (ASSOCIATED(fft_scratch%xzcount)) THEN
2736 0 : DEALLOCATE (fft_scratch%xzcount, fft_scratch%yzcount)
2737 0 : DEALLOCATE (fft_scratch%xzdispl, fft_scratch%yzdispl)
2738 0 : fft_scratch%in = 0
2739 0 : fft_scratch%rsratio = 1._dp
2740 : END IF
2741 44164 : IF (ASSOCIATED(fft_scratch%rbuf1)) THEN
2742 0 : DEALLOCATE (fft_scratch%rbuf1)
2743 : END IF
2744 44164 : IF (ASSOCIATED(fft_scratch%rbuf2)) THEN
2745 0 : DEALLOCATE (fft_scratch%rbuf2)
2746 : END IF
2747 44164 : IF (ASSOCIATED(fft_scratch%rbuf3)) THEN
2748 0 : DEALLOCATE (fft_scratch%rbuf3)
2749 : END IF
2750 44164 : IF (ASSOCIATED(fft_scratch%rbuf4)) THEN
2751 0 : DEALLOCATE (fft_scratch%rbuf4)
2752 : END IF
2753 44164 : IF (ASSOCIATED(fft_scratch%rbuf5)) THEN
2754 8 : DEALLOCATE (fft_scratch%rbuf5)
2755 : END IF
2756 44164 : IF (ASSOCIATED(fft_scratch%rbuf6)) THEN
2757 8 : DEALLOCATE (fft_scratch%rbuf6)
2758 : END IF
2759 :
2760 44164 : IF (fft_scratch%cart_sub_comm(1) /= mp_comm_null) THEN
2761 0 : CALL fft_scratch%cart_sub_comm(1)%free()
2762 : END IF
2763 44164 : IF (fft_scratch%cart_sub_comm(2) /= mp_comm_null) THEN
2764 0 : CALL fft_scratch%cart_sub_comm(2)%free()
2765 : END IF
2766 :
2767 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(1))
2768 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(2))
2769 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(3))
2770 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(4))
2771 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(5))
2772 44164 : CALL fft_destroy_plan(fft_scratch%fft_plan(6))
2773 :
2774 44164 : END SUBROUTINE deallocate_fft_scratch_type
2775 :
2776 : ! **************************************************************************************************
2777 : !> \brief ...
2778 : ! **************************************************************************************************
2779 26805 : SUBROUTINE release_fft_scratch_pool()
2780 :
2781 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch, fft_scratch_current
2782 :
2783 26805 : IF (init_fft_pool == 0) NULLIFY (fft_scratch_first)
2784 :
2785 26805 : fft_scratch => fft_scratch_first
2786 42458 : DO
2787 69263 : IF (ASSOCIATED(fft_scratch)) THEN
2788 42458 : fft_scratch_current => fft_scratch
2789 42458 : fft_scratch => fft_scratch_current%fft_scratch_next
2790 42458 : NULLIFY (fft_scratch_current%fft_scratch_next)
2791 :
2792 42458 : CALL deallocate_fft_scratch_type(fft_scratch_current%fft_scratch)
2793 :
2794 169832 : DEALLOCATE (fft_scratch_current%fft_scratch)
2795 42458 : DEALLOCATE (fft_scratch_current)
2796 : ELSE
2797 : EXIT
2798 : END IF
2799 : END DO
2800 :
2801 26805 : init_fft_pool = 0
2802 :
2803 26805 : END SUBROUTINE release_fft_scratch_pool
2804 :
2805 : ! **************************************************************************************************
2806 : !> \brief ...
2807 : ! **************************************************************************************************
2808 3230920 : SUBROUTINE resize_fft_scratch_pool()
2809 :
2810 : INTEGER :: last_tick, nscratch
2811 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, fft_scratch_old
2812 :
2813 3230920 : nscratch = 0
2814 :
2815 3230920 : last_tick = HUGE(last_tick)
2816 3230920 : NULLIFY (fft_scratch_old)
2817 :
2818 : ! start at the global pool, count, and find a deletion candidate
2819 3230920 : fft_scratch_current => fft_scratch_first
2820 23679378 : DO
2821 26910298 : IF (ASSOCIATED(fft_scratch_current)) THEN
2822 23679378 : nscratch = nscratch + 1
2823 : ! is this a candidate for deletion (i.e. least recently used, and not in use)
2824 23679378 : IF (.NOT. fft_scratch_current%fft_scratch%in_use) THEN
2825 20448458 : IF (fft_scratch_current%fft_scratch%last_tick < last_tick) THEN
2826 4001787 : last_tick = fft_scratch_current%fft_scratch%last_tick
2827 4001787 : fft_scratch_old => fft_scratch_current
2828 : END IF
2829 : END IF
2830 23679378 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2831 : ELSE
2832 : EXIT
2833 : END IF
2834 : END DO
2835 :
2836 : ! we should delete a scratch
2837 3230920 : IF (nscratch > fft_pool_scratch_limit) THEN
2838 : ! note that we never deallocate the first (special) element of the list
2839 1706 : IF (ASSOCIATED(fft_scratch_old)) THEN
2840 : fft_scratch_current => fft_scratch_first
2841 : DO
2842 29002 : IF (ASSOCIATED(fft_scratch_current)) THEN
2843 : ! should we delete the next in the list?
2844 27296 : IF (ASSOCIATED(fft_scratch_current%fft_scratch_next, fft_scratch_old)) THEN
2845 : ! fix the linked list
2846 1706 : fft_scratch_current%fft_scratch_next => fft_scratch_old%fft_scratch_next
2847 :
2848 : ! deallocate the element
2849 1706 : CALL deallocate_fft_scratch_type(fft_scratch_old%fft_scratch)
2850 6824 : DEALLOCATE (fft_scratch_old%fft_scratch)
2851 1706 : DEALLOCATE (fft_scratch_old)
2852 :
2853 : ELSE
2854 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2855 : END IF
2856 : ELSE
2857 : EXIT
2858 : END IF
2859 : END DO
2860 :
2861 : ELSE
2862 0 : CPWARN("The number of the scratches exceeded the limit, but none could be deallocated")
2863 : END IF
2864 : END IF
2865 :
2866 3230920 : END SUBROUTINE resize_fft_scratch_pool
2867 :
2868 : ! **************************************************************************************************
2869 : !> \brief ...
2870 : !> \param fft_scratch ...
2871 : !> \param tf_type ...
2872 : !> \param n ...
2873 : !> \param fft_sizes ...
2874 : ! **************************************************************************************************
2875 3230920 : SUBROUTINE get_fft_scratch(fft_scratch, tf_type, n, fft_sizes)
2876 : TYPE(fft_scratch_type), POINTER :: fft_scratch
2877 : INTEGER, INTENT(IN) :: tf_type
2878 : INTEGER, DIMENSION(:), INTENT(IN) :: n
2879 : TYPE(fft_scratch_sizes), INTENT(IN), &
2880 : OPTIONAL :: fft_sizes
2881 :
2882 : CHARACTER(len=*), PARAMETER :: routineN = 'get_fft_scratch'
2883 :
2884 : INTEGER :: coord(2), DIM(2), handle, i, ix, iz, lg, lmax, m1, m2, &
2885 : mcx2, mcy3, mcz1, mcz2, mg, mmax, mx1, mx2, my1, my3, mz1, mz2, mz3, &
2886 : nbx, nbz, nm, nmax, nmray, np, nx, ny, nyzray, nz, pos(2)
2887 : INTEGER, DIMENSION(3) :: pcoord
2888 : LOGICAL :: equal
2889 : LOGICAL, DIMENSION(2) :: dims
2890 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current, &
2891 : fft_scratch_last, &
2892 : fft_scratch_new
2893 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
2894 : INTEGER :: ierr
2895 : INTEGER(KIND=C_SIZE_T) :: length
2896 : TYPE(C_PTR) :: cptr_r1buf, cptr_tbuf, &
2897 : cptr_p2buf, cptr_p3buf, cptr_p4buf, cptr_p5buf, cptr_p7buf
2898 : #endif
2899 3230920 : CALL timeset(routineN, handle)
2900 :
2901 : ! this is the place to check that the scratch_pool does not grow without limits
2902 : ! before we add a new scratch check the size of the pool and release some of the list if needed
2903 3230920 : CALL resize_fft_scratch_pool()
2904 :
2905 : ! get the required scratch
2906 3230920 : tick_fft_pool = tick_fft_pool + 1
2907 3230920 : fft_scratch_current => fft_scratch_first
2908 : DO
2909 15698363 : IF (ASSOCIATED(fft_scratch_current)) THEN
2910 15663204 : IF (fft_scratch_current%fft_scratch%in_use) THEN
2911 3230920 : fft_scratch_last => fft_scratch_current
2912 3230920 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2913 3230920 : CYCLE
2914 : END IF
2915 12432284 : IF (tf_type /= fft_scratch_current%fft_scratch%tf_type) THEN
2916 4460914 : fft_scratch_last => fft_scratch_current
2917 4460914 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2918 4460914 : CYCLE
2919 : END IF
2920 18509951 : IF (.NOT. ALL(n == fft_scratch_current%fft_scratch%nfft)) THEN
2921 4458939 : fft_scratch_last => fft_scratch_current
2922 4458939 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2923 4458939 : CYCLE
2924 : END IF
2925 3512431 : IF (PRESENT(fft_sizes)) THEN
2926 2907836 : IF (fft_sizes%gs_group /= fft_scratch_current%fft_scratch%group) THEN
2927 312566 : fft_scratch_last => fft_scratch_current
2928 312566 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2929 312566 : CYCLE
2930 : END IF
2931 2595270 : CALL is_equal(fft_sizes, fft_scratch_current%fft_scratch%sizes, equal)
2932 2595270 : IF (.NOT. equal) THEN
2933 4104 : fft_scratch_last => fft_scratch_current
2934 4104 : fft_scratch_current => fft_scratch_current%fft_scratch_next
2935 4104 : CYCLE
2936 : END IF
2937 : END IF
2938 : ! Success
2939 3195761 : fft_scratch => fft_scratch_current%fft_scratch
2940 3195761 : fft_scratch_current%fft_scratch%in_use = .TRUE.
2941 3195761 : EXIT
2942 : ELSE
2943 : ! We cannot find the scratch type in this pool
2944 : ! Generate a new scratch set
2945 35159 : ALLOCATE (fft_scratch_new)
2946 914134 : ALLOCATE (fft_scratch_new%fft_scratch)
2947 :
2948 35159 : IF (tf_type .NE. 400) THEN
2949 24926 : fft_scratch_new%fft_scratch%sizes = fft_sizes
2950 24926 : np = fft_sizes%numtask
2951 : ALLOCATE (fft_scratch_new%fft_scratch%scount(0:np - 1), fft_scratch_new%fft_scratch%rcount(0:np - 1), &
2952 : fft_scratch_new%fft_scratch%sdispl(0:np - 1), fft_scratch_new%fft_scratch%rdispl(0:np - 1), &
2953 299112 : fft_scratch_new%fft_scratch%pgcube(0:np - 1, 2))
2954 : END IF
2955 :
2956 0 : SELECT CASE (tf_type)
2957 : CASE DEFAULT
2958 0 : CPABORT("")
2959 : CASE (100) ! fft3d_pb: full cube distribution
2960 0 : mx1 = fft_sizes%mx1
2961 0 : my1 = fft_sizes%my1
2962 0 : mx2 = fft_sizes%mx2
2963 0 : mz2 = fft_sizes%mz2
2964 0 : my3 = fft_sizes%my3
2965 0 : mz3 = fft_sizes%mz3
2966 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
2967 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
2968 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx2*mz2, n(2)])
2969 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx2*mz2])
2970 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
2971 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
2972 0 : fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
2973 :
2974 0 : dim = fft_sizes%rs_group%num_pe_cart
2975 0 : pos = fft_sizes%rs_group%mepos_cart
2976 0 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
2977 0 : fft_scratch_new%fft_scratch%dim = dim
2978 0 : fft_scratch_new%fft_scratch%pos = pos
2979 0 : mcz1 = fft_sizes%mcz1
2980 0 : mcx2 = fft_sizes%mcx2
2981 0 : mcz2 = fft_sizes%mcz2
2982 0 : mcy3 = fft_sizes%mcy3
2983 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
2984 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
2985 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf3(mx2*mz3*mcy3, 0:DIM(1) - 1))
2986 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf4(mx2*mz2*mcy3, 0:DIM(1) - 1))
2987 :
2988 0 : dims = (/.TRUE., .FALSE./)
2989 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(1)%from_sub(fft_sizes%rs_group, dims)
2990 0 : dims = (/.FALSE., .TRUE./)
2991 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
2992 :
2993 : !initialise pgcube
2994 0 : DO i = 0, DIM(1) - 1
2995 0 : coord = (/i, pos(2)/)
2996 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 1))
2997 : END DO
2998 0 : DO i = 0, DIM(2) - 1
2999 0 : coord = (/pos(1), i/)
3000 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3001 : END DO
3002 :
3003 : !set up fft plans
3004 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3005 0 : fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a2buf, fft_plan_style)
3006 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
3007 0 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3008 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
3009 0 : fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3010 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
3011 0 : fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3012 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
3013 0 : fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3014 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3015 0 : fft_scratch_new%fft_scratch%a2buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3016 :
3017 : CASE (101) ! fft3d_pb: full cube distribution (dim 1)
3018 8 : mx1 = fft_sizes%mx1
3019 8 : my1 = fft_sizes%my1
3020 8 : mz1 = fft_sizes%mz1
3021 8 : my3 = fft_sizes%my3
3022 8 : mz3 = fft_sizes%mz3
3023 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a1buf, [mx1*my1, n(3)])
3024 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a2buf, [n(3), mx1*my1])
3025 8 : fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3026 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a3buf, [mx1*mz1, n(2)])
3027 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a4buf, [n(2), mx1*mz1])
3028 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a5buf, [my3*mz3, n(1)])
3029 24 : CALL fft_alloc(fft_scratch_new%fft_scratch%a6buf, [n(1), my3*mz3])
3030 :
3031 24 : dim = fft_sizes%rs_group%num_pe_cart
3032 24 : pos = fft_sizes%rs_group%mepos_cart
3033 8 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3034 24 : fft_scratch_new%fft_scratch%dim = dim
3035 24 : fft_scratch_new%fft_scratch%pos = pos
3036 8 : mcy3 = fft_sizes%mcy3
3037 32 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf5(mx1*mz3*mcy3, 0:DIM(1) - 1))
3038 32 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf6(mx1*mz1*mcy3, 0:DIM(1) - 1))
3039 :
3040 : !set up fft plans
3041 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3042 8 : fft_scratch_new%fft_scratch%a1buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3043 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx1*mz1, &
3044 8 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a4buf, fft_plan_style)
3045 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), my3*mz3, &
3046 8 : fft_scratch_new%fft_scratch%a5buf, fft_scratch_new%fft_scratch%a6buf, fft_plan_style)
3047 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), my3*mz3, &
3048 8 : fft_scratch_new%fft_scratch%a6buf, fft_scratch_new%fft_scratch%a5buf, fft_plan_style)
3049 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx1*mz1, &
3050 8 : fft_scratch_new%fft_scratch%a4buf, fft_scratch_new%fft_scratch%a3buf, fft_plan_style)
3051 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3052 8 : fft_scratch_new%fft_scratch%a3buf, fft_scratch_new%fft_scratch%a1buf, fft_plan_style)
3053 :
3054 : CASE (200) ! fft3d_ps: plane distribution
3055 24918 : nx = fft_sizes%nx
3056 24918 : ny = fft_sizes%ny
3057 24918 : nz = fft_sizes%nz
3058 24918 : mx2 = fft_sizes%mx2
3059 24918 : lmax = fft_sizes%lmax
3060 24918 : mmax = fft_sizes%mmax
3061 24918 : lg = fft_sizes%lg
3062 24918 : mg = fft_sizes%mg
3063 24918 : np = fft_sizes%numtask
3064 24918 : nmray = fft_sizes%nmray
3065 24918 : nyzray = fft_sizes%nyzray
3066 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3067 : length = INT(2*dp_size*MAX(mmax, 1)*MAX(lmax, 1), KIND=C_SIZE_T)
3068 : ierr = offload_malloc_pinned_mem(cptr_r1buf, length)
3069 : CPASSERT(ierr == 0)
3070 : CALL c_f_pointer(cptr_r1buf, fft_scratch_new%fft_scratch%r1buf, (/MAX(mmax, 1), MAX(lmax, 1)/))
3071 : length = INT(2*dp_size*MAX(ny, 1)*MAX(nz, 1)*MAX(nx, 1), KIND=C_SIZE_T)
3072 : ierr = offload_malloc_pinned_mem(cptr_tbuf, length)
3073 : CPASSERT(ierr == 0)
3074 : CALL c_f_pointer(cptr_tbuf, fft_scratch_new%fft_scratch%tbuf, (/MAX(ny, 1), MAX(nz, 1), MAX(nx, 1)/))
3075 : #else
3076 74754 : CALL fft_alloc(fft_scratch_new%fft_scratch%r1buf, [mmax, lmax])
3077 99672 : CALL fft_alloc(fft_scratch_new%fft_scratch%tbuf, [ny, nz, nx])
3078 : #endif
3079 24918 : fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3080 74754 : CALL fft_alloc(fft_scratch_new%fft_scratch%r2buf, [lg, mg])
3081 24918 : nm = nmray*mx2
3082 24918 : IF (alltoall_sgl) THEN
3083 32 : ALLOCATE (fft_scratch_new%fft_scratch%ss(mmax, lmax))
3084 32 : ALLOCATE (fft_scratch_new%fft_scratch%tt(nm, 0:np - 1))
3085 : ELSE
3086 99640 : ALLOCATE (fft_scratch_new%fft_scratch%rr(nm, 0:np - 1))
3087 : END IF
3088 :
3089 : !set up fft plans
3090 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., nz, nx*ny, &
3091 24918 : fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3092 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., ny, nx*nz, &
3093 24918 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3094 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
3095 24918 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%r2buf, fft_plan_style)
3096 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
3097 24918 : fft_scratch_new%fft_scratch%r2buf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3098 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., ny, nx*nz, &
3099 24918 : fft_scratch_new%fft_scratch%tbuf, fft_scratch_new%fft_scratch%r1buf, fft_plan_style)
3100 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., nz, nx*ny, &
3101 24918 : fft_scratch_new%fft_scratch%r1buf, fft_scratch_new%fft_scratch%tbuf, fft_plan_style)
3102 :
3103 : CASE (300) ! fft3d_ps: block distribution
3104 0 : mx1 = fft_sizes%mx1
3105 0 : mx2 = fft_sizes%mx2
3106 0 : my1 = fft_sizes%my1
3107 0 : mz2 = fft_sizes%mz2
3108 0 : mcx2 = fft_sizes%mcx2
3109 0 : lg = fft_sizes%lg
3110 0 : mg = fft_sizes%mg
3111 0 : nmax = fft_sizes%nmax
3112 0 : nmray = fft_sizes%nmray
3113 0 : nyzray = fft_sizes%nyzray
3114 0 : m1 = fft_sizes%r_dim(1)
3115 0 : m2 = fft_sizes%r_dim(2)
3116 0 : nbx = fft_sizes%nbx
3117 0 : nbz = fft_sizes%nbz
3118 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p1buf, [mx1*my1, n(3)])
3119 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p6buf, [lg, mg])
3120 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
3121 : length = INT(2*dp_size*MAX(n(3), 1)*MAX(mx1*my1, 1), KIND=C_SIZE_T)
3122 : ierr = offload_malloc_pinned_mem(cptr_p2buf, length)
3123 : CPASSERT(ierr == 0)
3124 : CALL c_f_pointer(cptr_p2buf, fft_scratch_new%fft_scratch%p2buf, (/MAX(n(3), 1), MAX(mx1*my1, 1)/))
3125 : length = INT(2*dp_size*MAX(mx2*mz2, 1)*MAX(n(2), 1), KIND=C_SIZE_T)
3126 : ierr = offload_malloc_pinned_mem(cptr_p3buf, length)
3127 : CPASSERT(ierr == 0)
3128 : CALL c_f_pointer(cptr_p3buf, fft_scratch_new%fft_scratch%p3buf, (/MAX(mx2*mz2, 1), MAX(n(2), 1)/))
3129 : length = INT(2*dp_size*MAX(n(2), 1)*MAX(mx2*mz2, 1), KIND=C_SIZE_T)
3130 : ierr = offload_malloc_pinned_mem(cptr_p4buf, length)
3131 : CPASSERT(ierr == 0)
3132 : CALL c_f_pointer(cptr_p4buf, fft_scratch_new%fft_scratch%p4buf, (/MAX(n(2), 1), MAX(mx2*mz2, 1)/))
3133 : length = INT(2*dp_size*MAX(nyzray, 1)*MAX(n(1), 1), KIND=C_SIZE_T)
3134 : ierr = offload_malloc_pinned_mem(cptr_p5buf, length)
3135 : CPASSERT(ierr == 0)
3136 : CALL c_f_pointer(cptr_p5buf, fft_scratch_new%fft_scratch%p5buf, (/MAX(nyzray, 1), MAX(n(1), 1)/))
3137 : length = INT(2*dp_size*MAX(mg, 1)*MAX(lg, 1), KIND=C_SIZE_T)
3138 : ierr = offload_malloc_pinned_mem(cptr_p7buf, length)
3139 : CPASSERT(ierr == 0)
3140 : CALL c_f_pointer(cptr_p7buf, fft_scratch_new%fft_scratch%p7buf, (/MAX(mg, 1), MAX(lg, 1)/))
3141 : #else
3142 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p2buf, [n(3), mx1*my1])
3143 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p3buf, [mx2*mz2, n(2)])
3144 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p4buf, [n(2), mx2*mz2])
3145 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p5buf, [nyzray, n(1)])
3146 0 : CALL fft_alloc(fft_scratch_new%fft_scratch%p7buf, [mg, lg])
3147 : #endif
3148 0 : IF (alltoall_sgl) THEN
3149 0 : ALLOCATE (fft_scratch_new%fft_scratch%yzbuf_sgl(mg*lg))
3150 0 : ALLOCATE (fft_scratch_new%fft_scratch%xzbuf_sgl(n(2)*mx2*mz2))
3151 : ELSE
3152 0 : ALLOCATE (fft_scratch_new%fft_scratch%yzbuf(mg*lg))
3153 0 : ALLOCATE (fft_scratch_new%fft_scratch%xzbuf(n(2)*mx2*mz2))
3154 : END IF
3155 0 : ALLOCATE (fft_scratch_new%fft_scratch%pgrid(0:m1 - 1, 0:m2 - 1))
3156 0 : ALLOCATE (fft_scratch_new%fft_scratch%xcor(nbx))
3157 0 : ALLOCATE (fft_scratch_new%fft_scratch%zcor(nbz))
3158 0 : ALLOCATE (fft_scratch_new%fft_scratch%pzcoord(0:np - 1))
3159 : ALLOCATE (fft_scratch_new%fft_scratch%xzcount(0:np - 1), &
3160 0 : fft_scratch_new%fft_scratch%yzcount(0:np - 1))
3161 : ALLOCATE (fft_scratch_new%fft_scratch%xzdispl(0:np - 1), &
3162 0 : fft_scratch_new%fft_scratch%yzdispl(0:np - 1))
3163 0 : fft_scratch_new%fft_scratch%group = fft_sizes%gs_group
3164 :
3165 0 : dim = fft_sizes%rs_group%num_pe_cart
3166 0 : pos = fft_sizes%rs_group%mepos_cart
3167 0 : fft_scratch_new%fft_scratch%mip = fft_sizes%rs_group%mepos
3168 0 : fft_scratch_new%fft_scratch%dim = dim
3169 0 : fft_scratch_new%fft_scratch%pos = pos
3170 0 : mcz1 = fft_sizes%mcz1
3171 0 : mcz2 = fft_sizes%mcz2
3172 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf1(mx2*my1*mcz2, 0:DIM(2) - 1))
3173 0 : ALLOCATE (fft_scratch_new%fft_scratch%rbuf2(mx1*my1*mcz2, 0:DIM(2) - 1))
3174 :
3175 0 : dims = (/.FALSE., .TRUE./)
3176 0 : CALL fft_scratch_new%fft_scratch%cart_sub_comm(2)%from_sub(fft_sizes%rs_group, dims)
3177 :
3178 : !initialise pgcube
3179 0 : DO i = 0, DIM(2) - 1
3180 0 : coord = (/pos(1), i/)
3181 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgcube(i, 2))
3182 : END DO
3183 :
3184 : !initialise pgrid
3185 0 : DO ix = 0, m1 - 1
3186 0 : DO iz = 0, m2 - 1
3187 0 : coord = (/ix, iz/)
3188 0 : CALL fft_sizes%rs_group%rank_cart(coord, fft_scratch_new%fft_scratch%pgrid(ix, iz))
3189 : END DO
3190 : END DO
3191 :
3192 : !initialise pzcoord
3193 0 : DO i = 0, np - 1
3194 0 : CALL fft_sizes%rs_group%coords(i, pcoord)
3195 0 : fft_scratch_new%fft_scratch%pzcoord(i) = pcoord(2)
3196 : END DO
3197 :
3198 : !set up fft plans
3199 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, FWFFT, .TRUE., n(3), mx1*my1, &
3200 0 : fft_scratch_new%fft_scratch%p1buf, fft_scratch_new%fft_scratch%p2buf, fft_plan_style)
3201 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, FWFFT, .TRUE., n(2), mx2*mz2, &
3202 0 : fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p4buf, fft_plan_style)
3203 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, FWFFT, .TRUE., n(1), nyzray, &
3204 0 : fft_scratch_new%fft_scratch%p5buf, fft_scratch_new%fft_scratch%p6buf, fft_plan_style)
3205 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, BWFFT, .TRUE., n(1), nyzray, &
3206 0 : fft_scratch_new%fft_scratch%p6buf, fft_scratch_new%fft_scratch%p7buf, fft_plan_style)
3207 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(5), fft_type, BWFFT, .TRUE., n(2), mx2*mz2, &
3208 0 : fft_scratch_new%fft_scratch%p4buf, fft_scratch_new%fft_scratch%p3buf, fft_plan_style)
3209 : CALL fft_create_plan_1dm(fft_scratch_new%fft_scratch%fft_plan(6), fft_type, BWFFT, .TRUE., n(3), mx1*my1, &
3210 0 : fft_scratch_new%fft_scratch%p3buf, fft_scratch_new%fft_scratch%p1buf, fft_plan_style)
3211 :
3212 : CASE (400) ! serial FFT
3213 10233 : np = 0
3214 10233 : CALL fft_alloc(fft_scratch_new%fft_scratch%ziptr, n)
3215 10233 : CALL fft_alloc(fft_scratch_new%fft_scratch%zoptr, n)
3216 :
3217 : !in place plans
3218 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(1), fft_type, .TRUE., FWFFT, n, &
3219 10233 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3220 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(2), fft_type, .TRUE., BWFFT, n, &
3221 10233 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%ziptr, fft_plan_style)
3222 : ! out of place plans
3223 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(3), fft_type, .FALSE., FWFFT, n, &
3224 10233 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3225 : CALL fft_create_plan_3d(fft_scratch_new%fft_scratch%fft_plan(4), fft_type, .FALSE., BWFFT, n, &
3226 45392 : fft_scratch_new%fft_scratch%ziptr, fft_scratch_new%fft_scratch%zoptr, fft_plan_style)
3227 :
3228 : END SELECT
3229 :
3230 35159 : NULLIFY (fft_scratch_new%fft_scratch_next)
3231 : fft_scratch_new%fft_scratch%fft_scratch_id = &
3232 35159 : fft_scratch_last%fft_scratch%fft_scratch_id + 1
3233 35159 : fft_scratch_new%fft_scratch%in_use = .TRUE.
3234 140636 : fft_scratch_new%fft_scratch%nfft = n
3235 35159 : fft_scratch_last%fft_scratch_next => fft_scratch_new
3236 35159 : fft_scratch_new%fft_scratch%tf_type = tf_type
3237 35159 : fft_scratch => fft_scratch_new%fft_scratch
3238 35159 : EXIT
3239 :
3240 : END IF
3241 : END DO
3242 :
3243 3230920 : fft_scratch%last_tick = tick_fft_pool
3244 :
3245 3230920 : CALL timestop(handle)
3246 :
3247 3230920 : END SUBROUTINE get_fft_scratch
3248 :
3249 : ! **************************************************************************************************
3250 : !> \brief ...
3251 : !> \param fft_scratch ...
3252 : ! **************************************************************************************************
3253 3230920 : SUBROUTINE release_fft_scratch(fft_scratch)
3254 :
3255 : TYPE(fft_scratch_type), POINTER :: fft_scratch
3256 :
3257 : INTEGER :: scratch_id
3258 : TYPE(fft_scratch_pool_type), POINTER :: fft_scratch_current
3259 :
3260 3230920 : scratch_id = fft_scratch%fft_scratch_id
3261 :
3262 3230920 : fft_scratch_current => fft_scratch_first
3263 12467443 : DO
3264 15698363 : IF (ASSOCIATED(fft_scratch_current)) THEN
3265 15698363 : IF (scratch_id == fft_scratch_current%fft_scratch%fft_scratch_id) THEN
3266 3230920 : fft_scratch%in_use = .FALSE.
3267 3230920 : NULLIFY (fft_scratch)
3268 3230920 : EXIT
3269 : END IF
3270 12467443 : fft_scratch_current => fft_scratch_current%fft_scratch_next
3271 : ELSE
3272 : ! We cannot find the scratch type in this pool
3273 0 : CPABORT("")
3274 0 : EXIT
3275 : END IF
3276 : END DO
3277 :
3278 3230920 : END SUBROUTINE release_fft_scratch
3279 :
3280 : ! **************************************************************************************************
3281 : !> \brief ...
3282 : !> \param rs ...
3283 : !> \param scount ...
3284 : !> \param sdispl ...
3285 : !> \param rq ...
3286 : !> \param rcount ...
3287 : !> \param rdispl ...
3288 : !> \param group ...
3289 : ! **************************************************************************************************
3290 0 : SUBROUTINE sparse_alltoall(rs, scount, sdispl, rq, rcount, rdispl, group)
3291 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rs
3292 : INTEGER, DIMENSION(:), POINTER :: scount, sdispl
3293 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: rq
3294 : INTEGER, DIMENSION(:), POINTER :: rcount, rdispl
3295 :
3296 : CLASS(mp_comm_type), INTENT(IN) :: group
3297 :
3298 0 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: msgin, msgout
3299 : INTEGER :: ip, n, nr, ns, pos
3300 0 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: rreq, sreq
3301 :
3302 0 : CALL group%sync()
3303 0 : n = group%num_pe
3304 0 : pos = group%mepos
3305 0 : ALLOCATE (sreq(0:n - 1))
3306 0 : ALLOCATE (rreq(0:n - 1))
3307 0 : nr = 0
3308 0 : DO ip = 0, n - 1
3309 0 : IF (rcount(ip) == 0) CYCLE
3310 0 : IF (ip == pos) CYCLE
3311 0 : msgout => rq(rdispl(ip) + 1:rdispl(ip) + rcount(ip))
3312 0 : CALL group%irecv(msgout, ip, rreq(nr))
3313 0 : nr = nr + 1
3314 : END DO
3315 0 : ns = 0
3316 0 : DO ip = 0, n - 1
3317 0 : IF (scount(ip) == 0) CYCLE
3318 0 : IF (ip == pos) CYCLE
3319 0 : msgin => rs(sdispl(ip) + 1:sdispl(ip) + scount(ip))
3320 0 : CALL group%isend(msgin, ip, sreq(ns))
3321 0 : ns = ns + 1
3322 : END DO
3323 0 : IF (rcount(pos) /= 0) THEN
3324 0 : IF (rcount(pos) /= scount(pos)) CPABORT("")
3325 0 : rq(rdispl(pos) + 1:rdispl(pos) + rcount(pos)) = rs(sdispl(pos) + 1:sdispl(pos) + scount(pos))
3326 : END IF
3327 0 : CALL mp_waitall(sreq(0:ns - 1))
3328 0 : CALL mp_waitall(rreq(0:nr - 1))
3329 0 : DEALLOCATE (sreq)
3330 0 : DEALLOCATE (rreq)
3331 0 : CALL group%sync()
3332 :
3333 0 : END SUBROUTINE sparse_alltoall
3334 :
3335 : ! **************************************************************************************************
3336 : !> \brief test data structures for equality. It is assumed that if they are
3337 : !> different for one mpi task they are different for all (??)
3338 : !> \param fft_size_1 ...
3339 : !> \param fft_size_2 ...
3340 : !> \param equal ...
3341 : ! **************************************************************************************************
3342 2595270 : SUBROUTINE is_equal(fft_size_1, fft_size_2, equal)
3343 : TYPE(fft_scratch_sizes) :: fft_size_1, fft_size_2
3344 : LOGICAL :: equal
3345 :
3346 : equal = .TRUE.
3347 :
3348 2595270 : equal = equal .AND. fft_size_1%nx == fft_size_2%nx
3349 2595270 : equal = equal .AND. fft_size_1%ny == fft_size_2%ny
3350 2595270 : equal = equal .AND. fft_size_1%nz == fft_size_2%nz
3351 :
3352 2595270 : equal = equal .AND. fft_size_1%lmax == fft_size_2%lmax
3353 2595270 : equal = equal .AND. fft_size_1%mmax == fft_size_2%mmax
3354 2595270 : equal = equal .AND. fft_size_1%nmax == fft_size_2%nmax
3355 :
3356 2595270 : equal = equal .AND. fft_size_1%mx1 == fft_size_2%mx1
3357 2595270 : equal = equal .AND. fft_size_1%mx2 == fft_size_2%mx2
3358 2595270 : equal = equal .AND. fft_size_1%mx3 == fft_size_2%mx3
3359 :
3360 2595270 : equal = equal .AND. fft_size_1%my1 == fft_size_2%my1
3361 2595270 : equal = equal .AND. fft_size_1%my2 == fft_size_2%my2
3362 2595270 : equal = equal .AND. fft_size_1%my3 == fft_size_2%my3
3363 :
3364 2595270 : equal = equal .AND. fft_size_1%mcz1 == fft_size_2%mcz1
3365 2595270 : equal = equal .AND. fft_size_1%mcx2 == fft_size_2%mcx2
3366 2595270 : equal = equal .AND. fft_size_1%mcz2 == fft_size_2%mcz2
3367 2595270 : equal = equal .AND. fft_size_1%mcy3 == fft_size_2%mcy3
3368 :
3369 2595270 : equal = equal .AND. fft_size_1%lg == fft_size_2%lg
3370 2595270 : equal = equal .AND. fft_size_1%mg == fft_size_2%mg
3371 :
3372 2595270 : equal = equal .AND. fft_size_1%nbx == fft_size_2%nbx
3373 2595270 : equal = equal .AND. fft_size_1%nbz == fft_size_2%nbz
3374 :
3375 2595270 : equal = equal .AND. fft_size_1%nmray == fft_size_2%nmray
3376 2595270 : equal = equal .AND. fft_size_1%nyzray == fft_size_2%nyzray
3377 :
3378 2595270 : equal = equal .AND. fft_size_1%gs_group == fft_size_2%gs_group
3379 2595270 : equal = equal .AND. fft_size_1%rs_group == fft_size_2%rs_group
3380 :
3381 7785810 : equal = equal .AND. ALL(fft_size_1%g_pos == fft_size_2%g_pos)
3382 7785810 : equal = equal .AND. ALL(fft_size_1%r_pos == fft_size_2%r_pos)
3383 7785810 : equal = equal .AND. ALL(fft_size_1%r_dim == fft_size_2%r_dim)
3384 :
3385 2595270 : equal = equal .AND. fft_size_1%numtask == fft_size_2%numtask
3386 :
3387 2595270 : END SUBROUTINE is_equal
3388 :
3389 0 : END MODULE fft_tools
|