Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief different utils that are useful to manipulate splines on the regular
10 : !> grid of a pw
11 : !> \par History
12 : !> 05.2003 created [fawzi]
13 : !> 08.2004 removed spline evaluation method using more than 2 read streams
14 : !> (pw_compose_stripe_rs3), added linear solver based spline
15 : !> inversion [fawzi]
16 : !> \author Fawzi Mohamed
17 : ! **************************************************************************************************
18 : MODULE pw_spline_utils
19 :
20 : USE cp_log_handling, ONLY: cp_get_default_logger,&
21 : cp_logger_type,&
22 : cp_to_string
23 : USE kinds, ONLY: dp
24 : USE mathconstants, ONLY: twopi
25 : USE message_passing, ONLY: mp_comm_congruent
26 : USE pw_grid_types, ONLY: FULLSPACE,&
27 : PW_MODE_LOCAL
28 : USE pw_methods, ONLY: pw_axpy,&
29 : pw_copy,&
30 : pw_integral_ab,&
31 : pw_zero
32 : USE pw_pool_types, ONLY: pw_pool_release,&
33 : pw_pool_type
34 : USE pw_types, ONLY: pw_c1d_gs_type,&
35 : pw_r3d_rs_type
36 : #include "../base/base_uses.f90"
37 :
38 : IMPLICIT NONE
39 : PRIVATE
40 :
41 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
42 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
43 :
44 : INTEGER, PARAMETER, PUBLIC :: no_precond = 0, &
45 : precond_spl3_aint = 1, &
46 : precond_spl3_1 = 2, &
47 : precond_spl3_aint2 = 3, &
48 : precond_spl3_2 = 4, &
49 : precond_spl3_3 = 5
50 :
51 : REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
52 : [125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp], &
53 : spline3_coeffs = &
54 : [8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
55 : 1._dp/(27._dp*8._dp)], &
56 : spline2_coeffs = &
57 : [27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
58 : 1._dp/(64._dp*8._dp)], &
59 : nn50_coeffs = &
60 : [15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
61 : 1._dp/140608._dp], &
62 : spl3_aint_coeff = &
63 : [46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
64 : -1._dp/(27._dp*8._dp)], &
65 : spl3_precond1_coeff = &
66 : [64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp], &
67 : spl3_1d_transf_coeffs = &
68 : [2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp]
69 :
70 : REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
71 : [2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp], &
72 : spline2_deriv_coeffs = &
73 : [9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp], &
74 : nn10_deriv_coeffs = &
75 : [25._dp/72._dp, 5._dp/144, 1._dp/288._dp], &
76 : nn50_deriv_coeffs = &
77 : [625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp], &
78 : spl3_1d_coeffs0 = &
79 : [1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp], &
80 : spl3_1d_transf_border1 = &
81 : [0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp]
82 :
83 : PUBLIC :: pw_spline3_interpolate_values_g, &
84 : pw_spline3_deriv_g
85 : PUBLIC :: pw_spline_scale_deriv
86 : PUBLIC :: pw_spline2_interpolate_values_g, &
87 : pw_spline2_deriv_g
88 : PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
89 : spl3_nopbc, spl3_pbc, spl3_nopbct
90 : PUBLIC :: add_fine2coarse, add_coarse2fine
91 : PUBLIC :: pw_spline_precond_create, &
92 : pw_spline_do_precond, &
93 : pw_spline_precond_set_kind, &
94 : find_coeffs, &
95 : pw_spline_precond_release, &
96 : pw_spline_precond_type, &
97 : Eval_Interp_Spl3_pbc, &
98 : Eval_d_Interp_Spl3_pbc
99 :
100 : !***
101 :
102 : ! **************************************************************************************************
103 : !> \brief stores information for the preconditioner used to calculate the
104 : !> coeffs of splines
105 : !> \author fawzi
106 : ! **************************************************************************************************
107 : TYPE pw_spline_precond_type
108 : INTEGER :: kind = no_precond
109 : REAL(kind=dp), DIMENSION(4) :: coeffs = 0.0_dp
110 : REAL(kind=dp), DIMENSION(3) :: coeffs_1d = 0.0_dp
111 : LOGICAL :: sharpen = .FALSE., normalize = .FALSE., pbc = .FALSE., transpose = .FALSE.
112 : TYPE(pw_pool_type), POINTER :: pool => NULL()
113 : END TYPE pw_spline_precond_type
114 :
115 : CONTAINS
116 :
117 : ! **************************************************************************************************
118 : !> \brief calculates the FFT of the coefficients of the quadratic spline that
119 : !> interpolates the given values
120 : !> \param spline_g on entry the FFT of the values to interpolate as cc,
121 : !> will contain the FFT of the coefficients of the spline
122 : !> \par History
123 : !> 06.2003 created [fawzi]
124 : !> \author Fawzi Mohamed
125 : !> \note
126 : !> does not work with spherical cutoff
127 : ! **************************************************************************************************
128 40794 : SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
129 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
130 :
131 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g'
132 :
133 : INTEGER :: handle, i, ii, j, k
134 : INTEGER, DIMENSION(2, 3) :: gbo
135 : INTEGER, DIMENSION(3) :: n_tot
136 : REAL(KIND=dp) :: c23, coeff
137 40794 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cosIVals, cosJVals, cosKVals
138 :
139 40794 : CALL timeset(routineN, handle)
140 :
141 163176 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
142 407940 : gbo = spline_g%pw_grid%bounds
143 :
144 40794 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
145 40794 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
146 :
147 0 : ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
148 285558 : cosKVals(gbo(1, 3):gbo(2, 3)))
149 :
150 40794 : coeff = twopi/n_tot(1)
151 40794 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
152 : DO i = gbo(1, 1), gbo(2, 1)
153 : cosIVals(i) = COS(coeff*REAL(i, dp))
154 : END DO
155 40794 : coeff = twopi/n_tot(2)
156 40794 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
157 : DO j = gbo(1, 2), gbo(2, 2)
158 : cosJVals(j) = COS(coeff*REAL(j, dp))
159 : END DO
160 40794 : coeff = twopi/n_tot(3)
161 40794 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
162 : DO k = gbo(1, 3), gbo(2, 3)
163 : cosKVals(k) = COS(coeff*REAL(k, dp))
164 : END DO
165 :
166 40794 : !$OMP PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
167 : DO ii = 1, SIZE(spline_g%array)
168 : i = spline_g%pw_grid%g_hat(1, ii)
169 : j = spline_g%pw_grid%g_hat(2, ii)
170 : k = spline_g%pw_grid%g_hat(3, ii)
171 :
172 : c23 = cosJVals(j)*cosKVals(k)
173 : coeff = 64.0_dp/(cosIVals(i)*c23 + &
174 : (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
175 : (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
176 : 27.0_dp)
177 :
178 : spline_g%array(ii) = spline_g%array(ii)*coeff
179 :
180 : END DO
181 40794 : DEALLOCATE (cosIVals, cosJVals, cosKVals)
182 :
183 40794 : CALL timestop(handle)
184 81588 : END SUBROUTINE pw_spline2_interpolate_values_g
185 :
186 : ! **************************************************************************************************
187 : !> \brief calculates the FFT of the coefficients of the2 cubic spline that
188 : !> interpolates the given values
189 : !> \param spline_g on entry the FFT of the values to interpolate as cc,
190 : !> will contain the FFT of the coefficients of the spline
191 : !> \par History
192 : !> 06.2003 created [fawzi]
193 : !> \author Fawzi Mohamed
194 : !> \note
195 : !> does not work with spherical cutoff
196 : !> stupid distribution for cos calculation, it should calculate only the
197 : !> needed cos, and avoid the mpi_allreduce
198 : ! **************************************************************************************************
199 342 : SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
200 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
201 :
202 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g'
203 :
204 : INTEGER :: handle, i, ii, j, k
205 : INTEGER, DIMENSION(2, 3) :: gbo
206 : INTEGER, DIMENSION(3) :: n_tot
207 : REAL(KIND=dp) :: c23, coeff
208 342 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cosIVals, cosJVals, cosKVals
209 :
210 342 : CALL timeset(routineN, handle)
211 :
212 1368 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
213 3420 : gbo = spline_g%pw_grid%bounds
214 :
215 342 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
216 342 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
217 :
218 0 : ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
219 0 : cosJVals(gbo(1, 2):gbo(2, 2)), &
220 2394 : cosKVals(gbo(1, 3):gbo(2, 3)))
221 :
222 342 : coeff = twopi/n_tot(1)
223 342 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
224 : DO i = gbo(1, 1), gbo(2, 1)
225 : cosIVals(i) = COS(coeff*REAL(i, dp))
226 : END DO
227 342 : coeff = twopi/n_tot(2)
228 342 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
229 : DO j = gbo(1, 2), gbo(2, 2)
230 : cosJVals(j) = COS(coeff*REAL(j, dp))
231 : END DO
232 342 : coeff = twopi/n_tot(3)
233 342 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
234 : DO k = gbo(1, 3), gbo(2, 3)
235 : cosKVals(k) = COS(coeff*REAL(k, dp))
236 : END DO
237 :
238 342 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
239 : DO ii = 1, SIZE(spline_g%array)
240 : i = spline_g%pw_grid%g_hat(1, ii)
241 : j = spline_g%pw_grid%g_hat(2, ii)
242 : k = spline_g%pw_grid%g_hat(3, ii)
243 : ! no opt
244 : !FM coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
245 : !FM (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
246 : !FM cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
247 : !FM (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
248 : !FM 8.0_dp/27.0_dp)
249 : ! opt
250 : c23 = cosJVals(j)*cosKVals(k)
251 : coeff = 27.0_dp/(cosIVals(i)*c23 + &
252 : (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
253 : (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
254 : 8.0_dp)
255 :
256 : spline_g%array(ii) = spline_g%array(ii)*coeff
257 :
258 : END DO
259 342 : DEALLOCATE (cosIVals, cosJVals, cosKVals)
260 :
261 342 : CALL timestop(handle)
262 684 : END SUBROUTINE pw_spline3_interpolate_values_g
263 :
264 : ! **************************************************************************************************
265 : !> \brief rescales the derivatives from gridspacing=1 to the real derivatives
266 : !> \param deriv_vals_r an array of x,y,z derivatives
267 : !> \param transpose if true applies the transpose of the map (defaults to
268 : !> false)
269 : !> \param scale a scaling factor (defaults to 1.0)
270 : !> \par History
271 : !> 06.2003 created [fawzi]
272 : !> \author Fawzi Mohamed
273 : ! **************************************************************************************************
274 18144 : SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
275 : TYPE(pw_r3d_rs_type), DIMENSION(3), INTENT(IN) :: deriv_vals_r
276 : LOGICAL, INTENT(in), OPTIONAL :: transpose
277 : REAL(KIND=dp), INTENT(in), OPTIONAL :: scale
278 :
279 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv'
280 :
281 : INTEGER :: handle, i, idir, j, k
282 : INTEGER, DIMENSION(2, 3) :: bo
283 : INTEGER, DIMENSION(3) :: n_tot
284 : LOGICAL :: diag, my_transpose
285 : REAL(KIND=dp) :: dVal1, dVal2, dVal3, my_scale, scalef
286 : REAL(KIND=dp), DIMENSION(3, 3) :: dh_inv, h_grid
287 18144 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: ddata, ddata2, ddata3
288 :
289 18144 : CALL timeset(routineN, handle)
290 :
291 18144 : my_transpose = .FALSE.
292 18144 : IF (PRESENT(transpose)) my_transpose = transpose
293 18144 : my_scale = 1.0_dp
294 18144 : IF (PRESENT(scale)) my_scale = scale
295 72576 : n_tot(1:3) = deriv_vals_r(1)%pw_grid%npts(1:3)
296 181440 : bo = deriv_vals_r(1)%pw_grid%bounds_local
297 235872 : dh_inv = deriv_vals_r(1)%pw_grid%dh_inv
298 :
299 : ! map grid to real derivative
300 18144 : diag = .TRUE.
301 18144 : IF (my_transpose) THEN
302 32832 : DO j = 1, 3
303 106704 : DO i = 1, 3
304 73872 : h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
305 98496 : IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
306 : END DO
307 : END DO
308 : ELSE
309 39744 : DO j = 1, 3
310 129168 : DO i = 1, 3
311 89424 : h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
312 119232 : IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
313 : END DO
314 : END DO
315 : END IF
316 :
317 18144 : IF (diag) THEN
318 71856 : DO idir = 1, 3
319 53892 : ddata => deriv_vals_r(idir)%array
320 53892 : scalef = h_grid(idir, idir)
321 : CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
322 71856 : scalef, ddata, 1)
323 : END DO
324 : ELSE
325 180 : ddata => deriv_vals_r(1)%array
326 180 : ddata2 => deriv_vals_r(2)%array
327 180 : ddata3 => deriv_vals_r(3)%array
328 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
329 180 : !$OMP SHARED(ddata,ddata2,ddata3,h_grid,bo)
330 : DO k = bo(1, 3), bo(2, 3)
331 : DO j = bo(1, 2), bo(2, 2)
332 : DO i = bo(1, 1), bo(2, 1)
333 :
334 : dVal1 = ddata(i, j, k)
335 : dVal2 = ddata2(i, j, k)
336 : dVal3 = ddata3(i, j, k)
337 :
338 : ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
339 : h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
340 : ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
341 : h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
342 : ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
343 : h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
344 :
345 : END DO
346 : END DO
347 : END DO
348 : END IF
349 :
350 18144 : CALL timestop(handle)
351 18144 : END SUBROUTINE pw_spline_scale_deriv
352 :
353 : ! **************************************************************************************************
354 : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
355 : !> derivative of the cubic spline
356 : !> \param spline_g on entry the FFT of the coefficients of the spline
357 : !> will contain the FFT of the derivative
358 : !> \param idir direction of the derivative
359 : !> \par History
360 : !> 06.2003 created [fawzi]
361 : !> \author Fawzi Mohamed
362 : !> \note
363 : !> the distance between gridpoints is assumed to be 1
364 : ! **************************************************************************************************
365 342 : SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
366 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
367 : INTEGER, INTENT(in) :: idir
368 :
369 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g'
370 : REAL(KIND=dp), PARAMETER :: inv9 = 1.0_dp/9.0_dp
371 :
372 : INTEGER :: handle, i, ii, j, k
373 : INTEGER, DIMENSION(2, 3) :: bo, gbo
374 : INTEGER, DIMENSION(3) :: n, n_tot
375 : REAL(KIND=dp) :: coeff, tmp
376 342 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: csIVals, csJVals, csKVals
377 :
378 342 : CALL timeset(routineN, handle)
379 :
380 1368 : n(1:3) = spline_g%pw_grid%npts_local(1:3)
381 1368 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
382 3420 : bo = spline_g%pw_grid%bounds_local
383 3420 : gbo = spline_g%pw_grid%bounds
384 :
385 342 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
386 342 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
387 :
388 0 : ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
389 0 : csJVals(gbo(1, 2):gbo(2, 2)), &
390 2394 : csKVals(gbo(1, 3):gbo(2, 3)))
391 :
392 342 : coeff = twopi/n_tot(1)
393 342 : IF (idir == 1) THEN
394 114 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
395 : DO i = gbo(1, 1), gbo(2, 1)
396 : csIVals(i) = SIN(coeff*REAL(i, dp))
397 : END DO
398 : ELSE
399 228 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
400 : DO i = gbo(1, 1), gbo(2, 1)
401 : csIVals(i) = COS(coeff*REAL(i, dp))
402 : END DO
403 : END IF
404 342 : coeff = twopi/n_tot(2)
405 342 : IF (idir == 2) THEN
406 114 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
407 : DO j = gbo(1, 2), gbo(2, 2)
408 : csJVals(j) = SIN(coeff*REAL(j, dp))
409 : END DO
410 : ELSE
411 228 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
412 : DO j = gbo(1, 2), gbo(2, 2)
413 : csJVals(j) = COS(coeff*REAL(j, dp))
414 : END DO
415 : END IF
416 342 : coeff = twopi/n_tot(3)
417 342 : IF (idir == 3) THEN
418 114 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
419 : DO k = gbo(1, 3), gbo(2, 3)
420 : csKVals(k) = SIN(coeff*REAL(k, dp))
421 : END DO
422 : ELSE
423 228 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
424 : DO k = gbo(1, 3), gbo(2, 3)
425 : csKVals(k) = COS(coeff*REAL(k, dp))
426 : END DO
427 : END IF
428 :
429 114 : SELECT CASE (idir)
430 : CASE (1)
431 : ! x deriv
432 114 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
433 : DO ii = 1, SIZE(spline_g%array)
434 : i = spline_g%pw_grid%g_hat(1, ii)
435 : j = spline_g%pw_grid%g_hat(2, ii)
436 : k = spline_g%pw_grid%g_hat(3, ii)
437 : !FM ! formula
438 : !FM coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
439 : !FM (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
440 : !FM sinVal(1)*4.0_dp/9.0_dp
441 : tmp = csIVals(i)*csJVals(j)
442 : coeff = (tmp*csKVals(k) + &
443 : (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
444 : csIVals(i)*4.0_dp)*inv9
445 :
446 : spline_g%array(ii) = spline_g%array(ii)* &
447 : CMPLX(0.0_dp, coeff, dp)
448 : END DO
449 : CASE (2)
450 : ! y deriv
451 114 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
452 : DO ii = 1, SIZE(spline_g%array)
453 : i = spline_g%pw_grid%g_hat(1, ii)
454 : j = spline_g%pw_grid%g_hat(2, ii)
455 : k = spline_g%pw_grid%g_hat(3, ii)
456 :
457 : tmp = csIVals(i)*csJVals(j)
458 : coeff = (tmp*csKVals(k) + &
459 : (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
460 : csJVals(j)*4.0_dp)*inv9
461 :
462 : spline_g%array(ii) = spline_g%array(ii)* &
463 : CMPLX(0.0_dp, coeff, dp)
464 : END DO
465 : CASE (3)
466 : ! z deriv
467 342 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
468 : DO ii = 1, SIZE(spline_g%array)
469 : i = spline_g%pw_grid%g_hat(1, ii)
470 : j = spline_g%pw_grid%g_hat(2, ii)
471 : k = spline_g%pw_grid%g_hat(3, ii)
472 :
473 : tmp = csIVals(i)*csKVals(k)
474 : coeff = (tmp*csJVals(j) + &
475 : (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
476 : csKVals(k)*4.0_dp)*inv9
477 :
478 : spline_g%array(ii) = spline_g%array(ii)* &
479 : CMPLX(0.0_dp, coeff, dp)
480 : END DO
481 : END SELECT
482 :
483 342 : DEALLOCATE (csIVals, csJVals, csKVals)
484 :
485 342 : CALL timestop(handle)
486 684 : END SUBROUTINE pw_spline3_deriv_g
487 :
488 : ! **************************************************************************************************
489 : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
490 : !> derivative of the quadratic spline
491 : !> \param spline_g on entry the FFT of the coefficients of the spline
492 : !> will contain the FFT of the derivative
493 : !> \param idir direction of the derivative
494 : !> \par History
495 : !> 06.2003 created [fawzi]
496 : !> \author Fawzi Mohamed
497 : !> \note
498 : !> the distance between gridpoints is assumed to be 1
499 : ! **************************************************************************************************
500 40794 : SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
501 : TYPE(pw_c1d_gs_type), INTENT(IN) :: spline_g
502 : INTEGER, INTENT(in) :: idir
503 :
504 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g'
505 : REAL(KIND=dp), PARAMETER :: inv16 = 1.0_dp/16.0_dp
506 :
507 : INTEGER :: handle, i, ii, j, k
508 : INTEGER, DIMENSION(2, 3) :: bo
509 : INTEGER, DIMENSION(3) :: n, n_tot
510 : REAL(KIND=dp) :: coeff, tmp
511 40794 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: csIVals, csJVals, csKVals
512 :
513 40794 : CALL timeset(routineN, handle)
514 :
515 163176 : n(1:3) = spline_g%pw_grid%npts_local(1:3)
516 163176 : n_tot(1:3) = spline_g%pw_grid%npts(1:3)
517 407940 : bo = spline_g%pw_grid%bounds
518 :
519 40794 : CPASSERT(.NOT. spline_g%pw_grid%spherical)
520 40794 : CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
521 :
522 0 : ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
523 285558 : csKVals(bo(1, 3):bo(2, 3)))
524 :
525 40794 : coeff = twopi/n_tot(1)
526 40794 : IF (idir == 1) THEN
527 13598 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
528 : DO i = bo(1, 1), bo(2, 1)
529 : csIVals(i) = SIN(coeff*REAL(i, dp))
530 : END DO
531 : ELSE
532 27196 : !$OMP PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
533 : DO i = bo(1, 1), bo(2, 1)
534 : csIVals(i) = COS(coeff*REAL(i, dp))
535 : END DO
536 : END IF
537 40794 : coeff = twopi/n_tot(2)
538 40794 : IF (idir == 2) THEN
539 13598 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
540 : DO j = bo(1, 2), bo(2, 2)
541 : csJVals(j) = SIN(coeff*REAL(j, dp))
542 : END DO
543 : ELSE
544 27196 : !$OMP PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
545 : DO j = bo(1, 2), bo(2, 2)
546 : csJVals(j) = COS(coeff*REAL(j, dp))
547 : END DO
548 : END IF
549 40794 : coeff = twopi/n_tot(3)
550 40794 : IF (idir == 3) THEN
551 13598 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
552 : DO k = bo(1, 3), bo(2, 3)
553 : csKVals(k) = SIN(coeff*REAL(k, dp))
554 : END DO
555 : ELSE
556 27196 : !$OMP PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
557 : DO k = bo(1, 3), bo(2, 3)
558 : csKVals(k) = COS(coeff*REAL(k, dp))
559 : END DO
560 : END IF
561 :
562 13598 : SELECT CASE (idir)
563 : CASE (1)
564 : ! x deriv
565 13598 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
566 : DO ii = 1, SIZE(spline_g%array)
567 : i = spline_g%pw_grid%g_hat(1, ii)
568 : j = spline_g%pw_grid%g_hat(2, ii)
569 : k = spline_g%pw_grid%g_hat(3, ii)
570 : !FM ! formula
571 : !FM coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
572 : !FM (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
573 : !FM sinVal(1)*9.0_dp/16.0_dp
574 : tmp = csIVals(i)*csJVals(j)
575 : coeff = (tmp*csKVals(k) + &
576 : (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
577 : csIVals(i)*9.0_dp)*inv16
578 :
579 : spline_g%array(ii) = spline_g%array(ii)* &
580 : CMPLX(0.0_dp, coeff, dp)
581 : END DO
582 : CASE (2)
583 : ! y deriv
584 13598 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
585 : DO ii = 1, SIZE(spline_g%array)
586 : i = spline_g%pw_grid%g_hat(1, ii)
587 : j = spline_g%pw_grid%g_hat(2, ii)
588 : k = spline_g%pw_grid%g_hat(3, ii)
589 :
590 : tmp = csIVals(i)*csJVals(j)
591 : coeff = (tmp*csKVals(k) + &
592 : (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
593 : csJVals(j)*9.0_dp)*inv16
594 :
595 : spline_g%array(ii) = spline_g%array(ii)* &
596 : CMPLX(0.0_dp, coeff, dp)
597 : END DO
598 : CASE (3)
599 : ! z deriv
600 40794 : !$OMP PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
601 : DO ii = 1, SIZE(spline_g%array)
602 : i = spline_g%pw_grid%g_hat(1, ii)
603 : j = spline_g%pw_grid%g_hat(2, ii)
604 : k = spline_g%pw_grid%g_hat(3, ii)
605 :
606 : tmp = csIVals(i)*csKVals(k)
607 : coeff = (tmp*csJVals(j) + &
608 : (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
609 : csKVals(k)*9.0_dp)*inv16
610 :
611 : spline_g%array(ii) = spline_g%array(ii)* &
612 : CMPLX(0.0_dp, coeff, dp)
613 : END DO
614 : END SELECT
615 :
616 40794 : DEALLOCATE (csIVals, csJVals, csKVals)
617 :
618 40794 : CALL timestop(handle)
619 81588 : END SUBROUTINE pw_spline2_deriv_g
620 :
621 : ! **************************************************************************************************
622 : !> \brief applies a nearest neighbor linear operator to a stripe in x direction:
623 : !> out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
624 : !> \param weights the weights of the linear operator
625 : !> \param in_val the argument to the operator
626 : !> \param in_val_first the first argument (needed to calculate out_val(1))
627 : !> \param in_val_last the last argument (needed to calculate out_val(n_el))
628 : !> \param out_val the place where the result is accumulated
629 : !> \param n_el the number of elements in in_v and out_v
630 : !> \par History
631 : !> 04.2004 created [fawzi]
632 : !> \author fawzi
633 : !> \note
634 : !> uses 2 read streams and 1 write stream
635 : ! **************************************************************************************************
636 659526526 : SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
637 : out_val, n_el)
638 : REAL(kind=dp), DIMENSION(0:2), INTENT(in) :: weights
639 : REAL(kind=dp), DIMENSION(*), INTENT(in) :: in_val
640 : REAL(kind=dp), INTENT(in) :: in_val_first, in_val_last
641 : REAL(kind=dp), DIMENSION(*), INTENT(inout) :: out_val
642 : INTEGER :: n_el
643 :
644 : INTEGER :: i
645 : REAL(kind=dp) :: v0, v1, v2
646 :
647 : !1:n_el), &
648 : !1:n_el), &
649 :
650 659526526 : IF (n_el < 1) RETURN
651 659526526 : v0 = in_val_first
652 659526526 : v1 = in_val(1)
653 659526526 : IF (weights(1) == 0.0_dp) THEN
654 : ! optimized version for x deriv
655 105061374 : DO i = 1, n_el - 3, 3
656 1018220697 : v2 = in_val(i + 1)
657 : out_val(i) = out_val(i) + &
658 : weights(0)*v0 + &
659 1018220697 : weights(2)*v2
660 1018220697 : v0 = in_val(i + 2)
661 : out_val(i + 1) = out_val(i + 1) + &
662 : weights(0)*v1 + &
663 1018220697 : weights(2)*v0
664 1018220697 : v1 = in_val(i + 3)
665 : out_val(i + 2) = out_val(i + 2) + &
666 : weights(0)*v2 + &
667 1018220697 : weights(2)*v1
668 : END DO
669 : ELSE
670 : ! generic version
671 554465152 : DO i = 1, n_el - 3, 3
672 4291790882 : v2 = in_val(i + 1)
673 : out_val(i) = out_val(i) + &
674 : weights(0)*v0 + &
675 : weights(1)*v1 + &
676 4291790882 : weights(2)*v2
677 4291790882 : v0 = in_val(i + 2)
678 : out_val(i + 1) = out_val(i + 1) + &
679 : weights(0)*v1 + &
680 : weights(1)*v2 + &
681 4291790882 : weights(2)*v0
682 4291790882 : v1 = in_val(i + 3)
683 : out_val(i + 2) = out_val(i + 2) + &
684 : weights(0)*v2 + &
685 : weights(1)*v0 + &
686 4296042932 : weights(2)*v1
687 : END DO
688 : END IF
689 852855747 : SELECT CASE (MODULO(n_el - 1, 3))
690 : CASE (0)
691 193329221 : v2 = in_val_last
692 : out_val(n_el) = out_val(n_el) + &
693 : weights(0)*v0 + &
694 : weights(1)*v1 + &
695 193329221 : weights(2)*v2
696 : CASE (1)
697 205584388 : v2 = in_val(n_el)
698 : out_val(n_el - 1) = out_val(n_el - 1) + &
699 : weights(0)*v0 + &
700 : weights(1)*v1 + &
701 205584388 : weights(2)*v2
702 205584388 : v0 = in_val_last
703 : out_val(n_el) = out_val(n_el) + &
704 : weights(0)*v1 + &
705 : weights(1)*v2 + &
706 205584388 : weights(2)*v0
707 : CASE (2)
708 260612917 : v2 = in_val(n_el - 1)
709 : out_val(n_el - 2) = out_val(n_el - 2) + &
710 : weights(0)*v0 + &
711 : weights(1)*v1 + &
712 260612917 : weights(2)*v2
713 260612917 : v0 = in_val(n_el)
714 : out_val(n_el - 1) = out_val(n_el - 1) + &
715 : weights(0)*v1 + &
716 : weights(1)*v2 + &
717 260612917 : weights(2)*v0
718 260612917 : v1 = in_val_last
719 : out_val(n_el) = out_val(n_el) + &
720 : weights(0)*v2 + &
721 : weights(1)*v0 + &
722 659526526 : weights(2)*v1
723 : END SELECT
724 :
725 : END SUBROUTINE pw_compose_stripe
726 :
727 : ! **************************************************************************************************
728 : !> \brief private routine that computes pw_nn_compose_r (it seems that without
729 : !> passing arrays in this way either some compiler do a copyin/out (xlf)
730 : !> or by inlining suboptimal code is produced (nag))
731 : !> \param weights a 3x3x3 array with the linear operator
732 : !> \param in_val the argument for the linear operator
733 : !> \param out_val place where the value of the linear oprator should be added
734 : !> \param pw_in pw to be able to get the needed meta data about in_val and
735 : !> out_val
736 : !> \param bo boundaries of in_val and out_val
737 : !> \author fawzi
738 : ! **************************************************************************************************
739 28070 : SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
740 : REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2) :: weights
741 : INTEGER, DIMENSION(2, 3) :: bo
742 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
743 : REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
744 : 2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout) :: out_val
745 : REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
746 : 2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in) :: in_val
747 :
748 : INTEGER :: i, j, jw, k, kw, myj, myk
749 : INTEGER, DIMENSION(2, 3) :: gbo
750 : INTEGER, DIMENSION(3) :: s
751 : LOGICAL :: has_boundary, yderiv, zderiv
752 : REAL(kind=dp) :: in_val_f, in_val_l
753 28070 : REAL(kind=dp), DIMENSION(:, :), POINTER :: l_boundary, tmp, u_boundary
754 :
755 81254 : zderiv = ALL(weights(:, :, 1) == 0.0_dp)
756 81254 : yderiv = ALL(weights(:, 1, :) == 0.0_dp)
757 280700 : bo = pw_in%pw_grid%bounds_local
758 280700 : gbo = pw_in%pw_grid%bounds
759 112280 : DO i = 1, 3
760 112280 : s(i) = bo(2, i) - bo(1, i) + 1
761 : END DO
762 112280 : IF (ANY(s < 1)) RETURN
763 : has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
764 44247 : pw_in%pw_grid%bounds(:, 1))
765 28070 : IF (has_boundary) THEN
766 : ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
767 : u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
768 213136 : tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
769 56427058 : tmp(:, :) = pw_in%array(bo(2, 1), :, :)
770 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
771 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
772 : l_boundary, pw_in%pw_grid%para%pos_of_x( &
773 112827474 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
774 56427058 : tmp(:, :) = pw_in%array(bo(1, 1), :, :)
775 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
776 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
777 : u_boundary, pw_in%pw_grid%para%pos_of_x( &
778 112827474 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
779 26642 : DEALLOCATE (tmp)
780 : END IF
781 :
782 : !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
783 : !$OMP in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
784 28070 : !$OMP u_boundary,weights,has_boundary)
785 : DO k = 0, s(3) - 1
786 : DO kw = 0, 2
787 : myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
788 : IF (zderiv .AND. kw == 1) CYCLE
789 : DO j = 0, s(2) - 1
790 : DO jw = 0, 2
791 : myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
792 : IF (yderiv .AND. jw == 1) CYCLE
793 : IF (has_boundary) THEN
794 : in_val_f = l_boundary(myj, myk)
795 : in_val_l = u_boundary(myj, myk)
796 : ELSE
797 : in_val_f = in_val(bo(2, 1), myj, myk)
798 : in_val_l = in_val(bo(1, 1), myj, myk)
799 : END IF
800 : CALL pw_compose_stripe(weights=weights(:, jw, kw), &
801 : in_val=in_val(:, myj, myk), &
802 : in_val_first=in_val_f, in_val_last=in_val_l, &
803 : out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
804 : END DO
805 : END DO
806 : END DO
807 : END DO
808 28070 : IF (has_boundary) THEN
809 26642 : DEALLOCATE (l_boundary, u_boundary)
810 : END IF
811 28070 : END SUBROUTINE pw_nn_compose_r_work
812 :
813 : ! **************************************************************************************************
814 : !> \brief applies a nearest neighbor linear operator to a pw in real space
815 : !> \param weights a 3x3x3 array with the linear operator
816 : !> \param pw_in the argument for the linear operator
817 : !> \param pw_out place where the value of the linear oprator should be added
818 : !> \author fawzi
819 : !> \note
820 : !> has specialized versions for derivative operator (with central values==0)
821 : ! **************************************************************************************************
822 28070 : SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
823 : REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2) :: weights
824 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
825 :
826 : CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_compose_r'
827 :
828 : INTEGER :: handle
829 :
830 28070 : CALL timeset(routineN, handle)
831 196490 : IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
832 0 : CPABORT("wrong pw distribution")
833 : END IF
834 : CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%array, &
835 28070 : out_val=pw_out%array, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
836 28070 : CALL timestop(handle)
837 28070 : END SUBROUTINE pw_nn_compose_r
838 :
839 : ! **************************************************************************************************
840 : !> \brief calculates the values of a nearest neighbor smearing
841 : !> \param pw_in the argument for the linear operator
842 : !> \param pw_out place where the smeared values should be added
843 : !> \param coeffs array with the coefficent of the smearing, ordered with
844 : !> the distance from the center: coeffs(1) the coeff of the central
845 : !> element, coeffs(2) the coeff of the 6 element with distance 1,
846 : !> coeff(3) the coeff of the 12 elements at distance sqrt(2),
847 : !> coeff(4) the coeff of the 8 elements at distance sqrt(3).
848 : !> \author Fawzi Mohamed
849 : !> \note
850 : !> does not normalize the smear to 1.
851 : !> with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
852 : !> is equivalent to pw_spline3_evaluate_values_g, with
853 : !> coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
854 : ! **************************************************************************************************
855 14774 : SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
856 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
857 : REAL(KIND=dp), DIMENSION(4), INTENT(in) :: coeffs
858 :
859 : INTEGER :: i, j, k
860 : REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1) :: weights
861 :
862 59096 : DO k = -1, 1
863 192062 : DO j = -1, 1
864 576186 : DO i = -1, 1
865 531864 : weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
866 : END DO
867 : END DO
868 : END DO
869 :
870 14774 : CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
871 14774 : END SUBROUTINE pw_nn_smear_r
872 :
873 : ! **************************************************************************************************
874 : !> \brief calculates a nearest neighbor central derivative.
875 : !> for the x dir:
876 : !> pw_out%array(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
877 : !> ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
878 : !> pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
879 : !> ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
880 : !> pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
881 : !> periodic boundary conditions are applied
882 : !> \param pw_in the argument for the linear operator
883 : !> \param pw_out place where the smeared values should be added
884 : !> \param coeffs array with the coefficent of the front (positive) plane
885 : !> of the central derivative, ordered with
886 : !> the distance from the center: coeffs(1) the coeff of the central
887 : !> element, coeffs(2) the coeff of the 4 element with distance 1,
888 : !> coeff(3) the coeff of the 4 elements at distance sqrt(2)
889 : !> \param idir ...
890 : !> \author Fawzi Mohamed
891 : !> \note
892 : !> with coeff=(/ 2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
893 : !> is equivalent to pw_spline3_deriv_r, with
894 : !> coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
895 : !> to pw_spline2_deriv_r
896 : !> coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
897 : ! **************************************************************************************************
898 13296 : SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
899 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
900 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: coeffs
901 : INTEGER :: idir
902 :
903 : INTEGER :: i, idirVal, j, k
904 : REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1) :: weights
905 :
906 53184 : DO k = -1, 1
907 172848 : DO j = -1, 1
908 518544 : DO i = -1, 1
909 358992 : SELECT CASE (idir)
910 : CASE (1)
911 119664 : idirVal = i
912 : CASE (2)
913 119664 : idirVal = j
914 : CASE (3)
915 119664 : idirVal = k
916 : CASE default
917 358992 : CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
918 : END SELECT
919 478656 : IF (idirVal == 0) THEN
920 119664 : weights(i, j, k) = 0.0_dp
921 : ELSE
922 239328 : weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
923 : END IF
924 : END DO
925 : END DO
926 : END DO
927 :
928 13296 : CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
929 13296 : END SUBROUTINE pw_nn_deriv_r
930 :
931 : ! **************************************************************************************************
932 : !> \brief low level function that adds a coarse grid
933 : !> to a fine grid.
934 : !> If pbc is true periodic boundary conditions are applied
935 : !>
936 : !> It will add to
937 : !>
938 : !> fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
939 : !> 2*coarse_bounds(1,2):2*coarse_bounds(2,2),
940 : !> 2*coarse_bounds(1,3):2*coarse_bounds(2,3))
941 : !>
942 : !> using
943 : !>
944 : !> coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
945 : !> coarse_bounds(1,2):coarse_bounds(2,2),
946 : !> coarse_bounds(1,3):coarse_bounds(2,3))
947 : !>
948 : !> composed with the weights obtained by the direct product of the
949 : !> 1d coefficients weights:
950 : !>
951 : !> for i,j,k in -3..3
952 : !> w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
953 : !> weights_1d(abs(k)+1)
954 : !> \param coarse_coeffs_pw the values of the coefficients
955 : !> \param fine_values_pw where to add the values due to the
956 : !> coarse coeffs
957 : !> \param weights_1d the weights of the 1d smearing
958 : !> \param w_border0 the 1d weight at the border (when pbc is false)
959 : !> \param w_border1 the 1d weights for a point one off the border
960 : !> (w_border1(1) is the weight of the coefficent at the border)
961 : !> (used if pbc is false)
962 : !> \param pbc if periodic boundary conditions should be applied
963 : !> \param safe_computation ...
964 : !> \author fawzi
965 : !> \note
966 : !> coarse looping is continuos, I did not check if keeping the fine looping
967 : !> contiguous is better.
968 : !> And I ask myself yet again why, why we use x-slice distribution,
969 : !> z-slice distribution would be much better performancewise
970 : !> (and would semplify this code enormously).
971 : !> fine2coarse has much more understandable parallel part (build up of
972 : !> send/rcv sizes,... but worse if you have really a lot of processors,
973 : !> probabily irrelevant because it is not critical) [fawzi].
974 : ! **************************************************************************************************
975 2260 : SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
976 : weights_1d, w_border0, w_border1, pbc, safe_computation)
977 : TYPE(pw_r3d_rs_type), INTENT(IN) :: coarse_coeffs_pw, fine_values_pw
978 : REAL(kind=dp), DIMENSION(4), INTENT(in) :: weights_1d
979 : REAL(kind=dp), INTENT(in) :: w_border0
980 : REAL(kind=dp), DIMENSION(3), INTENT(in) :: w_border1
981 : LOGICAL, INTENT(in) :: pbc
982 : LOGICAL, INTENT(in), OPTIONAL :: safe_computation
983 :
984 : CHARACTER(len=*), PARAMETER :: routineN = 'add_coarse2fine'
985 :
986 : INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
987 : ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
988 : s(3), send_tot_size, sf, shift, ss, x, x_att, xx
989 2260 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcv_offset, rcv_size, real_rcv_size, &
990 2260 : send_offset, send_size, sent_size
991 : INTEGER, DIMENSION(2, 3) :: coarse_bo, coarse_gbo, fine_bo, &
992 : fine_gbo, my_coarse_bo
993 2260 : INTEGER, DIMENSION(:), POINTER :: pos_of_x
994 : LOGICAL :: has_i_lbound, has_i_ubound, is_split, &
995 : safe_calc
996 : REAL(kind=dp) :: v0, v1, v2, v3, wi, wj, wk
997 2260 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: rcv_buf, send_buf
998 : REAL(kind=dp), DIMENSION(3) :: w_0, ww0
999 : REAL(kind=dp), DIMENSION(4) :: w_1, ww1
1000 2260 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: coarse_coeffs, fine_values
1001 :
1002 2260 : CALL timeset(routineN, handle)
1003 : ! CALL timeset(routineN//"_pre",handle2)
1004 2260 : safe_calc = .FALSE.
1005 2260 : IF (PRESENT(safe_computation)) safe_calc = safe_computation
1006 2260 : ii = coarse_coeffs_pw%pw_grid%para%group%compare(fine_values_pw%pw_grid%para%group)
1007 2260 : CPASSERT(ii <= mp_comm_congruent)
1008 22600 : my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1009 22600 : coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1010 22600 : fine_bo = fine_values_pw%pw_grid%bounds_local
1011 22600 : fine_gbo = fine_values_pw%pw_grid%bounds
1012 9040 : f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1013 6780 : DO j = 2, 3
1014 15820 : DO i = 1, 2
1015 13560 : coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
1016 : END DO
1017 : END DO
1018 2260 : IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1019 2260 : coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
1020 2260 : coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
1021 : ELSE
1022 0 : coarse_bo(1, 1) = coarse_gbo(2, 1)
1023 0 : coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1024 : END IF
1025 6744 : is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1026 2260 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
1027 2260 : coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1028 2260 : coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1029 : END IF
1030 2260 : has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1031 2260 : has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1032 :
1033 2260 : IF (pbc) THEN
1034 0 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1035 0 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
1036 : ELSE
1037 9040 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1038 9040 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1039 : END IF
1040 :
1041 2260 : coarse_coeffs => coarse_coeffs_pw%array
1042 9040 : DO i = 1, 3
1043 9040 : s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1044 : END DO
1045 : ! CALL timestop(handle2)
1046 : ! *** parallel case
1047 2260 : IF (is_split) THEN
1048 24 : CALL timeset(routineN//"_comm", handle2)
1049 : coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
1050 24 : (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
1051 24 : n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
1052 : ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
1053 : sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
1054 192 : rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
1055 :
1056 : ! ** rcv size count
1057 :
1058 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1059 : p_old = pos_of_x(coarse_gbo(1, 1) &
1060 24 : + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
1061 24 : rcv_size = 0
1062 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
1063 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1064 184 : rcv_size(p) = rcv_size(p) + coarse_slice_size
1065 : END DO
1066 :
1067 : ! ** send size count
1068 :
1069 24 : pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
1070 24 : sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
1071 24 : fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
1072 24 : fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
1073 24 : IF (.NOT. pbc) THEN
1074 24 : fi_lb = MAX(fi_lb, fine_gbo(1, 1))
1075 24 : fi_ub = MIN(fi_ub, fine_gbo(2, 1))
1076 : ELSE
1077 0 : fi_ub = MIN(fi_ub, fi_lb + sf - 1)
1078 : END IF
1079 24 : p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1080 24 : p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1081 24 : send_size = 0
1082 320 : DO x = fi_lb, fi_ub
1083 296 : p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1084 320 : IF (p /= p_old) THEN
1085 24 : p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
1086 :
1087 : send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1088 24 : - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1089 :
1090 24 : IF (pbc) THEN
1091 0 : DO xx = p_lb, coarse_gbo(1, 1) - 1
1092 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1093 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1094 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1095 : END IF
1096 : END DO
1097 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub
1098 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1099 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1100 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1101 : END IF
1102 : END DO
1103 : END IF
1104 :
1105 24 : p_old = p
1106 24 : p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1107 : END IF
1108 : END DO
1109 24 : p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1110 :
1111 : send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1112 24 : - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1113 :
1114 24 : IF (pbc) THEN
1115 0 : DO xx = p_lb, coarse_gbo(1, 1) - 1
1116 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1117 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1118 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1119 : END IF
1120 : END DO
1121 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub
1122 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1123 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1124 0 : send_size(p_old) = send_size(p_old) + coarse_slice_size
1125 : END IF
1126 : END DO
1127 : END IF
1128 : ! ** offsets & alloc send-rcv
1129 :
1130 : send_tot_size = 0
1131 72 : DO ip = 0, n_procs - 1
1132 48 : send_offset(ip) = send_tot_size
1133 72 : send_tot_size = send_tot_size + send_size(ip)
1134 : END DO
1135 72 : ALLOCATE (send_buf(0:send_tot_size - 1))
1136 :
1137 24 : rcv_tot_size = 0
1138 72 : DO ip = 0, n_procs - 1
1139 48 : rcv_offset(ip) = rcv_tot_size
1140 72 : rcv_tot_size = rcv_tot_size + rcv_size(ip)
1141 : END DO
1142 24 : IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
1143 0 : CPABORT("Error calculating rcv_tot_size ")
1144 : END IF
1145 72 : ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
1146 :
1147 : ! ** fill send buffer
1148 :
1149 24 : p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1150 24 : p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1151 72 : sent_size(:) = send_offset
1152 24 : ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
1153 320 : DO x = fi_lb, fi_ub
1154 296 : p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1155 320 : IF (p /= p_old) THEN
1156 : shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1157 24 : FLOOR((x - 1 - f_shift(1))/2._dp)
1158 24 : p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
1159 :
1160 24 : IF (pbc) THEN
1161 0 : DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1162 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
1163 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1164 : CALL dcopy(coarse_slice_size, &
1165 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1166 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1167 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1168 : END IF
1169 : END DO
1170 : END IF
1171 :
1172 24 : ii = sent_size(p_old)
1173 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1174 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1175 17312 : DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1176 13904 : send_buf(ii) = coarse_coeffs(i, j, k)
1177 17064 : ii = ii + 1
1178 : END DO
1179 : END DO
1180 : END DO
1181 24 : sent_size(p_old) = ii
1182 :
1183 24 : IF (pbc) THEN
1184 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1185 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1186 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1187 : CALL dcopy(coarse_slice_size, &
1188 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1189 : my_coarse_bo(1, 3)), ss, &
1190 0 : send_buf(sent_size(p_old)), 1)
1191 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1192 : END IF
1193 : END DO
1194 : END IF
1195 :
1196 24 : p_old = p
1197 24 : p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1198 : END IF
1199 : END DO
1200 : shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1201 24 : FLOOR((x - 1 - f_shift(1))/2._dp)
1202 24 : p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1203 :
1204 24 : IF (pbc) THEN
1205 0 : DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1206 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1207 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1208 : CALL dcopy(coarse_slice_size, &
1209 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1210 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1211 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1212 : END IF
1213 : END DO
1214 : END IF
1215 :
1216 24 : ii = sent_size(p_old)
1217 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1218 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1219 17312 : DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1220 13904 : send_buf(ii) = coarse_coeffs(i, j, k)
1221 17064 : ii = ii + 1
1222 : END DO
1223 : END DO
1224 : END DO
1225 24 : sent_size(p_old) = ii
1226 :
1227 24 : IF (pbc) THEN
1228 0 : DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1229 0 : x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1230 0 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1231 : CALL dcopy(coarse_slice_size, &
1232 : coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1233 0 : my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1234 0 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1235 : END IF
1236 : END DO
1237 : END IF
1238 :
1239 48 : CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
1240 24 : CPASSERT(sent_size(n_procs - 1) == send_tot_size)
1241 : ! test send/rcv sizes
1242 24 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
1243 72 : CPASSERT(ALL(real_rcv_size == rcv_size))
1244 : ! all2all
1245 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
1246 24 : rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
1247 :
1248 : ! ** reorder rcv buffer
1249 : ! (actually reordering should be needed only with pbc)
1250 :
1251 : ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1252 : coarse_bo(1, 2):coarse_bo(2, 2), &
1253 120 : coarse_bo(1, 3):coarse_bo(2, 3)))
1254 :
1255 24 : my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1256 24 : my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1257 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1258 72 : sent_size(:) = rcv_offset
1259 24 : ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
1260 24 : DO x = my_ub + 1, coarse_bo(2, 1)
1261 0 : p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1262 : CALL dcopy(coarse_slice_size, &
1263 : rcv_buf(sent_size(p_old)), 1, &
1264 : coarse_coeffs(x, coarse_bo(1, 2), &
1265 0 : coarse_bo(1, 3)), ss)
1266 24 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1267 : END DO
1268 : p_old = pos_of_x(coarse_gbo(1, 1) &
1269 24 : + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
1270 24 : p_lb = my_lb
1271 184 : DO x = my_lb, my_ub
1272 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1273 160 : IF (p /= p_old) THEN
1274 24 : p_ub = x - 1
1275 :
1276 24 : ii = sent_size(p_old)
1277 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1278 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1279 15732 : DO i = p_lb, p_ub
1280 12324 : coarse_coeffs(i, j, k) = rcv_buf(ii)
1281 15484 : ii = ii + 1
1282 : END DO
1283 : END DO
1284 : END DO
1285 24 : sent_size(p_old) = ii
1286 :
1287 24 : p_lb = x
1288 24 : p_old = p
1289 : END IF
1290 184 : rcv_size(p) = rcv_size(p) + coarse_slice_size
1291 : END DO
1292 24 : p_ub = my_ub
1293 24 : ii = sent_size(p_old)
1294 272 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1295 3432 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1296 18892 : DO i = p_lb, p_ub
1297 15484 : coarse_coeffs(i, j, k) = rcv_buf(ii)
1298 18644 : ii = ii + 1
1299 : END DO
1300 : END DO
1301 : END DO
1302 24 : sent_size(p_old) = ii
1303 24 : DO x = coarse_bo(1, 1), my_lb - 1
1304 0 : p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1305 : CALL dcopy(coarse_slice_size, &
1306 : rcv_buf(sent_size(p_old)), 1, &
1307 : coarse_coeffs(x, coarse_bo(1, 2), &
1308 0 : coarse_bo(1, 3)), ss)
1309 24 : sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1310 : END DO
1311 :
1312 48 : CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
1313 24 : CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
1314 :
1315 : ! dealloc
1316 24 : DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
1317 24 : DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
1318 48 : CALL timestop(handle2)
1319 :
1320 : END IF
1321 2260 : fine_values => fine_values_pw%array
1322 9040 : w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
1323 11300 : w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
1324 :
1325 30388 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1326 227284 : DO ik = -3, 3
1327 196896 : IF (pbc) THEN
1328 0 : wk = weights_1d(ABS(ik) + 1)
1329 0 : fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1330 : ELSE
1331 196896 : fk = 2*k + ik + f_shift(3)
1332 196896 : IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1333 40680 : IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1334 22600 : IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1335 9040 : IF (ik /= 0) CYCLE
1336 4520 : wk = w_border0
1337 13560 : ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
1338 2260 : SELECT CASE (ik)
1339 : CASE (1)
1340 2260 : wk = w_border1(1)
1341 : CASE (-1)
1342 2260 : wk = w_border1(2)
1343 : CASE (-3)
1344 2260 : wk = w_border1(3)
1345 : CASE default
1346 0 : CPABORT("Only 1, -1, -3 are supported as the value of ik")
1347 6780 : CYCLE
1348 : END SELECT
1349 : ELSE
1350 2260 : SELECT CASE (ik)
1351 : CASE (3)
1352 2260 : wk = w_border1(3)
1353 : CASE (1)
1354 2260 : wk = w_border1(2)
1355 : CASE (-1)
1356 2260 : wk = w_border1(1)
1357 : CASE default
1358 0 : CPABORT("Only 3, 1, -1 are supported as the value of ik")
1359 6780 : CYCLE
1360 : END SELECT
1361 : END IF
1362 : ELSE
1363 156216 : wk = weights_1d(ABS(ik) + 1)
1364 : END IF
1365 : END IF
1366 3150076 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1367 23778112 : DO ij = -3, 3
1368 20633564 : IF (pbc) THEN
1369 0 : wj = weights_1d(ABS(ij) + 1)*wk
1370 0 : fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
1371 : ELSE
1372 20633564 : fj = 2*j + ij + f_shift(2)
1373 20633564 : IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1374 3137328 : IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1375 1742960 : IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1376 697184 : IF (ij /= 0) CYCLE
1377 348592 : wj = w_border0*wk
1378 1045776 : ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
1379 174296 : SELECT CASE (ij)
1380 : CASE (1)
1381 174296 : wj = w_border1(1)*wk
1382 : CASE (-1)
1383 174296 : wj = w_border1(2)*wk
1384 : CASE (-3)
1385 174296 : wj = w_border1(3)*wk
1386 : CASE default
1387 522888 : CYCLE
1388 : END SELECT
1389 : ELSE
1390 174296 : SELECT CASE (ij)
1391 : CASE (-1)
1392 174296 : wj = w_border1(1)*wk
1393 : CASE (1)
1394 174296 : wj = w_border1(2)*wk
1395 : CASE (3)
1396 174296 : wj = w_border1(3)*wk
1397 : CASE default
1398 522888 : CYCLE
1399 : END SELECT
1400 : END IF
1401 : ELSE
1402 17496236 : wj = weights_1d(ABS(ij) + 1)*wk
1403 : END IF
1404 : END IF
1405 :
1406 21838256 : IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
1407 : ! CALL timeset(routineN//"_safe",handle2)
1408 25000 : DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1409 165000 : DO ii = -3, 3
1410 140000 : IF (pbc .AND. .NOT. is_split) THEN
1411 0 : wi = weights_1d(ABS(ii) + 1)*wj
1412 0 : fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1413 : ELSE
1414 140000 : fi = 2*i + ii + f_shift(1)
1415 140000 : IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1416 67500 : IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
1417 : fi >= fine_gbo(2, 1) - 1)) THEN
1418 25000 : IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1419 10000 : IF (ii /= 0) CYCLE
1420 5000 : wi = w_border0*wj
1421 15000 : ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1422 2500 : SELECT CASE (ii)
1423 : CASE (1)
1424 2500 : wi = w_border1(1)*wj
1425 : CASE (-1)
1426 2500 : wi = w_border1(2)*wj
1427 : CASE (-3)
1428 2500 : wi = w_border1(3)*wj
1429 : CASE default
1430 7500 : CYCLE
1431 : END SELECT
1432 : ELSE
1433 2500 : SELECT CASE (ii)
1434 : CASE (-1)
1435 2500 : wi = w_border1(1)*wj
1436 : CASE (1)
1437 2500 : wi = w_border1(2)*wj
1438 : CASE (3)
1439 2500 : wi = w_border1(3)*wj
1440 : CASE default
1441 7500 : CYCLE
1442 : END SELECT
1443 : END IF
1444 : ELSE
1445 42500 : wi = weights_1d(ABS(ii) + 1)*wj
1446 : END IF
1447 : END IF
1448 : fine_values(fi, fj, fk) = &
1449 : fine_values(fi, fj, fk) + &
1450 160000 : wi*coarse_coeffs(i, j, k)
1451 : END DO
1452 : END DO
1453 : ! CALL timestop(handle2)
1454 : ELSE
1455 : ! CALL timeset(routineN//"_core1",handle2)
1456 75542416 : ww0 = wj*w_0
1457 94428020 : ww1 = wj*w_1
1458 18885604 : IF (pbc .AND. .NOT. is_split) THEN
1459 0 : v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
1460 0 : i = coarse_bo(1, 1)
1461 0 : fi = 2*i + f_shift(1)
1462 0 : v0 = coarse_coeffs(i, j, k)
1463 0 : v1 = coarse_coeffs(i + 1, j, k)
1464 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1465 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1466 0 : v2 = coarse_coeffs(i + 2, j, k)
1467 0 : fi = fi + 1
1468 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1469 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1470 18885604 : ELSE IF (.NOT. has_i_lbound) THEN
1471 18826844 : i = coarse_bo(1, 1)
1472 18826844 : fi = 2*i + f_shift(1)
1473 18826844 : v0 = coarse_coeffs(i, j, k)
1474 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1475 18826844 : w_border0*wj*v0
1476 18826844 : v1 = coarse_coeffs(i + 1, j, k)
1477 18826844 : v2 = coarse_coeffs(i + 2, j, k)
1478 18826844 : fi = fi + 1
1479 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1480 : wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
1481 18826844 : w_border1(3)*v2)
1482 : ELSE
1483 58760 : i = coarse_bo(1, 1)
1484 58760 : v0 = coarse_coeffs(i, j, k)
1485 58760 : v1 = coarse_coeffs(i + 1, j, k)
1486 58760 : v2 = coarse_coeffs(i + 2, j, k)
1487 58760 : fi = 2*i + f_shift(1) + 1
1488 58760 : IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
1489 : fi + 2 == fine_bo(1, 1))) THEN
1490 : CALL cp_abort(__LOCATION__, &
1491 : "unexpected start index "// &
1492 : TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
1493 0 : TRIM(cp_to_string(fi)))
1494 : END IF
1495 : END IF
1496 18885604 : fi = fi + 1
1497 18885604 : IF (fi >= fine_bo(1, 1)) THEN
1498 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1499 : ww0(1)*v0 + ww0(2)*v1 + &
1500 18885604 : ww0(3)*v2
1501 : ELSE
1502 0 : CPASSERT(fi + 1 == fine_bo(1, 1))
1503 : END IF
1504 : ! CALL timestop(handle2)
1505 : ! CALL timeset(routineN//"_core",handle2)
1506 18885604 : DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
1507 84267688 : v3 = coarse_coeffs(i, j, k)
1508 84267688 : fi = fi + 1
1509 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1510 : (ww1(1)*v0 + ww1(2)*v1 + &
1511 84267688 : ww1(3)*v2 + ww1(4)*v3)
1512 84267688 : fi = fi + 1
1513 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1514 : (ww0(1)*v1 + ww0(2)*v2 + &
1515 84267688 : ww0(3)*v3)
1516 84267688 : v0 = coarse_coeffs(i + 1, j, k)
1517 84267688 : fi = fi + 1
1518 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1519 : (ww1(4)*v0 + ww1(1)*v1 + &
1520 84267688 : ww1(2)*v2 + ww1(3)*v3)
1521 84267688 : fi = fi + 1
1522 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1523 : (ww0(1)*v2 + ww0(2)*v3 + &
1524 84267688 : ww0(3)*v0)
1525 84267688 : v1 = coarse_coeffs(i + 2, j, k)
1526 84267688 : fi = fi + 1
1527 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1528 : (ww1(3)*v0 + ww1(4)*v1 + &
1529 84267688 : ww1(1)*v2 + ww1(2)*v3)
1530 84267688 : fi = fi + 1
1531 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1532 : (ww0(1)*v3 + ww0(2)*v0 + &
1533 84267688 : ww0(3)*v1)
1534 84267688 : v2 = coarse_coeffs(i + 3, j, k)
1535 84267688 : fi = fi + 1
1536 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1537 : (ww1(2)*v0 + ww1(3)*v1 + &
1538 84267688 : ww1(4)*v2 + ww1(1)*v3)
1539 84267688 : fi = fi + 1
1540 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1541 : (ww0(1)*v0 + ww0(2)*v1 + &
1542 84543910 : ww0(3)*v2)
1543 : END DO
1544 : ! CALL timestop(handle2)
1545 : ! CALL timeset(routineN//"_clean",handle2)
1546 18885604 : rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
1547 18885604 : IF (rest_b > 0) THEN
1548 18508720 : i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
1549 18508720 : v3 = coarse_coeffs(i, j, k)
1550 18508720 : fi = fi + 1
1551 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1552 : (ww1(1)*v0 + ww1(2)*v1 + &
1553 18508720 : ww1(3)*v2 + ww1(4)*v3)
1554 18508720 : fi = fi + 1
1555 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1556 : (ww0(1)*v1 + ww0(2)*v2 + &
1557 18508720 : ww0(3)*v3)
1558 18508720 : IF (rest_b > 1) THEN
1559 18449960 : v0 = coarse_coeffs(i + 1, j, k)
1560 18449960 : fi = fi + 1
1561 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1562 : (ww1(4)*v0 + ww1(1)*v1 + &
1563 18449960 : ww1(2)*v2 + ww1(3)*v3)
1564 18449960 : fi = fi + 1
1565 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1566 : (ww0(1)*v2 + ww0(2)*v3 + &
1567 18449960 : ww0(3)*v0)
1568 18449960 : IF (rest_b > 2) THEN
1569 73160 : v1 = coarse_coeffs(i + 2, j, k)
1570 73160 : fi = fi + 1
1571 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1572 : (ww1(3)*v0 + ww1(4)*v1 + &
1573 73160 : ww1(1)*v2 + ww1(2)*v3)
1574 73160 : fi = fi + 1
1575 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1576 : (ww0(1)*v3 + ww0(2)*v0 + &
1577 73160 : ww0(3)*v1)
1578 73160 : IF (pbc .AND. .NOT. is_split) THEN
1579 0 : v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
1580 0 : fi = fi + 1
1581 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1582 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1583 0 : fi = fi + 1
1584 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1585 0 : ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1586 0 : v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1587 0 : fi = fi + 1
1588 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1589 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1590 73160 : ELSE IF (has_i_ubound) THEN
1591 0 : v2 = coarse_coeffs(i + 3, j, k)
1592 0 : fi = fi + 1
1593 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1594 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1595 0 : fi = fi + 1
1596 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1597 0 : ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1598 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1599 0 : v3 = coarse_coeffs(i + 4, j, k)
1600 0 : fi = fi + 1
1601 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1602 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1603 : END IF
1604 : ELSE
1605 73160 : fi = fi + 1
1606 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1607 : wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
1608 73160 : w_border1(1)*v1)
1609 73160 : fi = fi + 1
1610 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1611 73160 : w_border0*wj*v1
1612 : END IF
1613 18376800 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1614 0 : v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
1615 0 : fi = fi + 1
1616 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1617 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1618 0 : fi = fi + 1
1619 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1620 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1621 0 : v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1622 0 : fi = fi + 1
1623 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1624 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1625 18376800 : ELSE IF (has_i_ubound) THEN
1626 0 : v1 = coarse_coeffs(i + 2, j, k)
1627 0 : fi = fi + 1
1628 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1629 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1630 0 : fi = fi + 1
1631 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1632 0 : ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1633 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1634 0 : v2 = coarse_coeffs(i + 3, j, k)
1635 0 : fi = fi + 1
1636 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1637 0 : ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1638 : END IF
1639 : ELSE
1640 18376800 : fi = fi + 1
1641 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1642 : wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
1643 18376800 : w_border1(1)*v0)
1644 18376800 : fi = fi + 1
1645 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1646 18376800 : w_border0*wj*v0
1647 : END IF
1648 58760 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1649 0 : v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
1650 0 : fi = fi + 1
1651 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1652 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1653 0 : fi = fi + 1
1654 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1655 0 : ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1656 0 : v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1657 0 : fi = fi + 1
1658 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1659 0 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1660 58760 : ELSE IF (has_i_ubound) THEN
1661 58760 : v0 = coarse_coeffs(i + 1, j, k)
1662 58760 : fi = fi + 1
1663 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1664 58760 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1665 58760 : fi = fi + 1
1666 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1667 58760 : ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1668 58760 : IF (fi + 1 == fine_bo(2, 1)) THEN
1669 58760 : v1 = coarse_coeffs(i + 2, j, k)
1670 58760 : fi = fi + 1
1671 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1672 58760 : ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1673 : END IF
1674 : ELSE
1675 0 : fi = fi + 1
1676 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1677 : wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
1678 0 : w_border1(1)*v3)
1679 0 : fi = fi + 1
1680 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1681 0 : w_border0*wj*v3
1682 : END IF
1683 376884 : ELSE IF (pbc .AND. .NOT. is_split) THEN
1684 0 : v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
1685 0 : fi = fi + 1
1686 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1687 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1688 0 : fi = fi + 1
1689 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1690 0 : ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1691 0 : v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1692 0 : fi = fi + 1
1693 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1694 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1695 376884 : ELSE IF (has_i_ubound) THEN
1696 0 : v3 = coarse_coeffs(i, j, k)
1697 0 : fi = fi + 1
1698 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1699 0 : ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1700 0 : fi = fi + 1
1701 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1702 0 : ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1703 0 : IF (fi + 1 == fine_bo(2, 1)) THEN
1704 0 : v0 = coarse_coeffs(i + 1, j, k)
1705 0 : fi = fi + 1
1706 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1707 0 : ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1708 : END IF
1709 : ELSE
1710 376884 : fi = fi + 1
1711 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1712 : wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
1713 376884 : w_border1(1)*v2)
1714 376884 : fi = fi + 1
1715 : fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1716 376884 : w_border0*wj*v2
1717 : END IF
1718 18885604 : CPASSERT(fi == fine_bo(2, 1))
1719 : END IF
1720 : ! CALL timestop(handle2)
1721 : END DO
1722 : END DO
1723 : END DO
1724 : END DO
1725 :
1726 2260 : IF (is_split) THEN
1727 24 : DEALLOCATE (coarse_coeffs)
1728 : END IF
1729 2260 : CALL timestop(handle)
1730 4520 : END SUBROUTINE add_coarse2fine
1731 :
1732 : ! **************************************************************************************************
1733 : !> \brief low level function that adds a coarse grid (without boundary)
1734 : !> to a fine grid.
1735 : !>
1736 : !> It will add to
1737 : !>
1738 : !> coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
1739 : !> coarse_bounds(1,2):coarse_bounds(2,2),
1740 : !> coarse_bounds(1,3):coarse_bounds(2,3))
1741 : !>
1742 : !> using
1743 : !>
1744 : !> fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
1745 : !> 2*coarse_bounds(1,2):2*coarse_bounds(2,2),
1746 : !> 2*coarse_bounds(1,3):2*coarse_bounds(2,3))
1747 : !>
1748 : !> composed with the weights obtained by the direct product of the
1749 : !> 1d coefficients weights:
1750 : !>
1751 : !> for i,j,k in -3..3
1752 : !> w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
1753 : !> weights_1d(abs(k)+1)
1754 : !> \param fine_values_pw 3d array where to add the values due to the
1755 : !> coarse coeffs
1756 : !> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
1757 : !> coefficients
1758 : !> \param weights_1d the weights of the 1d smearing
1759 : !> \param w_border0 the 1d weight at the border
1760 : !> \param w_border1 the 1d weights for a point one off the border
1761 : !> (w_border1(1) is the weight of the coefficent at the border)
1762 : !> \param pbc ...
1763 : !> \param safe_computation ...
1764 : !> \author fawzi
1765 : !> \note
1766 : !> see coarse2fine for some relevant notes
1767 : ! **************************************************************************************************
1768 1062 : SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
1769 : weights_1d, w_border0, w_border1, pbc, safe_computation)
1770 : TYPE(pw_r3d_rs_type), INTENT(IN) :: fine_values_pw, coarse_coeffs_pw
1771 : REAL(kind=dp), DIMENSION(4), INTENT(in) :: weights_1d
1772 : REAL(kind=dp), INTENT(in) :: w_border0
1773 : REAL(kind=dp), DIMENSION(3), INTENT(in) :: w_border1
1774 : LOGICAL, INTENT(in) :: pbc
1775 : LOGICAL, INTENT(in), OPTIONAL :: safe_computation
1776 :
1777 : CHARACTER(len=*), PARAMETER :: routineN = 'add_fine2coarse'
1778 :
1779 : INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
1780 : k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
1781 1062 : INTEGER, ALLOCATABLE, DIMENSION(:) :: pp_lb, pp_ub, rcv_offset, rcv_size, &
1782 1062 : real_rcv_size, send_offset, send_size, &
1783 1062 : sent_size
1784 : INTEGER, DIMENSION(2, 3) :: coarse_bo, coarse_gbo, fine_bo, &
1785 : fine_gbo, my_coarse_bo
1786 1062 : INTEGER, DIMENSION(:), POINTER :: pos_of_x
1787 : LOGICAL :: has_i_lbound, has_i_ubound, is_split, &
1788 : local_data, safe_calc
1789 : REAL(kind=dp) :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
1790 : wi, wj, wk
1791 1062 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: rcv_buf, send_buf
1792 : REAL(kind=dp), DIMENSION(3) :: w_0, ww0
1793 : REAL(kind=dp), DIMENSION(4) :: w_1, ww1
1794 1062 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: coarse_coeffs, fine_values
1795 :
1796 1062 : CALL timeset(routineN, handle)
1797 :
1798 1062 : safe_calc = .FALSE.
1799 1062 : IF (PRESENT(safe_computation)) safe_calc = safe_computation
1800 :
1801 10620 : my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1802 10620 : coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1803 10620 : fine_bo = fine_values_pw%pw_grid%bounds_local
1804 10620 : fine_gbo = fine_values_pw%pw_grid%bounds
1805 4248 : f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1806 3150 : is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1807 1062 : coarse_bo = my_coarse_bo
1808 1062 : IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1809 1062 : coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
1810 1062 : coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
1811 : ELSE
1812 0 : coarse_bo(1, 1) = coarse_gbo(2, 1)
1813 0 : coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1814 : END IF
1815 1062 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
1816 1062 : coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1817 1062 : coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1818 : END IF
1819 1062 : has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1820 1062 : has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1821 :
1822 1062 : IF (pbc) THEN
1823 0 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1824 0 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
1825 : ELSE
1826 4248 : CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1827 4248 : CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1828 : END IF
1829 1062 : CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
1830 1062 : local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
1831 1062 : IF (local_data) THEN
1832 : ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1833 : coarse_bo(1, 2):coarse_bo(2, 2), &
1834 120 : coarse_bo(1, 3):coarse_bo(2, 3)))
1835 31240 : coarse_coeffs = 0._dp
1836 : ELSE
1837 1038 : coarse_coeffs => coarse_coeffs_pw%array
1838 : END IF
1839 :
1840 1062 : fine_values => fine_values_pw%array
1841 4248 : w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
1842 5310 : w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
1843 :
1844 4248 : DO i = 1, 3
1845 4248 : s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1846 : END DO
1847 4248 : IF (ANY(s < 1)) RETURN
1848 :
1849 14092 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1850 105302 : DO ik = -3, 3
1851 91210 : IF (pbc) THEN
1852 0 : wk = weights_1d(ABS(ik) + 1)
1853 0 : fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1854 : ELSE
1855 91210 : fk = 2*k + ik + f_shift(3)
1856 91210 : IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1857 19116 : IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1858 10620 : IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1859 4248 : IF (ik /= 0) CYCLE
1860 2124 : wk = w_border0
1861 6372 : ELSE IF (fk == fine_bo(1, 3) + 1) THEN
1862 1062 : SELECT CASE (ik)
1863 : CASE (1)
1864 1062 : wk = w_border1(1)
1865 : CASE (-1)
1866 1062 : wk = w_border1(2)
1867 : CASE (-3)
1868 1062 : wk = w_border1(3)
1869 : CASE default
1870 0 : CPABORT("Only 1, -1, -3 are supported as the value of ik")
1871 3186 : CYCLE
1872 : END SELECT
1873 : ELSE
1874 1062 : SELECT CASE (ik)
1875 : CASE (3)
1876 1062 : wk = w_border1(3)
1877 : CASE (1)
1878 1062 : wk = w_border1(2)
1879 : CASE (-1)
1880 1062 : wk = w_border1(1)
1881 : CASE default
1882 0 : CPABORT("Only 3, 1, -1 are supported as the value of ik")
1883 3186 : CYCLE
1884 : END SELECT
1885 : END IF
1886 : ELSE
1887 72094 : wk = weights_1d(ABS(ik) + 1)
1888 : END IF
1889 : END IF
1890 1471974 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1891 11118042 : DO ij = -3, 3
1892 9648478 : IF (pbc) THEN
1893 : fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
1894 0 : 2*s(2))
1895 0 : wj = weights_1d(ABS(ij) + 1)*wk
1896 : ELSE
1897 9648478 : fj = 2*j + ij + f_shift(2)
1898 9648478 : IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1899 1450620 : IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1900 805900 : IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1901 322360 : IF (ij /= 0) CYCLE
1902 161180 : wj = w_border0*wk
1903 483540 : ELSE IF (fj == fine_bo(1, 2) + 1) THEN
1904 80590 : SELECT CASE (ij)
1905 : CASE (1)
1906 80590 : wj = w_border1(1)*wk
1907 : CASE (-1)
1908 80590 : wj = w_border1(2)*wk
1909 : CASE (-3)
1910 80590 : wj = w_border1(3)*wk
1911 : CASE default
1912 0 : CPABORT("Only 1, -1, -3 are supported as the value of ij")
1913 241770 : CYCLE
1914 : END SELECT
1915 : ELSE
1916 80590 : SELECT CASE (ij)
1917 : CASE (-1)
1918 80590 : wj = w_border1(1)*wk
1919 : CASE (1)
1920 80590 : wj = w_border1(2)*wk
1921 : CASE (3)
1922 80590 : wj = w_border1(3)*wk
1923 : CASE default
1924 0 : CPABORT("Only -1, 1, 3 are supported as the value of ij")
1925 241770 : CYCLE
1926 : END SELECT
1927 : END IF
1928 : ELSE
1929 8197858 : wj = weights_1d(ABS(ij) + 1)*wk
1930 : END IF
1931 : END IF
1932 :
1933 10220932 : IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
1934 1559844 : DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1935 10785788 : DO ii = -3, 3
1936 9225944 : IF (pbc .AND. .NOT. is_split) THEN
1937 0 : wi = weights_1d(ABS(ii) + 1)*wj
1938 0 : fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1939 : ELSE
1940 9225944 : fi = 2*i + ii + f_shift(1)
1941 9225944 : IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1942 7112560 : IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
1943 : ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
1944 2281160 : IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1945 912464 : IF (ii /= 0) CYCLE
1946 456232 : wi = w_border0*wj
1947 1368696 : ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1948 228116 : SELECT CASE (ii)
1949 : CASE (1)
1950 228116 : wi = w_border1(1)*wj
1951 : CASE (-1)
1952 228116 : wi = w_border1(2)*wj
1953 : CASE (-3)
1954 228116 : wi = w_border1(3)*wj
1955 : CASE default
1956 684348 : CYCLE
1957 : END SELECT
1958 : ELSE
1959 228116 : SELECT CASE (ii)
1960 : CASE (-1)
1961 228116 : wi = w_border1(1)*wj
1962 : CASE (1)
1963 228116 : wi = w_border1(2)*wj
1964 : CASE (3)
1965 228116 : wi = w_border1(3)*wj
1966 : CASE default
1967 684348 : CYCLE
1968 : END SELECT
1969 : END IF
1970 : ELSE
1971 4831400 : wi = weights_1d(ABS(ii) + 1)*wj
1972 : END IF
1973 : END IF
1974 : coarse_coeffs(i, j, k) = &
1975 : coarse_coeffs(i, j, k) + &
1976 10543936 : wi*fine_values(fi, fj, fk)
1977 : END DO
1978 : END DO
1979 : ELSE
1980 34402904 : ww0 = wj*w_0
1981 43003630 : ww1 = wj*w_1
1982 8600726 : IF (pbc .AND. .NOT. is_split) THEN
1983 0 : i = coarse_bo(1, 1) - 1
1984 0 : vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
1985 0 : vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
1986 0 : vv4 = fine_values(fine_bo(2, 1), fj, fk)
1987 0 : fi = fine_bo(1, 1)
1988 0 : vv5 = fine_values(fi, fj, fk)
1989 0 : fi = fi + 1
1990 0 : vv6 = fine_values(fi, fj, fk)
1991 0 : fi = fi + 1
1992 0 : vv7 = fine_values(fi, fj, fk)
1993 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
1994 0 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
1995 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
1996 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
1997 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
1998 0 : + ww1(4)*vv6 + ww0(3)*vv7
1999 8600726 : ELSE IF (has_i_lbound) THEN
2000 47524 : i = coarse_bo(1, 1)
2001 47524 : fi = fine_bo(1, 1) - 1
2002 47524 : IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
2003 47524 : fi = fi + 1
2004 47524 : vv0 = fine_values(fi, fj, fk)
2005 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2006 47524 : vv0*ww0(3)
2007 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2008 47524 : vv0*ww0(2)
2009 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2010 47524 : vv0*ww0(1)
2011 : END IF
2012 : ELSE
2013 8553202 : i = coarse_bo(1, 1)
2014 8553202 : fi = 2*i + f_shift(1)
2015 8553202 : vv0 = fine_values(fi, fj, fk)
2016 8553202 : fi = fi + 1
2017 8553202 : vv1 = fine_values(fi, fj, fk)
2018 8553202 : fi = fi + 1
2019 8553202 : vv2 = fine_values(fi, fj, fk)
2020 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2021 8553202 : (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
2022 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2023 8553202 : wj*w_border1(2)*vv1 + ww0(2)*vv2
2024 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2025 8553202 : wj*w_border1(3)*vv1 + ww0(3)*vv2
2026 : END IF
2027 8600726 : DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
2028 37559568 : fi = fi + 1
2029 37559568 : vv0 = fine_values(fi, fj, fk)
2030 37559568 : fi = fi + 1
2031 37559568 : vv1 = fine_values(fi, fj, fk)
2032 37559568 : fi = fi + 1
2033 37559568 : vv2 = fine_values(fi, fj, fk)
2034 37559568 : fi = fi + 1
2035 37559568 : vv3 = fine_values(fi, fj, fk)
2036 37559568 : fi = fi + 1
2037 37559568 : vv4 = fine_values(fi, fj, fk)
2038 37559568 : fi = fi + 1
2039 37559568 : vv5 = fine_values(fi, fj, fk)
2040 37559568 : fi = fi + 1
2041 37559568 : vv6 = fine_values(fi, fj, fk)
2042 37559568 : fi = fi + 1
2043 37559568 : vv7 = fine_values(fi, fj, fk)
2044 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2045 37559568 : + ww1(1)*vv0
2046 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2047 37559568 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2048 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2049 37559568 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2050 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2051 37559568 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2052 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2053 37559568 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
2054 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2055 37559568 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
2056 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2057 37559568 : + ww1(4)*vv6 + ww0(3)*vv7
2058 : END DO
2059 8600726 : IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
2060 0 : CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
2061 : END IF
2062 8600726 : rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
2063 8600726 : i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
2064 8600726 : CPASSERT(fi == (i - 2)*2 + f_shift(1))
2065 8600726 : IF (rest_b > 0) THEN
2066 8540210 : fi = fi + 1
2067 8540210 : vv0 = fine_values(fi, fj, fk)
2068 8540210 : fi = fi + 1
2069 8540210 : vv1 = fine_values(fi, fj, fk)
2070 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2071 8540210 : + ww1(1)*vv0
2072 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2073 8540210 : + ww1(2)*vv0 + ww0(1)*vv1
2074 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2075 8540210 : + ww1(3)*vv0 + ww0(2)*vv1
2076 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2077 8540210 : + ww1(4)*vv0 + ww0(3)*vv1
2078 8540210 : IF (rest_b > 1) THEN
2079 8492686 : fi = fi + 1
2080 8492686 : vv2 = fine_values(fi, fj, fk)
2081 8492686 : fi = fi + 1
2082 8492686 : vv3 = fine_values(fi, fj, fk)
2083 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2084 8492686 : + ww1(1)*vv2
2085 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2086 8492686 : + ww1(2)*vv2 + ww0(1)*vv3
2087 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2088 8492686 : + ww1(3)*vv2 + ww0(2)*vv3
2089 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2090 8492686 : + ww1(4)*vv2 + ww0(3)*vv3
2091 8492686 : IF (rest_b > 2) THEN
2092 61924 : fi = fi + 1
2093 61924 : vv4 = fine_values(fi, fj, fk)
2094 61924 : fi = fi + 1
2095 61924 : vv5 = fine_values(fi, fj, fk)
2096 61924 : fi = fi + 1
2097 61924 : vv6 = fine_values(fi, fj, fk)
2098 61924 : fi = fi + 1
2099 61924 : vv7 = fine_values(fi, fj, fk)
2100 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2101 61924 : + ww1(1)*vv4
2102 61924 : IF (has_i_ubound) THEN
2103 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2104 0 : fi = fi + 1
2105 0 : vv0 = fine_values(fi, fj, fk)
2106 : coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
2107 0 : + vv0*ww1(4)
2108 : ELSE
2109 : vv0 = 0._dp
2110 : END IF
2111 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2112 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2113 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2114 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2115 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2116 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
2117 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2118 0 : + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
2119 61924 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2120 0 : fi = fi + 1
2121 0 : vv0 = fine_values(fi, fj, fk)
2122 0 : vv1 = fine_values(fine_bo(1, 1), fj, fk)
2123 0 : vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2124 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2125 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2126 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2127 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2128 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2129 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
2130 0 : + vv1*ww0(1) + vv2*ww1(1)
2131 : ELSE
2132 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2133 61924 : + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
2134 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2135 61924 : + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
2136 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2137 61924 : + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
2138 : END IF
2139 : ELSE
2140 8430762 : fi = fi + 1
2141 8430762 : vv4 = fine_values(fi, fj, fk)
2142 8430762 : fi = fi + 1
2143 8430762 : vv5 = fine_values(fi, fj, fk)
2144 8430762 : IF (has_i_ubound) THEN
2145 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2146 0 : fi = fi + 1
2147 0 : vv6 = fine_values(fi, fj, fk)
2148 : coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2149 0 : + ww1(4)*vv6
2150 : ELSE
2151 : vv6 = 0._dp
2152 : END IF
2153 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2154 0 : + ww1(1)*vv4
2155 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2156 0 : + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2157 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2158 0 : + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
2159 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2160 0 : + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
2161 8430762 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2162 0 : fi = fi + 1
2163 0 : vv6 = fine_values(fi, fj, fk)
2164 0 : vv7 = fine_values(fine_bo(1, 1), fj, fk)
2165 0 : vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2166 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2167 0 : + ww1(1)*vv4
2168 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2169 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
2170 0 : ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2171 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2172 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
2173 0 : + ww0(1)*vv7 + ww1(1)*vv0
2174 : ELSE
2175 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2176 8430762 : + wj*w_border1(3)*vv4
2177 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2178 8430762 : + wj*w_border1(2)*vv4
2179 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2180 8430762 : + wj*(w_border1(1)*vv4 + w_border0*vv5)
2181 : END IF
2182 : END IF
2183 : ELSE
2184 47524 : fi = fi + 1
2185 47524 : vv2 = fine_values(fi, fj, fk)
2186 47524 : fi = fi + 1
2187 47524 : vv3 = fine_values(fi, fj, fk)
2188 47524 : IF (has_i_ubound) THEN
2189 47524 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2190 47524 : fi = fi + 1
2191 47524 : vv4 = fine_values(fi, fj, fk)
2192 : coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2193 47524 : + ww1(4)*vv4
2194 : ELSE
2195 : vv4 = 0._dp
2196 : END IF
2197 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2198 47524 : + ww1(1)*vv2
2199 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2200 47524 : + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2201 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2202 47524 : + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
2203 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2204 47524 : + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
2205 0 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2206 0 : fi = fi + 1
2207 0 : vv4 = fine_values(fi, fj, fk)
2208 0 : vv5 = fine_values(fine_bo(1, 1), fj, fk)
2209 0 : vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2210 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2211 0 : + ww1(1)*vv2
2212 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2213 0 : + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2214 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2215 0 : + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
2216 : ELSE
2217 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2218 0 : + wj*w_border1(3)*vv2
2219 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2220 0 : + wj*w_border1(2)*vv2
2221 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2222 0 : + wj*(w_border1(1)*vv2 + w_border0*vv3)
2223 : END IF
2224 : END IF
2225 : ELSE
2226 60516 : fi = fi + 1
2227 60516 : vv0 = fine_values(fi, fj, fk)
2228 60516 : fi = fi + 1
2229 60516 : vv1 = fine_values(fi, fj, fk)
2230 60516 : IF (has_i_ubound) THEN
2231 0 : IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2232 0 : fi = fi + 1
2233 0 : vv2 = fine_values(fi, fj, fk)
2234 : coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2235 0 : + ww1(4)*vv2
2236 : ELSE
2237 : vv2 = 0._dp
2238 : END IF
2239 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2240 0 : + ww1(1)*vv0
2241 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2242 0 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2243 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2244 0 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
2245 : coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2246 0 : + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
2247 60516 : ELSEIF (pbc .AND. .NOT. is_split) THEN
2248 0 : fi = fi + 1
2249 0 : vv2 = fine_values(fi, fj, fk)
2250 0 : vv3 = fine_values(fine_bo(1, 1), fk, fk)
2251 0 : vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
2252 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2253 0 : + ww1(1)*vv0
2254 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2255 0 : + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2256 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2257 0 : + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2258 : ELSE
2259 : coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2260 60516 : + wj*w_border1(3)*vv0
2261 : coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2262 60516 : + wj*w_border1(2)*vv0
2263 : coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2264 60516 : + wj*(w_border1(1)*vv0 + w_border0*vv1)
2265 : END IF
2266 : END IF
2267 8600726 : CPASSERT(fi == fine_bo(2, 1))
2268 : END IF
2269 : END DO
2270 : END DO
2271 : END DO
2272 : END DO
2273 :
2274 : ! *** parallel case
2275 1062 : IF (is_split) THEN
2276 24 : CALL timeset(routineN//"_comm", handle2)
2277 : coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
2278 24 : (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
2279 24 : n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
2280 : ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
2281 : sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
2282 : rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
2283 240 : pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
2284 :
2285 : ! ** send size count
2286 :
2287 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2288 24 : send_size = 0
2289 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2290 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2291 184 : send_size(p) = send_size(p) + coarse_slice_size
2292 : END DO
2293 :
2294 : ! ** rcv size count
2295 :
2296 24 : pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
2297 24 : p_old = pos_of_x(fine_gbo(1, 1))
2298 72 : pp_lb = fine_gbo(2, 1)
2299 72 : pp_ub = fine_gbo(2, 1) - 1
2300 24 : pp_lb(p_old) = fine_gbo(1, 1)
2301 496 : DO x = fine_gbo(1, 1), fine_gbo(2, 1)
2302 472 : p = pos_of_x(x)
2303 496 : IF (p /= p_old) THEN
2304 24 : pp_ub(p_old) = x - 1
2305 24 : pp_lb(p) = x
2306 24 : p_old = p
2307 : END IF
2308 : END DO
2309 24 : pp_ub(p_old) = fine_gbo(2, 1)
2310 :
2311 72 : DO ip = 0, n_procs - 1
2312 48 : IF (pp_lb(ip) <= pp_ub(ip)) THEN
2313 48 : pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
2314 48 : pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
2315 : ELSE
2316 0 : pp_lb(ip) = coarse_gbo(2, 1)
2317 0 : pp_ub(ip) = coarse_gbo(2, 1) - 1
2318 : END IF
2319 72 : IF (.NOT. is_split .OR. .NOT. pbc) THEN
2320 48 : pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
2321 48 : pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
2322 : END IF
2323 : END DO
2324 :
2325 24 : rcv_size = 0
2326 72 : DO ip = 0, n_procs - 1
2327 48 : DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2328 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2329 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2330 0 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2331 : END IF
2332 : END DO
2333 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
2334 : MAX(0, &
2335 48 : MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
2336 72 : DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2337 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2338 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2339 0 : rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2340 : END IF
2341 : END DO
2342 : END DO
2343 :
2344 : ! ** offsets & alloc send-rcv
2345 :
2346 : send_tot_size = 0
2347 72 : DO ip = 0, n_procs - 1
2348 48 : send_offset(ip) = send_tot_size
2349 72 : send_tot_size = send_tot_size + send_size(ip)
2350 : END DO
2351 24 : IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
2352 0 : CPABORT("Error calculating send_tot_size")
2353 72 : ALLOCATE (send_buf(0:send_tot_size - 1))
2354 :
2355 24 : rcv_tot_size = 0
2356 72 : DO ip = 0, n_procs - 1
2357 48 : rcv_offset(ip) = rcv_tot_size
2358 72 : rcv_tot_size = rcv_tot_size + rcv_size(ip)
2359 : END DO
2360 72 : ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
2361 :
2362 : ! ** fill send buffer
2363 :
2364 24 : pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2365 : p_old = pos_of_x(coarse_gbo(1, 1) &
2366 24 : + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
2367 72 : sent_size(:) = send_offset
2368 24 : ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
2369 184 : DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2370 160 : p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2371 : CALL dcopy(coarse_slice_size, &
2372 : coarse_coeffs(x, coarse_bo(1, 2), &
2373 160 : coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
2374 184 : sent_size(p) = sent_size(p) + coarse_slice_size
2375 : END DO
2376 :
2377 48 : IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
2378 0 : CPABORT("error 1 filling send buffer")
2379 24 : IF (sent_size(n_procs - 1) /= send_tot_size) &
2380 0 : CPABORT("error 2 filling send buffer")
2381 :
2382 : IF (local_data) THEN
2383 24 : DEALLOCATE (coarse_coeffs)
2384 : ELSE
2385 : NULLIFY (coarse_coeffs)
2386 : END IF
2387 :
2388 48 : CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
2389 24 : CPASSERT(sent_size(n_procs - 1) == send_tot_size)
2390 : ! test send/rcv sizes
2391 24 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
2392 :
2393 72 : CPASSERT(ALL(real_rcv_size == rcv_size))
2394 : ! all2all
2395 : CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
2396 24 : rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
2397 :
2398 : ! ** sum & reorder rcv buffer
2399 :
2400 72 : sent_size(:) = rcv_offset
2401 72 : DO ip = 0, n_procs - 1
2402 :
2403 48 : DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2404 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2405 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2406 0 : ii = sent_size(ip)
2407 0 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2408 0 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2409 0 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2410 0 : ii = ii + 1
2411 : END DO
2412 : END DO
2413 0 : sent_size(ip) = ii
2414 : END IF
2415 : END DO
2416 :
2417 48 : ii = sent_size(ip)
2418 208 : DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
2419 2160 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2420 29920 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2421 27808 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2422 29760 : ii = ii + 1
2423 : END DO
2424 : END DO
2425 : END DO
2426 48 : sent_size(ip) = ii
2427 :
2428 72 : DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2429 0 : x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2430 48 : IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2431 0 : ii = sent_size(ip)
2432 0 : DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2433 0 : DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2434 0 : coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
2435 0 : ii = ii + 1
2436 : END DO
2437 : END DO
2438 0 : sent_size(ip) = ii
2439 : END IF
2440 : END DO
2441 :
2442 : END DO
2443 :
2444 48 : IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
2445 0 : CPABORT("error 1 handling the rcv buffer")
2446 24 : IF (sent_size(n_procs - 1) /= rcv_tot_size) &
2447 0 : CPABORT("error 2 handling the rcv buffer")
2448 :
2449 : ! dealloc
2450 24 : DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
2451 24 : DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
2452 24 : DEALLOCATE (pp_ub, pp_lb)
2453 48 : CALL timestop(handle2)
2454 : ELSE
2455 : CPASSERT(.NOT. local_data)
2456 : END IF
2457 :
2458 1062 : CALL timestop(handle)
2459 2124 : END SUBROUTINE add_fine2coarse
2460 :
2461 : ! **************************************************************************************************
2462 : !> \brief ...
2463 : !> \param preconditioner the preconditioner to create
2464 : !> \param precond_kind the kind of preconditioner to use
2465 : !> \param pool a pool with grids of the same type as the elements to
2466 : !> precondition
2467 : !> \param pbc if periodic boundary conditions should be applied
2468 : !> \param transpose ...
2469 : !> \author fawzi
2470 : ! **************************************************************************************************
2471 33948 : SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
2472 : pool, pbc, transpose)
2473 : TYPE(pw_spline_precond_type), INTENT(OUT) :: preconditioner
2474 : INTEGER, INTENT(in) :: precond_kind
2475 : TYPE(pw_pool_type), INTENT(IN), POINTER :: pool
2476 : LOGICAL, INTENT(in) :: pbc, transpose
2477 :
2478 : preconditioner%kind = no_precond
2479 3772 : preconditioner%pool => pool
2480 3772 : preconditioner%pbc = pbc
2481 3772 : preconditioner%transpose = transpose
2482 3772 : CALL pool%retain()
2483 3772 : CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
2484 3772 : END SUBROUTINE pw_spline_precond_create
2485 :
2486 : ! **************************************************************************************************
2487 : !> \brief switches the types of preconditioner to use
2488 : !> \param preconditioner the preconditioner to be changed
2489 : !> \param precond_kind the new kind of preconditioner to use
2490 : !> \param pbc ...
2491 : !> \param transpose ...
2492 : !> \author fawzi
2493 : ! **************************************************************************************************
2494 7544 : SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
2495 : transpose)
2496 : TYPE(pw_spline_precond_type), INTENT(INOUT) :: preconditioner
2497 : INTEGER, INTENT(in) :: precond_kind
2498 : LOGICAL, INTENT(in), OPTIONAL :: pbc, transpose
2499 :
2500 : LOGICAL :: do_3d_coeff
2501 : REAL(kind=dp) :: s
2502 :
2503 7544 : IF (PRESENT(transpose)) preconditioner%transpose = transpose
2504 7544 : do_3d_coeff = .FALSE.
2505 7544 : preconditioner%kind = precond_kind
2506 7544 : IF (PRESENT(pbc)) preconditioner%pbc = pbc
2507 : SELECT CASE (precond_kind)
2508 : CASE (no_precond)
2509 : CASE (precond_spl3_aint2)
2510 0 : preconditioner%coeffs_1d = [-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp]
2511 0 : preconditioner%sharpen = .FALSE.
2512 0 : preconditioner%normalize = .FALSE.
2513 3772 : do_3d_coeff = .TRUE.
2514 : CASE (precond_spl3_3)
2515 3772 : preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
2516 3772 : preconditioner%coeffs_1d(2) = 1.6_dp
2517 3772 : preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
2518 3772 : preconditioner%sharpen = .FALSE.
2519 3772 : preconditioner%normalize = .FALSE.
2520 0 : do_3d_coeff = .TRUE.
2521 : CASE (precond_spl3_2)
2522 0 : preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
2523 0 : preconditioner%coeffs_1d(2) = 1.76_dp
2524 0 : preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
2525 0 : preconditioner%sharpen = .FALSE.
2526 0 : preconditioner%normalize = .FALSE.
2527 : do_3d_coeff = .TRUE.
2528 : CASE (precond_spl3_aint)
2529 15088 : preconditioner%coeffs_1d = spl3_1d_coeffs0
2530 3772 : preconditioner%sharpen = .TRUE.
2531 3772 : preconditioner%normalize = .TRUE.
2532 0 : do_3d_coeff = .TRUE.
2533 : CASE (precond_spl3_1)
2534 0 : preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
2535 0 : preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
2536 0 : preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
2537 0 : preconditioner%sharpen = .TRUE.
2538 0 : preconditioner%normalize = .FALSE.
2539 0 : do_3d_coeff = .TRUE.
2540 : CASE default
2541 7544 : CPABORT("Unknown preconditioner kind for pw_spline_precond_set_kind")
2542 : END SELECT
2543 : IF (do_3d_coeff) THEN
2544 7544 : s = 1._dp
2545 7544 : IF (preconditioner%sharpen) s = -1._dp
2546 : preconditioner%coeffs(1) = &
2547 : s*preconditioner%coeffs_1d(2)* &
2548 : preconditioner%coeffs_1d(2)* &
2549 7544 : preconditioner%coeffs_1d(2)
2550 : preconditioner%coeffs(2) = &
2551 : s*preconditioner%coeffs_1d(1)* &
2552 : preconditioner%coeffs_1d(2)* &
2553 7544 : preconditioner%coeffs_1d(2)
2554 : preconditioner%coeffs(3) = &
2555 : s*preconditioner%coeffs_1d(1)* &
2556 : preconditioner%coeffs_1d(1)* &
2557 7544 : preconditioner%coeffs_1d(2)
2558 : preconditioner%coeffs(4) = &
2559 : s*preconditioner%coeffs_1d(1)* &
2560 : preconditioner%coeffs_1d(1)* &
2561 7544 : preconditioner%coeffs_1d(1)
2562 7544 : IF (preconditioner%sharpen) THEN
2563 3772 : IF (preconditioner%normalize) THEN
2564 : preconditioner%coeffs(1) = 2._dp + &
2565 3772 : preconditioner%coeffs(1)
2566 : ELSE
2567 0 : preconditioner%coeffs(1) = -preconditioner%coeffs(1)
2568 : END IF
2569 : END IF
2570 : END IF
2571 7544 : END SUBROUTINE pw_spline_precond_set_kind
2572 :
2573 : ! **************************************************************************************************
2574 : !> \brief releases the preconditioner
2575 : !> \param preconditioner the preconditioner to release
2576 : !> \author fawzi
2577 : ! **************************************************************************************************
2578 3772 : SUBROUTINE pw_spline_precond_release(preconditioner)
2579 : TYPE(pw_spline_precond_type), INTENT(INOUT) :: preconditioner
2580 :
2581 3772 : CALL pw_pool_release(preconditioner%pool)
2582 3772 : END SUBROUTINE pw_spline_precond_release
2583 :
2584 : ! **************************************************************************************************
2585 : !> \brief applies the preconditioner to the system of equations to find the
2586 : !> coefficients of the spline
2587 : !> \param preconditioner the preconditioner to apply
2588 : !> \param in_v the grid on which the preconditioner should be applied
2589 : !> \param out_v place to store the preconditioner applied on v_out
2590 : !> \author fawzi
2591 : ! **************************************************************************************************
2592 76185 : SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
2593 : TYPE(pw_spline_precond_type), INTENT(IN) :: preconditioner
2594 : TYPE(pw_r3d_rs_type), INTENT(IN) :: in_v
2595 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: out_v
2596 :
2597 76185 : SELECT CASE (preconditioner%kind)
2598 : CASE (no_precond)
2599 0 : CALL pw_copy(in_v, out_v)
2600 : CASE (precond_spl3_aint, precond_spl3_1)
2601 3772 : CALL pw_zero(out_v)
2602 3772 : IF (preconditioner%pbc) THEN
2603 : CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2604 450 : coeffs=preconditioner%coeffs)
2605 : ELSE
2606 : CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2607 : pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2608 : normalize=preconditioner%normalize, &
2609 3322 : transpose=preconditioner%transpose)
2610 : END IF
2611 : CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
2612 72413 : CALL pw_zero(out_v)
2613 72413 : IF (preconditioner%pbc) THEN
2614 : CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2615 6382 : coeffs=preconditioner%coeffs)
2616 : ELSE
2617 : CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2618 : pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2619 : normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
2620 66031 : transpose=preconditioner%transpose)
2621 : END IF
2622 : CASE default
2623 76185 : CPABORT("Unknown preconditioner kind for pw_spline_do_precond")
2624 : END SELECT
2625 76185 : END SUBROUTINE pw_spline_do_precond
2626 :
2627 : ! **************************************************************************************************
2628 : !> \brief solves iteratively (CG) a systmes of linear equations
2629 : !> linOp(coeffs)=values
2630 : !> (for example those needed to find the coefficients of a spline)
2631 : !> Returns true if the it succeeded to achieve the requested accuracy
2632 : !> \param values the right hand side of the system
2633 : !> \param coeffs will contain the solution of the system (and on entry
2634 : !> it contains the starting point)
2635 : !> \param linOp the linear operator to be inverted
2636 : !> \param preconditioner the preconditioner to apply
2637 : !> \param pool a pool of grids (for the temporary objects)
2638 : !> \param eps_r the requested precision on the residual
2639 : !> \param eps_x the requested precision on the solution
2640 : !> \param max_iter maximum number of iteration allowed
2641 : !> \param sumtype ...
2642 : !> \return ...
2643 : !> \author fawzi
2644 : ! **************************************************************************************************
2645 3772 : FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
2646 : eps_r, eps_x, max_iter, sumtype) RESULT(res)
2647 :
2648 : TYPE(pw_r3d_rs_type), INTENT(IN) :: values
2649 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: coeffs
2650 : INTERFACE
2651 : SUBROUTINE linOp(pw_in, pw_out)
2652 : USE pw_types, ONLY: pw_r3d_rs_type
2653 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
2654 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
2655 : END SUBROUTINE linOp
2656 : END INTERFACE
2657 : TYPE(pw_spline_precond_type), INTENT(IN) :: preconditioner
2658 : TYPE(pw_pool_type), POINTER :: pool
2659 : REAL(kind=dp), INTENT(in) :: eps_r, eps_x
2660 : INTEGER, INTENT(in) :: max_iter
2661 : INTEGER, INTENT(in), OPTIONAL :: sumtype
2662 : LOGICAL :: res
2663 :
2664 : INTEGER :: i, iiter, iter, j, k
2665 : INTEGER, DIMENSION(2, 3) :: bo
2666 : LOGICAL :: last
2667 : REAL(kind=dp) :: alpha, beta, eps_r_att, eps_x_att, r_z, &
2668 : r_z_new
2669 : TYPE(cp_logger_type), POINTER :: logger
2670 : TYPE(pw_r3d_rs_type) :: Ap, p, r, z
2671 :
2672 3772 : last = .FALSE.
2673 :
2674 3772 : res = .FALSE.
2675 3772 : logger => cp_get_default_logger()
2676 3772 : CALL pool%create_pw(r)
2677 3772 : CALL pool%create_pw(z)
2678 3772 : CALL pool%create_pw(p)
2679 3772 : CALL pool%create_pw(Ap)
2680 :
2681 : !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2682 8290 : ext_do: DO iiter = 1, max_iter, 10
2683 8290 : CALL pw_zero(r)
2684 8290 : CALL linOp(pw_in=coeffs, pw_out=r)
2685 63345536 : r%array = -r%array
2686 8290 : CALL pw_axpy(values, r)
2687 8290 : CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2688 8290 : CALL pw_copy(z, p)
2689 8290 : r_z = pw_integral_ab(r, z, sumtype)
2690 :
2691 72413 : DO iter = iiter, MIN(iiter + 9, max_iter)
2692 67895 : eps_r_att = SQRT(pw_integral_ab(r, r, sumtype))
2693 67895 : IF (eps_r_att == 0._dp) THEN
2694 67895 : eps_x_att = 0._dp
2695 : last = .TRUE.
2696 : ELSE
2697 67889 : CALL pw_zero(Ap)
2698 67889 : CALL linOp(pw_in=p, pw_out=Ap)
2699 67889 : alpha = r_z/pw_integral_ab(Ap, p, sumtype)
2700 :
2701 67889 : CALL pw_axpy(p, coeffs, alpha=alpha)
2702 :
2703 67889 : eps_x_att = alpha*SQRT(pw_integral_ab(p, p, sumtype)) ! try to spare if unneeded?
2704 67889 : IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
2705 : END IF
2706 : !CALL cp_iterate(logger%iter_info,last=last)
2707 : IF (last) THEN
2708 : res = .TRUE.
2709 : EXIT ext_do
2710 : END IF
2711 :
2712 64123 : CALL pw_axpy(Ap, r, alpha=-alpha)
2713 :
2714 64123 : CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2715 :
2716 64123 : r_z_new = pw_integral_ab(r, z, sumtype)
2717 64123 : beta = r_z_new/r_z
2718 64123 : r_z = r_z_new
2719 :
2720 641230 : bo = p%pw_grid%bounds_local
2721 975144 : DO k = bo(1, 3), bo(2, 3)
2722 19717425 : DO j = bo(1, 2), bo(2, 2)
2723 423564035 : DO i = bo(1, 1), bo(2, 1)
2724 422657532 : p%array(i, j, k) = z%array(i, j, k) + beta*p%array(i, j, k)
2725 : END DO
2726 : END DO
2727 : END DO
2728 :
2729 : END DO
2730 : END DO ext_do
2731 : !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2732 :
2733 3772 : CALL pool%give_back_pw(r)
2734 3772 : CALL pool%give_back_pw(z)
2735 3772 : CALL pool%give_back_pw(p)
2736 3772 : CALL pool%give_back_pw(Ap)
2737 :
2738 3772 : END FUNCTION find_coeffs
2739 :
2740 : ! **************************************************************************************************
2741 : !> \brief adds to pw_out pw_in composed with the weights
2742 : !> pw_out%array(i,j,k)=pw_out%array(i,j,k)+sum(pw_in%array(i+l,j+m,k+n)*
2743 : !> weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
2744 : !> l=-1..1,m=-1..1,n=-1..1)
2745 : !> \param weights_1d ...
2746 : !> \param pw_in ...
2747 : !> \param pw_out ...
2748 : !> \param sharpen ...
2749 : !> \param normalize ...
2750 : !> \param transpose ...
2751 : !> \param smooth_boundary ...
2752 : !> \author fawzi
2753 : ! **************************************************************************************************
2754 138700 : SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
2755 : sharpen, normalize, transpose, smooth_boundary)
2756 : REAL(kind=dp), DIMENSION(-1:1) :: weights_1d
2757 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in, pw_out
2758 : LOGICAL, INTENT(in), OPTIONAL :: sharpen, normalize, transpose, &
2759 : smooth_boundary
2760 :
2761 : INTEGER :: first_index, i, j, jw, k, kw, &
2762 : last_index, myj, myk, n_els
2763 : INTEGER, DIMENSION(2, 3) :: bo, gbo
2764 : INTEGER, DIMENSION(3) :: s
2765 : LOGICAL :: has_l_boundary, has_u_boundary, &
2766 : is_split, my_normalize, my_sharpen, &
2767 : my_smooth_boundary, my_transpose
2768 : REAL(kind=dp) :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
2769 : REAL(kind=dp), DIMENSION(-1:1) :: w
2770 138700 : REAL(kind=dp), DIMENSION(:, :), POINTER :: l_boundary, tmp, u_boundary
2771 : REAL(kind=dp), DIMENSION(:, :, :), POINTER :: in_val, out_val
2772 :
2773 1387000 : bo = pw_in%pw_grid%bounds_local
2774 1387000 : gbo = pw_in%pw_grid%bounds
2775 138700 : in_val => pw_in%array
2776 138700 : out_val => pw_out%array
2777 138700 : my_sharpen = .FALSE.
2778 138700 : IF (PRESENT(sharpen)) my_sharpen = sharpen
2779 138700 : my_normalize = .FALSE.
2780 138700 : IF (PRESENT(normalize)) my_normalize = normalize
2781 138700 : my_transpose = .FALSE.
2782 138700 : IF (PRESENT(transpose)) my_transpose = transpose
2783 138700 : my_smooth_boundary = .FALSE.
2784 138700 : IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
2785 138700 : CPASSERT(.NOT. my_normalize .OR. my_sharpen)
2786 138700 : CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
2787 554800 : DO i = 1, 3
2788 554800 : s(i) = bo(2, i) - bo(1, i) + 1
2789 : END DO
2790 554800 : IF (ANY(s < 1)) RETURN
2791 : is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
2792 412752 : pw_in%pw_grid%bounds(:, 1))
2793 138700 : has_l_boundary = (gbo(1, 1) == bo(1, 1))
2794 138700 : has_u_boundary = (gbo(2, 1) == bo(2, 1))
2795 138700 : IF (is_split) THEN
2796 : ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2797 : u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2798 17856 : tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
2799 311256 : tmp(:, :) = pw_in%array(bo(2, 1), :, :)
2800 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2801 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2802 : l_boundary, pw_in%pw_grid%para%pos_of_x( &
2803 620280 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
2804 311256 : tmp(:, :) = pw_in%array(bo(1, 1), :, :)
2805 : CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2806 : gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2807 : u_boundary, pw_in%pw_grid%para%pos_of_x( &
2808 620280 : gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
2809 2232 : DEALLOCATE (tmp)
2810 : END IF
2811 :
2812 138700 : n_els = s(1)
2813 138700 : IF (has_l_boundary) THEN
2814 137584 : n_els = n_els - 1
2815 137584 : first_index = bo(1, 1) + 1
2816 : ELSE
2817 1116 : first_index = bo(1, 1)
2818 : END IF
2819 138700 : IF (has_u_boundary) THEN
2820 137584 : n_els = n_els - 1
2821 137584 : last_index = bo(2, 1) - 1
2822 : ELSE
2823 1116 : last_index = bo(2, 1)
2824 : END IF
2825 : !$OMP PARALLEL DO DEFAULT(NONE) &
2826 : !$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
2827 : !$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
2828 : !$OMP my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
2829 138700 : !$OMP my_sharpen, last_index, first_index, my_normalize, n_els)
2830 : DO k = bo(1, 3), bo(2, 3)
2831 : DO kw = -1, 1
2832 : myk = k + kw
2833 : IF (my_transpose) THEN
2834 : IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2835 : IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2836 : IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
2837 : w_k = weights_1d(kw)
2838 : IF (my_smooth_boundary) THEN
2839 : w_k = weights_1d(kw)/weights_1d(0)
2840 : END IF
2841 : ELSE IF (kw == 0) THEN
2842 : w_k = 1._dp
2843 : ELSE
2844 : CYCLE
2845 : END IF
2846 : ELSE
2847 : IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
2848 : w_k = weights_1d(kw)
2849 : END IF
2850 : ELSE
2851 : w_k = weights_1d(kw)
2852 : END IF
2853 : ELSE
2854 : IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2855 : IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2856 : IF (kw /= 0) CYCLE
2857 : w_k = 1._dp
2858 : ELSE
2859 : IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
2860 : (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
2861 : w_k = weights_1d(kw)/weights_1d(0)
2862 : ELSE
2863 : w_k = weights_1d(kw)
2864 : END IF
2865 : END IF
2866 : ELSE
2867 : w_k = weights_1d(kw)
2868 : END IF
2869 : END IF
2870 : DO j = bo(1, 2), bo(2, 2)
2871 : DO jw = -1, 1
2872 : myj = j + jw
2873 : IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
2874 : w_j = w_k*weights_1d(jw)
2875 : ELSE
2876 : IF (my_transpose) THEN
2877 : IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2878 : IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
2879 : w_j = weights_1d(jw)*w_k
2880 : IF (my_smooth_boundary) THEN
2881 : w_j = weights_1d(jw)/weights_1d(0)*w_k
2882 : END IF
2883 : ELSE IF (jw == 0) THEN
2884 : w_j = w_k
2885 : ELSE
2886 : CYCLE
2887 : END IF
2888 : ELSE
2889 : IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
2890 : w_j = w_k*weights_1d(jw)
2891 : END IF
2892 : ELSE
2893 : IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2894 : IF (jw /= 0) CYCLE
2895 : w_j = w_k
2896 : ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
2897 : (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
2898 : w_j = w_k*weights_1d(jw)/weights_1d(0)
2899 : ELSE
2900 : w_j = w_k*weights_1d(jw)
2901 : END IF
2902 : END IF
2903 : END IF
2904 :
2905 : IF (has_l_boundary) THEN
2906 : IF (my_transpose) THEN
2907 : IF (s(1) == 1) THEN
2908 : CPASSERT(.NOT. has_u_boundary)
2909 : in_val_tmp = u_boundary(myj, myk)
2910 : ELSE
2911 : in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
2912 : END IF
2913 : IF (my_sharpen) THEN
2914 : IF (kw == 0 .AND. jw == 0) THEN
2915 : IF (my_normalize) THEN
2916 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2917 : (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
2918 : in_val_tmp*weights_1d(1)*w_j
2919 : ELSE
2920 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2921 : in_val(bo(1, 1), myj, myk)*w_j - &
2922 : in_val_tmp*weights_1d(1)*w_j
2923 : END IF
2924 : ELSE
2925 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
2926 : in_val(bo(1, 1), myj, myk)*w_j - &
2927 : in_val_tmp*weights_1d(1)*w_j
2928 : END IF
2929 : ELSE IF (my_smooth_boundary) THEN
2930 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2931 : w_j*(in_val(bo(1, 1), myj, myk) + &
2932 : in_val_tmp*weights_1d(1)/weights_1d(0))
2933 : ELSE
2934 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2935 : w_j*(in_val(bo(1, 1), myj, myk) + &
2936 : in_val_tmp*weights_1d(1))
2937 : END IF
2938 : in_val_f = 0.0_dp
2939 : ELSE
2940 : in_val_f = in_val(bo(1, 1), myj, myk)
2941 : IF (my_sharpen) THEN
2942 : IF (kw == 0 .AND. jw == 0) THEN
2943 : IF (my_normalize) THEN
2944 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2945 : (2.0_dp - w_j)*in_val_f
2946 : ELSE
2947 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2948 : in_val_f*w_j
2949 : END IF
2950 : ELSE
2951 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
2952 : in_val_f*w_j
2953 : END IF
2954 : ELSE
2955 : out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
2956 : in_val_f*w_j
2957 : END IF
2958 : END IF
2959 : ELSE
2960 : in_val_f = l_boundary(myj, myk)
2961 : END IF
2962 : IF (has_u_boundary) THEN
2963 : IF (my_transpose) THEN
2964 : in_val_l = in_val(bo(2, 1), myj, myk)
2965 : IF (s(1) == 1) THEN
2966 : CPASSERT(.NOT. has_l_boundary)
2967 : in_val_tmp = l_boundary(myj, myk)
2968 : ELSE
2969 : in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
2970 : END IF
2971 : IF (my_sharpen) THEN
2972 : IF (kw == 0 .AND. jw == 0) THEN
2973 : IF (my_normalize) THEN
2974 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2975 : in_val_l*(2._dp - w_j) - &
2976 : in_val_tmp*weights_1d(1)*w_j
2977 : ELSE
2978 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2979 : in_val_l*w_j - &
2980 : in_val_tmp*weights_1d(1)*w_j
2981 : END IF
2982 : ELSE
2983 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
2984 : w_j*in_val_l - &
2985 : in_val_tmp*weights_1d(1)*w_j
2986 : END IF
2987 : ELSE IF (my_smooth_boundary) THEN
2988 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2989 : w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
2990 : ELSE
2991 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
2992 : w_j*(in_val_l + in_val_tmp*weights_1d(1))
2993 : END IF
2994 : in_val_l = 0._dp
2995 : ELSE
2996 : in_val_l = in_val(bo(2, 1), myj, myk)
2997 : IF (my_sharpen) THEN
2998 : IF (kw == 0 .AND. jw == 0) THEN
2999 : IF (my_normalize) THEN
3000 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3001 : in_val_l*(2._dp - w_j)
3002 : ELSE
3003 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3004 : in_val_l*w_j
3005 : END IF
3006 : ELSE
3007 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
3008 : w_j*in_val_l
3009 : END IF
3010 : ELSE
3011 : out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3012 : w_j*in_val_l
3013 : END IF
3014 : END IF
3015 : ELSE
3016 : in_val_l = u_boundary(myj, myk)
3017 : END IF
3018 : IF (last_index >= first_index) THEN
3019 : IF (my_transpose) THEN
3020 : IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
3021 : in_val_f = 0._dp
3022 : ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
3023 : in_val_l = 0._dp
3024 : END IF
3025 : END IF
3026 : IF (my_sharpen) THEN
3027 : w = -weights_1d*w_j
3028 : IF (kw == 0 .AND. jw == 0) THEN
3029 : IF (my_normalize) THEN
3030 : w(0) = w(0) + 2._dp
3031 : ELSE
3032 : w(0) = -w(0)
3033 : END IF
3034 : END IF
3035 : ELSE
3036 : w = weights_1d*w_j
3037 : END IF
3038 : IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3039 : IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
3040 : gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3041 : IF (gbo(1, 1) >= bo(1, 1)) THEN
3042 : out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3043 : in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
3044 : (1._dp/weights_1d(0) - 1._dp)
3045 : ELSE
3046 : out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3047 : l_boundary(myj, myk)*w_j*weights_1d(-1)* &
3048 : (1._dp/weights_1d(0) - 1._dp)
3049 : END IF
3050 : END IF
3051 : END IF
3052 : CALL pw_compose_stripe(weights=w, &
3053 : in_val=in_val(first_index:last_index, myj, myk), &
3054 : in_val_first=in_val_f, in_val_last=in_val_l, &
3055 : out_val=out_val(first_index:last_index, j, k), &
3056 : n_el=n_els)
3057 : !FM call pw_compose_stripe2(weights=w,&
3058 : !FM in_val=in_val,&
3059 : !FM in_val_first=in_val_f,in_val_last=in_val_l,&
3060 : !FM out_val=out_val,&
3061 : !FM first_val=first_index,last_val=last_index,&
3062 : !FM myj=myj,myk=myk,j=j,k=k)
3063 : IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3064 : IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
3065 : gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3066 : IF (gbo(2, 1) <= bo(2, 1)) THEN
3067 : out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3068 : in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
3069 : (1._dp/weights_1d(0) - 1._dp)
3070 : ELSE
3071 : out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3072 : u_boundary(myj, myk)*w_j*weights_1d(1)* &
3073 : (1._dp/weights_1d(0) - 1._dp)
3074 : END IF
3075 : END IF
3076 : END IF
3077 :
3078 : END IF
3079 : END DO
3080 : END DO
3081 : END DO
3082 : END DO
3083 :
3084 138700 : IF (is_split) THEN
3085 2232 : DEALLOCATE (l_boundary, u_boundary)
3086 : END IF
3087 138700 : END SUBROUTINE pw_nn_compose_r_no_pbc
3088 :
3089 : ! **************************************************************************************************
3090 : !> \brief ...
3091 : !> \param pw_in ...
3092 : !> \param pw_out ...
3093 : ! **************************************************************************************************
3094 41588 : SUBROUTINE spl3_nopbc(pw_in, pw_out)
3095 :
3096 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3097 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3098 :
3099 41588 : CALL pw_zero(pw_out)
3100 : CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3101 41588 : pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
3102 :
3103 41588 : END SUBROUTINE spl3_nopbc
3104 :
3105 : ! **************************************************************************************************
3106 : !> \brief ...
3107 : !> \param pw_in ...
3108 : !> \param pw_out ...
3109 : ! **************************************************************************************************
3110 27759 : SUBROUTINE spl3_nopbct(pw_in, pw_out)
3111 :
3112 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3113 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3114 :
3115 27759 : CALL pw_zero(pw_out)
3116 : CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3117 27759 : pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
3118 :
3119 27759 : END SUBROUTINE spl3_nopbct
3120 :
3121 : ! **************************************************************************************************
3122 : !> \brief ...
3123 : !> \param pw_in ...
3124 : !> \param pw_out ...
3125 : ! **************************************************************************************************
3126 6832 : SUBROUTINE spl3_pbc(pw_in, pw_out)
3127 :
3128 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw_in
3129 : TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
3130 :
3131 6832 : CALL pw_zero(pw_out)
3132 6832 : CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
3133 :
3134 6832 : END SUBROUTINE spl3_pbc
3135 :
3136 : ! **************************************************************************************************
3137 : !> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
3138 : !> input vector (vec)
3139 : !> \param vec ...
3140 : !> \param pw ...
3141 : !> \return ...
3142 : !> \par History
3143 : !> 12.2007 Adapted for use with distributed grids [rdeclerck]
3144 : !> \author Teodoro Laino 12/2005 [tlaino]
3145 : !> \note
3146 : !> Requires the Spline coefficients to be computed with PBC
3147 : ! **************************************************************************************************
3148 1821936 : FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
3149 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: vec
3150 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw
3151 : REAL(KIND=dp) :: val
3152 :
3153 : INTEGER :: i, ivec(3), j, k, npts(3)
3154 : INTEGER, DIMENSION(2, 3) :: bo, bo_l
3155 : INTEGER, DIMENSION(4) :: ii, ij, ik
3156 : LOGICAL :: my_mpsum
3157 : REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
3158 : f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
3159 : t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
3160 : REAL(KIND=dp), DIMENSION(4, 4, 4) :: box
3161 1821936 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: grid
3162 :
3163 1821936 : NULLIFY (grid)
3164 1821936 : my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3165 7287744 : npts = pw%pw_grid%npts
3166 7287744 : ivec = FLOOR(vec/pw%pw_grid%dr)
3167 1821936 : dr1 = pw%pw_grid%dr(1)
3168 1821936 : dr2 = pw%pw_grid%dr(2)
3169 1821936 : dr3 = pw%pw_grid%dr(3)
3170 :
3171 1821936 : xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3172 1821936 : xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3173 1821936 : xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3174 1821936 : grid => pw%array(:, :, :)
3175 18219360 : bo = pw%pw_grid%bounds
3176 18219360 : bo_l = pw%pw_grid%bounds_local
3177 :
3178 1821936 : ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3179 1821936 : ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3180 1821936 : ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3181 1821936 : ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3182 :
3183 1821936 : ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3184 1821936 : ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3185 1821936 : ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3186 1821936 : ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3187 :
3188 1821936 : ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3189 1821936 : ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3190 1821936 : ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3191 1821936 : ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3192 :
3193 9109680 : DO k = 1, 4
3194 38260656 : DO j = 1, 4
3195 153042624 : DO i = 1, 4
3196 : IF ( &
3197 : ii(i) >= bo_l(1, 1) .AND. &
3198 : ii(i) <= bo_l(2, 1) .AND. &
3199 : ij(j) >= bo_l(1, 2) .AND. &
3200 : ij(j) <= bo_l(2, 2) .AND. &
3201 116603904 : ik(k) >= bo_l(1, 3) .AND. &
3202 : ik(k) <= bo_l(2, 3) &
3203 29150976 : ) THEN
3204 : box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3205 : ij(j) + 1 - bo_l(1, 2), &
3206 58529856 : ik(k) + 1 - bo_l(1, 3))
3207 : ELSE
3208 58074048 : box(i, j, k) = 0.0_dp
3209 : END IF
3210 : END DO
3211 : END DO
3212 : END DO
3213 :
3214 1821936 : a1 = 3.0_dp + xd1
3215 1821936 : a2 = a1*a1
3216 1821936 : a3 = a2*a1
3217 1821936 : b1 = 2.0_dp + xd1
3218 1821936 : b2 = b1*b1
3219 1821936 : b3 = b2*b1
3220 1821936 : c1 = 1.0_dp + xd1
3221 1821936 : c2 = c1*c1
3222 1821936 : c3 = c2*c1
3223 1821936 : d1 = xd1
3224 1821936 : d2 = d1*d1
3225 1821936 : d3 = d2*d1
3226 1821936 : e1 = 3.0_dp + xd2
3227 1821936 : e2 = e1*e1
3228 1821936 : e3 = e2*e1
3229 1821936 : f1 = 2.0_dp + xd2
3230 1821936 : f2 = f1*f1
3231 1821936 : f3 = f2*f1
3232 1821936 : g1 = 1.0_dp + xd2
3233 1821936 : g2 = g1*g1
3234 1821936 : g3 = g2*g1
3235 1821936 : h1 = xd2
3236 1821936 : h2 = h1*h1
3237 1821936 : h3 = h2*h1
3238 1821936 : p1 = 3.0_dp + xd3
3239 1821936 : p2 = p1*p1
3240 1821936 : p3 = p2*p1
3241 1821936 : q1 = 2.0_dp + xd3
3242 1821936 : q2 = q1*q1
3243 1821936 : q3 = q2*q1
3244 1821936 : r1 = 1.0_dp + xd3
3245 1821936 : r2 = r1*r1
3246 1821936 : r3 = r2*r1
3247 1821936 : u1 = xd3
3248 1821936 : u2 = u1*u1
3249 1821936 : u3 = u2*u1
3250 :
3251 1821936 : t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3252 1821936 : t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3253 1821936 : t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3254 1821936 : t4 = 1.0_dp/6.0_dp*d3
3255 1821936 : s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3256 1821936 : s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3257 1821936 : s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3258 1821936 : s4 = 1.0_dp/6.0_dp*h3
3259 1821936 : v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3260 1821936 : v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3261 1821936 : v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3262 1821936 : v4 = 1.0_dp/6.0_dp*u3
3263 :
3264 : val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3265 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3266 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3267 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3268 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3269 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3270 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3271 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3272 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3273 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3274 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3275 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3276 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3277 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3278 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3279 1821936 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3280 :
3281 1821936 : IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
3282 :
3283 1821936 : END FUNCTION Eval_Interp_Spl3_pbc
3284 :
3285 : ! **************************************************************************************************
3286 : !> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
3287 : !> function on the generic input vector (vec)
3288 : !> \param vec ...
3289 : !> \param pw ...
3290 : !> \return ...
3291 : !> \par History
3292 : !> 12.2007 Adapted for use with distributed grids [rdeclerck]
3293 : !> \author Teodoro Laino 12/2005 [tlaino]
3294 : !> \note
3295 : !> Requires the Spline coefficients to be computed with PBC
3296 : ! **************************************************************************************************
3297 10166 : FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
3298 : REAL(KIND=dp), DIMENSION(3), INTENT(in) :: vec
3299 : TYPE(pw_r3d_rs_type), INTENT(IN) :: pw
3300 : REAL(KIND=dp) :: val(3)
3301 :
3302 : INTEGER :: i, ivec(3), j, k, npts(3)
3303 : INTEGER, DIMENSION(2, 3) :: bo, bo_l
3304 : INTEGER, DIMENSION(4) :: ii, ij, ik
3305 : LOGICAL :: my_mpsum
3306 : REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
3307 : dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
3308 : s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
3309 : t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
3310 : v4o, xd1, xd2, xd3
3311 : REAL(KIND=dp), DIMENSION(4, 4, 4) :: box
3312 5083 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: grid
3313 :
3314 5083 : NULLIFY (grid)
3315 5083 : my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3316 20332 : npts = pw%pw_grid%npts
3317 20332 : ivec = FLOOR(vec/pw%pw_grid%dr)
3318 5083 : dr1 = pw%pw_grid%dr(1)
3319 5083 : dr2 = pw%pw_grid%dr(2)
3320 5083 : dr3 = pw%pw_grid%dr(3)
3321 5083 : dr1i = 1.0_dp/dr1
3322 5083 : dr2i = 1.0_dp/dr2
3323 5083 : dr3i = 1.0_dp/dr3
3324 5083 : xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3325 5083 : xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3326 5083 : xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3327 5083 : grid => pw%array(:, :, :)
3328 50830 : bo = pw%pw_grid%bounds
3329 50830 : bo_l = pw%pw_grid%bounds_local
3330 :
3331 5083 : ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3332 5083 : ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3333 5083 : ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3334 5083 : ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3335 :
3336 5083 : ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3337 5083 : ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3338 5083 : ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3339 5083 : ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3340 :
3341 5083 : ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3342 5083 : ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3343 5083 : ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3344 5083 : ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3345 :
3346 25415 : DO k = 1, 4
3347 106743 : DO j = 1, 4
3348 426972 : DO i = 1, 4
3349 : IF ( &
3350 : ii(i) >= bo_l(1, 1) .AND. &
3351 : ii(i) <= bo_l(2, 1) .AND. &
3352 : ij(j) >= bo_l(1, 2) .AND. &
3353 : ij(j) <= bo_l(2, 2) .AND. &
3354 325312 : ik(k) >= bo_l(1, 3) .AND. &
3355 : ik(k) <= bo_l(2, 3) &
3356 81328 : ) THEN
3357 : box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3358 : ij(j) + 1 - bo_l(1, 2), &
3359 324160 : ik(k) + 1 - bo_l(1, 3))
3360 : ELSE
3361 1152 : box(i, j, k) = 0.0_dp
3362 : END IF
3363 : END DO
3364 : END DO
3365 : END DO
3366 :
3367 5083 : a1 = 3.0_dp + xd1
3368 5083 : a2 = a1*a1
3369 5083 : a3 = a2*a1
3370 5083 : b1 = 2.0_dp + xd1
3371 5083 : b2 = b1*b1
3372 5083 : b3 = b2*b1
3373 5083 : c1 = 1.0_dp + xd1
3374 5083 : c2 = c1*c1
3375 5083 : c3 = c2*c1
3376 5083 : d1 = xd1
3377 5083 : d2 = d1*d1
3378 5083 : d3 = d2*d1
3379 5083 : e1 = 3.0_dp + xd2
3380 5083 : e2 = e1*e1
3381 5083 : e3 = e2*e1
3382 5083 : f1 = 2.0_dp + xd2
3383 5083 : f2 = f1*f1
3384 5083 : f3 = f2*f1
3385 5083 : g1 = 1.0_dp + xd2
3386 5083 : g2 = g1*g1
3387 5083 : g3 = g2*g1
3388 5083 : h1 = xd2
3389 5083 : h2 = h1*h1
3390 5083 : h3 = h2*h1
3391 5083 : p1 = 3.0_dp + xd3
3392 5083 : p2 = p1*p1
3393 5083 : p3 = p2*p1
3394 5083 : q1 = 2.0_dp + xd3
3395 5083 : q2 = q1*q1
3396 5083 : q3 = q2*q1
3397 5083 : r1 = 1.0_dp + xd3
3398 5083 : r2 = r1*r1
3399 5083 : r3 = r2*r1
3400 5083 : u1 = xd3
3401 5083 : u2 = u1*u1
3402 5083 : u3 = u2*u1
3403 :
3404 5083 : t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3405 5083 : t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3406 5083 : t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3407 5083 : t4o = 1.0_dp/6.0_dp*d3
3408 5083 : s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3409 5083 : s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3410 5083 : s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3411 5083 : s4o = 1.0_dp/6.0_dp*h3
3412 5083 : v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3413 5083 : v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3414 5083 : v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3415 5083 : v4o = 1.0_dp/6.0_dp*u3
3416 :
3417 5083 : t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
3418 5083 : t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
3419 5083 : t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
3420 5083 : t4d = 0.5_dp*d2
3421 5083 : s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
3422 5083 : s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
3423 5083 : s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
3424 5083 : s4d = 0.5_dp*h2
3425 5083 : v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
3426 5083 : v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
3427 5083 : v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
3428 5083 : v4d = 0.5_dp*u2
3429 :
3430 5083 : t1 = t1d*dr1i
3431 5083 : t2 = t2d*dr1i
3432 5083 : t3 = t3d*dr1i
3433 5083 : t4 = t4d*dr1i
3434 5083 : s1 = s1o
3435 5083 : s2 = s2o
3436 5083 : s3 = s3o
3437 5083 : s4 = s4o
3438 5083 : v1 = v1o
3439 5083 : v2 = v2o
3440 5083 : v3 = v3o
3441 5083 : v4 = v4o
3442 : val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3443 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3444 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3445 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3446 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3447 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3448 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3449 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3450 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3451 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3452 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3453 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3454 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3455 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3456 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3457 5083 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3458 :
3459 5083 : t1 = t1o
3460 5083 : t2 = t2o
3461 5083 : t3 = t3o
3462 5083 : t4 = t4o
3463 5083 : s1 = s1d*dr2i
3464 5083 : s2 = s2d*dr2i
3465 5083 : s3 = s3d*dr2i
3466 5083 : s4 = s4d*dr2i
3467 5083 : v1 = v1o
3468 5083 : v2 = v2o
3469 5083 : v3 = v3o
3470 5083 : v4 = v4o
3471 : val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3472 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3473 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3474 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3475 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3476 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3477 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3478 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3479 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3480 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3481 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3482 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3483 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3484 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3485 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3486 5083 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3487 :
3488 5083 : t1 = t1o
3489 5083 : t2 = t2o
3490 5083 : t3 = t3o
3491 5083 : t4 = t4o
3492 5083 : s1 = s1o
3493 5083 : s2 = s2o
3494 5083 : s3 = s3o
3495 5083 : s4 = s4o
3496 5083 : v1 = v1d*dr3i
3497 5083 : v2 = v2d*dr3i
3498 5083 : v3 = v3d*dr3i
3499 5083 : v4 = v4d*dr3i
3500 : val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3501 : (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3502 : (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3503 : (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3504 : ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3505 : (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3506 : (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3507 : (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3508 : ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3509 : (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3510 : (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3511 : (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3512 : ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3513 : (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3514 : (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3515 5083 : (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3516 :
3517 5083 : IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
3518 :
3519 5083 : END FUNCTION Eval_d_Interp_Spl3_pbc
3520 :
3521 0 : END MODULE pw_spline_utils
|