LCOV - code coverage report
Current view: top level - src/pw - pw_spline_utils.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 81.9 % 1450 1187
Test Date: 2026-07-04 06:36:57 Functions: 95.8 % 24 23

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief different utils that are useful to manipulate splines on the regular
      10              : !>      grid of a pw
      11              : !> \par History
      12              : !>      05.2003 created [fawzi]
      13              : !>      08.2004 removed spline evaluation method using more than 2 read streams
      14              : !>              (pw_compose_stripe_rs3), added linear solver based spline
      15              : !>              inversion [fawzi]
      16              : !> \author Fawzi Mohamed
      17              : ! **************************************************************************************************
      18              : MODULE pw_spline_utils
      19              : 
      20              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      21              :                                               cp_logger_type,&
      22              :                                               cp_to_string
      23              :    USE kinds,                           ONLY: dp
      24              :    USE mathconstants,                   ONLY: twopi
      25              :    USE message_passing,                 ONLY: mp_comm_congruent
      26              :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      27              :                                               PW_MODE_LOCAL
      28              :    USE pw_methods,                      ONLY: pw_axpy,&
      29              :                                               pw_copy,&
      30              :                                               pw_integral_ab,&
      31              :                                               pw_zero
      32              :    USE pw_pool_types,                   ONLY: pw_pool_release,&
      33              :                                               pw_pool_type
      34              :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      35              :                                               pw_r3d_rs_type
      36              : #include "../base/base_uses.f90"
      37              : 
      38              :    IMPLICIT NONE
      39              :    PRIVATE
      40              : 
      41              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      42              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
      43              : 
      44              :    INTEGER, PARAMETER, PUBLIC               :: no_precond = 0, &
      45              :                                                precond_spl3_aint = 1, &
      46              :                                                precond_spl3_1 = 2, &
      47              :                                                precond_spl3_aint2 = 3, &
      48              :                                                precond_spl3_2 = 4, &
      49              :                                                precond_spl3_3 = 5
      50              : 
      51              :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
      52              :                                                      [125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp], &
      53              :                                                      spline3_coeffs = &
      54              :                                                      [8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
      55              :                                                       1._dp/(27._dp*8._dp)], &
      56              :                                                      spline2_coeffs = &
      57              :                                                      [27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
      58              :                                                       1._dp/(64._dp*8._dp)], &
      59              :                                                      nn50_coeffs = &
      60              :                                                      [15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
      61              :                                                       1._dp/140608._dp], &
      62              :                                                      spl3_aint_coeff = &
      63              :                                                      [46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
      64              :                                                       -1._dp/(27._dp*8._dp)], &
      65              :                                                      spl3_precond1_coeff = &
      66              :                                                      [64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp], &
      67              :                                                      spl3_1d_transf_coeffs = &
      68              :                                                      [2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp]
      69              : 
      70              :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
      71              :                                                      [2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp], &
      72              :                                                      spline2_deriv_coeffs = &
      73              :                                                      [9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp], &
      74              :                                                      nn10_deriv_coeffs = &
      75              :                                                      [25._dp/72._dp, 5._dp/144, 1._dp/288._dp], &
      76              :                                                      nn50_deriv_coeffs = &
      77              :                                                      [625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp], &
      78              :                                                      spl3_1d_coeffs0 = &
      79              :                                                      [1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp], &
      80              :                                                      spl3_1d_transf_border1 = &
      81              :                                                      [0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp]
      82              : 
      83              :    PUBLIC :: pw_spline3_interpolate_values_g, &
      84              :              pw_spline3_deriv_g
      85              :    PUBLIC :: pw_spline_scale_deriv
      86              :    PUBLIC :: pw_spline2_interpolate_values_g, &
      87              :              pw_spline2_deriv_g
      88              :    PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
      89              :              spl3_nopbc, spl3_pbc, spl3_nopbct
      90              :    PUBLIC :: add_fine2coarse, add_coarse2fine
      91              :    PUBLIC :: pw_spline_precond_create, &
      92              :              pw_spline_do_precond, &
      93              :              pw_spline_precond_set_kind, &
      94              :              find_coeffs, &
      95              :              pw_spline_precond_release, &
      96              :              pw_spline_precond_type, &
      97              :              Eval_Interp_Spl3_pbc, &
      98              :              Eval_d_Interp_Spl3_pbc
      99              : 
     100              : !***
     101              : 
     102              : ! **************************************************************************************************
     103              : !> \brief stores information for the preconditioner used to calculate the
     104              : !>      coeffs of splines
     105              : !> \author fawzi
     106              : ! **************************************************************************************************
     107              :    TYPE pw_spline_precond_type
     108              :       INTEGER :: kind = no_precond
     109              :       REAL(kind=dp), DIMENSION(4) :: coeffs = 0.0_dp
     110              :       REAL(kind=dp), DIMENSION(3) :: coeffs_1d = 0.0_dp
     111              :       LOGICAL :: sharpen = .FALSE., normalize = .FALSE., pbc = .FALSE., transpose = .FALSE.
     112              :       TYPE(pw_pool_type), POINTER :: pool => NULL()
     113              :    END TYPE pw_spline_precond_type
     114              : 
     115              : CONTAINS
     116              : 
     117              : ! **************************************************************************************************
     118              : !> \brief calculates the FFT of the coefficients of the quadratic spline that
     119              : !>      interpolates the given values
     120              : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     121              : !>        will contain the FFT of the coefficients of the spline
     122              : !> \par History
     123              : !>      06.2003 created [fawzi]
     124              : !> \author Fawzi Mohamed
     125              : !> \note
     126              : !>      does not work with spherical cutoff
     127              : ! **************************************************************************************************
     128        40794 :    SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
     129              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     130              : 
     131              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g'
     132              : 
     133              :       INTEGER                                            :: handle, i, ii, j, k
     134              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     135              :       INTEGER, DIMENSION(3)                              :: n_tot
     136              :       REAL(KIND=dp)                                      :: c23, coeff
     137        40794 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     138              : 
     139        40794 :       CALL timeset(routineN, handle)
     140              : 
     141       163176 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     142       407940 :       gbo = spline_g%pw_grid%bounds
     143              : 
     144        40794 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     145        40794 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     146              : 
     147            0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
     148       285558 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     149              : 
     150        40794 :       coeff = twopi/n_tot(1)
     151        40794 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     152              :       DO i = gbo(1, 1), gbo(2, 1)
     153              :          cosIVals(i) = COS(coeff*REAL(i, dp))
     154              :       END DO
     155        40794 :       coeff = twopi/n_tot(2)
     156        40794 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     157              :       DO j = gbo(1, 2), gbo(2, 2)
     158              :          cosJVals(j) = COS(coeff*REAL(j, dp))
     159              :       END DO
     160        40794 :       coeff = twopi/n_tot(3)
     161        40794 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     162              :       DO k = gbo(1, 3), gbo(2, 3)
     163              :          cosKVals(k) = COS(coeff*REAL(k, dp))
     164              :       END DO
     165              : 
     166        40794 : !$OMP     PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     167              :       DO ii = 1, SIZE(spline_g%array)
     168              :          i = spline_g%pw_grid%g_hat(1, ii)
     169              :          j = spline_g%pw_grid%g_hat(2, ii)
     170              :          k = spline_g%pw_grid%g_hat(3, ii)
     171              : 
     172              :          c23 = cosJVals(j)*cosKVals(k)
     173              :          coeff = 64.0_dp/(cosIVals(i)*c23 + &
     174              :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
     175              :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
     176              :                           27.0_dp)
     177              : 
     178              :          spline_g%array(ii) = spline_g%array(ii)*coeff
     179              : 
     180              :       END DO
     181        40794 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     182              : 
     183        40794 :       CALL timestop(handle)
     184        81588 :    END SUBROUTINE pw_spline2_interpolate_values_g
     185              : 
     186              : ! **************************************************************************************************
     187              : !> \brief calculates the FFT of the coefficients of the2 cubic spline that
     188              : !>      interpolates the given values
     189              : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     190              : !>        will contain the FFT of the coefficients of the spline
     191              : !> \par History
     192              : !>      06.2003 created [fawzi]
     193              : !> \author Fawzi Mohamed
     194              : !> \note
     195              : !>      does not work with spherical cutoff
     196              : !>      stupid distribution for cos calculation, it should calculate only the
     197              : !>      needed cos, and avoid the mpi_allreduce
     198              : ! **************************************************************************************************
     199          342 :    SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
     200              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     201              : 
     202              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g'
     203              : 
     204              :       INTEGER                                            :: handle, i, ii, j, k
     205              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     206              :       INTEGER, DIMENSION(3)                              :: n_tot
     207              :       REAL(KIND=dp)                                      :: c23, coeff
     208          342 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     209              : 
     210          342 :       CALL timeset(routineN, handle)
     211              : 
     212         1368 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     213         3420 :       gbo = spline_g%pw_grid%bounds
     214              : 
     215          342 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     216          342 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     217              : 
     218            0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
     219            0 :                 cosJVals(gbo(1, 2):gbo(2, 2)), &
     220         2394 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     221              : 
     222          342 :       coeff = twopi/n_tot(1)
     223          342 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     224              :       DO i = gbo(1, 1), gbo(2, 1)
     225              :          cosIVals(i) = COS(coeff*REAL(i, dp))
     226              :       END DO
     227          342 :       coeff = twopi/n_tot(2)
     228          342 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     229              :       DO j = gbo(1, 2), gbo(2, 2)
     230              :          cosJVals(j) = COS(coeff*REAL(j, dp))
     231              :       END DO
     232          342 :       coeff = twopi/n_tot(3)
     233          342 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     234              :       DO k = gbo(1, 3), gbo(2, 3)
     235              :          cosKVals(k) = COS(coeff*REAL(k, dp))
     236              :       END DO
     237              : 
     238          342 : !$OMP     PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     239              :       DO ii = 1, SIZE(spline_g%array)
     240              :          i = spline_g%pw_grid%g_hat(1, ii)
     241              :          j = spline_g%pw_grid%g_hat(2, ii)
     242              :          k = spline_g%pw_grid%g_hat(3, ii)
     243              :          ! no opt
     244              : !FM                coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
     245              : !FM                     (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
     246              : !FM                     cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
     247              : !FM                     (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
     248              : !FM                     8.0_dp/27.0_dp)
     249              :          ! opt
     250              :          c23 = cosJVals(j)*cosKVals(k)
     251              :          coeff = 27.0_dp/(cosIVals(i)*c23 + &
     252              :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
     253              :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
     254              :                           8.0_dp)
     255              : 
     256              :          spline_g%array(ii) = spline_g%array(ii)*coeff
     257              : 
     258              :       END DO
     259          342 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     260              : 
     261          342 :       CALL timestop(handle)
     262          684 :    END SUBROUTINE pw_spline3_interpolate_values_g
     263              : 
     264              : ! **************************************************************************************************
     265              : !> \brief rescales the derivatives from gridspacing=1 to the real derivatives
     266              : !> \param deriv_vals_r an array of x,y,z derivatives
     267              : !> \param transpose if true applies the transpose of the map (defaults to
     268              : !>        false)
     269              : !> \param scale a scaling factor (defaults to 1.0)
     270              : !> \par History
     271              : !>      06.2003 created [fawzi]
     272              : !> \author Fawzi Mohamed
     273              : ! **************************************************************************************************
     274        18144 :    SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
     275              :       TYPE(pw_r3d_rs_type), DIMENSION(3), INTENT(IN)     :: deriv_vals_r
     276              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     277              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: scale
     278              : 
     279              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv'
     280              : 
     281              :       INTEGER                                            :: handle, i, idir, j, k
     282              :       INTEGER, DIMENSION(2, 3)                           :: bo
     283              :       INTEGER, DIMENSION(3)                              :: n_tot
     284              :       LOGICAL                                            :: diag, my_transpose
     285              :       REAL(KIND=dp)                                      :: dVal1, dVal2, dVal3, my_scale, scalef
     286              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: dh_inv, h_grid
     287        18144 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: ddata, ddata2, ddata3
     288              : 
     289        18144 :       CALL timeset(routineN, handle)
     290              : 
     291        18144 :       my_transpose = .FALSE.
     292        18144 :       IF (PRESENT(transpose)) my_transpose = transpose
     293        18144 :       my_scale = 1.0_dp
     294        18144 :       IF (PRESENT(scale)) my_scale = scale
     295        72576 :       n_tot(1:3) = deriv_vals_r(1)%pw_grid%npts(1:3)
     296       181440 :       bo = deriv_vals_r(1)%pw_grid%bounds_local
     297       235872 :       dh_inv = deriv_vals_r(1)%pw_grid%dh_inv
     298              : 
     299              :       ! map grid to real derivative
     300        18144 :       diag = .TRUE.
     301        18144 :       IF (my_transpose) THEN
     302        32832 :          DO j = 1, 3
     303       106704 :             DO i = 1, 3
     304        73872 :                h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     305        98496 :                IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
     306              :             END DO
     307              :          END DO
     308              :       ELSE
     309        39744 :          DO j = 1, 3
     310       129168 :             DO i = 1, 3
     311        89424 :                h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     312       119232 :                IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
     313              :             END DO
     314              :          END DO
     315              :       END IF
     316              : 
     317        18144 :       IF (diag) THEN
     318        71856 :          DO idir = 1, 3
     319        53892 :             ddata => deriv_vals_r(idir)%array
     320        53892 :             scalef = h_grid(idir, idir)
     321              :             CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
     322        71856 :                        scalef, ddata, 1)
     323              :          END DO
     324              :       ELSE
     325          180 :          ddata => deriv_vals_r(1)%array
     326          180 :          ddata2 => deriv_vals_r(2)%array
     327          180 :          ddata3 => deriv_vals_r(3)%array
     328              : !$OMP        PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
     329          180 : !$OMP                 SHARED(ddata,ddata2,ddata3,h_grid,bo)
     330              :          DO k = bo(1, 3), bo(2, 3)
     331              :             DO j = bo(1, 2), bo(2, 2)
     332              :                DO i = bo(1, 1), bo(2, 1)
     333              : 
     334              :                   dVal1 = ddata(i, j, k)
     335              :                   dVal2 = ddata2(i, j, k)
     336              :                   dVal3 = ddata3(i, j, k)
     337              : 
     338              :                   ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
     339              :                                    h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
     340              :                   ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
     341              :                                     h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
     342              :                   ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
     343              :                                     h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
     344              : 
     345              :                END DO
     346              :             END DO
     347              :          END DO
     348              :       END IF
     349              : 
     350        18144 :       CALL timestop(handle)
     351        18144 :    END SUBROUTINE pw_spline_scale_deriv
     352              : 
     353              : ! **************************************************************************************************
     354              : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     355              : !>      derivative of the cubic spline
     356              : !> \param spline_g on entry the FFT of the coefficients of the spline
     357              : !>        will contain the FFT of the derivative
     358              : !> \param idir direction of the derivative
     359              : !> \par History
     360              : !>      06.2003 created [fawzi]
     361              : !> \author Fawzi Mohamed
     362              : !> \note
     363              : !>      the distance between gridpoints is assumed to be 1
     364              : ! **************************************************************************************************
     365          342 :    SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
     366              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     367              :       INTEGER, INTENT(in)                                :: idir
     368              : 
     369              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g'
     370              :       REAL(KIND=dp), PARAMETER                           :: inv9 = 1.0_dp/9.0_dp
     371              : 
     372              :       INTEGER                                            :: handle, i, ii, j, k
     373              :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
     374              :       INTEGER, DIMENSION(3)                              :: n, n_tot
     375              :       REAL(KIND=dp)                                      :: coeff, tmp
     376          342 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     377              : 
     378          342 :       CALL timeset(routineN, handle)
     379              : 
     380         1368 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     381         1368 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     382         3420 :       bo = spline_g%pw_grid%bounds_local
     383         3420 :       gbo = spline_g%pw_grid%bounds
     384              : 
     385          342 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     386          342 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     387              : 
     388            0 :       ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
     389            0 :                 csJVals(gbo(1, 2):gbo(2, 2)), &
     390         2394 :                 csKVals(gbo(1, 3):gbo(2, 3)))
     391              : 
     392          342 :       coeff = twopi/n_tot(1)
     393          342 :       IF (idir == 1) THEN
     394          114 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     395              :          DO i = gbo(1, 1), gbo(2, 1)
     396              :             csIVals(i) = SIN(coeff*REAL(i, dp))
     397              :          END DO
     398              :       ELSE
     399          228 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     400              :          DO i = gbo(1, 1), gbo(2, 1)
     401              :             csIVals(i) = COS(coeff*REAL(i, dp))
     402              :          END DO
     403              :       END IF
     404          342 :       coeff = twopi/n_tot(2)
     405          342 :       IF (idir == 2) THEN
     406          114 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     407              :          DO j = gbo(1, 2), gbo(2, 2)
     408              :             csJVals(j) = SIN(coeff*REAL(j, dp))
     409              :          END DO
     410              :       ELSE
     411          228 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     412              :          DO j = gbo(1, 2), gbo(2, 2)
     413              :             csJVals(j) = COS(coeff*REAL(j, dp))
     414              :          END DO
     415              :       END IF
     416          342 :       coeff = twopi/n_tot(3)
     417          342 :       IF (idir == 3) THEN
     418          114 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     419              :          DO k = gbo(1, 3), gbo(2, 3)
     420              :             csKVals(k) = SIN(coeff*REAL(k, dp))
     421              :          END DO
     422              :       ELSE
     423          228 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     424              :          DO k = gbo(1, 3), gbo(2, 3)
     425              :             csKVals(k) = COS(coeff*REAL(k, dp))
     426              :          END DO
     427              :       END IF
     428              : 
     429          114 :       SELECT CASE (idir)
     430              :       CASE (1)
     431              :          ! x deriv
     432          114 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     433              :          DO ii = 1, SIZE(spline_g%array)
     434              :             i = spline_g%pw_grid%g_hat(1, ii)
     435              :             j = spline_g%pw_grid%g_hat(2, ii)
     436              :             k = spline_g%pw_grid%g_hat(3, ii)
     437              : !FM                ! formula
     438              : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
     439              : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
     440              : !FM                     sinVal(1)*4.0_dp/9.0_dp
     441              :             tmp = csIVals(i)*csJVals(j)
     442              :             coeff = (tmp*csKVals(k) + &
     443              :                      (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
     444              :                      csIVals(i)*4.0_dp)*inv9
     445              : 
     446              :             spline_g%array(ii) = spline_g%array(ii)* &
     447              :                                  CMPLX(0.0_dp, coeff, dp)
     448              :          END DO
     449              :       CASE (2)
     450              :          ! y deriv
     451          114 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     452              :          DO ii = 1, SIZE(spline_g%array)
     453              :             i = spline_g%pw_grid%g_hat(1, ii)
     454              :             j = spline_g%pw_grid%g_hat(2, ii)
     455              :             k = spline_g%pw_grid%g_hat(3, ii)
     456              : 
     457              :             tmp = csIVals(i)*csJVals(j)
     458              :             coeff = (tmp*csKVals(k) + &
     459              :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     460              :                      csJVals(j)*4.0_dp)*inv9
     461              : 
     462              :             spline_g%array(ii) = spline_g%array(ii)* &
     463              :                                  CMPLX(0.0_dp, coeff, dp)
     464              :          END DO
     465              :       CASE (3)
     466              :          ! z deriv
     467          342 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     468              :          DO ii = 1, SIZE(spline_g%array)
     469              :             i = spline_g%pw_grid%g_hat(1, ii)
     470              :             j = spline_g%pw_grid%g_hat(2, ii)
     471              :             k = spline_g%pw_grid%g_hat(3, ii)
     472              : 
     473              :             tmp = csIVals(i)*csKVals(k)
     474              :             coeff = (tmp*csJVals(j) + &
     475              :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     476              :                      csKVals(k)*4.0_dp)*inv9
     477              : 
     478              :             spline_g%array(ii) = spline_g%array(ii)* &
     479              :                                  CMPLX(0.0_dp, coeff, dp)
     480              :          END DO
     481              :       END SELECT
     482              : 
     483          342 :       DEALLOCATE (csIVals, csJVals, csKVals)
     484              : 
     485          342 :       CALL timestop(handle)
     486          684 :    END SUBROUTINE pw_spline3_deriv_g
     487              : 
     488              : ! **************************************************************************************************
     489              : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     490              : !>      derivative of the quadratic spline
     491              : !> \param spline_g on entry the FFT of the coefficients of the spline
     492              : !>        will contain the FFT of the derivative
     493              : !> \param idir direction of the derivative
     494              : !> \par History
     495              : !>      06.2003 created [fawzi]
     496              : !> \author Fawzi Mohamed
     497              : !> \note
     498              : !>      the distance between gridpoints is assumed to be 1
     499              : ! **************************************************************************************************
     500        40794 :    SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
     501              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     502              :       INTEGER, INTENT(in)                                :: idir
     503              : 
     504              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g'
     505              :       REAL(KIND=dp), PARAMETER                           :: inv16 = 1.0_dp/16.0_dp
     506              : 
     507              :       INTEGER                                            :: handle, i, ii, j, k
     508              :       INTEGER, DIMENSION(2, 3)                           :: bo
     509              :       INTEGER, DIMENSION(3)                              :: n, n_tot
     510              :       REAL(KIND=dp)                                      :: coeff, tmp
     511        40794 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     512              : 
     513        40794 :       CALL timeset(routineN, handle)
     514              : 
     515       163176 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     516       163176 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     517       407940 :       bo = spline_g%pw_grid%bounds
     518              : 
     519        40794 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     520        40794 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     521              : 
     522            0 :       ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
     523       285558 :                 csKVals(bo(1, 3):bo(2, 3)))
     524              : 
     525        40794 :       coeff = twopi/n_tot(1)
     526        40794 :       IF (idir == 1) THEN
     527        13598 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     528              :          DO i = bo(1, 1), bo(2, 1)
     529              :             csIVals(i) = SIN(coeff*REAL(i, dp))
     530              :          END DO
     531              :       ELSE
     532        27196 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     533              :          DO i = bo(1, 1), bo(2, 1)
     534              :             csIVals(i) = COS(coeff*REAL(i, dp))
     535              :          END DO
     536              :       END IF
     537        40794 :       coeff = twopi/n_tot(2)
     538        40794 :       IF (idir == 2) THEN
     539        13598 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     540              :          DO j = bo(1, 2), bo(2, 2)
     541              :             csJVals(j) = SIN(coeff*REAL(j, dp))
     542              :          END DO
     543              :       ELSE
     544        27196 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     545              :          DO j = bo(1, 2), bo(2, 2)
     546              :             csJVals(j) = COS(coeff*REAL(j, dp))
     547              :          END DO
     548              :       END IF
     549        40794 :       coeff = twopi/n_tot(3)
     550        40794 :       IF (idir == 3) THEN
     551        13598 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     552              :          DO k = bo(1, 3), bo(2, 3)
     553              :             csKVals(k) = SIN(coeff*REAL(k, dp))
     554              :          END DO
     555              :       ELSE
     556        27196 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     557              :          DO k = bo(1, 3), bo(2, 3)
     558              :             csKVals(k) = COS(coeff*REAL(k, dp))
     559              :          END DO
     560              :       END IF
     561              : 
     562        13598 :       SELECT CASE (idir)
     563              :       CASE (1)
     564              :          ! x deriv
     565        13598 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
     566              :          DO ii = 1, SIZE(spline_g%array)
     567              :             i = spline_g%pw_grid%g_hat(1, ii)
     568              :             j = spline_g%pw_grid%g_hat(2, ii)
     569              :             k = spline_g%pw_grid%g_hat(3, ii)
     570              : !FM                ! formula
     571              : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
     572              : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
     573              : !FM                     sinVal(1)*9.0_dp/16.0_dp
     574              :             tmp = csIVals(i)*csJVals(j)
     575              :             coeff = (tmp*csKVals(k) + &
     576              :                      (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
     577              :                      csIVals(i)*9.0_dp)*inv16
     578              : 
     579              :             spline_g%array(ii) = spline_g%array(ii)* &
     580              :                                  CMPLX(0.0_dp, coeff, dp)
     581              :          END DO
     582              :       CASE (2)
     583              :          ! y deriv
     584        13598 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     585              :          DO ii = 1, SIZE(spline_g%array)
     586              :             i = spline_g%pw_grid%g_hat(1, ii)
     587              :             j = spline_g%pw_grid%g_hat(2, ii)
     588              :             k = spline_g%pw_grid%g_hat(3, ii)
     589              : 
     590              :             tmp = csIVals(i)*csJVals(j)
     591              :             coeff = (tmp*csKVals(k) + &
     592              :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     593              :                      csJVals(j)*9.0_dp)*inv16
     594              : 
     595              :             spline_g%array(ii) = spline_g%array(ii)* &
     596              :                                  CMPLX(0.0_dp, coeff, dp)
     597              :          END DO
     598              :       CASE (3)
     599              :          ! z deriv
     600        40794 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     601              :          DO ii = 1, SIZE(spline_g%array)
     602              :             i = spline_g%pw_grid%g_hat(1, ii)
     603              :             j = spline_g%pw_grid%g_hat(2, ii)
     604              :             k = spline_g%pw_grid%g_hat(3, ii)
     605              : 
     606              :             tmp = csIVals(i)*csKVals(k)
     607              :             coeff = (tmp*csJVals(j) + &
     608              :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     609              :                      csKVals(k)*9.0_dp)*inv16
     610              : 
     611              :             spline_g%array(ii) = spline_g%array(ii)* &
     612              :                                  CMPLX(0.0_dp, coeff, dp)
     613              :          END DO
     614              :       END SELECT
     615              : 
     616        40794 :       DEALLOCATE (csIVals, csJVals, csKVals)
     617              : 
     618        40794 :       CALL timestop(handle)
     619        81588 :    END SUBROUTINE pw_spline2_deriv_g
     620              : 
     621              : ! **************************************************************************************************
     622              : !> \brief applies a nearest neighbor linear operator to a stripe in x direction:
     623              : !>      out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
     624              : !> \param weights the weights of the linear operator
     625              : !> \param in_val the argument to the operator
     626              : !> \param in_val_first the first argument (needed to calculate out_val(1))
     627              : !> \param in_val_last the last argument (needed to calculate out_val(n_el))
     628              : !> \param out_val the place where the result is accumulated
     629              : !> \param n_el the number of elements in in_v and out_v
     630              : !> \par History
     631              : !>      04.2004 created [fawzi]
     632              : !> \author fawzi
     633              : !> \note
     634              : !>      uses 2 read streams and 1 write stream
     635              : ! **************************************************************************************************
     636    659526526 :    SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
     637              :                                 out_val, n_el)
     638              :       REAL(kind=dp), DIMENSION(0:2), INTENT(in)          :: weights
     639              :       REAL(kind=dp), DIMENSION(*), INTENT(in)            :: in_val
     640              :       REAL(kind=dp), INTENT(in)                          :: in_val_first, in_val_last
     641              :       REAL(kind=dp), DIMENSION(*), INTENT(inout)         :: out_val
     642              :       INTEGER                                            :: n_el
     643              : 
     644              :       INTEGER                                            :: i
     645              :       REAL(kind=dp)                                      :: v0, v1, v2
     646              : 
     647              : !1:n_el), &
     648              : !1:n_el), &
     649              : 
     650    659526526 :       IF (n_el < 1) RETURN
     651    659526526 :       v0 = in_val_first
     652    659526526 :       v1 = in_val(1)
     653    659526526 :       IF (weights(1) == 0.0_dp) THEN
     654              :          ! optimized version for x deriv
     655    105061374 :          DO i = 1, n_el - 3, 3
     656   1018220697 :             v2 = in_val(i + 1)
     657              :             out_val(i) = out_val(i) + &
     658              :                          weights(0)*v0 + &
     659   1018220697 :                          weights(2)*v2
     660   1018220697 :             v0 = in_val(i + 2)
     661              :             out_val(i + 1) = out_val(i + 1) + &
     662              :                              weights(0)*v1 + &
     663   1018220697 :                              weights(2)*v0
     664   1018220697 :             v1 = in_val(i + 3)
     665              :             out_val(i + 2) = out_val(i + 2) + &
     666              :                              weights(0)*v2 + &
     667   1018220697 :                              weights(2)*v1
     668              :          END DO
     669              :       ELSE
     670              :          ! generic version
     671    554465152 :          DO i = 1, n_el - 3, 3
     672   4291790882 :             v2 = in_val(i + 1)
     673              :             out_val(i) = out_val(i) + &
     674              :                          weights(0)*v0 + &
     675              :                          weights(1)*v1 + &
     676   4291790882 :                          weights(2)*v2
     677   4291790882 :             v0 = in_val(i + 2)
     678              :             out_val(i + 1) = out_val(i + 1) + &
     679              :                              weights(0)*v1 + &
     680              :                              weights(1)*v2 + &
     681   4291790882 :                              weights(2)*v0
     682   4291790882 :             v1 = in_val(i + 3)
     683              :             out_val(i + 2) = out_val(i + 2) + &
     684              :                              weights(0)*v2 + &
     685              :                              weights(1)*v0 + &
     686   4296042932 :                              weights(2)*v1
     687              :          END DO
     688              :       END IF
     689    852855747 :       SELECT CASE (MODULO(n_el - 1, 3))
     690              :       CASE (0)
     691    193329221 :          v2 = in_val_last
     692              :          out_val(n_el) = out_val(n_el) + &
     693              :                          weights(0)*v0 + &
     694              :                          weights(1)*v1 + &
     695    193329221 :                          weights(2)*v2
     696              :       CASE (1)
     697    205584388 :          v2 = in_val(n_el)
     698              :          out_val(n_el - 1) = out_val(n_el - 1) + &
     699              :                              weights(0)*v0 + &
     700              :                              weights(1)*v1 + &
     701    205584388 :                              weights(2)*v2
     702    205584388 :          v0 = in_val_last
     703              :          out_val(n_el) = out_val(n_el) + &
     704              :                          weights(0)*v1 + &
     705              :                          weights(1)*v2 + &
     706    205584388 :                          weights(2)*v0
     707              :       CASE (2)
     708    260612917 :          v2 = in_val(n_el - 1)
     709              :          out_val(n_el - 2) = out_val(n_el - 2) + &
     710              :                              weights(0)*v0 + &
     711              :                              weights(1)*v1 + &
     712    260612917 :                              weights(2)*v2
     713    260612917 :          v0 = in_val(n_el)
     714              :          out_val(n_el - 1) = out_val(n_el - 1) + &
     715              :                              weights(0)*v1 + &
     716              :                              weights(1)*v2 + &
     717    260612917 :                              weights(2)*v0
     718    260612917 :          v1 = in_val_last
     719              :          out_val(n_el) = out_val(n_el) + &
     720              :                          weights(0)*v2 + &
     721              :                          weights(1)*v0 + &
     722    659526526 :                          weights(2)*v1
     723              :       END SELECT
     724              : 
     725              :    END SUBROUTINE pw_compose_stripe
     726              : 
     727              : ! **************************************************************************************************
     728              : !> \brief private routine that computes pw_nn_compose_r (it seems that without
     729              : !>      passing arrays in this way either some compiler do a copyin/out (xlf)
     730              : !>      or by inlining suboptimal code is produced (nag))
     731              : !> \param weights a 3x3x3 array with the linear operator
     732              : !> \param in_val the argument for the linear operator
     733              : !> \param out_val place where the value of the linear oprator should be added
     734              : !> \param pw_in pw to be able to get the needed meta data about in_val and
     735              : !>        out_val
     736              : !> \param bo boundaries of in_val and out_val
     737              : !> \author fawzi
     738              : ! **************************************************************************************************
     739        28070 :    SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
     740              :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     741              :       INTEGER, DIMENSION(2, 3)                           :: bo
     742              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     743              :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     744              :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout)  :: out_val
     745              :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     746              :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in)     :: in_val
     747              : 
     748              :       INTEGER                                            :: i, j, jw, k, kw, myj, myk
     749              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     750              :       INTEGER, DIMENSION(3)                              :: s
     751              :       LOGICAL                                            :: has_boundary, yderiv, zderiv
     752              :       REAL(kind=dp)                                      :: in_val_f, in_val_l
     753        28070 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
     754              : 
     755        81254 :       zderiv = ALL(weights(:, :, 1) == 0.0_dp)
     756        81254 :       yderiv = ALL(weights(:, 1, :) == 0.0_dp)
     757       280700 :       bo = pw_in%pw_grid%bounds_local
     758       280700 :       gbo = pw_in%pw_grid%bounds
     759       112280 :       DO i = 1, 3
     760       112280 :          s(i) = bo(2, i) - bo(1, i) + 1
     761              :       END DO
     762       112280 :       IF (ANY(s < 1)) RETURN
     763              :       has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
     764        44247 :                          pw_in%pw_grid%bounds(:, 1))
     765        28070 :       IF (has_boundary) THEN
     766              :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     767              :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     768       213136 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
     769     56427058 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
     770              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     771              :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     772              :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
     773    112827474 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     774     56427058 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
     775              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     776              :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     777              :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
     778    112827474 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     779        26642 :          DEALLOCATE (tmp)
     780              :       END IF
     781              : 
     782              : !$OMP   PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
     783              : !$OMP       in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
     784        28070 : !$OMP       u_boundary,weights,has_boundary)
     785              :       DO k = 0, s(3) - 1
     786              :          DO kw = 0, 2
     787              :             myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
     788              :             IF (zderiv .AND. kw == 1) CYCLE
     789              :             DO j = 0, s(2) - 1
     790              :                DO jw = 0, 2
     791              :                   myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
     792              :                   IF (yderiv .AND. jw == 1) CYCLE
     793              :                   IF (has_boundary) THEN
     794              :                      in_val_f = l_boundary(myj, myk)
     795              :                      in_val_l = u_boundary(myj, myk)
     796              :                   ELSE
     797              :                      in_val_f = in_val(bo(2, 1), myj, myk)
     798              :                      in_val_l = in_val(bo(1, 1), myj, myk)
     799              :                   END IF
     800              :                   CALL pw_compose_stripe(weights=weights(:, jw, kw), &
     801              :                                          in_val=in_val(:, myj, myk), &
     802              :                                          in_val_first=in_val_f, in_val_last=in_val_l, &
     803              :                                          out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
     804              :                END DO
     805              :             END DO
     806              :          END DO
     807              :       END DO
     808        28070 :       IF (has_boundary) THEN
     809        26642 :          DEALLOCATE (l_boundary, u_boundary)
     810              :       END IF
     811        28070 :    END SUBROUTINE pw_nn_compose_r_work
     812              : 
     813              : ! **************************************************************************************************
     814              : !> \brief applies a nearest neighbor linear operator to a pw in real space
     815              : !> \param weights a 3x3x3 array with the linear operator
     816              : !> \param pw_in the argument for the linear operator
     817              : !> \param pw_out place where the value of the linear oprator should be added
     818              : !> \author fawzi
     819              : !> \note
     820              : !>      has specialized versions for derivative operator (with central values==0)
     821              : ! **************************************************************************************************
     822        28070 :    SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
     823              :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     824              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     825              : 
     826              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pw_nn_compose_r'
     827              : 
     828              :       INTEGER                                            :: handle
     829              : 
     830        28070 :       CALL timeset(routineN, handle)
     831       196490 :       IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
     832            0 :          CPABORT("wrong pw distribution")
     833              :       END IF
     834              :       CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%array, &
     835        28070 :                                 out_val=pw_out%array, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
     836        28070 :       CALL timestop(handle)
     837        28070 :    END SUBROUTINE pw_nn_compose_r
     838              : 
     839              : ! **************************************************************************************************
     840              : !> \brief calculates the values of a nearest neighbor smearing
     841              : !> \param pw_in the argument for the linear operator
     842              : !> \param pw_out place where the smeared values should be added
     843              : !> \param coeffs array with the coefficent of the smearing, ordered with
     844              : !>        the distance from the center: coeffs(1) the coeff of the central
     845              : !>        element, coeffs(2) the coeff of the 6 element with distance 1,
     846              : !>        coeff(3) the coeff of the 12 elements at distance sqrt(2),
     847              : !>        coeff(4) the coeff of the 8 elements at distance sqrt(3).
     848              : !> \author Fawzi Mohamed
     849              : !> \note
     850              : !>      does not normalize the smear to 1.
     851              : !>      with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
     852              : !>      is equivalent to pw_spline3_evaluate_values_g, with
     853              : !>      coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
     854              : ! **************************************************************************************************
     855        14774 :    SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
     856              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     857              :       REAL(KIND=dp), DIMENSION(4), INTENT(in)            :: coeffs
     858              : 
     859              :       INTEGER                                            :: i, j, k
     860              :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     861              : 
     862        59096 :       DO k = -1, 1
     863       192062 :          DO j = -1, 1
     864       576186 :             DO i = -1, 1
     865       531864 :                weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
     866              :             END DO
     867              :          END DO
     868              :       END DO
     869              : 
     870        14774 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     871        14774 :    END SUBROUTINE pw_nn_smear_r
     872              : 
     873              : ! **************************************************************************************************
     874              : !> \brief calculates a nearest neighbor central derivative.
     875              : !>      for the x dir:
     876              : !>      pw_out%array(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
     877              : !>             ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
     878              : !>               pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
     879              : !>             ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
     880              : !>               pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
     881              : !>      periodic boundary conditions are applied
     882              : !> \param pw_in the argument for the linear operator
     883              : !> \param pw_out place where the smeared values should be added
     884              : !> \param coeffs array with the coefficent of the front (positive) plane
     885              : !>        of the central derivative, ordered with
     886              : !>        the distance from the center: coeffs(1) the coeff of the central
     887              : !>        element, coeffs(2) the coeff of the 4 element with distance 1,
     888              : !>        coeff(3) the coeff of the 4 elements at distance sqrt(2)
     889              : !> \param idir ...
     890              : !> \author Fawzi Mohamed
     891              : !> \note
     892              : !>      with coeff=(/ 2.0_dp/9.0_dp,  1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
     893              : !>      is equivalent to pw_spline3_deriv_r, with
     894              : !>      coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
     895              : !>      to pw_spline2_deriv_r
     896              : !>      coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
     897              : ! **************************************************************************************************
     898        13296 :    SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
     899              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     900              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: coeffs
     901              :       INTEGER                                            :: idir
     902              : 
     903              :       INTEGER                                            :: i, idirVal, j, k
     904              :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     905              : 
     906        53184 :       DO k = -1, 1
     907       172848 :          DO j = -1, 1
     908       518544 :             DO i = -1, 1
     909       358992 :                SELECT CASE (idir)
     910              :                CASE (1)
     911       119664 :                   idirVal = i
     912              :                CASE (2)
     913       119664 :                   idirVal = j
     914              :                CASE (3)
     915       119664 :                   idirVal = k
     916              :                CASE default
     917       358992 :                   CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
     918              :                END SELECT
     919       478656 :                IF (idirVal == 0) THEN
     920       119664 :                   weights(i, j, k) = 0.0_dp
     921              :                ELSE
     922       239328 :                   weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
     923              :                END IF
     924              :             END DO
     925              :          END DO
     926              :       END DO
     927              : 
     928        13296 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     929        13296 :    END SUBROUTINE pw_nn_deriv_r
     930              : 
     931              : ! **************************************************************************************************
     932              : !> \brief low level function that adds a coarse grid
     933              : !>      to a fine grid.
     934              : !>      If pbc is true periodic boundary conditions are applied
     935              : !>
     936              : !>      It will add to
     937              : !>
     938              : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
     939              : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
     940              : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
     941              : !>
     942              : !>      using
     943              : !>
     944              : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
     945              : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
     946              : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
     947              : !>
     948              : !>      composed with the weights obtained by the direct product of the
     949              : !>      1d coefficients weights:
     950              : !>
     951              : !>      for i,j,k in -3..3
     952              : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
     953              : !>                  weights_1d(abs(k)+1)
     954              : !> \param coarse_coeffs_pw the values of the coefficients
     955              : !> \param fine_values_pw where to add the values due to the
     956              : !>        coarse coeffs
     957              : !> \param weights_1d the weights of the 1d smearing
     958              : !> \param w_border0 the 1d weight at the border (when pbc is false)
     959              : !> \param w_border1 the 1d weights for a point one off the border
     960              : !>        (w_border1(1) is the weight of the coefficent at the border)
     961              : !>        (used if pbc is false)
     962              : !> \param pbc if periodic boundary conditions should be applied
     963              : !> \param safe_computation ...
     964              : !> \author fawzi
     965              : !> \note
     966              : !>      coarse looping is continuos, I did not check if keeping the fine looping
     967              : !>      contiguous is better.
     968              : !>      And I ask myself yet again why, why we use x-slice distribution,
     969              : !>      z-slice distribution would be much better performancewise
     970              : !>      (and would semplify this code enormously).
     971              : !>      fine2coarse has much more understandable parallel part (build up of
     972              : !>      send/rcv sizes,... but worse if you have really a lot of processors,
     973              : !>      probabily irrelevant because it is not critical) [fawzi].
     974              : ! **************************************************************************************************
     975         2260 :    SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
     976              :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
     977              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: coarse_coeffs_pw, fine_values_pw
     978              :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
     979              :       REAL(kind=dp), INTENT(in)                          :: w_border0
     980              :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
     981              :       LOGICAL, INTENT(in)                                :: pbc
     982              :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
     983              : 
     984              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_coarse2fine'
     985              : 
     986              :       INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
     987              :          ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
     988              :          s(3), send_tot_size, sf, shift, ss, x, x_att, xx
     989         2260 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcv_offset, rcv_size, real_rcv_size, &
     990         2260 :                                                             send_offset, send_size, sent_size
     991              :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
     992              :                                                             fine_gbo, my_coarse_bo
     993         2260 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
     994              :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
     995              :                                                             safe_calc
     996              :       REAL(kind=dp)                                      :: v0, v1, v2, v3, wi, wj, wk
     997         2260 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
     998              :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
     999              :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1000         2260 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1001              : 
    1002         2260 :       CALL timeset(routineN, handle)
    1003              : !    CALL timeset(routineN//"_pre",handle2)
    1004         2260 :       safe_calc = .FALSE.
    1005         2260 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1006         2260 :       ii = coarse_coeffs_pw%pw_grid%para%group%compare(fine_values_pw%pw_grid%para%group)
    1007         2260 :       CPASSERT(ii <= mp_comm_congruent)
    1008        22600 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1009        22600 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1010        22600 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1011        22600 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1012         9040 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1013         6780 :       DO j = 2, 3
    1014        15820 :          DO i = 1, 2
    1015        13560 :             coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
    1016              :          END DO
    1017              :       END DO
    1018         2260 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1019         2260 :          coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
    1020         2260 :          coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
    1021              :       ELSE
    1022            0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1023            0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1024              :       END IF
    1025         6744 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1026         2260 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1027         2260 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1028         2260 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1029              :       END IF
    1030         2260 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1031         2260 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1032              : 
    1033         2260 :       IF (pbc) THEN
    1034            0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1035            0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
    1036              :       ELSE
    1037         9040 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1038         9040 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1039              :       END IF
    1040              : 
    1041         2260 :       coarse_coeffs => coarse_coeffs_pw%array
    1042         9040 :       DO i = 1, 3
    1043         9040 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1044              :       END DO
    1045              : !       CALL timestop(handle2)
    1046              :       ! *** parallel case
    1047         2260 :       IF (is_split) THEN
    1048           24 :          CALL timeset(routineN//"_comm", handle2)
    1049              :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    1050           24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    1051           24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
    1052              :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    1053              :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    1054          192 :                    rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    1055              : 
    1056              :          ! ** rcv size count
    1057              : 
    1058           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1059              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1060           24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    1061           24 :          rcv_size = 0
    1062          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    1063          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1064          184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1065              :          END DO
    1066              : 
    1067              :          ! ** send size count
    1068              : 
    1069           24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    1070           24 :          sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
    1071           24 :          fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
    1072           24 :          fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
    1073           24 :          IF (.NOT. pbc) THEN
    1074           24 :             fi_lb = MAX(fi_lb, fine_gbo(1, 1))
    1075           24 :             fi_ub = MIN(fi_ub, fine_gbo(2, 1))
    1076              :          ELSE
    1077            0 :             fi_ub = MIN(fi_ub, fi_lb + sf - 1)
    1078              :          END IF
    1079           24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1080           24 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1081           24 :          send_size = 0
    1082          320 :          DO x = fi_lb, fi_ub
    1083          296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1084          320 :             IF (p /= p_old) THEN
    1085           24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
    1086              : 
    1087              :                send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1088           24 :                                                       - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1089              : 
    1090           24 :                IF (pbc) THEN
    1091            0 :                   DO xx = p_lb, coarse_gbo(1, 1) - 1
    1092            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1093            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1094            0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1095              :                      END IF
    1096              :                   END DO
    1097            0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub
    1098            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1099            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1100            0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1101              :                      END IF
    1102              :                   END DO
    1103              :                END IF
    1104              : 
    1105           24 :                p_old = p
    1106           24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1107              :             END IF
    1108              :          END DO
    1109           24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1110              : 
    1111              :          send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1112           24 :                                                 - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1113              : 
    1114           24 :          IF (pbc) THEN
    1115            0 :             DO xx = p_lb, coarse_gbo(1, 1) - 1
    1116            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1117            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1118            0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1119              :                END IF
    1120              :             END DO
    1121            0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub
    1122            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1123            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1124            0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1125              :                END IF
    1126              :             END DO
    1127              :          END IF
    1128              :          ! ** offsets & alloc send-rcv
    1129              : 
    1130              :          send_tot_size = 0
    1131           72 :          DO ip = 0, n_procs - 1
    1132           48 :             send_offset(ip) = send_tot_size
    1133           72 :             send_tot_size = send_tot_size + send_size(ip)
    1134              :          END DO
    1135           72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    1136              : 
    1137           24 :          rcv_tot_size = 0
    1138           72 :          DO ip = 0, n_procs - 1
    1139           48 :             rcv_offset(ip) = rcv_tot_size
    1140           72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    1141              :          END DO
    1142           24 :          IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
    1143            0 :             CPABORT("Error calculating rcv_tot_size ")
    1144              :          END IF
    1145           72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    1146              : 
    1147              :          ! ** fill send buffer
    1148              : 
    1149           24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1150           24 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1151           72 :          sent_size(:) = send_offset
    1152           24 :          ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
    1153          320 :          DO x = fi_lb, fi_ub
    1154          296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1155          320 :             IF (p /= p_old) THEN
    1156              :                shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1157           24 :                        FLOOR((x - 1 - f_shift(1))/2._dp)
    1158           24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
    1159              : 
    1160           24 :                IF (pbc) THEN
    1161            0 :                   DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1162            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
    1163            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1164              :                         CALL dcopy(coarse_slice_size, &
    1165              :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1166            0 :                                                  my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1167            0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1168              :                      END IF
    1169              :                   END DO
    1170              :                END IF
    1171              : 
    1172           24 :                ii = sent_size(p_old)
    1173          272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1174         3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1175        17312 :                      DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1176        13904 :                         send_buf(ii) = coarse_coeffs(i, j, k)
    1177        17064 :                         ii = ii + 1
    1178              :                      END DO
    1179              :                   END DO
    1180              :                END DO
    1181           24 :                sent_size(p_old) = ii
    1182              : 
    1183           24 :                IF (pbc) THEN
    1184            0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1185            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1186            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1187              :                         CALL dcopy(coarse_slice_size, &
    1188              :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1189              :                                                  my_coarse_bo(1, 3)), ss, &
    1190            0 :                                    send_buf(sent_size(p_old)), 1)
    1191            0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1192              :                      END IF
    1193              :                   END DO
    1194              :                END IF
    1195              : 
    1196           24 :                p_old = p
    1197           24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1198              :             END IF
    1199              :          END DO
    1200              :          shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1201           24 :                  FLOOR((x - 1 - f_shift(1))/2._dp)
    1202           24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1203              : 
    1204           24 :          IF (pbc) THEN
    1205            0 :             DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1206            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1207            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1208              :                   CALL dcopy(coarse_slice_size, &
    1209              :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1210            0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1211            0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1212              :                END IF
    1213              :             END DO
    1214              :          END IF
    1215              : 
    1216           24 :          ii = sent_size(p_old)
    1217          272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1218         3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1219        17312 :                DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1220        13904 :                   send_buf(ii) = coarse_coeffs(i, j, k)
    1221        17064 :                   ii = ii + 1
    1222              :                END DO
    1223              :             END DO
    1224              :          END DO
    1225           24 :          sent_size(p_old) = ii
    1226              : 
    1227           24 :          IF (pbc) THEN
    1228            0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1229            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1230            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1231              :                   CALL dcopy(coarse_slice_size, &
    1232              :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1233            0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1234            0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1235              :                END IF
    1236              :             END DO
    1237              :          END IF
    1238              : 
    1239           48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    1240           24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    1241              :          ! test send/rcv sizes
    1242           24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    1243           72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    1244              :          ! all2all
    1245              :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    1246           24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    1247              : 
    1248              :          ! ** reorder rcv buffer
    1249              :          ! (actually reordering should be needed only with pbc)
    1250              : 
    1251              :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1252              :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1253          120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1254              : 
    1255           24 :          my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1256           24 :          my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1257           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1258           72 :          sent_size(:) = rcv_offset
    1259           24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    1260           24 :          DO x = my_ub + 1, coarse_bo(2, 1)
    1261            0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1262              :             CALL dcopy(coarse_slice_size, &
    1263              :                        rcv_buf(sent_size(p_old)), 1, &
    1264              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1265            0 :                                      coarse_bo(1, 3)), ss)
    1266           24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1267              :          END DO
    1268              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1269           24 :                           + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
    1270           24 :          p_lb = my_lb
    1271          184 :          DO x = my_lb, my_ub
    1272          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1273          160 :             IF (p /= p_old) THEN
    1274           24 :                p_ub = x - 1
    1275              : 
    1276           24 :                ii = sent_size(p_old)
    1277          272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1278         3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1279        15732 :                      DO i = p_lb, p_ub
    1280        12324 :                         coarse_coeffs(i, j, k) = rcv_buf(ii)
    1281        15484 :                         ii = ii + 1
    1282              :                      END DO
    1283              :                   END DO
    1284              :                END DO
    1285           24 :                sent_size(p_old) = ii
    1286              : 
    1287           24 :                p_lb = x
    1288           24 :                p_old = p
    1289              :             END IF
    1290          184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1291              :          END DO
    1292           24 :          p_ub = my_ub
    1293           24 :          ii = sent_size(p_old)
    1294          272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1295         3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1296        18892 :                DO i = p_lb, p_ub
    1297        15484 :                   coarse_coeffs(i, j, k) = rcv_buf(ii)
    1298        18644 :                   ii = ii + 1
    1299              :                END DO
    1300              :             END DO
    1301              :          END DO
    1302           24 :          sent_size(p_old) = ii
    1303           24 :          DO x = coarse_bo(1, 1), my_lb - 1
    1304            0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1305              :             CALL dcopy(coarse_slice_size, &
    1306              :                        rcv_buf(sent_size(p_old)), 1, &
    1307              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1308            0 :                                      coarse_bo(1, 3)), ss)
    1309           24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1310              :          END DO
    1311              : 
    1312           48 :          CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
    1313           24 :          CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
    1314              : 
    1315              :          ! dealloc
    1316           24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    1317           24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    1318           48 :          CALL timestop(handle2)
    1319              : 
    1320              :       END IF
    1321         2260 :       fine_values => fine_values_pw%array
    1322         9040 :       w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
    1323        11300 :       w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
    1324              : 
    1325        30388 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1326       227284 :          DO ik = -3, 3
    1327       196896 :             IF (pbc) THEN
    1328            0 :                wk = weights_1d(ABS(ik) + 1)
    1329            0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1330              :             ELSE
    1331       196896 :                fk = 2*k + ik + f_shift(3)
    1332       196896 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1333        40680 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1334        22600 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1335         9040 :                      IF (ik /= 0) CYCLE
    1336         4520 :                      wk = w_border0
    1337        13560 :                   ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
    1338         2260 :                      SELECT CASE (ik)
    1339              :                      CASE (1)
    1340         2260 :                         wk = w_border1(1)
    1341              :                      CASE (-1)
    1342         2260 :                         wk = w_border1(2)
    1343              :                      CASE (-3)
    1344         2260 :                         wk = w_border1(3)
    1345              :                      CASE default
    1346            0 :                         CPABORT("Only 1, -1, -3 are supported as the value of ik")
    1347         6780 :                         CYCLE
    1348              :                      END SELECT
    1349              :                   ELSE
    1350         2260 :                      SELECT CASE (ik)
    1351              :                      CASE (3)
    1352         2260 :                         wk = w_border1(3)
    1353              :                      CASE (1)
    1354         2260 :                         wk = w_border1(2)
    1355              :                      CASE (-1)
    1356         2260 :                         wk = w_border1(1)
    1357              :                      CASE default
    1358            0 :                         CPABORT("Only 3, 1, -1 are supported as the value of ik")
    1359         6780 :                         CYCLE
    1360              :                      END SELECT
    1361              :                   END IF
    1362              :                ELSE
    1363       156216 :                   wk = weights_1d(ABS(ik) + 1)
    1364              :                END IF
    1365              :             END IF
    1366      3150076 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1367     23778112 :                DO ij = -3, 3
    1368     20633564 :                   IF (pbc) THEN
    1369            0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1370            0 :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
    1371              :                   ELSE
    1372     20633564 :                      fj = 2*j + ij + f_shift(2)
    1373     20633564 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1374      3137328 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1375      1742960 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1376       697184 :                            IF (ij /= 0) CYCLE
    1377       348592 :                            wj = w_border0*wk
    1378      1045776 :                         ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
    1379       174296 :                            SELECT CASE (ij)
    1380              :                            CASE (1)
    1381       174296 :                               wj = w_border1(1)*wk
    1382              :                            CASE (-1)
    1383       174296 :                               wj = w_border1(2)*wk
    1384              :                            CASE (-3)
    1385       174296 :                               wj = w_border1(3)*wk
    1386              :                            CASE default
    1387       522888 :                               CYCLE
    1388              :                            END SELECT
    1389              :                         ELSE
    1390       174296 :                            SELECT CASE (ij)
    1391              :                            CASE (-1)
    1392       174296 :                               wj = w_border1(1)*wk
    1393              :                            CASE (1)
    1394       174296 :                               wj = w_border1(2)*wk
    1395              :                            CASE (3)
    1396       174296 :                               wj = w_border1(3)*wk
    1397              :                            CASE default
    1398       522888 :                               CYCLE
    1399              :                            END SELECT
    1400              :                         END IF
    1401              :                      ELSE
    1402     17496236 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1403              :                      END IF
    1404              :                   END IF
    1405              : 
    1406     21838256 :                   IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
    1407              : !                      CALL timeset(routineN//"_safe",handle2)
    1408        25000 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1409       165000 :                         DO ii = -3, 3
    1410       140000 :                            IF (pbc .AND. .NOT. is_split) THEN
    1411            0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1412            0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1413              :                            ELSE
    1414       140000 :                               fi = 2*i + ii + f_shift(1)
    1415       140000 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1416        67500 :                               IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
    1417              :                                                    fi >= fine_gbo(2, 1) - 1)) THEN
    1418        25000 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1419        10000 :                                     IF (ii /= 0) CYCLE
    1420         5000 :                                     wi = w_border0*wj
    1421        15000 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1422         2500 :                                     SELECT CASE (ii)
    1423              :                                     CASE (1)
    1424         2500 :                                        wi = w_border1(1)*wj
    1425              :                                     CASE (-1)
    1426         2500 :                                        wi = w_border1(2)*wj
    1427              :                                     CASE (-3)
    1428         2500 :                                        wi = w_border1(3)*wj
    1429              :                                     CASE default
    1430         7500 :                                        CYCLE
    1431              :                                     END SELECT
    1432              :                                  ELSE
    1433         2500 :                                     SELECT CASE (ii)
    1434              :                                     CASE (-1)
    1435         2500 :                                        wi = w_border1(1)*wj
    1436              :                                     CASE (1)
    1437         2500 :                                        wi = w_border1(2)*wj
    1438              :                                     CASE (3)
    1439         2500 :                                        wi = w_border1(3)*wj
    1440              :                                     CASE default
    1441         7500 :                                        CYCLE
    1442              :                                     END SELECT
    1443              :                                  END IF
    1444              :                               ELSE
    1445        42500 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1446              :                               END IF
    1447              :                            END IF
    1448              :                            fine_values(fi, fj, fk) = &
    1449              :                               fine_values(fi, fj, fk) + &
    1450       160000 :                               wi*coarse_coeffs(i, j, k)
    1451              :                         END DO
    1452              :                      END DO
    1453              : !                      CALL timestop(handle2)
    1454              :                   ELSE
    1455              : !                      CALL timeset(routineN//"_core1",handle2)
    1456     75542416 :                      ww0 = wj*w_0
    1457     94428020 :                      ww1 = wj*w_1
    1458     18885604 :                      IF (pbc .AND. .NOT. is_split) THEN
    1459            0 :                         v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
    1460            0 :                         i = coarse_bo(1, 1)
    1461            0 :                         fi = 2*i + f_shift(1)
    1462            0 :                         v0 = coarse_coeffs(i, j, k)
    1463            0 :                         v1 = coarse_coeffs(i + 1, j, k)
    1464              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1465            0 :                                                   ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1466            0 :                         v2 = coarse_coeffs(i + 2, j, k)
    1467            0 :                         fi = fi + 1
    1468              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1469            0 :                                                   ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1470     18885604 :                      ELSE IF (.NOT. has_i_lbound) THEN
    1471     18826844 :                         i = coarse_bo(1, 1)
    1472     18826844 :                         fi = 2*i + f_shift(1)
    1473     18826844 :                         v0 = coarse_coeffs(i, j, k)
    1474              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1475     18826844 :                                                   w_border0*wj*v0
    1476     18826844 :                         v1 = coarse_coeffs(i + 1, j, k)
    1477     18826844 :                         v2 = coarse_coeffs(i + 2, j, k)
    1478     18826844 :                         fi = fi + 1
    1479              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1480              :                                                   wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
    1481     18826844 :                                                       w_border1(3)*v2)
    1482              :                      ELSE
    1483        58760 :                         i = coarse_bo(1, 1)
    1484        58760 :                         v0 = coarse_coeffs(i, j, k)
    1485        58760 :                         v1 = coarse_coeffs(i + 1, j, k)
    1486        58760 :                         v2 = coarse_coeffs(i + 2, j, k)
    1487        58760 :                         fi = 2*i + f_shift(1) + 1
    1488        58760 :                         IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
    1489              :                                    fi + 2 == fine_bo(1, 1))) THEN
    1490              :                            CALL cp_abort(__LOCATION__, &
    1491              :                                          "unexpected start index "// &
    1492              :                                          TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
    1493            0 :                                          TRIM(cp_to_string(fi)))
    1494              :                         END IF
    1495              :                      END IF
    1496     18885604 :                      fi = fi + 1
    1497     18885604 :                      IF (fi >= fine_bo(1, 1)) THEN
    1498              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1499              :                                                   ww0(1)*v0 + ww0(2)*v1 + &
    1500     18885604 :                                                   ww0(3)*v2
    1501              :                      ELSE
    1502            0 :                         CPASSERT(fi + 1 == fine_bo(1, 1))
    1503              :                      END IF
    1504              : !                      CALL timestop(handle2)
    1505              : !                      CALL timeset(routineN//"_core",handle2)
    1506     18885604 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
    1507     84267688 :                         v3 = coarse_coeffs(i, j, k)
    1508     84267688 :                         fi = fi + 1
    1509              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1510              :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1511     84267688 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1512     84267688 :                         fi = fi + 1
    1513              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1514              :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1515     84267688 :                                                    ww0(3)*v3)
    1516     84267688 :                         v0 = coarse_coeffs(i + 1, j, k)
    1517     84267688 :                         fi = fi + 1
    1518              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1519              :                                                   (ww1(4)*v0 + ww1(1)*v1 + &
    1520     84267688 :                                                    ww1(2)*v2 + ww1(3)*v3)
    1521     84267688 :                         fi = fi + 1
    1522              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1523              :                                                   (ww0(1)*v2 + ww0(2)*v3 + &
    1524     84267688 :                                                    ww0(3)*v0)
    1525     84267688 :                         v1 = coarse_coeffs(i + 2, j, k)
    1526     84267688 :                         fi = fi + 1
    1527              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1528              :                                                   (ww1(3)*v0 + ww1(4)*v1 + &
    1529     84267688 :                                                    ww1(1)*v2 + ww1(2)*v3)
    1530     84267688 :                         fi = fi + 1
    1531              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1532              :                                                   (ww0(1)*v3 + ww0(2)*v0 + &
    1533     84267688 :                                                    ww0(3)*v1)
    1534     84267688 :                         v2 = coarse_coeffs(i + 3, j, k)
    1535     84267688 :                         fi = fi + 1
    1536              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1537              :                                                   (ww1(2)*v0 + ww1(3)*v1 + &
    1538     84267688 :                                                    ww1(4)*v2 + ww1(1)*v3)
    1539     84267688 :                         fi = fi + 1
    1540              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1541              :                                                   (ww0(1)*v0 + ww0(2)*v1 + &
    1542     84543910 :                                                    ww0(3)*v2)
    1543              :                      END DO
    1544              : !                      CALL timestop(handle2)
    1545              : !                      CALL timeset(routineN//"_clean",handle2)
    1546     18885604 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
    1547     18885604 :                      IF (rest_b > 0) THEN
    1548     18508720 :                         i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
    1549     18508720 :                         v3 = coarse_coeffs(i, j, k)
    1550     18508720 :                         fi = fi + 1
    1551              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1552              :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1553     18508720 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1554     18508720 :                         fi = fi + 1
    1555              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1556              :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1557     18508720 :                                                    ww0(3)*v3)
    1558     18508720 :                         IF (rest_b > 1) THEN
    1559     18449960 :                            v0 = coarse_coeffs(i + 1, j, k)
    1560     18449960 :                            fi = fi + 1
    1561              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1562              :                                                      (ww1(4)*v0 + ww1(1)*v1 + &
    1563     18449960 :                                                       ww1(2)*v2 + ww1(3)*v3)
    1564     18449960 :                            fi = fi + 1
    1565              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1566              :                                                      (ww0(1)*v2 + ww0(2)*v3 + &
    1567     18449960 :                                                       ww0(3)*v0)
    1568     18449960 :                            IF (rest_b > 2) THEN
    1569        73160 :                               v1 = coarse_coeffs(i + 2, j, k)
    1570        73160 :                               fi = fi + 1
    1571              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1572              :                                                         (ww1(3)*v0 + ww1(4)*v1 + &
    1573        73160 :                                                          ww1(1)*v2 + ww1(2)*v3)
    1574        73160 :                               fi = fi + 1
    1575              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1576              :                                                         (ww0(1)*v3 + ww0(2)*v0 + &
    1577        73160 :                                                          ww0(3)*v1)
    1578        73160 :                               IF (pbc .AND. .NOT. is_split) THEN
    1579            0 :                                  v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1580            0 :                                  fi = fi + 1
    1581              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1582            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1583            0 :                                  fi = fi + 1
    1584              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1585            0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1586            0 :                                  v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1587            0 :                                  fi = fi + 1
    1588              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1589            0 :                                                            ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1590        73160 :                               ELSE IF (has_i_ubound) THEN
    1591            0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1592            0 :                                  fi = fi + 1
    1593              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1594            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1595            0 :                                  fi = fi + 1
    1596              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1597            0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1598            0 :                                  IF (fi + 1 == fine_bo(2, 1)) THEN
    1599            0 :                                     v3 = coarse_coeffs(i + 4, j, k)
    1600            0 :                                     fi = fi + 1
    1601              :                                     fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1602            0 :                                                               ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1603              :                                  END IF
    1604              :                               ELSE
    1605        73160 :                                  fi = fi + 1
    1606              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1607              :                                                            wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
    1608        73160 :                                                                w_border1(1)*v1)
    1609        73160 :                                  fi = fi + 1
    1610              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1611        73160 :                                                            w_border0*wj*v1
    1612              :                               END IF
    1613     18376800 :                            ELSE IF (pbc .AND. .NOT. is_split) THEN
    1614            0 :                               v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1615            0 :                               fi = fi + 1
    1616              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1617            0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1618            0 :                               fi = fi + 1
    1619              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1620            0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1621            0 :                               v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1622            0 :                               fi = fi + 1
    1623              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1624            0 :                                                         ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1625     18376800 :                            ELSE IF (has_i_ubound) THEN
    1626            0 :                               v1 = coarse_coeffs(i + 2, j, k)
    1627            0 :                               fi = fi + 1
    1628              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1629            0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1630            0 :                               fi = fi + 1
    1631              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1632            0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1633            0 :                               IF (fi + 1 == fine_bo(2, 1)) THEN
    1634            0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1635            0 :                                  fi = fi + 1
    1636              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1637            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1638              :                               END IF
    1639              :                            ELSE
    1640     18376800 :                               fi = fi + 1
    1641              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1642              :                                                         wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
    1643     18376800 :                                                             w_border1(1)*v0)
    1644     18376800 :                               fi = fi + 1
    1645              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1646     18376800 :                                                         w_border0*wj*v0
    1647              :                            END IF
    1648        58760 :                         ELSE IF (pbc .AND. .NOT. is_split) THEN
    1649            0 :                            v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1650            0 :                            fi = fi + 1
    1651              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1652            0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1653            0 :                            fi = fi + 1
    1654              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1655            0 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1656            0 :                            v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1657            0 :                            fi = fi + 1
    1658              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1659            0 :                                                      ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1660        58760 :                         ELSE IF (has_i_ubound) THEN
    1661        58760 :                            v0 = coarse_coeffs(i + 1, j, k)
    1662        58760 :                            fi = fi + 1
    1663              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1664        58760 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1665        58760 :                            fi = fi + 1
    1666              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1667        58760 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1668        58760 :                            IF (fi + 1 == fine_bo(2, 1)) THEN
    1669        58760 :                               v1 = coarse_coeffs(i + 2, j, k)
    1670        58760 :                               fi = fi + 1
    1671              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1672        58760 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1673              :                            END IF
    1674              :                         ELSE
    1675            0 :                            fi = fi + 1
    1676              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1677              :                                                      wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
    1678            0 :                                                          w_border1(1)*v3)
    1679            0 :                            fi = fi + 1
    1680              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1681            0 :                                                      w_border0*wj*v3
    1682              :                         END IF
    1683       376884 :                      ELSE IF (pbc .AND. .NOT. is_split) THEN
    1684            0 :                         v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1685            0 :                         fi = fi + 1
    1686              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1687            0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1688            0 :                         fi = fi + 1
    1689              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1690            0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1691            0 :                         v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1692            0 :                         fi = fi + 1
    1693              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1694            0 :                                                   ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1695       376884 :                      ELSE IF (has_i_ubound) THEN
    1696            0 :                         v3 = coarse_coeffs(i, j, k)
    1697            0 :                         fi = fi + 1
    1698              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1699            0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1700            0 :                         fi = fi + 1
    1701              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1702            0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1703            0 :                         IF (fi + 1 == fine_bo(2, 1)) THEN
    1704            0 :                            v0 = coarse_coeffs(i + 1, j, k)
    1705            0 :                            fi = fi + 1
    1706              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1707            0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1708              :                         END IF
    1709              :                      ELSE
    1710       376884 :                         fi = fi + 1
    1711              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1712              :                                                   wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
    1713       376884 :                                                       w_border1(1)*v2)
    1714       376884 :                         fi = fi + 1
    1715              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1716       376884 :                                                   w_border0*wj*v2
    1717              :                      END IF
    1718     18885604 :                      CPASSERT(fi == fine_bo(2, 1))
    1719              :                   END IF
    1720              : !                   CALL timestop(handle2)
    1721              :                END DO
    1722              :             END DO
    1723              :          END DO
    1724              :       END DO
    1725              : 
    1726         2260 :       IF (is_split) THEN
    1727           24 :          DEALLOCATE (coarse_coeffs)
    1728              :       END IF
    1729         2260 :       CALL timestop(handle)
    1730         4520 :    END SUBROUTINE add_coarse2fine
    1731              : 
    1732              : ! **************************************************************************************************
    1733              : !> \brief low level function that adds a coarse grid (without boundary)
    1734              : !>      to a fine grid.
    1735              : !>
    1736              : !>      It will add to
    1737              : !>
    1738              : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
    1739              : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
    1740              : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
    1741              : !>
    1742              : !>      using
    1743              : !>
    1744              : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
    1745              : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
    1746              : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
    1747              : !>
    1748              : !>      composed with the weights obtained by the direct product of the
    1749              : !>      1d coefficients weights:
    1750              : !>
    1751              : !>      for i,j,k in -3..3
    1752              : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
    1753              : !>                  weights_1d(abs(k)+1)
    1754              : !> \param fine_values_pw 3d array where to add the values due to the
    1755              : !>        coarse coeffs
    1756              : !> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
    1757              : !>        coefficients
    1758              : !> \param weights_1d the weights of the 1d smearing
    1759              : !> \param w_border0 the 1d weight at the border
    1760              : !> \param w_border1 the 1d weights for a point one off the border
    1761              : !>        (w_border1(1) is the weight of the coefficent at the border)
    1762              : !> \param pbc ...
    1763              : !> \param safe_computation ...
    1764              : !> \author fawzi
    1765              : !> \note
    1766              : !>      see coarse2fine for some relevant notes
    1767              : ! **************************************************************************************************
    1768         1062 :    SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
    1769              :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
    1770              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: fine_values_pw, coarse_coeffs_pw
    1771              :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
    1772              :       REAL(kind=dp), INTENT(in)                          :: w_border0
    1773              :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
    1774              :       LOGICAL, INTENT(in)                                :: pbc
    1775              :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
    1776              : 
    1777              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_fine2coarse'
    1778              : 
    1779              :       INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
    1780              :          k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
    1781         1062 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pp_lb, pp_ub, rcv_offset, rcv_size, &
    1782         1062 :                                                             real_rcv_size, send_offset, send_size, &
    1783         1062 :                                                             sent_size
    1784              :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
    1785              :                                                             fine_gbo, my_coarse_bo
    1786         1062 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
    1787              :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
    1788              :                                                             local_data, safe_calc
    1789              :       REAL(kind=dp)                                      :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
    1790              :                                                             wi, wj, wk
    1791         1062 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
    1792              :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
    1793              :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1794         1062 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1795              : 
    1796         1062 :       CALL timeset(routineN, handle)
    1797              : 
    1798         1062 :       safe_calc = .FALSE.
    1799         1062 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1800              : 
    1801        10620 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1802        10620 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1803        10620 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1804        10620 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1805         4248 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1806         3150 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1807         1062 :       coarse_bo = my_coarse_bo
    1808         1062 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1809         1062 :          coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
    1810         1062 :          coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
    1811              :       ELSE
    1812            0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1813            0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1814              :       END IF
    1815         1062 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1816         1062 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1817         1062 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1818              :       END IF
    1819         1062 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1820         1062 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1821              : 
    1822         1062 :       IF (pbc) THEN
    1823            0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1824            0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
    1825              :       ELSE
    1826         4248 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1827         4248 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1828              :       END IF
    1829         1062 :       CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
    1830         1062 :       local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
    1831         1062 :       IF (local_data) THEN
    1832              :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1833              :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1834          120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1835        31240 :          coarse_coeffs = 0._dp
    1836              :       ELSE
    1837         1038 :          coarse_coeffs => coarse_coeffs_pw%array
    1838              :       END IF
    1839              : 
    1840         1062 :       fine_values => fine_values_pw%array
    1841         4248 :       w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
    1842         5310 :       w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
    1843              : 
    1844         4248 :       DO i = 1, 3
    1845         4248 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1846              :       END DO
    1847         4248 :       IF (ANY(s < 1)) RETURN
    1848              : 
    1849        14092 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1850       105302 :          DO ik = -3, 3
    1851        91210 :             IF (pbc) THEN
    1852            0 :                wk = weights_1d(ABS(ik) + 1)
    1853            0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1854              :             ELSE
    1855        91210 :                fk = 2*k + ik + f_shift(3)
    1856        91210 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1857        19116 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1858        10620 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1859         4248 :                      IF (ik /= 0) CYCLE
    1860         2124 :                      wk = w_border0
    1861         6372 :                   ELSE IF (fk == fine_bo(1, 3) + 1) THEN
    1862         1062 :                      SELECT CASE (ik)
    1863              :                      CASE (1)
    1864         1062 :                         wk = w_border1(1)
    1865              :                      CASE (-1)
    1866         1062 :                         wk = w_border1(2)
    1867              :                      CASE (-3)
    1868         1062 :                         wk = w_border1(3)
    1869              :                      CASE default
    1870            0 :                         CPABORT("Only 1, -1, -3 are supported as the value of ik")
    1871         3186 :                         CYCLE
    1872              :                      END SELECT
    1873              :                   ELSE
    1874         1062 :                      SELECT CASE (ik)
    1875              :                      CASE (3)
    1876         1062 :                         wk = w_border1(3)
    1877              :                      CASE (1)
    1878         1062 :                         wk = w_border1(2)
    1879              :                      CASE (-1)
    1880         1062 :                         wk = w_border1(1)
    1881              :                      CASE default
    1882            0 :                         CPABORT("Only 3, 1, -1 are supported as the value of ik")
    1883         3186 :                         CYCLE
    1884              :                      END SELECT
    1885              :                   END IF
    1886              :                ELSE
    1887        72094 :                   wk = weights_1d(ABS(ik) + 1)
    1888              :                END IF
    1889              :             END IF
    1890      1471974 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1891     11118042 :                DO ij = -3, 3
    1892      9648478 :                   IF (pbc) THEN
    1893              :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
    1894            0 :                                                   2*s(2))
    1895            0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1896              :                   ELSE
    1897      9648478 :                      fj = 2*j + ij + f_shift(2)
    1898      9648478 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1899      1450620 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1900       805900 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1901       322360 :                            IF (ij /= 0) CYCLE
    1902       161180 :                            wj = w_border0*wk
    1903       483540 :                         ELSE IF (fj == fine_bo(1, 2) + 1) THEN
    1904        80590 :                            SELECT CASE (ij)
    1905              :                            CASE (1)
    1906        80590 :                               wj = w_border1(1)*wk
    1907              :                            CASE (-1)
    1908        80590 :                               wj = w_border1(2)*wk
    1909              :                            CASE (-3)
    1910        80590 :                               wj = w_border1(3)*wk
    1911              :                            CASE default
    1912            0 :                               CPABORT("Only 1, -1, -3 are supported as the value of ij")
    1913       241770 :                               CYCLE
    1914              :                            END SELECT
    1915              :                         ELSE
    1916        80590 :                            SELECT CASE (ij)
    1917              :                            CASE (-1)
    1918        80590 :                               wj = w_border1(1)*wk
    1919              :                            CASE (1)
    1920        80590 :                               wj = w_border1(2)*wk
    1921              :                            CASE (3)
    1922        80590 :                               wj = w_border1(3)*wk
    1923              :                            CASE default
    1924            0 :                               CPABORT("Only -1, 1, 3 are supported as the value of ij")
    1925       241770 :                               CYCLE
    1926              :                            END SELECT
    1927              :                         END IF
    1928              :                      ELSE
    1929      8197858 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1930              :                      END IF
    1931              :                   END IF
    1932              : 
    1933     10220932 :                   IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
    1934      1559844 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1935     10785788 :                         DO ii = -3, 3
    1936      9225944 :                            IF (pbc .AND. .NOT. is_split) THEN
    1937            0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1938            0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1939              :                            ELSE
    1940      9225944 :                               fi = 2*i + ii + f_shift(1)
    1941      9225944 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1942      7112560 :                               IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
    1943              :                                   ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
    1944      2281160 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1945       912464 :                                     IF (ii /= 0) CYCLE
    1946       456232 :                                     wi = w_border0*wj
    1947      1368696 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1948       228116 :                                     SELECT CASE (ii)
    1949              :                                     CASE (1)
    1950       228116 :                                        wi = w_border1(1)*wj
    1951              :                                     CASE (-1)
    1952       228116 :                                        wi = w_border1(2)*wj
    1953              :                                     CASE (-3)
    1954       228116 :                                        wi = w_border1(3)*wj
    1955              :                                     CASE default
    1956       684348 :                                        CYCLE
    1957              :                                     END SELECT
    1958              :                                  ELSE
    1959       228116 :                                     SELECT CASE (ii)
    1960              :                                     CASE (-1)
    1961       228116 :                                        wi = w_border1(1)*wj
    1962              :                                     CASE (1)
    1963       228116 :                                        wi = w_border1(2)*wj
    1964              :                                     CASE (3)
    1965       228116 :                                        wi = w_border1(3)*wj
    1966              :                                     CASE default
    1967       684348 :                                        CYCLE
    1968              :                                     END SELECT
    1969              :                                  END IF
    1970              :                               ELSE
    1971      4831400 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1972              :                               END IF
    1973              :                            END IF
    1974              :                            coarse_coeffs(i, j, k) = &
    1975              :                               coarse_coeffs(i, j, k) + &
    1976     10543936 :                               wi*fine_values(fi, fj, fk)
    1977              :                         END DO
    1978              :                      END DO
    1979              :                   ELSE
    1980     34402904 :                      ww0 = wj*w_0
    1981     43003630 :                      ww1 = wj*w_1
    1982      8600726 :                      IF (pbc .AND. .NOT. is_split) THEN
    1983            0 :                         i = coarse_bo(1, 1) - 1
    1984            0 :                         vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
    1985            0 :                         vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
    1986            0 :                         vv4 = fine_values(fine_bo(2, 1), fj, fk)
    1987            0 :                         fi = fine_bo(1, 1)
    1988            0 :                         vv5 = fine_values(fi, fj, fk)
    1989            0 :                         fi = fi + 1
    1990            0 :                         vv6 = fine_values(fi, fj, fk)
    1991            0 :                         fi = fi + 1
    1992            0 :                         vv7 = fine_values(fi, fj, fk)
    1993              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    1994            0 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    1995              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    1996            0 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    1997              :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    1998            0 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    1999      8600726 :                      ELSE IF (has_i_lbound) THEN
    2000        47524 :                         i = coarse_bo(1, 1)
    2001        47524 :                         fi = fine_bo(1, 1) - 1
    2002        47524 :                         IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
    2003        47524 :                            fi = fi + 1
    2004        47524 :                            vv0 = fine_values(fi, fj, fk)
    2005              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2006        47524 :                                                     vv0*ww0(3)
    2007              :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2008        47524 :                                                         vv0*ww0(2)
    2009              :                            coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2010        47524 :                                                         vv0*ww0(1)
    2011              :                         END IF
    2012              :                      ELSE
    2013      8553202 :                         i = coarse_bo(1, 1)
    2014      8553202 :                         fi = 2*i + f_shift(1)
    2015      8553202 :                         vv0 = fine_values(fi, fj, fk)
    2016      8553202 :                         fi = fi + 1
    2017      8553202 :                         vv1 = fine_values(fi, fj, fk)
    2018      8553202 :                         fi = fi + 1
    2019      8553202 :                         vv2 = fine_values(fi, fj, fk)
    2020              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2021      8553202 :                                                  (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
    2022              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2023      8553202 :                                                      wj*w_border1(2)*vv1 + ww0(2)*vv2
    2024              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2025      8553202 :                                                      wj*w_border1(3)*vv1 + ww0(3)*vv2
    2026              :                      END IF
    2027      8600726 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
    2028     37559568 :                         fi = fi + 1
    2029     37559568 :                         vv0 = fine_values(fi, fj, fk)
    2030     37559568 :                         fi = fi + 1
    2031     37559568 :                         vv1 = fine_values(fi, fj, fk)
    2032     37559568 :                         fi = fi + 1
    2033     37559568 :                         vv2 = fine_values(fi, fj, fk)
    2034     37559568 :                         fi = fi + 1
    2035     37559568 :                         vv3 = fine_values(fi, fj, fk)
    2036     37559568 :                         fi = fi + 1
    2037     37559568 :                         vv4 = fine_values(fi, fj, fk)
    2038     37559568 :                         fi = fi + 1
    2039     37559568 :                         vv5 = fine_values(fi, fj, fk)
    2040     37559568 :                         fi = fi + 1
    2041     37559568 :                         vv6 = fine_values(fi, fj, fk)
    2042     37559568 :                         fi = fi + 1
    2043     37559568 :                         vv7 = fine_values(fi, fj, fk)
    2044              :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2045     37559568 :                                                      + ww1(1)*vv0
    2046              :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2047     37559568 :                                                      + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2048              :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2049     37559568 :                                                      + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2050              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2051     37559568 :                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2052              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2053     37559568 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    2054              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2055     37559568 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    2056              :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2057     37559568 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    2058              :                      END DO
    2059      8600726 :                      IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
    2060            0 :                         CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
    2061              :                      END IF
    2062      8600726 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
    2063      8600726 :                      i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
    2064      8600726 :                      CPASSERT(fi == (i - 2)*2 + f_shift(1))
    2065      8600726 :                      IF (rest_b > 0) THEN
    2066      8540210 :                         fi = fi + 1
    2067      8540210 :                         vv0 = fine_values(fi, fj, fk)
    2068      8540210 :                         fi = fi + 1
    2069      8540210 :                         vv1 = fine_values(fi, fj, fk)
    2070              :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2071      8540210 :                                                      + ww1(1)*vv0
    2072              :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2073      8540210 :                                                      + ww1(2)*vv0 + ww0(1)*vv1
    2074              :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2075      8540210 :                                                      + ww1(3)*vv0 + ww0(2)*vv1
    2076              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2077      8540210 :                                                  + ww1(4)*vv0 + ww0(3)*vv1
    2078      8540210 :                         IF (rest_b > 1) THEN
    2079      8492686 :                            fi = fi + 1
    2080      8492686 :                            vv2 = fine_values(fi, fj, fk)
    2081      8492686 :                            fi = fi + 1
    2082      8492686 :                            vv3 = fine_values(fi, fj, fk)
    2083              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2084      8492686 :                                                         + ww1(1)*vv2
    2085              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2086      8492686 :                                                         + ww1(2)*vv2 + ww0(1)*vv3
    2087              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2088      8492686 :                                                     + ww1(3)*vv2 + ww0(2)*vv3
    2089              :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2090      8492686 :                                                         + ww1(4)*vv2 + ww0(3)*vv3
    2091      8492686 :                            IF (rest_b > 2) THEN
    2092        61924 :                               fi = fi + 1
    2093        61924 :                               vv4 = fine_values(fi, fj, fk)
    2094        61924 :                               fi = fi + 1
    2095        61924 :                               vv5 = fine_values(fi, fj, fk)
    2096        61924 :                               fi = fi + 1
    2097        61924 :                               vv6 = fine_values(fi, fj, fk)
    2098        61924 :                               fi = fi + 1
    2099        61924 :                               vv7 = fine_values(fi, fj, fk)
    2100              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2101        61924 :                                                            + ww1(1)*vv4
    2102        61924 :                               IF (has_i_ubound) THEN
    2103            0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2104            0 :                                     fi = fi + 1
    2105            0 :                                     vv0 = fine_values(fi, fj, fk)
    2106              :                                     coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
    2107            0 :                                                                  + vv0*ww1(4)
    2108              :                                  ELSE
    2109              :                                     vv0 = 0._dp
    2110              :                                  END IF
    2111              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2112            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2113              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2114            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2115              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2116            0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
    2117              :                                  coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2118            0 :                                                               + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
    2119        61924 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2120            0 :                                  fi = fi + 1
    2121            0 :                                  vv0 = fine_values(fi, fj, fk)
    2122            0 :                                  vv1 = fine_values(fine_bo(1, 1), fj, fk)
    2123            0 :                                  vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2124              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2125            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2126              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2127            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2128              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2129              :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
    2130            0 :                                                               + vv1*ww0(1) + vv2*ww1(1)
    2131              :                               ELSE
    2132              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2133        61924 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
    2134              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2135        61924 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
    2136              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2137        61924 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
    2138              :                               END IF
    2139              :                            ELSE
    2140      8430762 :                               fi = fi + 1
    2141      8430762 :                               vv4 = fine_values(fi, fj, fk)
    2142      8430762 :                               fi = fi + 1
    2143      8430762 :                               vv5 = fine_values(fi, fj, fk)
    2144      8430762 :                               IF (has_i_ubound) THEN
    2145            0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2146            0 :                                     fi = fi + 1
    2147            0 :                                     vv6 = fine_values(fi, fj, fk)
    2148              :                                     coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2149            0 :                                                                  + ww1(4)*vv6
    2150              :                                  ELSE
    2151              :                                     vv6 = 0._dp
    2152              :                                  END IF
    2153              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2154            0 :                                                               + ww1(1)*vv4
    2155              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2156            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2157              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2158            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
    2159              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2160            0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
    2161      8430762 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2162            0 :                                  fi = fi + 1
    2163            0 :                                  vv6 = fine_values(fi, fj, fk)
    2164            0 :                                  vv7 = fine_values(fine_bo(1, 1), fj, fk)
    2165            0 :                                  vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2166              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2167            0 :                                                               + ww1(1)*vv4
    2168              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2169              :                                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
    2170            0 :                                                           ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2171              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2172              :                                                               + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
    2173            0 :                                                               + ww0(1)*vv7 + ww1(1)*vv0
    2174              :                               ELSE
    2175              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2176      8430762 :                                                               + wj*w_border1(3)*vv4
    2177              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2178      8430762 :                                                           + wj*w_border1(2)*vv4
    2179              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2180      8430762 :                                                               + wj*(w_border1(1)*vv4 + w_border0*vv5)
    2181              :                               END IF
    2182              :                            END IF
    2183              :                         ELSE
    2184        47524 :                            fi = fi + 1
    2185        47524 :                            vv2 = fine_values(fi, fj, fk)
    2186        47524 :                            fi = fi + 1
    2187        47524 :                            vv3 = fine_values(fi, fj, fk)
    2188        47524 :                            IF (has_i_ubound) THEN
    2189        47524 :                               IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2190        47524 :                                  fi = fi + 1
    2191        47524 :                                  vv4 = fine_values(fi, fj, fk)
    2192              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2193        47524 :                                                               + ww1(4)*vv4
    2194              :                               ELSE
    2195              :                                  vv4 = 0._dp
    2196              :                               END IF
    2197              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2198        47524 :                                                            + ww1(1)*vv2
    2199              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2200        47524 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2201              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2202        47524 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
    2203              :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2204        47524 :                                                            + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
    2205            0 :                            ELSEIF (pbc .AND. .NOT. is_split) THEN
    2206            0 :                               fi = fi + 1
    2207            0 :                               vv4 = fine_values(fi, fj, fk)
    2208            0 :                               vv5 = fine_values(fine_bo(1, 1), fj, fk)
    2209            0 :                               vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2210              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2211            0 :                                                            + ww1(1)*vv2
    2212              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2213            0 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2214              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2215            0 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
    2216              :                            ELSE
    2217              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2218            0 :                                                            + wj*w_border1(3)*vv2
    2219              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2220            0 :                                                            + wj*w_border1(2)*vv2
    2221              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2222            0 :                                                        + wj*(w_border1(1)*vv2 + w_border0*vv3)
    2223              :                            END IF
    2224              :                         END IF
    2225              :                      ELSE
    2226        60516 :                         fi = fi + 1
    2227        60516 :                         vv0 = fine_values(fi, fj, fk)
    2228        60516 :                         fi = fi + 1
    2229        60516 :                         vv1 = fine_values(fi, fj, fk)
    2230        60516 :                         IF (has_i_ubound) THEN
    2231            0 :                            IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2232            0 :                               fi = fi + 1
    2233            0 :                               vv2 = fine_values(fi, fj, fk)
    2234              :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2235            0 :                                                            + ww1(4)*vv2
    2236              :                            ELSE
    2237              :                               vv2 = 0._dp
    2238              :                            END IF
    2239              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2240            0 :                                                         + ww1(1)*vv0
    2241              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2242            0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2243              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2244            0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
    2245              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2246            0 :                                                     + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
    2247        60516 :                         ELSEIF (pbc .AND. .NOT. is_split) THEN
    2248            0 :                            fi = fi + 1
    2249            0 :                            vv2 = fine_values(fi, fj, fk)
    2250            0 :                            vv3 = fine_values(fine_bo(1, 1), fk, fk)
    2251            0 :                            vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
    2252              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2253            0 :                                                         + ww1(1)*vv0
    2254              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2255            0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2256              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2257            0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2258              :                         ELSE
    2259              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2260        60516 :                                                         + wj*w_border1(3)*vv0
    2261              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2262        60516 :                                                         + wj*w_border1(2)*vv0
    2263              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2264        60516 :                                                         + wj*(w_border1(1)*vv0 + w_border0*vv1)
    2265              :                         END IF
    2266              :                      END IF
    2267      8600726 :                      CPASSERT(fi == fine_bo(2, 1))
    2268              :                   END IF
    2269              :                END DO
    2270              :             END DO
    2271              :          END DO
    2272              :       END DO
    2273              : 
    2274              :       ! *** parallel case
    2275         1062 :       IF (is_split) THEN
    2276           24 :          CALL timeset(routineN//"_comm", handle2)
    2277              :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    2278           24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    2279           24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
    2280              :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    2281              :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    2282              :                    rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
    2283          240 :                    pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    2284              : 
    2285              :          ! ** send size count
    2286              : 
    2287           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2288           24 :          send_size = 0
    2289          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2290          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2291          184 :             send_size(p) = send_size(p) + coarse_slice_size
    2292              :          END DO
    2293              : 
    2294              :          ! ** rcv size count
    2295              : 
    2296           24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    2297           24 :          p_old = pos_of_x(fine_gbo(1, 1))
    2298           72 :          pp_lb = fine_gbo(2, 1)
    2299           72 :          pp_ub = fine_gbo(2, 1) - 1
    2300           24 :          pp_lb(p_old) = fine_gbo(1, 1)
    2301          496 :          DO x = fine_gbo(1, 1), fine_gbo(2, 1)
    2302          472 :             p = pos_of_x(x)
    2303          496 :             IF (p /= p_old) THEN
    2304           24 :                pp_ub(p_old) = x - 1
    2305           24 :                pp_lb(p) = x
    2306           24 :                p_old = p
    2307              :             END IF
    2308              :          END DO
    2309           24 :          pp_ub(p_old) = fine_gbo(2, 1)
    2310              : 
    2311           72 :          DO ip = 0, n_procs - 1
    2312           48 :             IF (pp_lb(ip) <= pp_ub(ip)) THEN
    2313           48 :                pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
    2314           48 :                pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
    2315              :             ELSE
    2316            0 :                pp_lb(ip) = coarse_gbo(2, 1)
    2317            0 :                pp_ub(ip) = coarse_gbo(2, 1) - 1
    2318              :             END IF
    2319           72 :             IF (.NOT. is_split .OR. .NOT. pbc) THEN
    2320           48 :                pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
    2321           48 :                pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
    2322              :             END IF
    2323              :          END DO
    2324              : 
    2325           24 :          rcv_size = 0
    2326           72 :          DO ip = 0, n_procs - 1
    2327           48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2328            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2329           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2330            0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2331              :                END IF
    2332              :             END DO
    2333              :             rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
    2334              :                            MAX(0, &
    2335           48 :                                MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
    2336           72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2337            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2338           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2339            0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2340              :                END IF
    2341              :             END DO
    2342              :          END DO
    2343              : 
    2344              :          ! ** offsets & alloc send-rcv
    2345              : 
    2346              :          send_tot_size = 0
    2347           72 :          DO ip = 0, n_procs - 1
    2348           48 :             send_offset(ip) = send_tot_size
    2349           72 :             send_tot_size = send_tot_size + send_size(ip)
    2350              :          END DO
    2351           24 :          IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
    2352            0 :             CPABORT("Error calculating send_tot_size")
    2353           72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    2354              : 
    2355           24 :          rcv_tot_size = 0
    2356           72 :          DO ip = 0, n_procs - 1
    2357           48 :             rcv_offset(ip) = rcv_tot_size
    2358           72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    2359              :          END DO
    2360           72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    2361              : 
    2362              :          ! ** fill send buffer
    2363              : 
    2364           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2365              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    2366           24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    2367           72 :          sent_size(:) = send_offset
    2368           24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    2369          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2370          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2371              :             CALL dcopy(coarse_slice_size, &
    2372              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    2373          160 :                                      coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
    2374          184 :             sent_size(p) = sent_size(p) + coarse_slice_size
    2375              :          END DO
    2376              : 
    2377           48 :          IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
    2378            0 :             CPABORT("error 1 filling send buffer")
    2379           24 :          IF (sent_size(n_procs - 1) /= send_tot_size) &
    2380            0 :             CPABORT("error 2 filling send buffer")
    2381              : 
    2382              :          IF (local_data) THEN
    2383           24 :             DEALLOCATE (coarse_coeffs)
    2384              :          ELSE
    2385              :             NULLIFY (coarse_coeffs)
    2386              :          END IF
    2387              : 
    2388           48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    2389           24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    2390              :          ! test send/rcv sizes
    2391           24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    2392              : 
    2393           72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    2394              :          ! all2all
    2395              :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    2396           24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    2397              : 
    2398              :          ! ** sum & reorder rcv buffer
    2399              : 
    2400           72 :          sent_size(:) = rcv_offset
    2401           72 :          DO ip = 0, n_procs - 1
    2402              : 
    2403           48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2404            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2405           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2406            0 :                   ii = sent_size(ip)
    2407            0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2408            0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2409            0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2410            0 :                         ii = ii + 1
    2411              :                      END DO
    2412              :                   END DO
    2413            0 :                   sent_size(ip) = ii
    2414              :                END IF
    2415              :             END DO
    2416              : 
    2417           48 :             ii = sent_size(ip)
    2418          208 :             DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
    2419         2160 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2420        29920 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2421        27808 :                      coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2422        29760 :                      ii = ii + 1
    2423              :                   END DO
    2424              :                END DO
    2425              :             END DO
    2426           48 :             sent_size(ip) = ii
    2427              : 
    2428           72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2429            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2430           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2431            0 :                   ii = sent_size(ip)
    2432            0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2433            0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2434            0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2435            0 :                         ii = ii + 1
    2436              :                      END DO
    2437              :                   END DO
    2438            0 :                   sent_size(ip) = ii
    2439              :                END IF
    2440              :             END DO
    2441              : 
    2442              :          END DO
    2443              : 
    2444           48 :          IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
    2445            0 :             CPABORT("error 1 handling the rcv buffer")
    2446           24 :          IF (sent_size(n_procs - 1) /= rcv_tot_size) &
    2447            0 :             CPABORT("error 2 handling the rcv buffer")
    2448              : 
    2449              :          ! dealloc
    2450           24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    2451           24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    2452           24 :          DEALLOCATE (pp_ub, pp_lb)
    2453           48 :          CALL timestop(handle2)
    2454              :       ELSE
    2455              :          CPASSERT(.NOT. local_data)
    2456              :       END IF
    2457              : 
    2458         1062 :       CALL timestop(handle)
    2459         2124 :    END SUBROUTINE add_fine2coarse
    2460              : 
    2461              : ! **************************************************************************************************
    2462              : !> \brief ...
    2463              : !> \param preconditioner the preconditioner to create
    2464              : !> \param precond_kind the kind of preconditioner to use
    2465              : !> \param pool a pool with grids of the same type as the elements to
    2466              : !>        precondition
    2467              : !> \param pbc if periodic boundary conditions should be applied
    2468              : !> \param transpose ...
    2469              : !> \author fawzi
    2470              : ! **************************************************************************************************
    2471        33948 :    SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
    2472              :                                        pool, pbc, transpose)
    2473              :       TYPE(pw_spline_precond_type), INTENT(OUT)          :: preconditioner
    2474              :       INTEGER, INTENT(in)                                :: precond_kind
    2475              :       TYPE(pw_pool_type), INTENT(IN), POINTER            :: pool
    2476              :       LOGICAL, INTENT(in)                                :: pbc, transpose
    2477              : 
    2478              :       preconditioner%kind = no_precond
    2479         3772 :       preconditioner%pool => pool
    2480         3772 :       preconditioner%pbc = pbc
    2481         3772 :       preconditioner%transpose = transpose
    2482         3772 :       CALL pool%retain()
    2483         3772 :       CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
    2484         3772 :    END SUBROUTINE pw_spline_precond_create
    2485              : 
    2486              : ! **************************************************************************************************
    2487              : !> \brief switches the types of preconditioner to use
    2488              : !> \param preconditioner the preconditioner to be changed
    2489              : !> \param precond_kind the new kind of preconditioner to use
    2490              : !> \param pbc ...
    2491              : !> \param transpose ...
    2492              : !> \author fawzi
    2493              : ! **************************************************************************************************
    2494         7544 :    SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
    2495              :                                          transpose)
    2496              :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2497              :       INTEGER, INTENT(in)                                :: precond_kind
    2498              :       LOGICAL, INTENT(in), OPTIONAL                      :: pbc, transpose
    2499              : 
    2500              :       LOGICAL                                            :: do_3d_coeff
    2501              :       REAL(kind=dp)                                      :: s
    2502              : 
    2503         7544 :       IF (PRESENT(transpose)) preconditioner%transpose = transpose
    2504         7544 :       do_3d_coeff = .FALSE.
    2505         7544 :       preconditioner%kind = precond_kind
    2506         7544 :       IF (PRESENT(pbc)) preconditioner%pbc = pbc
    2507              :       SELECT CASE (precond_kind)
    2508              :       CASE (no_precond)
    2509              :       CASE (precond_spl3_aint2)
    2510            0 :          preconditioner%coeffs_1d = [-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp]
    2511            0 :          preconditioner%sharpen = .FALSE.
    2512            0 :          preconditioner%normalize = .FALSE.
    2513         3772 :          do_3d_coeff = .TRUE.
    2514              :       CASE (precond_spl3_3)
    2515         3772 :          preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
    2516         3772 :          preconditioner%coeffs_1d(2) = 1.6_dp
    2517         3772 :          preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
    2518         3772 :          preconditioner%sharpen = .FALSE.
    2519         3772 :          preconditioner%normalize = .FALSE.
    2520            0 :          do_3d_coeff = .TRUE.
    2521              :       CASE (precond_spl3_2)
    2522            0 :          preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
    2523            0 :          preconditioner%coeffs_1d(2) = 1.76_dp
    2524            0 :          preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
    2525            0 :          preconditioner%sharpen = .FALSE.
    2526            0 :          preconditioner%normalize = .FALSE.
    2527              :          do_3d_coeff = .TRUE.
    2528              :       CASE (precond_spl3_aint)
    2529        15088 :          preconditioner%coeffs_1d = spl3_1d_coeffs0
    2530         3772 :          preconditioner%sharpen = .TRUE.
    2531         3772 :          preconditioner%normalize = .TRUE.
    2532            0 :          do_3d_coeff = .TRUE.
    2533              :       CASE (precond_spl3_1)
    2534            0 :          preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
    2535            0 :          preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
    2536            0 :          preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
    2537            0 :          preconditioner%sharpen = .TRUE.
    2538            0 :          preconditioner%normalize = .FALSE.
    2539            0 :          do_3d_coeff = .TRUE.
    2540              :       CASE default
    2541         7544 :          CPABORT("Unknown preconditioner kind for pw_spline_precond_set_kind")
    2542              :       END SELECT
    2543              :       IF (do_3d_coeff) THEN
    2544         7544 :          s = 1._dp
    2545         7544 :          IF (preconditioner%sharpen) s = -1._dp
    2546              :          preconditioner%coeffs(1) = &
    2547              :             s*preconditioner%coeffs_1d(2)* &
    2548              :             preconditioner%coeffs_1d(2)* &
    2549         7544 :             preconditioner%coeffs_1d(2)
    2550              :          preconditioner%coeffs(2) = &
    2551              :             s*preconditioner%coeffs_1d(1)* &
    2552              :             preconditioner%coeffs_1d(2)* &
    2553         7544 :             preconditioner%coeffs_1d(2)
    2554              :          preconditioner%coeffs(3) = &
    2555              :             s*preconditioner%coeffs_1d(1)* &
    2556              :             preconditioner%coeffs_1d(1)* &
    2557         7544 :             preconditioner%coeffs_1d(2)
    2558              :          preconditioner%coeffs(4) = &
    2559              :             s*preconditioner%coeffs_1d(1)* &
    2560              :             preconditioner%coeffs_1d(1)* &
    2561         7544 :             preconditioner%coeffs_1d(1)
    2562         7544 :          IF (preconditioner%sharpen) THEN
    2563         3772 :             IF (preconditioner%normalize) THEN
    2564              :                preconditioner%coeffs(1) = 2._dp + &
    2565         3772 :                                           preconditioner%coeffs(1)
    2566              :             ELSE
    2567            0 :                preconditioner%coeffs(1) = -preconditioner%coeffs(1)
    2568              :             END IF
    2569              :          END IF
    2570              :       END IF
    2571         7544 :    END SUBROUTINE pw_spline_precond_set_kind
    2572              : 
    2573              : ! **************************************************************************************************
    2574              : !> \brief releases the preconditioner
    2575              : !> \param preconditioner the preconditioner to release
    2576              : !> \author fawzi
    2577              : ! **************************************************************************************************
    2578         3772 :    SUBROUTINE pw_spline_precond_release(preconditioner)
    2579              :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2580              : 
    2581         3772 :       CALL pw_pool_release(preconditioner%pool)
    2582         3772 :    END SUBROUTINE pw_spline_precond_release
    2583              : 
    2584              : ! **************************************************************************************************
    2585              : !> \brief applies the preconditioner to the system of equations to find the
    2586              : !>      coefficients of the spline
    2587              : !> \param preconditioner the preconditioner to apply
    2588              : !> \param in_v the grid on which the preconditioner should be applied
    2589              : !> \param out_v place to store the preconditioner applied on v_out
    2590              : !> \author fawzi
    2591              : ! **************************************************************************************************
    2592        76185 :    SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
    2593              :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2594              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: in_v
    2595              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: out_v
    2596              : 
    2597        76185 :       SELECT CASE (preconditioner%kind)
    2598              :       CASE (no_precond)
    2599            0 :          CALL pw_copy(in_v, out_v)
    2600              :       CASE (precond_spl3_aint, precond_spl3_1)
    2601         3772 :          CALL pw_zero(out_v)
    2602         3772 :          IF (preconditioner%pbc) THEN
    2603              :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2604          450 :                                coeffs=preconditioner%coeffs)
    2605              :          ELSE
    2606              :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2607              :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2608              :                                         normalize=preconditioner%normalize, &
    2609         3322 :                                         transpose=preconditioner%transpose)
    2610              :          END IF
    2611              :       CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
    2612        72413 :          CALL pw_zero(out_v)
    2613        72413 :          IF (preconditioner%pbc) THEN
    2614              :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2615         6382 :                                coeffs=preconditioner%coeffs)
    2616              :          ELSE
    2617              :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2618              :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2619              :                                         normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
    2620        66031 :                                         transpose=preconditioner%transpose)
    2621              :          END IF
    2622              :       CASE default
    2623        76185 :          CPABORT("Unknown preconditioner kind for pw_spline_do_precond")
    2624              :       END SELECT
    2625        76185 :    END SUBROUTINE pw_spline_do_precond
    2626              : 
    2627              : ! **************************************************************************************************
    2628              : !> \brief solves iteratively (CG) a systmes of linear equations
    2629              : !>           linOp(coeffs)=values
    2630              : !>      (for example those needed to find the coefficients of a spline)
    2631              : !>      Returns true if the it succeeded to achieve the requested accuracy
    2632              : !> \param values the right hand side of the system
    2633              : !> \param coeffs will contain the solution of the system (and on entry
    2634              : !>        it contains the starting point)
    2635              : !> \param linOp the linear operator to be inverted
    2636              : !> \param preconditioner the preconditioner to apply
    2637              : !> \param pool a pool of grids (for the temporary objects)
    2638              : !> \param eps_r the requested precision on the residual
    2639              : !> \param eps_x the requested precision on the solution
    2640              : !> \param max_iter maximum number of iteration allowed
    2641              : !> \param sumtype ...
    2642              : !> \return ...
    2643              : !> \author fawzi
    2644              : ! **************************************************************************************************
    2645         3772 :    FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
    2646              :                         eps_r, eps_x, max_iter, sumtype) RESULT(res)
    2647              : 
    2648              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: values
    2649              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: coeffs
    2650              :       INTERFACE
    2651              :          SUBROUTINE linOp(pw_in, pw_out)
    2652              :             USE pw_types, ONLY: pw_r3d_rs_type
    2653              :             TYPE(pw_r3d_rs_type), INTENT(IN)    :: pw_in
    2654              :             TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
    2655              :          END SUBROUTINE linOp
    2656              :       END INTERFACE
    2657              :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2658              :       TYPE(pw_pool_type), POINTER                        :: pool
    2659              :       REAL(kind=dp), INTENT(in)                          :: eps_r, eps_x
    2660              :       INTEGER, INTENT(in)                                :: max_iter
    2661              :       INTEGER, INTENT(in), OPTIONAL                      :: sumtype
    2662              :       LOGICAL                                            :: res
    2663              : 
    2664              :       INTEGER                                            :: i, iiter, iter, j, k
    2665              :       INTEGER, DIMENSION(2, 3)                           :: bo
    2666              :       LOGICAL                                            :: last
    2667              :       REAL(kind=dp)                                      :: alpha, beta, eps_r_att, eps_x_att, r_z, &
    2668              :                                                             r_z_new
    2669              :       TYPE(cp_logger_type), POINTER                      :: logger
    2670              :       TYPE(pw_r3d_rs_type)                               :: Ap, p, r, z
    2671              : 
    2672         3772 :       last = .FALSE.
    2673              : 
    2674         3772 :       res = .FALSE.
    2675         3772 :       logger => cp_get_default_logger()
    2676         3772 :       CALL pool%create_pw(r)
    2677         3772 :       CALL pool%create_pw(z)
    2678         3772 :       CALL pool%create_pw(p)
    2679         3772 :       CALL pool%create_pw(Ap)
    2680              : 
    2681              :       !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2682         8290 :       ext_do: DO iiter = 1, max_iter, 10
    2683         8290 :          CALL pw_zero(r)
    2684         8290 :          CALL linOp(pw_in=coeffs, pw_out=r)
    2685     63345536 :          r%array = -r%array
    2686         8290 :          CALL pw_axpy(values, r)
    2687         8290 :          CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2688         8290 :          CALL pw_copy(z, p)
    2689         8290 :          r_z = pw_integral_ab(r, z, sumtype)
    2690              : 
    2691        72413 :          DO iter = iiter, MIN(iiter + 9, max_iter)
    2692        67895 :             eps_r_att = SQRT(pw_integral_ab(r, r, sumtype))
    2693        67895 :             IF (eps_r_att == 0._dp) THEN
    2694        67895 :                eps_x_att = 0._dp
    2695              :                last = .TRUE.
    2696              :             ELSE
    2697        67889 :                CALL pw_zero(Ap)
    2698        67889 :                CALL linOp(pw_in=p, pw_out=Ap)
    2699        67889 :                alpha = r_z/pw_integral_ab(Ap, p, sumtype)
    2700              : 
    2701        67889 :                CALL pw_axpy(p, coeffs, alpha=alpha)
    2702              : 
    2703        67889 :                eps_x_att = alpha*SQRT(pw_integral_ab(p, p, sumtype)) ! try to spare if unneeded?
    2704        67889 :                IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
    2705              :             END IF
    2706              :             !CALL cp_iterate(logger%iter_info,last=last)
    2707              :             IF (last) THEN
    2708              :                res = .TRUE.
    2709              :                EXIT ext_do
    2710              :             END IF
    2711              : 
    2712        64123 :             CALL pw_axpy(Ap, r, alpha=-alpha)
    2713              : 
    2714        64123 :             CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2715              : 
    2716        64123 :             r_z_new = pw_integral_ab(r, z, sumtype)
    2717        64123 :             beta = r_z_new/r_z
    2718        64123 :             r_z = r_z_new
    2719              : 
    2720       641230 :             bo = p%pw_grid%bounds_local
    2721       975144 :             DO k = bo(1, 3), bo(2, 3)
    2722     19717425 :                DO j = bo(1, 2), bo(2, 2)
    2723    423564035 :                   DO i = bo(1, 1), bo(2, 1)
    2724    422657532 :                      p%array(i, j, k) = z%array(i, j, k) + beta*p%array(i, j, k)
    2725              :                   END DO
    2726              :                END DO
    2727              :             END DO
    2728              : 
    2729              :          END DO
    2730              :       END DO ext_do
    2731              :       !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2732              : 
    2733         3772 :       CALL pool%give_back_pw(r)
    2734         3772 :       CALL pool%give_back_pw(z)
    2735         3772 :       CALL pool%give_back_pw(p)
    2736         3772 :       CALL pool%give_back_pw(Ap)
    2737              : 
    2738         3772 :    END FUNCTION find_coeffs
    2739              : 
    2740              : ! **************************************************************************************************
    2741              : !> \brief adds to pw_out pw_in composed with the weights
    2742              : !>      pw_out%array(i,j,k)=pw_out%array(i,j,k)+sum(pw_in%array(i+l,j+m,k+n)*
    2743              : !>         weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
    2744              : !>         l=-1..1,m=-1..1,n=-1..1)
    2745              : !> \param weights_1d ...
    2746              : !> \param pw_in ...
    2747              : !> \param pw_out ...
    2748              : !> \param sharpen ...
    2749              : !> \param normalize ...
    2750              : !> \param transpose ...
    2751              : !> \param smooth_boundary ...
    2752              : !> \author fawzi
    2753              : ! **************************************************************************************************
    2754       138700 :    SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
    2755              :                                      sharpen, normalize, transpose, smooth_boundary)
    2756              :       REAL(kind=dp), DIMENSION(-1:1)                     :: weights_1d
    2757              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
    2758              :       LOGICAL, INTENT(in), OPTIONAL                      :: sharpen, normalize, transpose, &
    2759              :                                                             smooth_boundary
    2760              : 
    2761              :       INTEGER                                            :: first_index, i, j, jw, k, kw, &
    2762              :                                                             last_index, myj, myk, n_els
    2763              :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
    2764              :       INTEGER, DIMENSION(3)                              :: s
    2765              :       LOGICAL                                            :: has_l_boundary, has_u_boundary, &
    2766              :                                                             is_split, my_normalize, my_sharpen, &
    2767              :                                                             my_smooth_boundary, my_transpose
    2768              :       REAL(kind=dp)                                      :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
    2769              :       REAL(kind=dp), DIMENSION(-1:1)                     :: w
    2770       138700 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
    2771              :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: in_val, out_val
    2772              : 
    2773      1387000 :       bo = pw_in%pw_grid%bounds_local
    2774      1387000 :       gbo = pw_in%pw_grid%bounds
    2775       138700 :       in_val => pw_in%array
    2776       138700 :       out_val => pw_out%array
    2777       138700 :       my_sharpen = .FALSE.
    2778       138700 :       IF (PRESENT(sharpen)) my_sharpen = sharpen
    2779       138700 :       my_normalize = .FALSE.
    2780       138700 :       IF (PRESENT(normalize)) my_normalize = normalize
    2781       138700 :       my_transpose = .FALSE.
    2782       138700 :       IF (PRESENT(transpose)) my_transpose = transpose
    2783       138700 :       my_smooth_boundary = .FALSE.
    2784       138700 :       IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
    2785       138700 :       CPASSERT(.NOT. my_normalize .OR. my_sharpen)
    2786       138700 :       CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
    2787       554800 :       DO i = 1, 3
    2788       554800 :          s(i) = bo(2, i) - bo(1, i) + 1
    2789              :       END DO
    2790       554800 :       IF (ANY(s < 1)) RETURN
    2791              :       is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
    2792       412752 :                      pw_in%pw_grid%bounds(:, 1))
    2793       138700 :       has_l_boundary = (gbo(1, 1) == bo(1, 1))
    2794       138700 :       has_u_boundary = (gbo(2, 1) == bo(2, 1))
    2795       138700 :       IF (is_split) THEN
    2796              :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2797              :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2798        17856 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
    2799       311256 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
    2800              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2801              :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2802              :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
    2803       620280 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2804       311256 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
    2805              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2806              :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2807              :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
    2808       620280 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2809         2232 :          DEALLOCATE (tmp)
    2810              :       END IF
    2811              : 
    2812       138700 :       n_els = s(1)
    2813       138700 :       IF (has_l_boundary) THEN
    2814       137584 :          n_els = n_els - 1
    2815       137584 :          first_index = bo(1, 1) + 1
    2816              :       ELSE
    2817         1116 :          first_index = bo(1, 1)
    2818              :       END IF
    2819       138700 :       IF (has_u_boundary) THEN
    2820       137584 :          n_els = n_els - 1
    2821       137584 :          last_index = bo(2, 1) - 1
    2822              :       ELSE
    2823         1116 :          last_index = bo(2, 1)
    2824              :       END IF
    2825              : !$OMP PARALLEL DO DEFAULT(NONE) &
    2826              : !$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
    2827              : !$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
    2828              : !$OMP        my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
    2829       138700 : !$OMP        my_sharpen, last_index, first_index, my_normalize, n_els)
    2830              :       DO k = bo(1, 3), bo(2, 3)
    2831              :          DO kw = -1, 1
    2832              :             myk = k + kw
    2833              :             IF (my_transpose) THEN
    2834              :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2835              :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2836              :                      IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
    2837              :                         w_k = weights_1d(kw)
    2838              :                         IF (my_smooth_boundary) THEN
    2839              :                            w_k = weights_1d(kw)/weights_1d(0)
    2840              :                         END IF
    2841              :                      ELSE IF (kw == 0) THEN
    2842              :                         w_k = 1._dp
    2843              :                      ELSE
    2844              :                         CYCLE
    2845              :                      END IF
    2846              :                   ELSE
    2847              :                      IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
    2848              :                      w_k = weights_1d(kw)
    2849              :                   END IF
    2850              :                ELSE
    2851              :                   w_k = weights_1d(kw)
    2852              :                END IF
    2853              :             ELSE
    2854              :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2855              :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2856              :                      IF (kw /= 0) CYCLE
    2857              :                      w_k = 1._dp
    2858              :                   ELSE
    2859              :                      IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
    2860              :                                                    (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
    2861              :                         w_k = weights_1d(kw)/weights_1d(0)
    2862              :                      ELSE
    2863              :                         w_k = weights_1d(kw)
    2864              :                      END IF
    2865              :                   END IF
    2866              :                ELSE
    2867              :                   w_k = weights_1d(kw)
    2868              :                END IF
    2869              :             END IF
    2870              :             DO j = bo(1, 2), bo(2, 2)
    2871              :                DO jw = -1, 1
    2872              :                   myj = j + jw
    2873              :                   IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
    2874              :                      w_j = w_k*weights_1d(jw)
    2875              :                   ELSE
    2876              :                      IF (my_transpose) THEN
    2877              :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2878              :                            IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
    2879              :                               w_j = weights_1d(jw)*w_k
    2880              :                               IF (my_smooth_boundary) THEN
    2881              :                                  w_j = weights_1d(jw)/weights_1d(0)*w_k
    2882              :                               END IF
    2883              :                            ELSE IF (jw == 0) THEN
    2884              :                               w_j = w_k
    2885              :                            ELSE
    2886              :                               CYCLE
    2887              :                            END IF
    2888              :                         ELSE
    2889              :                            IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
    2890              :                            w_j = w_k*weights_1d(jw)
    2891              :                         END IF
    2892              :                      ELSE
    2893              :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2894              :                            IF (jw /= 0) CYCLE
    2895              :                            w_j = w_k
    2896              :                         ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
    2897              :                                                            (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
    2898              :                            w_j = w_k*weights_1d(jw)/weights_1d(0)
    2899              :                         ELSE
    2900              :                            w_j = w_k*weights_1d(jw)
    2901              :                         END IF
    2902              :                      END IF
    2903              :                   END IF
    2904              : 
    2905              :                   IF (has_l_boundary) THEN
    2906              :                      IF (my_transpose) THEN
    2907              :                         IF (s(1) == 1) THEN
    2908              :                            CPASSERT(.NOT. has_u_boundary)
    2909              :                            in_val_tmp = u_boundary(myj, myk)
    2910              :                         ELSE
    2911              :                            in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
    2912              :                         END IF
    2913              :                         IF (my_sharpen) THEN
    2914              :                            IF (kw == 0 .AND. jw == 0) THEN
    2915              :                               IF (my_normalize) THEN
    2916              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2917              :                                                            (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
    2918              :                                                            in_val_tmp*weights_1d(1)*w_j
    2919              :                               ELSE
    2920              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2921              :                                                            in_val(bo(1, 1), myj, myk)*w_j - &
    2922              :                                                            in_val_tmp*weights_1d(1)*w_j
    2923              :                               END IF
    2924              :                            ELSE
    2925              :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2926              :                                                         in_val(bo(1, 1), myj, myk)*w_j - &
    2927              :                                                         in_val_tmp*weights_1d(1)*w_j
    2928              :                            END IF
    2929              :                         ELSE IF (my_smooth_boundary) THEN
    2930              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2931              :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2932              :                                                           in_val_tmp*weights_1d(1)/weights_1d(0))
    2933              :                         ELSE
    2934              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2935              :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2936              :                                                           in_val_tmp*weights_1d(1))
    2937              :                         END IF
    2938              :                         in_val_f = 0.0_dp
    2939              :                      ELSE
    2940              :                         in_val_f = in_val(bo(1, 1), myj, myk)
    2941              :                         IF (my_sharpen) THEN
    2942              :                            IF (kw == 0 .AND. jw == 0) THEN
    2943              :                               IF (my_normalize) THEN
    2944              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2945              :                                                            (2.0_dp - w_j)*in_val_f
    2946              :                               ELSE
    2947              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2948              :                                                            in_val_f*w_j
    2949              :                               END IF
    2950              :                            ELSE
    2951              :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2952              :                                                         in_val_f*w_j
    2953              :                            END IF
    2954              :                         ELSE
    2955              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2956              :                                                      in_val_f*w_j
    2957              :                         END IF
    2958              :                      END IF
    2959              :                   ELSE
    2960              :                      in_val_f = l_boundary(myj, myk)
    2961              :                   END IF
    2962              :                   IF (has_u_boundary) THEN
    2963              :                      IF (my_transpose) THEN
    2964              :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2965              :                         IF (s(1) == 1) THEN
    2966              :                            CPASSERT(.NOT. has_l_boundary)
    2967              :                            in_val_tmp = l_boundary(myj, myk)
    2968              :                         ELSE
    2969              :                            in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
    2970              :                         END IF
    2971              :                         IF (my_sharpen) THEN
    2972              :                            IF (kw == 0 .AND. jw == 0) THEN
    2973              :                               IF (my_normalize) THEN
    2974              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2975              :                                                            in_val_l*(2._dp - w_j) - &
    2976              :                                                            in_val_tmp*weights_1d(1)*w_j
    2977              :                               ELSE
    2978              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2979              :                                                            in_val_l*w_j - &
    2980              :                                                            in_val_tmp*weights_1d(1)*w_j
    2981              :                               END IF
    2982              :                            ELSE
    2983              :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    2984              :                                                         w_j*in_val_l - &
    2985              :                                                         in_val_tmp*weights_1d(1)*w_j
    2986              :                            END IF
    2987              :                         ELSE IF (my_smooth_boundary) THEN
    2988              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2989              :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
    2990              :                         ELSE
    2991              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2992              :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1))
    2993              :                         END IF
    2994              :                         in_val_l = 0._dp
    2995              :                      ELSE
    2996              :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2997              :                         IF (my_sharpen) THEN
    2998              :                            IF (kw == 0 .AND. jw == 0) THEN
    2999              :                               IF (my_normalize) THEN
    3000              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3001              :                                                            in_val_l*(2._dp - w_j)
    3002              :                               ELSE
    3003              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3004              :                                                            in_val_l*w_j
    3005              :                               END IF
    3006              :                            ELSE
    3007              :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    3008              :                                                         w_j*in_val_l
    3009              :                            END IF
    3010              :                         ELSE
    3011              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3012              :                                                      w_j*in_val_l
    3013              :                         END IF
    3014              :                      END IF
    3015              :                   ELSE
    3016              :                      in_val_l = u_boundary(myj, myk)
    3017              :                   END IF
    3018              :                   IF (last_index >= first_index) THEN
    3019              :                      IF (my_transpose) THEN
    3020              :                         IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
    3021              :                            in_val_f = 0._dp
    3022              :                         ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
    3023              :                            in_val_l = 0._dp
    3024              :                         END IF
    3025              :                      END IF
    3026              :                      IF (my_sharpen) THEN
    3027              :                         w = -weights_1d*w_j
    3028              :                         IF (kw == 0 .AND. jw == 0) THEN
    3029              :                            IF (my_normalize) THEN
    3030              :                               w(0) = w(0) + 2._dp
    3031              :                            ELSE
    3032              :                               w(0) = -w(0)
    3033              :                            END IF
    3034              :                         END IF
    3035              :                      ELSE
    3036              :                         w = weights_1d*w_j
    3037              :                      END IF
    3038              :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3039              :                         IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
    3040              :                             gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3041              :                            IF (gbo(1, 1) >= bo(1, 1)) THEN
    3042              :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3043              :                                                              in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
    3044              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3045              :                            ELSE
    3046              :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3047              :                                                              l_boundary(myj, myk)*w_j*weights_1d(-1)* &
    3048              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3049              :                            END IF
    3050              :                         END IF
    3051              :                      END IF
    3052              :                      CALL pw_compose_stripe(weights=w, &
    3053              :                                             in_val=in_val(first_index:last_index, myj, myk), &
    3054              :                                             in_val_first=in_val_f, in_val_last=in_val_l, &
    3055              :                                             out_val=out_val(first_index:last_index, j, k), &
    3056              :                                             n_el=n_els)
    3057              : !FM                   call pw_compose_stripe2(weights=w,&
    3058              : !FM                        in_val=in_val,&
    3059              : !FM                        in_val_first=in_val_f,in_val_last=in_val_l,&
    3060              : !FM                        out_val=out_val,&
    3061              : !FM                        first_val=first_index,last_val=last_index,&
    3062              : !FM                        myj=myj,myk=myk,j=j,k=k)
    3063              :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3064              :                         IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
    3065              :                             gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3066              :                            IF (gbo(2, 1) <= bo(2, 1)) THEN
    3067              :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3068              :                                                              in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
    3069              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3070              :                            ELSE
    3071              :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3072              :                                                              u_boundary(myj, myk)*w_j*weights_1d(1)* &
    3073              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3074              :                            END IF
    3075              :                         END IF
    3076              :                      END IF
    3077              : 
    3078              :                   END IF
    3079              :                END DO
    3080              :             END DO
    3081              :          END DO
    3082              :       END DO
    3083              : 
    3084       138700 :       IF (is_split) THEN
    3085         2232 :          DEALLOCATE (l_boundary, u_boundary)
    3086              :       END IF
    3087       138700 :    END SUBROUTINE pw_nn_compose_r_no_pbc
    3088              : 
    3089              : ! **************************************************************************************************
    3090              : !> \brief ...
    3091              : !> \param pw_in ...
    3092              : !> \param pw_out ...
    3093              : ! **************************************************************************************************
    3094        41588 :    SUBROUTINE spl3_nopbc(pw_in, pw_out)
    3095              : 
    3096              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3097              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3098              : 
    3099        41588 :       CALL pw_zero(pw_out)
    3100              :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3101        41588 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
    3102              : 
    3103        41588 :    END SUBROUTINE spl3_nopbc
    3104              : 
    3105              : ! **************************************************************************************************
    3106              : !> \brief ...
    3107              : !> \param pw_in ...
    3108              : !> \param pw_out ...
    3109              : ! **************************************************************************************************
    3110        27759 :    SUBROUTINE spl3_nopbct(pw_in, pw_out)
    3111              : 
    3112              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3113              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3114              : 
    3115        27759 :       CALL pw_zero(pw_out)
    3116              :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3117        27759 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
    3118              : 
    3119        27759 :    END SUBROUTINE spl3_nopbct
    3120              : 
    3121              : ! **************************************************************************************************
    3122              : !> \brief ...
    3123              : !> \param pw_in ...
    3124              : !> \param pw_out ...
    3125              : ! **************************************************************************************************
    3126         6832 :    SUBROUTINE spl3_pbc(pw_in, pw_out)
    3127              : 
    3128              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3129              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3130              : 
    3131         6832 :       CALL pw_zero(pw_out)
    3132         6832 :       CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
    3133              : 
    3134         6832 :    END SUBROUTINE spl3_pbc
    3135              : 
    3136              : ! **************************************************************************************************
    3137              : !> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
    3138              : !>      input vector (vec)
    3139              : !> \param vec ...
    3140              : !> \param pw ...
    3141              : !> \return ...
    3142              : !> \par History
    3143              : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3144              : !> \author Teodoro Laino 12/2005 [tlaino]
    3145              : !> \note
    3146              : !>      Requires the Spline coefficients to be computed with PBC
    3147              : ! **************************************************************************************************
    3148      1821936 :    FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3149              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3150              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3151              :       REAL(KIND=dp)                                      :: val
    3152              : 
    3153              :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3154              :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3155              :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3156              :       LOGICAL                                            :: my_mpsum
    3157              :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
    3158              :          f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
    3159              :          t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
    3160              :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3161      1821936 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3162              : 
    3163      1821936 :       NULLIFY (grid)
    3164      1821936 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3165      7287744 :       npts = pw%pw_grid%npts
    3166      7287744 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3167      1821936 :       dr1 = pw%pw_grid%dr(1)
    3168      1821936 :       dr2 = pw%pw_grid%dr(2)
    3169      1821936 :       dr3 = pw%pw_grid%dr(3)
    3170              : 
    3171      1821936 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3172      1821936 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3173      1821936 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3174      1821936 :       grid => pw%array(:, :, :)
    3175     18219360 :       bo = pw%pw_grid%bounds
    3176     18219360 :       bo_l = pw%pw_grid%bounds_local
    3177              : 
    3178      1821936 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3179      1821936 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3180      1821936 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3181      1821936 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3182              : 
    3183      1821936 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3184      1821936 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3185      1821936 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3186      1821936 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3187              : 
    3188      1821936 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3189      1821936 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3190      1821936 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3191      1821936 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3192              : 
    3193      9109680 :       DO k = 1, 4
    3194     38260656 :          DO j = 1, 4
    3195    153042624 :             DO i = 1, 4
    3196              :                IF ( &
    3197              :                   ii(i) >= bo_l(1, 1) .AND. &
    3198              :                   ii(i) <= bo_l(2, 1) .AND. &
    3199              :                   ij(j) >= bo_l(1, 2) .AND. &
    3200              :                   ij(j) <= bo_l(2, 2) .AND. &
    3201    116603904 :                   ik(k) >= bo_l(1, 3) .AND. &
    3202              :                   ik(k) <= bo_l(2, 3) &
    3203     29150976 :                   ) THEN
    3204              :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3205              :                                       ij(j) + 1 - bo_l(1, 2), &
    3206     58529856 :                                       ik(k) + 1 - bo_l(1, 3))
    3207              :                ELSE
    3208     58074048 :                   box(i, j, k) = 0.0_dp
    3209              :                END IF
    3210              :             END DO
    3211              :          END DO
    3212              :       END DO
    3213              : 
    3214      1821936 :       a1 = 3.0_dp + xd1
    3215      1821936 :       a2 = a1*a1
    3216      1821936 :       a3 = a2*a1
    3217      1821936 :       b1 = 2.0_dp + xd1
    3218      1821936 :       b2 = b1*b1
    3219      1821936 :       b3 = b2*b1
    3220      1821936 :       c1 = 1.0_dp + xd1
    3221      1821936 :       c2 = c1*c1
    3222      1821936 :       c3 = c2*c1
    3223      1821936 :       d1 = xd1
    3224      1821936 :       d2 = d1*d1
    3225      1821936 :       d3 = d2*d1
    3226      1821936 :       e1 = 3.0_dp + xd2
    3227      1821936 :       e2 = e1*e1
    3228      1821936 :       e3 = e2*e1
    3229      1821936 :       f1 = 2.0_dp + xd2
    3230      1821936 :       f2 = f1*f1
    3231      1821936 :       f3 = f2*f1
    3232      1821936 :       g1 = 1.0_dp + xd2
    3233      1821936 :       g2 = g1*g1
    3234      1821936 :       g3 = g2*g1
    3235      1821936 :       h1 = xd2
    3236      1821936 :       h2 = h1*h1
    3237      1821936 :       h3 = h2*h1
    3238      1821936 :       p1 = 3.0_dp + xd3
    3239      1821936 :       p2 = p1*p1
    3240      1821936 :       p3 = p2*p1
    3241      1821936 :       q1 = 2.0_dp + xd3
    3242      1821936 :       q2 = q1*q1
    3243      1821936 :       q3 = q2*q1
    3244      1821936 :       r1 = 1.0_dp + xd3
    3245      1821936 :       r2 = r1*r1
    3246      1821936 :       r3 = r2*r1
    3247      1821936 :       u1 = xd3
    3248      1821936 :       u2 = u1*u1
    3249      1821936 :       u3 = u2*u1
    3250              : 
    3251      1821936 :       t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3252      1821936 :       t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3253      1821936 :       t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3254      1821936 :       t4 = 1.0_dp/6.0_dp*d3
    3255      1821936 :       s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3256      1821936 :       s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3257      1821936 :       s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3258      1821936 :       s4 = 1.0_dp/6.0_dp*h3
    3259      1821936 :       v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3260      1821936 :       v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3261      1821936 :       v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3262      1821936 :       v4 = 1.0_dp/6.0_dp*u3
    3263              : 
    3264              :       val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3265              :              (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3266              :              (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3267              :              (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3268              :             ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3269              :              (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3270              :              (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3271              :              (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3272              :             ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3273              :              (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3274              :              (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3275              :              (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3276              :             ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3277              :              (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3278              :              (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3279      1821936 :              (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3280              : 
    3281      1821936 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3282              : 
    3283      1821936 :    END FUNCTION Eval_Interp_Spl3_pbc
    3284              : 
    3285              : ! **************************************************************************************************
    3286              : !> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
    3287              : !>      function on the generic input vector (vec)
    3288              : !> \param vec ...
    3289              : !> \param pw ...
    3290              : !> \return ...
    3291              : !> \par History
    3292              : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3293              : !> \author Teodoro Laino 12/2005 [tlaino]
    3294              : !> \note
    3295              : !>      Requires the Spline coefficients to be computed with PBC
    3296              : ! **************************************************************************************************
    3297        10166 :    FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3298              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3299              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3300              :       REAL(KIND=dp)                                      :: val(3)
    3301              : 
    3302              :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3303              :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3304              :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3305              :       LOGICAL                                            :: my_mpsum
    3306              :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
    3307              :          dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
    3308              :          s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
    3309              :          t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
    3310              :          v4o, xd1, xd2, xd3
    3311              :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3312         5083 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3313              : 
    3314         5083 :       NULLIFY (grid)
    3315         5083 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3316        20332 :       npts = pw%pw_grid%npts
    3317        20332 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3318         5083 :       dr1 = pw%pw_grid%dr(1)
    3319         5083 :       dr2 = pw%pw_grid%dr(2)
    3320         5083 :       dr3 = pw%pw_grid%dr(3)
    3321         5083 :       dr1i = 1.0_dp/dr1
    3322         5083 :       dr2i = 1.0_dp/dr2
    3323         5083 :       dr3i = 1.0_dp/dr3
    3324         5083 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3325         5083 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3326         5083 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3327         5083 :       grid => pw%array(:, :, :)
    3328        50830 :       bo = pw%pw_grid%bounds
    3329        50830 :       bo_l = pw%pw_grid%bounds_local
    3330              : 
    3331         5083 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3332         5083 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3333         5083 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3334         5083 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3335              : 
    3336         5083 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3337         5083 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3338         5083 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3339         5083 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3340              : 
    3341         5083 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3342         5083 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3343         5083 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3344         5083 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3345              : 
    3346        25415 :       DO k = 1, 4
    3347       106743 :          DO j = 1, 4
    3348       426972 :             DO i = 1, 4
    3349              :                IF ( &
    3350              :                   ii(i) >= bo_l(1, 1) .AND. &
    3351              :                   ii(i) <= bo_l(2, 1) .AND. &
    3352              :                   ij(j) >= bo_l(1, 2) .AND. &
    3353              :                   ij(j) <= bo_l(2, 2) .AND. &
    3354       325312 :                   ik(k) >= bo_l(1, 3) .AND. &
    3355              :                   ik(k) <= bo_l(2, 3) &
    3356        81328 :                   ) THEN
    3357              :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3358              :                                       ij(j) + 1 - bo_l(1, 2), &
    3359       324160 :                                       ik(k) + 1 - bo_l(1, 3))
    3360              :                ELSE
    3361         1152 :                   box(i, j, k) = 0.0_dp
    3362              :                END IF
    3363              :             END DO
    3364              :          END DO
    3365              :       END DO
    3366              : 
    3367         5083 :       a1 = 3.0_dp + xd1
    3368         5083 :       a2 = a1*a1
    3369         5083 :       a3 = a2*a1
    3370         5083 :       b1 = 2.0_dp + xd1
    3371         5083 :       b2 = b1*b1
    3372         5083 :       b3 = b2*b1
    3373         5083 :       c1 = 1.0_dp + xd1
    3374         5083 :       c2 = c1*c1
    3375         5083 :       c3 = c2*c1
    3376         5083 :       d1 = xd1
    3377         5083 :       d2 = d1*d1
    3378         5083 :       d3 = d2*d1
    3379         5083 :       e1 = 3.0_dp + xd2
    3380         5083 :       e2 = e1*e1
    3381         5083 :       e3 = e2*e1
    3382         5083 :       f1 = 2.0_dp + xd2
    3383         5083 :       f2 = f1*f1
    3384         5083 :       f3 = f2*f1
    3385         5083 :       g1 = 1.0_dp + xd2
    3386         5083 :       g2 = g1*g1
    3387         5083 :       g3 = g2*g1
    3388         5083 :       h1 = xd2
    3389         5083 :       h2 = h1*h1
    3390         5083 :       h3 = h2*h1
    3391         5083 :       p1 = 3.0_dp + xd3
    3392         5083 :       p2 = p1*p1
    3393         5083 :       p3 = p2*p1
    3394         5083 :       q1 = 2.0_dp + xd3
    3395         5083 :       q2 = q1*q1
    3396         5083 :       q3 = q2*q1
    3397         5083 :       r1 = 1.0_dp + xd3
    3398         5083 :       r2 = r1*r1
    3399         5083 :       r3 = r2*r1
    3400         5083 :       u1 = xd3
    3401         5083 :       u2 = u1*u1
    3402         5083 :       u3 = u2*u1
    3403              : 
    3404         5083 :       t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3405         5083 :       t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3406         5083 :       t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3407         5083 :       t4o = 1.0_dp/6.0_dp*d3
    3408         5083 :       s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3409         5083 :       s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3410         5083 :       s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3411         5083 :       s4o = 1.0_dp/6.0_dp*h3
    3412         5083 :       v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3413         5083 :       v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3414         5083 :       v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3415         5083 :       v4o = 1.0_dp/6.0_dp*u3
    3416              : 
    3417         5083 :       t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
    3418         5083 :       t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
    3419         5083 :       t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
    3420         5083 :       t4d = 0.5_dp*d2
    3421         5083 :       s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
    3422         5083 :       s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
    3423         5083 :       s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
    3424         5083 :       s4d = 0.5_dp*h2
    3425         5083 :       v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
    3426         5083 :       v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
    3427         5083 :       v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
    3428         5083 :       v4d = 0.5_dp*u2
    3429              : 
    3430         5083 :       t1 = t1d*dr1i
    3431         5083 :       t2 = t2d*dr1i
    3432         5083 :       t3 = t3d*dr1i
    3433         5083 :       t4 = t4d*dr1i
    3434         5083 :       s1 = s1o
    3435         5083 :       s2 = s2o
    3436         5083 :       s3 = s3o
    3437         5083 :       s4 = s4o
    3438         5083 :       v1 = v1o
    3439         5083 :       v2 = v2o
    3440         5083 :       v3 = v3o
    3441         5083 :       v4 = v4o
    3442              :       val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3443              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3444              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3445              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3446              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3447              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3448              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3449              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3450              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3451              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3452              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3453              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3454              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3455              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3456              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3457         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3458              : 
    3459         5083 :       t1 = t1o
    3460         5083 :       t2 = t2o
    3461         5083 :       t3 = t3o
    3462         5083 :       t4 = t4o
    3463         5083 :       s1 = s1d*dr2i
    3464         5083 :       s2 = s2d*dr2i
    3465         5083 :       s3 = s3d*dr2i
    3466         5083 :       s4 = s4d*dr2i
    3467         5083 :       v1 = v1o
    3468         5083 :       v2 = v2o
    3469         5083 :       v3 = v3o
    3470         5083 :       v4 = v4o
    3471              :       val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3472              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3473              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3474              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3475              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3476              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3477              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3478              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3479              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3480              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3481              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3482              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3483              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3484              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3485              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3486         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3487              : 
    3488         5083 :       t1 = t1o
    3489         5083 :       t2 = t2o
    3490         5083 :       t3 = t3o
    3491         5083 :       t4 = t4o
    3492         5083 :       s1 = s1o
    3493         5083 :       s2 = s2o
    3494         5083 :       s3 = s3o
    3495         5083 :       s4 = s4o
    3496         5083 :       v1 = v1d*dr3i
    3497         5083 :       v2 = v2d*dr3i
    3498         5083 :       v3 = v3d*dr3i
    3499         5083 :       v4 = v4d*dr3i
    3500              :       val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3501              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3502              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3503              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3504              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3505              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3506              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3507              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3508              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3509              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3510              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3511              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3512              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3513              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3514              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3515         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3516              : 
    3517         5083 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3518              : 
    3519         5083 :    END FUNCTION Eval_d_Interp_Spl3_pbc
    3520              : 
    3521            0 : END MODULE pw_spline_utils
        

Generated by: LCOV version 2.0-1