LCOV - code coverage report
Current view: top level - src/pw - pw_spline_utils.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 81.8 % 1453 1189
Test Date: 2025-12-04 06:27:48 Functions: 95.8 % 24 23

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief different utils that are useful to manipulate splines on the regular
      10              : !>      grid of a pw
      11              : !> \par History
      12              : !>      05.2003 created [fawzi]
      13              : !>      08.2004 removed spline evaluation method using more than 2 read streams
      14              : !>              (pw_compose_stripe_rs3), added linear solver based spline
      15              : !>              inversion [fawzi]
      16              : !> \author Fawzi Mohamed
      17              : ! **************************************************************************************************
      18              : MODULE pw_spline_utils
      19              : 
      20              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      21              :                                               cp_logger_type,&
      22              :                                               cp_to_string
      23              :    USE kinds,                           ONLY: dp
      24              :    USE mathconstants,                   ONLY: twopi
      25              :    USE message_passing,                 ONLY: mp_comm_congruent
      26              :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      27              :                                               PW_MODE_LOCAL
      28              :    USE pw_methods,                      ONLY: pw_axpy,&
      29              :                                               pw_copy,&
      30              :                                               pw_integral_ab,&
      31              :                                               pw_zero
      32              :    USE pw_pool_types,                   ONLY: pw_pool_release,&
      33              :                                               pw_pool_type
      34              :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      35              :                                               pw_r3d_rs_type
      36              : #include "../base/base_uses.f90"
      37              : 
      38              :    IMPLICIT NONE
      39              :    PRIVATE
      40              : 
      41              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      42              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
      43              : 
      44              :    INTEGER, PARAMETER, PUBLIC               :: no_precond = 0, &
      45              :                                                precond_spl3_aint = 1, &
      46              :                                                precond_spl3_1 = 2, &
      47              :                                                precond_spl3_aint2 = 3, &
      48              :                                                precond_spl3_2 = 4, &
      49              :                                                precond_spl3_3 = 5
      50              : 
      51              :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
      52              :                                                      [125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp], &
      53              :                                                      spline3_coeffs = &
      54              :                                                      [8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
      55              :                                                       1._dp/(27._dp*8._dp)], &
      56              :                                                      spline2_coeffs = &
      57              :                                                      [27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
      58              :                                                       1._dp/(64._dp*8._dp)], &
      59              :                                                      nn50_coeffs = &
      60              :                                                      [15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
      61              :                                                       1._dp/140608._dp], &
      62              :                                                      spl3_aint_coeff = &
      63              :                                                      [46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
      64              :                                                       -1._dp/(27._dp*8._dp)], &
      65              :                                                      spl3_precond1_coeff = &
      66              :                                                      [64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp], &
      67              :                                                      spl3_1d_transf_coeffs = &
      68              :                                                      [2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp]
      69              : 
      70              :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
      71              :                                                      [2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp], &
      72              :                                                      spline2_deriv_coeffs = &
      73              :                                                      [9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp], &
      74              :                                                      nn10_deriv_coeffs = &
      75              :                                                      [25._dp/72._dp, 5._dp/144, 1._dp/288._dp], &
      76              :                                                      nn50_deriv_coeffs = &
      77              :                                                      [625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp], &
      78              :                                                      spl3_1d_coeffs0 = &
      79              :                                                      [1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp], &
      80              :                                                      spl3_1d_transf_border1 = &
      81              :                                                      [0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp]
      82              : 
      83              :    PUBLIC :: pw_spline3_interpolate_values_g, &
      84              :              pw_spline3_deriv_g
      85              :    PUBLIC :: pw_spline_scale_deriv
      86              :    PUBLIC :: pw_spline2_interpolate_values_g, &
      87              :              pw_spline2_deriv_g
      88              :    PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
      89              :              spl3_nopbc, spl3_pbc, spl3_nopbct
      90              :    PUBLIC :: add_fine2coarse, add_coarse2fine
      91              :    PUBLIC :: pw_spline_precond_create, &
      92              :              pw_spline_do_precond, &
      93              :              pw_spline_precond_set_kind, &
      94              :              find_coeffs, &
      95              :              pw_spline_precond_release, &
      96              :              pw_spline_precond_type, &
      97              :              Eval_Interp_Spl3_pbc, &
      98              :              Eval_d_Interp_Spl3_pbc
      99              : 
     100              : !***
     101              : 
     102              : ! **************************************************************************************************
     103              : !> \brief stores information for the preconditioner used to calculate the
     104              : !>      coeffs of splines
     105              : !> \author fawzi
     106              : ! **************************************************************************************************
     107              :    TYPE pw_spline_precond_type
     108              :       INTEGER :: kind = no_precond
     109              :       REAL(kind=dp), DIMENSION(4) :: coeffs = 0.0_dp
     110              :       REAL(kind=dp), DIMENSION(3) :: coeffs_1d = 0.0_dp
     111              :       LOGICAL :: sharpen = .FALSE., normalize = .FALSE., pbc = .FALSE., transpose = .FALSE.
     112              :       TYPE(pw_pool_type), POINTER :: pool => NULL()
     113              :    END TYPE pw_spline_precond_type
     114              : 
     115              : CONTAINS
     116              : 
     117              : ! **************************************************************************************************
     118              : !> \brief calculates the FFT of the coefficients of the quadratic spline that
     119              : !>      interpolates the given values
     120              : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     121              : !>        will contain the FFT of the coefficients of the spline
     122              : !> \par History
     123              : !>      06.2003 created [fawzi]
     124              : !> \author Fawzi Mohamed
     125              : !> \note
     126              : !>      does not work with spherical cutoff
     127              : ! **************************************************************************************************
     128        38550 :    SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
     129              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     130              : 
     131              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g'
     132              : 
     133              :       INTEGER                                            :: handle, i, ii, j, k
     134              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     135              :       INTEGER, DIMENSION(3)                              :: n_tot
     136              :       REAL(KIND=dp)                                      :: c23, coeff
     137        38550 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     138              : 
     139        38550 :       CALL timeset(routineN, handle)
     140              : 
     141       154200 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     142       385500 :       gbo = spline_g%pw_grid%bounds
     143              : 
     144        38550 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     145        38550 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     146              : 
     147            0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
     148       269850 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     149              : 
     150        38550 :       coeff = twopi/n_tot(1)
     151        38550 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     152              :       DO i = gbo(1, 1), gbo(2, 1)
     153              :          cosIVals(i) = COS(coeff*REAL(i, dp))
     154              :       END DO
     155        38550 :       coeff = twopi/n_tot(2)
     156        38550 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     157              :       DO j = gbo(1, 2), gbo(2, 2)
     158              :          cosJVals(j) = COS(coeff*REAL(j, dp))
     159              :       END DO
     160        38550 :       coeff = twopi/n_tot(3)
     161        38550 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     162              :       DO k = gbo(1, 3), gbo(2, 3)
     163              :          cosKVals(k) = COS(coeff*REAL(k, dp))
     164              :       END DO
     165              : 
     166        38550 : !$OMP     PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     167              :       DO ii = 1, SIZE(spline_g%array)
     168              :          i = spline_g%pw_grid%g_hat(1, ii)
     169              :          j = spline_g%pw_grid%g_hat(2, ii)
     170              :          k = spline_g%pw_grid%g_hat(3, ii)
     171              : 
     172              :          c23 = cosJVals(j)*cosKVals(k)
     173              :          coeff = 64.0_dp/(cosIVals(i)*c23 + &
     174              :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
     175              :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
     176              :                           27.0_dp)
     177              : 
     178              :          spline_g%array(ii) = spline_g%array(ii)*coeff
     179              : 
     180              :       END DO
     181        38550 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     182              : 
     183        38550 :       CALL timestop(handle)
     184        77100 :    END SUBROUTINE pw_spline2_interpolate_values_g
     185              : 
     186              : ! **************************************************************************************************
     187              : !> \brief calculates the FFT of the coefficients of the2 cubic spline that
     188              : !>      interpolates the given values
     189              : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     190              : !>        will contain the FFT of the coefficients of the spline
     191              : !> \par History
     192              : !>      06.2003 created [fawzi]
     193              : !> \author Fawzi Mohamed
     194              : !> \note
     195              : !>      does not work with spherical cutoff
     196              : !>      stupid distribution for cos calculation, it should calculate only the
     197              : !>      needed cos, and avoid the mpi_allreduce
     198              : ! **************************************************************************************************
     199          342 :    SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
     200              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     201              : 
     202              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g'
     203              : 
     204              :       INTEGER                                            :: handle, i, ii, j, k
     205              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     206              :       INTEGER, DIMENSION(3)                              :: n_tot
     207              :       REAL(KIND=dp)                                      :: c23, coeff
     208          342 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     209              : 
     210          342 :       CALL timeset(routineN, handle)
     211              : 
     212         1368 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     213         3420 :       gbo = spline_g%pw_grid%bounds
     214              : 
     215          342 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     216          342 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     217              : 
     218            0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
     219            0 :                 cosJVals(gbo(1, 2):gbo(2, 2)), &
     220         2394 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     221              : 
     222          342 :       coeff = twopi/n_tot(1)
     223          342 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     224              :       DO i = gbo(1, 1), gbo(2, 1)
     225              :          cosIVals(i) = COS(coeff*REAL(i, dp))
     226              :       END DO
     227          342 :       coeff = twopi/n_tot(2)
     228          342 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     229              :       DO j = gbo(1, 2), gbo(2, 2)
     230              :          cosJVals(j) = COS(coeff*REAL(j, dp))
     231              :       END DO
     232          342 :       coeff = twopi/n_tot(3)
     233          342 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     234              :       DO k = gbo(1, 3), gbo(2, 3)
     235              :          cosKVals(k) = COS(coeff*REAL(k, dp))
     236              :       END DO
     237              : 
     238          342 : !$OMP     PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     239              :       DO ii = 1, SIZE(spline_g%array)
     240              :          i = spline_g%pw_grid%g_hat(1, ii)
     241              :          j = spline_g%pw_grid%g_hat(2, ii)
     242              :          k = spline_g%pw_grid%g_hat(3, ii)
     243              :          ! no opt
     244              : !FM                coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
     245              : !FM                     (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
     246              : !FM                     cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
     247              : !FM                     (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
     248              : !FM                     8.0_dp/27.0_dp)
     249              :          ! opt
     250              :          c23 = cosJVals(j)*cosKVals(k)
     251              :          coeff = 27.0_dp/(cosIVals(i)*c23 + &
     252              :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
     253              :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
     254              :                           8.0_dp)
     255              : 
     256              :          spline_g%array(ii) = spline_g%array(ii)*coeff
     257              : 
     258              :       END DO
     259          342 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     260              : 
     261          342 :       CALL timestop(handle)
     262          684 :    END SUBROUTINE pw_spline3_interpolate_values_g
     263              : 
     264              : ! **************************************************************************************************
     265              : !> \brief rescales the derivatives from gridspacing=1 to the real derivatives
     266              : !> \param deriv_vals_r an array of x,y,z derivatives
     267              : !> \param transpose if true applies the transpose of the map (defaults to
     268              : !>        false)
     269              : !> \param scale a scaling factor (defaults to 1.0)
     270              : !> \par History
     271              : !>      06.2003 created [fawzi]
     272              : !> \author Fawzi Mohamed
     273              : ! **************************************************************************************************
     274        16846 :    SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
     275              :       TYPE(pw_r3d_rs_type), DIMENSION(3), INTENT(IN)     :: deriv_vals_r
     276              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     277              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: scale
     278              : 
     279              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv'
     280              : 
     281              :       INTEGER                                            :: handle, i, idir, j, k
     282              :       INTEGER, DIMENSION(2, 3)                           :: bo
     283              :       INTEGER, DIMENSION(3)                              :: n_tot
     284              :       LOGICAL                                            :: diag, my_transpose
     285              :       REAL(KIND=dp)                                      :: dVal1, dVal2, dVal3, my_scale, scalef
     286              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: dh_inv, h_grid
     287        16846 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: ddata, ddata2, ddata3
     288              : 
     289        16846 :       CALL timeset(routineN, handle)
     290              : 
     291        16846 :       my_transpose = .FALSE.
     292        16846 :       IF (PRESENT(transpose)) my_transpose = transpose
     293        16846 :       my_scale = 1.0_dp
     294        16846 :       IF (PRESENT(scale)) my_scale = scale
     295        67384 :       n_tot(1:3) = deriv_vals_r(1)%pw_grid%npts(1:3)
     296       168460 :       bo = deriv_vals_r(1)%pw_grid%bounds_local
     297       218998 :       dh_inv = deriv_vals_r(1)%pw_grid%dh_inv
     298              : 
     299              :       ! map grid to real derivative
     300        16846 :       diag = .TRUE.
     301        16846 :       IF (my_transpose) THEN
     302        30224 :          DO j = 1, 3
     303        98228 :             DO i = 1, 3
     304        68004 :                h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     305        90672 :                IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
     306              :             END DO
     307              :          END DO
     308              :       ELSE
     309        37160 :          DO j = 1, 3
     310       120770 :             DO i = 1, 3
     311        83610 :                h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     312       111480 :                IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
     313              :             END DO
     314              :          END DO
     315              :       END IF
     316              : 
     317        16846 :       IF (diag) THEN
     318        66664 :          DO idir = 1, 3
     319        49998 :             ddata => deriv_vals_r(idir)%array
     320        49998 :             scalef = h_grid(idir, idir)
     321              :             CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
     322        66664 :                        scalef, ddata, 1)
     323              :          END DO
     324              :       ELSE
     325          180 :          ddata => deriv_vals_r(1)%array
     326          180 :          ddata2 => deriv_vals_r(2)%array
     327          180 :          ddata3 => deriv_vals_r(3)%array
     328              : !$OMP        PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
     329          180 : !$OMP                 SHARED(ddata,ddata2,ddata3,h_grid,bo)
     330              :          DO k = bo(1, 3), bo(2, 3)
     331              :             DO j = bo(1, 2), bo(2, 2)
     332              :                DO i = bo(1, 1), bo(2, 1)
     333              : 
     334              :                   dVal1 = ddata(i, j, k)
     335              :                   dVal2 = ddata2(i, j, k)
     336              :                   dVal3 = ddata3(i, j, k)
     337              : 
     338              :                   ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
     339              :                                    h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
     340              :                   ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
     341              :                                     h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
     342              :                   ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
     343              :                                     h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
     344              : 
     345              :                END DO
     346              :             END DO
     347              :          END DO
     348              :       END IF
     349              : 
     350        16846 :       CALL timestop(handle)
     351        16846 :    END SUBROUTINE pw_spline_scale_deriv
     352              : 
     353              : ! **************************************************************************************************
     354              : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     355              : !>      derivative of the cubic spline
     356              : !> \param spline_g on entry the FFT of the coefficients of the spline
     357              : !>        will contain the FFT of the derivative
     358              : !> \param idir direction of the derivative
     359              : !> \par History
     360              : !>      06.2003 created [fawzi]
     361              : !> \author Fawzi Mohamed
     362              : !> \note
     363              : !>      the distance between gridpoints is assumed to be 1
     364              : ! **************************************************************************************************
     365          342 :    SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
     366              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     367              :       INTEGER, INTENT(in)                                :: idir
     368              : 
     369              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g'
     370              :       REAL(KIND=dp), PARAMETER                           :: inv9 = 1.0_dp/9.0_dp
     371              : 
     372              :       INTEGER                                            :: handle, i, ii, j, k
     373              :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
     374              :       INTEGER, DIMENSION(3)                              :: n, n_tot
     375              :       REAL(KIND=dp)                                      :: coeff, tmp
     376          342 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     377              : 
     378          342 :       CALL timeset(routineN, handle)
     379              : 
     380         1368 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     381         1368 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     382         3420 :       bo = spline_g%pw_grid%bounds_local
     383         3420 :       gbo = spline_g%pw_grid%bounds
     384              : 
     385          342 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     386          342 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     387              : 
     388            0 :       ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
     389            0 :                 csJVals(gbo(1, 2):gbo(2, 2)), &
     390         2394 :                 csKVals(gbo(1, 3):gbo(2, 3)))
     391              : 
     392          342 :       coeff = twopi/n_tot(1)
     393          342 :       IF (idir == 1) THEN
     394          114 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     395              :          DO i = gbo(1, 1), gbo(2, 1)
     396              :             csIVals(i) = SIN(coeff*REAL(i, dp))
     397              :          END DO
     398              :       ELSE
     399          228 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     400              :          DO i = gbo(1, 1), gbo(2, 1)
     401              :             csIVals(i) = COS(coeff*REAL(i, dp))
     402              :          END DO
     403              :       END IF
     404          342 :       coeff = twopi/n_tot(2)
     405          342 :       IF (idir == 2) THEN
     406          114 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     407              :          DO j = gbo(1, 2), gbo(2, 2)
     408              :             csJVals(j) = SIN(coeff*REAL(j, dp))
     409              :          END DO
     410              :       ELSE
     411          228 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     412              :          DO j = gbo(1, 2), gbo(2, 2)
     413              :             csJVals(j) = COS(coeff*REAL(j, dp))
     414              :          END DO
     415              :       END IF
     416          342 :       coeff = twopi/n_tot(3)
     417          342 :       IF (idir == 3) THEN
     418          114 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     419              :          DO k = gbo(1, 3), gbo(2, 3)
     420              :             csKVals(k) = SIN(coeff*REAL(k, dp))
     421              :          END DO
     422              :       ELSE
     423          228 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     424              :          DO k = gbo(1, 3), gbo(2, 3)
     425              :             csKVals(k) = COS(coeff*REAL(k, dp))
     426              :          END DO
     427              :       END IF
     428              : 
     429          114 :       SELECT CASE (idir)
     430              :       CASE (1)
     431              :          ! x deriv
     432          114 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     433              :          DO ii = 1, SIZE(spline_g%array)
     434              :             i = spline_g%pw_grid%g_hat(1, ii)
     435              :             j = spline_g%pw_grid%g_hat(2, ii)
     436              :             k = spline_g%pw_grid%g_hat(3, ii)
     437              : !FM                ! formula
     438              : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
     439              : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
     440              : !FM                     sinVal(1)*4.0_dp/9.0_dp
     441              :             tmp = csIVals(i)*csJVals(j)
     442              :             coeff = (tmp*csKVals(k) + &
     443              :                      (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
     444              :                      csIVals(i)*4.0_dp)*inv9
     445              : 
     446              :             spline_g%array(ii) = spline_g%array(ii)* &
     447              :                                  CMPLX(0.0_dp, coeff, dp)
     448              :          END DO
     449              :       CASE (2)
     450              :          ! y deriv
     451          114 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     452              :          DO ii = 1, SIZE(spline_g%array)
     453              :             i = spline_g%pw_grid%g_hat(1, ii)
     454              :             j = spline_g%pw_grid%g_hat(2, ii)
     455              :             k = spline_g%pw_grid%g_hat(3, ii)
     456              : 
     457              :             tmp = csIVals(i)*csJVals(j)
     458              :             coeff = (tmp*csKVals(k) + &
     459              :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     460              :                      csJVals(j)*4.0_dp)*inv9
     461              : 
     462              :             spline_g%array(ii) = spline_g%array(ii)* &
     463              :                                  CMPLX(0.0_dp, coeff, dp)
     464              :          END DO
     465              :       CASE (3)
     466              :          ! z deriv
     467          342 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     468              :          DO ii = 1, SIZE(spline_g%array)
     469              :             i = spline_g%pw_grid%g_hat(1, ii)
     470              :             j = spline_g%pw_grid%g_hat(2, ii)
     471              :             k = spline_g%pw_grid%g_hat(3, ii)
     472              : 
     473              :             tmp = csIVals(i)*csKVals(k)
     474              :             coeff = (tmp*csJVals(j) + &
     475              :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     476              :                      csKVals(k)*4.0_dp)*inv9
     477              : 
     478              :             spline_g%array(ii) = spline_g%array(ii)* &
     479              :                                  CMPLX(0.0_dp, coeff, dp)
     480              :          END DO
     481              :       END SELECT
     482              : 
     483          342 :       DEALLOCATE (csIVals, csJVals, csKVals)
     484              : 
     485          342 :       CALL timestop(handle)
     486          684 :    END SUBROUTINE pw_spline3_deriv_g
     487              : 
     488              : ! **************************************************************************************************
     489              : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     490              : !>      derivative of the quadratic spline
     491              : !> \param spline_g on entry the FFT of the coefficients of the spline
     492              : !>        will contain the FFT of the derivative
     493              : !> \param idir direction of the derivative
     494              : !> \par History
     495              : !>      06.2003 created [fawzi]
     496              : !> \author Fawzi Mohamed
     497              : !> \note
     498              : !>      the distance between gridpoints is assumed to be 1
     499              : ! **************************************************************************************************
     500        38550 :    SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
     501              :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     502              :       INTEGER, INTENT(in)                                :: idir
     503              : 
     504              :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g'
     505              :       REAL(KIND=dp), PARAMETER                           :: inv16 = 1.0_dp/16.0_dp
     506              : 
     507              :       INTEGER                                            :: handle, i, ii, j, k
     508              :       INTEGER, DIMENSION(2, 3)                           :: bo
     509              :       INTEGER, DIMENSION(3)                              :: n, n_tot
     510              :       REAL(KIND=dp)                                      :: coeff, tmp
     511        38550 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     512              : 
     513        38550 :       CALL timeset(routineN, handle)
     514              : 
     515       154200 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     516       154200 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     517       385500 :       bo = spline_g%pw_grid%bounds
     518              : 
     519        38550 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     520        38550 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     521              : 
     522            0 :       ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
     523       269850 :                 csKVals(bo(1, 3):bo(2, 3)))
     524              : 
     525        38550 :       coeff = twopi/n_tot(1)
     526        38550 :       IF (idir == 1) THEN
     527        12850 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     528              :          DO i = bo(1, 1), bo(2, 1)
     529              :             csIVals(i) = SIN(coeff*REAL(i, dp))
     530              :          END DO
     531              :       ELSE
     532        25700 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     533              :          DO i = bo(1, 1), bo(2, 1)
     534              :             csIVals(i) = COS(coeff*REAL(i, dp))
     535              :          END DO
     536              :       END IF
     537        38550 :       coeff = twopi/n_tot(2)
     538        38550 :       IF (idir == 2) THEN
     539        12850 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     540              :          DO j = bo(1, 2), bo(2, 2)
     541              :             csJVals(j) = SIN(coeff*REAL(j, dp))
     542              :          END DO
     543              :       ELSE
     544        25700 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     545              :          DO j = bo(1, 2), bo(2, 2)
     546              :             csJVals(j) = COS(coeff*REAL(j, dp))
     547              :          END DO
     548              :       END IF
     549        38550 :       coeff = twopi/n_tot(3)
     550        38550 :       IF (idir == 3) THEN
     551        12850 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     552              :          DO k = bo(1, 3), bo(2, 3)
     553              :             csKVals(k) = SIN(coeff*REAL(k, dp))
     554              :          END DO
     555              :       ELSE
     556        25700 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     557              :          DO k = bo(1, 3), bo(2, 3)
     558              :             csKVals(k) = COS(coeff*REAL(k, dp))
     559              :          END DO
     560              :       END IF
     561              : 
     562        12850 :       SELECT CASE (idir)
     563              :       CASE (1)
     564              :          ! x deriv
     565        12850 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
     566              :          DO ii = 1, SIZE(spline_g%array)
     567              :             i = spline_g%pw_grid%g_hat(1, ii)
     568              :             j = spline_g%pw_grid%g_hat(2, ii)
     569              :             k = spline_g%pw_grid%g_hat(3, ii)
     570              : !FM                ! formula
     571              : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
     572              : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
     573              : !FM                     sinVal(1)*9.0_dp/16.0_dp
     574              :             tmp = csIVals(i)*csJVals(j)
     575              :             coeff = (tmp*csKVals(k) + &
     576              :                      (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
     577              :                      csIVals(i)*9.0_dp)*inv16
     578              : 
     579              :             spline_g%array(ii) = spline_g%array(ii)* &
     580              :                                  CMPLX(0.0_dp, coeff, dp)
     581              :          END DO
     582              :       CASE (2)
     583              :          ! y deriv
     584        12850 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     585              :          DO ii = 1, SIZE(spline_g%array)
     586              :             i = spline_g%pw_grid%g_hat(1, ii)
     587              :             j = spline_g%pw_grid%g_hat(2, ii)
     588              :             k = spline_g%pw_grid%g_hat(3, ii)
     589              : 
     590              :             tmp = csIVals(i)*csJVals(j)
     591              :             coeff = (tmp*csKVals(k) + &
     592              :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     593              :                      csJVals(j)*9.0_dp)*inv16
     594              : 
     595              :             spline_g%array(ii) = spline_g%array(ii)* &
     596              :                                  CMPLX(0.0_dp, coeff, dp)
     597              :          END DO
     598              :       CASE (3)
     599              :          ! z deriv
     600        38550 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     601              :          DO ii = 1, SIZE(spline_g%array)
     602              :             i = spline_g%pw_grid%g_hat(1, ii)
     603              :             j = spline_g%pw_grid%g_hat(2, ii)
     604              :             k = spline_g%pw_grid%g_hat(3, ii)
     605              : 
     606              :             tmp = csIVals(i)*csKVals(k)
     607              :             coeff = (tmp*csJVals(j) + &
     608              :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     609              :                      csKVals(k)*9.0_dp)*inv16
     610              : 
     611              :             spline_g%array(ii) = spline_g%array(ii)* &
     612              :                                  CMPLX(0.0_dp, coeff, dp)
     613              :          END DO
     614              :       END SELECT
     615              : 
     616        38550 :       DEALLOCATE (csIVals, csJVals, csKVals)
     617              : 
     618        38550 :       CALL timestop(handle)
     619        77100 :    END SUBROUTINE pw_spline2_deriv_g
     620              : 
     621              : ! **************************************************************************************************
     622              : !> \brief applies a nearest neighbor linear operator to a stripe in x direction:
     623              : !>      out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
     624              : !> \param weights the weights of the linear operator
     625              : !> \param in_val the argument to the operator
     626              : !> \param in_val_first the first argument (needed to calculate out_val(1))
     627              : !> \param in_val_last the last argument (needed to calculate out_val(n_el))
     628              : !> \param out_val the place where the result is accumulated
     629              : !> \param n_el the number of elements in in_v and out_v
     630              : !> \par History
     631              : !>      04.2004 created [fawzi]
     632              : !> \author fawzi
     633              : !> \note
     634              : !>      uses 2 read streams and 1 write stream
     635              : ! **************************************************************************************************
     636    616922146 :    SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
     637              :                                 out_val, n_el)
     638              :       REAL(kind=dp), DIMENSION(0:2), INTENT(in)          :: weights
     639              :       REAL(kind=dp), DIMENSION(*), INTENT(in)            :: in_val
     640              :       REAL(kind=dp), INTENT(in)                          :: in_val_first, in_val_last
     641              :       REAL(kind=dp), DIMENSION(*), INTENT(inout)         :: out_val
     642              :       INTEGER                                            :: n_el
     643              : 
     644              :       INTEGER                                            :: i
     645              :       REAL(kind=dp)                                      :: v0, v1, v2
     646              : 
     647              : !1:n_el), &
     648              : !1:n_el), &
     649              : 
     650    616922146 :       IF (n_el < 1) RETURN
     651    616922146 :       v0 = in_val_first
     652    616922146 :       v1 = in_val(1)
     653    616922146 :       IF (weights(1) == 0.0_dp) THEN
     654              :          ! optimized version for x deriv
     655     90263970 :          DO i = 1, n_el - 3, 3
     656    872499429 :             v2 = in_val(i + 1)
     657              :             out_val(i) = out_val(i) + &
     658              :                          weights(0)*v0 + &
     659    872499429 :                          weights(2)*v2
     660    872499429 :             v0 = in_val(i + 2)
     661              :             out_val(i + 1) = out_val(i + 1) + &
     662              :                              weights(0)*v1 + &
     663    872499429 :                              weights(2)*v0
     664    872499429 :             v1 = in_val(i + 3)
     665              :             out_val(i + 2) = out_val(i + 2) + &
     666              :                              weights(0)*v2 + &
     667    872499429 :                              weights(2)*v1
     668              :          END DO
     669              :       ELSE
     670              :          ! generic version
     671    526658176 :          DO i = 1, n_el - 3, 3
     672   3977756690 :             v2 = in_val(i + 1)
     673              :             out_val(i) = out_val(i) + &
     674              :                          weights(0)*v0 + &
     675              :                          weights(1)*v1 + &
     676   3977756690 :                          weights(2)*v2
     677   3977756690 :             v0 = in_val(i + 2)
     678              :             out_val(i + 1) = out_val(i + 1) + &
     679              :                              weights(0)*v1 + &
     680              :                              weights(1)*v2 + &
     681   3977756690 :                              weights(2)*v0
     682   3977756690 :             v1 = in_val(i + 3)
     683              :             out_val(i + 2) = out_val(i + 2) + &
     684              :                              weights(0)*v2 + &
     685              :                              weights(1)*v0 + &
     686   3982008740 :                              weights(2)*v1
     687              :          END DO
     688              :       END IF
     689    801151017 :       SELECT CASE (MODULO(n_el - 1, 3))
     690              :       CASE (0)
     691    184228871 :          v2 = in_val_last
     692              :          out_val(n_el) = out_val(n_el) + &
     693              :                          weights(0)*v0 + &
     694              :                          weights(1)*v1 + &
     695    184228871 :                          weights(2)*v2
     696              :       CASE (1)
     697    193784038 :          v2 = in_val(n_el)
     698              :          out_val(n_el - 1) = out_val(n_el - 1) + &
     699              :                              weights(0)*v0 + &
     700              :                              weights(1)*v1 + &
     701    193784038 :                              weights(2)*v2
     702    193784038 :          v0 = in_val_last
     703              :          out_val(n_el) = out_val(n_el) + &
     704              :                          weights(0)*v1 + &
     705              :                          weights(1)*v2 + &
     706    193784038 :                          weights(2)*v0
     707              :       CASE (2)
     708    238909237 :          v2 = in_val(n_el - 1)
     709              :          out_val(n_el - 2) = out_val(n_el - 2) + &
     710              :                              weights(0)*v0 + &
     711              :                              weights(1)*v1 + &
     712    238909237 :                              weights(2)*v2
     713    238909237 :          v0 = in_val(n_el)
     714              :          out_val(n_el - 1) = out_val(n_el - 1) + &
     715              :                              weights(0)*v1 + &
     716              :                              weights(1)*v2 + &
     717    238909237 :                              weights(2)*v0
     718    238909237 :          v1 = in_val_last
     719              :          out_val(n_el) = out_val(n_el) + &
     720              :                          weights(0)*v2 + &
     721              :                          weights(1)*v0 + &
     722    616922146 :                          weights(2)*v1
     723              :       END SELECT
     724              : 
     725              :    END SUBROUTINE pw_compose_stripe
     726              : 
     727              : ! **************************************************************************************************
     728              : !> \brief private routine that computes pw_nn_compose_r (it seems that without
     729              : !>      passing arrays in this way either some compiler do a copyin/out (xlf)
     730              : !>      or by inlining suboptimal code is produced (nag))
     731              : !> \param weights a 3x3x3 array with the linear operator
     732              : !> \param in_val the argument for the linear operator
     733              : !> \param out_val place where the value of the linear oprator should be added
     734              : !> \param pw_in pw to be able to get the needed meta data about in_val and
     735              : !>        out_val
     736              : !> \param bo boundaries of in_val and out_val
     737              : !> \author fawzi
     738              : ! **************************************************************************************************
     739        26228 :    SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
     740              :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     741              :       INTEGER, DIMENSION(2, 3)                           :: bo
     742              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     743              :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     744              :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout)  :: out_val
     745              :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     746              :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in)     :: in_val
     747              : 
     748              :       INTEGER                                            :: i, j, jw, k, kw, myj, myk
     749              :       INTEGER, DIMENSION(2, 3)                           :: gbo
     750              :       INTEGER, DIMENSION(3)                              :: s
     751              :       LOGICAL                                            :: has_boundary, yderiv, zderiv
     752              :       REAL(kind=dp)                                      :: in_val_f, in_val_l
     753        26228 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
     754              : 
     755        72812 :       zderiv = ALL(weights(:, :, 1) == 0.0_dp)
     756        72812 :       yderiv = ALL(weights(:, 1, :) == 0.0_dp)
     757       262280 :       bo = pw_in%pw_grid%bounds_local
     758       262280 :       gbo = pw_in%pw_grid%bounds
     759       104912 :       DO i = 1, 3
     760       104912 :          s(i) = bo(2, i) - bo(1, i) + 1
     761              :       END DO
     762       104912 :       IF (ANY(s < 1)) RETURN
     763              :       has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
     764        41304 :                          pw_in%pw_grid%bounds(:, 1))
     765        26228 :       IF (has_boundary) THEN
     766              :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     767              :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     768       199360 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
     769     50802424 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
     770              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     771              :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     772              :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
     773    101579928 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     774     50802424 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
     775              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     776              :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     777              :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
     778    101579928 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     779        24920 :          DEALLOCATE (tmp)
     780              :       END IF
     781              : 
     782              : !$OMP   PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
     783              : !$OMP       in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
     784        26228 : !$OMP       u_boundary,weights,has_boundary)
     785              :       DO k = 0, s(3) - 1
     786              :          DO kw = 0, 2
     787              :             myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
     788              :             IF (zderiv .AND. kw == 1) CYCLE
     789              :             DO j = 0, s(2) - 1
     790              :                DO jw = 0, 2
     791              :                   myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
     792              :                   IF (yderiv .AND. jw == 1) CYCLE
     793              :                   IF (has_boundary) THEN
     794              :                      in_val_f = l_boundary(myj, myk)
     795              :                      in_val_l = u_boundary(myj, myk)
     796              :                   ELSE
     797              :                      in_val_f = in_val(bo(2, 1), myj, myk)
     798              :                      in_val_l = in_val(bo(1, 1), myj, myk)
     799              :                   END IF
     800              :                   CALL pw_compose_stripe(weights=weights(:, jw, kw), &
     801              :                                          in_val=in_val(:, myj, myk), &
     802              :                                          in_val_first=in_val_f, in_val_last=in_val_l, &
     803              :                                          out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
     804              :                END DO
     805              :             END DO
     806              :          END DO
     807              :       END DO
     808        26228 :       IF (has_boundary) THEN
     809        24920 :          DEALLOCATE (l_boundary, u_boundary)
     810              :       END IF
     811        26228 :    END SUBROUTINE pw_nn_compose_r_work
     812              : 
     813              : ! **************************************************************************************************
     814              : !> \brief applies a nearest neighbor linear operator to a pw in real space
     815              : !> \param weights a 3x3x3 array with the linear operator
     816              : !> \param pw_in the argument for the linear operator
     817              : !> \param pw_out place where the value of the linear oprator should be added
     818              : !> \author fawzi
     819              : !> \note
     820              : !>      has specialized versions for derivative operator (with central values==0)
     821              : ! **************************************************************************************************
     822        26228 :    SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
     823              :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     824              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     825              : 
     826              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pw_nn_compose_r'
     827              : 
     828              :       INTEGER                                            :: handle
     829              : 
     830        26228 :       CALL timeset(routineN, handle)
     831       183596 :       IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
     832            0 :          CPABORT("wrong pw distribution")
     833              :       END IF
     834              :       CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%array, &
     835        26228 :                                 out_val=pw_out%array, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
     836        26228 :       CALL timestop(handle)
     837        26228 :    END SUBROUTINE pw_nn_compose_r
     838              : 
     839              : ! **************************************************************************************************
     840              : !> \brief calculates the values of a nearest neighbor smearing
     841              : !> \param pw_in the argument for the linear operator
     842              : !> \param pw_out place where the smeared values should be added
     843              : !> \param coeffs array with the coefficent of the smearing, ordered with
     844              : !>        the distance from the center: coeffs(1) the coeff of the central
     845              : !>        element, coeffs(2) the coeff of the 6 element with distance 1,
     846              : !>        coeff(3) the coeff of the 12 elements at distance sqrt(2),
     847              : !>        coeff(4) the coeff of the 8 elements at distance sqrt(3).
     848              : !> \author Fawzi Mohamed
     849              : !> \note
     850              : !>      does not normalize the smear to 1.
     851              : !>      with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
     852              : !>      is equivalent to pw_spline3_evaluate_values_g, with
     853              : !>      coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
     854              : ! **************************************************************************************************
     855        14582 :    SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
     856              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     857              :       REAL(KIND=dp), DIMENSION(4), INTENT(in)            :: coeffs
     858              : 
     859              :       INTEGER                                            :: i, j, k
     860              :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     861              : 
     862        58328 :       DO k = -1, 1
     863       189566 :          DO j = -1, 1
     864       568698 :             DO i = -1, 1
     865       524952 :                weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
     866              :             END DO
     867              :          END DO
     868              :       END DO
     869              : 
     870        14582 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     871        14582 :    END SUBROUTINE pw_nn_smear_r
     872              : 
     873              : ! **************************************************************************************************
     874              : !> \brief calculates a nearest neighbor central derivative.
     875              : !>      for the x dir:
     876              : !>      pw_out%array(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
     877              : !>             ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
     878              : !>               pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
     879              : !>             ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
     880              : !>               pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
     881              : !>      periodic boundary conditions are applied
     882              : !> \param pw_in the argument for the linear operator
     883              : !> \param pw_out place where the smeared values should be added
     884              : !> \param coeffs array with the coefficent of the front (positive) plane
     885              : !>        of the central derivative, ordered with
     886              : !>        the distance from the center: coeffs(1) the coeff of the central
     887              : !>        element, coeffs(2) the coeff of the 4 element with distance 1,
     888              : !>        coeff(3) the coeff of the 4 elements at distance sqrt(2)
     889              : !> \param idir ...
     890              : !> \author Fawzi Mohamed
     891              : !> \note
     892              : !>      with coeff=(/ 2.0_dp/9.0_dp,  1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
     893              : !>      is equivalent to pw_spline3_deriv_r, with
     894              : !>      coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
     895              : !>      to pw_spline2_deriv_r
     896              : !>      coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
     897              : ! **************************************************************************************************
     898        11646 :    SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
     899              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     900              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: coeffs
     901              :       INTEGER                                            :: idir
     902              : 
     903              :       INTEGER                                            :: i, idirVal, j, k
     904              :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     905              : 
     906        46584 :       DO k = -1, 1
     907       151398 :          DO j = -1, 1
     908       454194 :             DO i = -1, 1
     909       314442 :                SELECT CASE (idir)
     910              :                CASE (1)
     911       104814 :                   idirVal = i
     912              :                CASE (2)
     913       104814 :                   idirVal = j
     914              :                CASE (3)
     915       104814 :                   idirVal = k
     916              :                CASE default
     917       314442 :                   CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
     918              :                END SELECT
     919       419256 :                IF (idirVal == 0) THEN
     920       104814 :                   weights(i, j, k) = 0.0_dp
     921              :                ELSE
     922       209628 :                   weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
     923              :                END IF
     924              :             END DO
     925              :          END DO
     926              :       END DO
     927              : 
     928        11646 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     929        11646 :    END SUBROUTINE pw_nn_deriv_r
     930              : 
     931              : ! **************************************************************************************************
     932              : !> \brief low level function that adds a coarse grid
     933              : !>      to a fine grid.
     934              : !>      If pbc is true periodic boundary conditions are applied
     935              : !>
     936              : !>      It will add to
     937              : !>
     938              : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
     939              : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
     940              : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
     941              : !>
     942              : !>      using
     943              : !>
     944              : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
     945              : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
     946              : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
     947              : !>
     948              : !>      composed with the weights obtained by the direct product of the
     949              : !>      1d coefficients weights:
     950              : !>
     951              : !>      for i,j,k in -3..3
     952              : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
     953              : !>                  weights_1d(abs(k)+1)
     954              : !> \param coarse_coeffs_pw the values of the coefficients
     955              : !> \param fine_values_pw where to add the values due to the
     956              : !>        coarse coeffs
     957              : !> \param weights_1d the weights of the 1d smearing
     958              : !> \param w_border0 the 1d weight at the border (when pbc is false)
     959              : !> \param w_border1 the 1d weights for a point one off the border
     960              : !>        (w_border1(1) is the weight of the coefficent at the border)
     961              : !>        (used if pbc is false)
     962              : !> \param pbc if periodic boundary conditions should be applied
     963              : !> \param safe_computation ...
     964              : !> \author fawzi
     965              : !> \note
     966              : !>      coarse looping is continuos, I did not check if keeping the fine looping
     967              : !>      contiguous is better.
     968              : !>      And I ask myself yet again why, why we use x-slice distribution,
     969              : !>      z-slice distribution would be much better performancewise
     970              : !>      (and would semplify this code enormously).
     971              : !>      fine2coarse has much more understandable parallel part (build up of
     972              : !>      send/rcv sizes,... but worse if you have really a lot of processors,
     973              : !>      probabily irrelevant because it is not critical) [fawzi].
     974              : ! **************************************************************************************************
     975         2260 :    SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
     976              :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
     977              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: coarse_coeffs_pw, fine_values_pw
     978              :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
     979              :       REAL(kind=dp), INTENT(in)                          :: w_border0
     980              :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
     981              :       LOGICAL, INTENT(in)                                :: pbc
     982              :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
     983              : 
     984              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_coarse2fine'
     985              : 
     986              :       INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
     987              :          ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
     988              :          s(3), send_tot_size, sf, shift, ss, x, x_att, xx
     989         2260 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcv_offset, rcv_size, real_rcv_size, &
     990         2260 :                                                             send_offset, send_size, sent_size
     991              :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
     992              :                                                             fine_gbo, my_coarse_bo
     993         2260 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
     994              :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
     995              :                                                             safe_calc
     996              :       REAL(kind=dp)                                      :: v0, v1, v2, v3, wi, wj, wk
     997         2260 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
     998              :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
     999              :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1000         2260 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1001              : 
    1002         2260 :       CALL timeset(routineN, handle)
    1003              : !    CALL timeset(routineN//"_pre",handle2)
    1004         2260 :       safe_calc = .FALSE.
    1005         2260 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1006         2260 :       ii = coarse_coeffs_pw%pw_grid%para%group%compare(fine_values_pw%pw_grid%para%group)
    1007         2260 :       IF (ii > mp_comm_congruent) THEN
    1008            0 :          CPABORT("")
    1009              :       END IF
    1010        22600 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1011        22600 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1012        22600 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1013        22600 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1014         9040 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1015         6780 :       DO j = 2, 3
    1016        15820 :          DO i = 1, 2
    1017        13560 :             coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
    1018              :          END DO
    1019              :       END DO
    1020         2260 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1021         2260 :          coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
    1022         2260 :          coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
    1023              :       ELSE
    1024            0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1025            0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1026              :       END IF
    1027         6744 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1028         2260 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1029         2260 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1030         2260 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1031              :       END IF
    1032         2260 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1033         2260 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1034              : 
    1035         2260 :       IF (pbc) THEN
    1036            0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1037            0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
    1038              :       ELSE
    1039         9040 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1040         9040 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1041              :       END IF
    1042              : 
    1043         2260 :       coarse_coeffs => coarse_coeffs_pw%array
    1044         9040 :       DO i = 1, 3
    1045         9040 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1046              :       END DO
    1047              : !       CALL timestop(handle2)
    1048              :       ! *** parallel case
    1049         2260 :       IF (is_split) THEN
    1050           24 :          CALL timeset(routineN//"_comm", handle2)
    1051              :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    1052           24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    1053           24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
    1054              :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    1055              :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    1056          312 :                    rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    1057              : 
    1058              :          ! ** rcv size count
    1059              : 
    1060           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1061              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1062           24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    1063           72 :          rcv_size = 0
    1064          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    1065          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1066          184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1067              :          END DO
    1068              : 
    1069              :          ! ** send size count
    1070              : 
    1071           24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    1072           24 :          sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
    1073           24 :          fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
    1074           24 :          fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
    1075           24 :          IF (.NOT. pbc) THEN
    1076           24 :             fi_lb = MAX(fi_lb, fine_gbo(1, 1))
    1077           24 :             fi_ub = MIN(fi_ub, fine_gbo(2, 1))
    1078              :          ELSE
    1079            0 :             fi_ub = MIN(fi_ub, fi_lb + sf - 1)
    1080              :          END IF
    1081           24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1082           24 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1083           72 :          send_size = 0
    1084          320 :          DO x = fi_lb, fi_ub
    1085          296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1086          320 :             IF (p /= p_old) THEN
    1087           24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
    1088              : 
    1089              :                send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1090           24 :                                                       - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1091              : 
    1092           24 :                IF (pbc) THEN
    1093            0 :                   DO xx = p_lb, coarse_gbo(1, 1) - 1
    1094            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1095            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1096            0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1097              :                      END IF
    1098              :                   END DO
    1099            0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub
    1100            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1101            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1102            0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1103              :                      END IF
    1104              :                   END DO
    1105              :                END IF
    1106              : 
    1107           24 :                p_old = p
    1108           24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1109              :             END IF
    1110              :          END DO
    1111           24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1112              : 
    1113              :          send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1114           24 :                                                 - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1115              : 
    1116           24 :          IF (pbc) THEN
    1117            0 :             DO xx = p_lb, coarse_gbo(1, 1) - 1
    1118            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1119            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1120            0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1121              :                END IF
    1122              :             END DO
    1123            0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub
    1124            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1125            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1126            0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1127              :                END IF
    1128              :             END DO
    1129              :          END IF
    1130              :          ! ** offsets & alloc send-rcv
    1131              : 
    1132              :          send_tot_size = 0
    1133           72 :          DO ip = 0, n_procs - 1
    1134           48 :             send_offset(ip) = send_tot_size
    1135           72 :             send_tot_size = send_tot_size + send_size(ip)
    1136              :          END DO
    1137           72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    1138              : 
    1139           24 :          rcv_tot_size = 0
    1140           72 :          DO ip = 0, n_procs - 1
    1141           48 :             rcv_offset(ip) = rcv_tot_size
    1142           72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    1143              :          END DO
    1144           24 :          IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
    1145            0 :             CPABORT("Error calculating rcv_tot_size ")
    1146              :          END IF
    1147           72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    1148              : 
    1149              :          ! ** fill send buffer
    1150              : 
    1151           24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1152           24 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1153           72 :          sent_size(:) = send_offset
    1154           24 :          ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
    1155          320 :          DO x = fi_lb, fi_ub
    1156          296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1157          320 :             IF (p /= p_old) THEN
    1158              :                shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1159           24 :                        FLOOR((x - 1 - f_shift(1))/2._dp)
    1160           24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
    1161              : 
    1162           24 :                IF (pbc) THEN
    1163            0 :                   DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1164            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
    1165            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1166              :                         CALL dcopy(coarse_slice_size, &
    1167              :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1168            0 :                                                  my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1169            0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1170              :                      END IF
    1171              :                   END DO
    1172              :                END IF
    1173              : 
    1174           24 :                ii = sent_size(p_old)
    1175          272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1176         3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1177        17312 :                      DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1178        13904 :                         send_buf(ii) = coarse_coeffs(i, j, k)
    1179        17064 :                         ii = ii + 1
    1180              :                      END DO
    1181              :                   END DO
    1182              :                END DO
    1183           24 :                sent_size(p_old) = ii
    1184              : 
    1185           24 :                IF (pbc) THEN
    1186            0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1187            0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1188            0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1189              :                         CALL dcopy(coarse_slice_size, &
    1190              :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1191              :                                                  my_coarse_bo(1, 3)), ss, &
    1192            0 :                                    send_buf(sent_size(p_old)), 1)
    1193            0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1194              :                      END IF
    1195              :                   END DO
    1196              :                END IF
    1197              : 
    1198           24 :                p_old = p
    1199           24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1200              :             END IF
    1201              :          END DO
    1202              :          shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1203           24 :                  FLOOR((x - 1 - f_shift(1))/2._dp)
    1204           24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1205              : 
    1206           24 :          IF (pbc) THEN
    1207            0 :             DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1208            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1209            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1210              :                   CALL dcopy(coarse_slice_size, &
    1211              :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1212            0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1213            0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1214              :                END IF
    1215              :             END DO
    1216              :          END IF
    1217              : 
    1218           24 :          ii = sent_size(p_old)
    1219          272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1220         3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1221        17312 :                DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1222        13904 :                   send_buf(ii) = coarse_coeffs(i, j, k)
    1223        17064 :                   ii = ii + 1
    1224              :                END DO
    1225              :             END DO
    1226              :          END DO
    1227           24 :          sent_size(p_old) = ii
    1228              : 
    1229           24 :          IF (pbc) THEN
    1230            0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1231            0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1232            0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1233              :                   CALL dcopy(coarse_slice_size, &
    1234              :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1235            0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1236            0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1237              :                END IF
    1238              :             END DO
    1239              :          END IF
    1240              : 
    1241           48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    1242           24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    1243              :          ! test send/rcv sizes
    1244           24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    1245           72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    1246              :          ! all2all
    1247              :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    1248           24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    1249              : 
    1250              :          ! ** reorder rcv buffer
    1251              :          ! (actually reordering should be needed only with pbc)
    1252              : 
    1253              :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1254              :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1255          120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1256              : 
    1257           24 :          my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1258           24 :          my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1259           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1260           72 :          sent_size(:) = rcv_offset
    1261           24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    1262           24 :          DO x = my_ub + 1, coarse_bo(2, 1)
    1263            0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1264              :             CALL dcopy(coarse_slice_size, &
    1265              :                        rcv_buf(sent_size(p_old)), 1, &
    1266              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1267            0 :                                      coarse_bo(1, 3)), ss)
    1268           24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1269              :          END DO
    1270              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1271           24 :                           + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
    1272           24 :          p_lb = my_lb
    1273          184 :          DO x = my_lb, my_ub
    1274          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1275          160 :             IF (p /= p_old) THEN
    1276           24 :                p_ub = x - 1
    1277              : 
    1278           24 :                ii = sent_size(p_old)
    1279          272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1280         3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1281        15732 :                      DO i = p_lb, p_ub
    1282        12324 :                         coarse_coeffs(i, j, k) = rcv_buf(ii)
    1283        15484 :                         ii = ii + 1
    1284              :                      END DO
    1285              :                   END DO
    1286              :                END DO
    1287           24 :                sent_size(p_old) = ii
    1288              : 
    1289           24 :                p_lb = x
    1290           24 :                p_old = p
    1291              :             END IF
    1292          184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1293              :          END DO
    1294           24 :          p_ub = my_ub
    1295           24 :          ii = sent_size(p_old)
    1296          272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1297         3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1298        18892 :                DO i = p_lb, p_ub
    1299        15484 :                   coarse_coeffs(i, j, k) = rcv_buf(ii)
    1300        18644 :                   ii = ii + 1
    1301              :                END DO
    1302              :             END DO
    1303              :          END DO
    1304           24 :          sent_size(p_old) = ii
    1305           24 :          DO x = coarse_bo(1, 1), my_lb - 1
    1306            0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1307              :             CALL dcopy(coarse_slice_size, &
    1308              :                        rcv_buf(sent_size(p_old)), 1, &
    1309              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1310            0 :                                      coarse_bo(1, 3)), ss)
    1311           24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1312              :          END DO
    1313              : 
    1314           48 :          CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
    1315           24 :          CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
    1316              : 
    1317              :          ! dealloc
    1318           24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    1319           24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    1320           48 :          CALL timestop(handle2)
    1321              : 
    1322              :       END IF
    1323         2260 :       fine_values => fine_values_pw%array
    1324         9040 :       w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
    1325        11300 :       w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
    1326              : 
    1327        30388 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1328       227284 :          DO ik = -3, 3
    1329       196896 :             IF (pbc) THEN
    1330            0 :                wk = weights_1d(ABS(ik) + 1)
    1331            0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1332              :             ELSE
    1333       196896 :                fk = 2*k + ik + f_shift(3)
    1334       196896 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1335        40680 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1336        22600 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1337         9040 :                      IF (ik /= 0) CYCLE
    1338         4520 :                      wk = w_border0
    1339        13560 :                   ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
    1340         2260 :                      SELECT CASE (ik)
    1341              :                      CASE (1)
    1342         2260 :                         wk = w_border1(1)
    1343              :                      CASE (-1)
    1344         2260 :                         wk = w_border1(2)
    1345              :                      CASE (-3)
    1346         2260 :                         wk = w_border1(3)
    1347              :                      CASE default
    1348            0 :                         CPABORT("")
    1349         6780 :                         CYCLE
    1350              :                      END SELECT
    1351              :                   ELSE
    1352         2260 :                      SELECT CASE (ik)
    1353              :                      CASE (3)
    1354         2260 :                         wk = w_border1(3)
    1355              :                      CASE (1)
    1356         2260 :                         wk = w_border1(2)
    1357              :                      CASE (-1)
    1358         2260 :                         wk = w_border1(1)
    1359              :                      CASE default
    1360            0 :                         CPABORT("")
    1361         6780 :                         CYCLE
    1362              :                      END SELECT
    1363              :                   END IF
    1364              :                ELSE
    1365       156216 :                   wk = weights_1d(ABS(ik) + 1)
    1366              :                END IF
    1367              :             END IF
    1368      3150076 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1369     23778112 :                DO ij = -3, 3
    1370     20633564 :                   IF (pbc) THEN
    1371            0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1372            0 :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
    1373              :                   ELSE
    1374     20633564 :                      fj = 2*j + ij + f_shift(2)
    1375     20633564 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1376      3137328 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1377      1742960 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1378       697184 :                            IF (ij /= 0) CYCLE
    1379       348592 :                            wj = w_border0*wk
    1380      1045776 :                         ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
    1381       174296 :                            SELECT CASE (ij)
    1382              :                            CASE (1)
    1383       174296 :                               wj = w_border1(1)*wk
    1384              :                            CASE (-1)
    1385       174296 :                               wj = w_border1(2)*wk
    1386              :                            CASE (-3)
    1387       174296 :                               wj = w_border1(3)*wk
    1388              :                            CASE default
    1389       522888 :                               CYCLE
    1390              :                            END SELECT
    1391              :                         ELSE
    1392       174296 :                            SELECT CASE (ij)
    1393              :                            CASE (-1)
    1394       174296 :                               wj = w_border1(1)*wk
    1395              :                            CASE (1)
    1396       174296 :                               wj = w_border1(2)*wk
    1397              :                            CASE (3)
    1398       174296 :                               wj = w_border1(3)*wk
    1399              :                            CASE default
    1400       522888 :                               CYCLE
    1401              :                            END SELECT
    1402              :                         END IF
    1403              :                      ELSE
    1404     17496236 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1405              :                      END IF
    1406              :                   END IF
    1407              : 
    1408     21838256 :                   IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
    1409              : !                      CALL timeset(routineN//"_safe",handle2)
    1410        25000 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1411       165000 :                         DO ii = -3, 3
    1412       140000 :                            IF (pbc .AND. .NOT. is_split) THEN
    1413            0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1414            0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1415              :                            ELSE
    1416       140000 :                               fi = 2*i + ii + f_shift(1)
    1417       140000 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1418        67500 :                               IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
    1419              :                                                    fi >= fine_gbo(2, 1) - 1)) THEN
    1420        25000 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1421        10000 :                                     IF (ii /= 0) CYCLE
    1422         5000 :                                     wi = w_border0*wj
    1423        15000 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1424         2500 :                                     SELECT CASE (ii)
    1425              :                                     CASE (1)
    1426         2500 :                                        wi = w_border1(1)*wj
    1427              :                                     CASE (-1)
    1428         2500 :                                        wi = w_border1(2)*wj
    1429              :                                     CASE (-3)
    1430         2500 :                                        wi = w_border1(3)*wj
    1431              :                                     CASE default
    1432         7500 :                                        CYCLE
    1433              :                                     END SELECT
    1434              :                                  ELSE
    1435         2500 :                                     SELECT CASE (ii)
    1436              :                                     CASE (-1)
    1437         2500 :                                        wi = w_border1(1)*wj
    1438              :                                     CASE (1)
    1439         2500 :                                        wi = w_border1(2)*wj
    1440              :                                     CASE (3)
    1441         2500 :                                        wi = w_border1(3)*wj
    1442              :                                     CASE default
    1443         7500 :                                        CYCLE
    1444              :                                     END SELECT
    1445              :                                  END IF
    1446              :                               ELSE
    1447        42500 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1448              :                               END IF
    1449              :                            END IF
    1450              :                            fine_values(fi, fj, fk) = &
    1451              :                               fine_values(fi, fj, fk) + &
    1452       160000 :                               wi*coarse_coeffs(i, j, k)
    1453              :                         END DO
    1454              :                      END DO
    1455              : !                      CALL timestop(handle2)
    1456              :                   ELSE
    1457              : !                      CALL timeset(routineN//"_core1",handle2)
    1458     75542416 :                      ww0 = wj*w_0
    1459     94428020 :                      ww1 = wj*w_1
    1460     18885604 :                      IF (pbc .AND. .NOT. is_split) THEN
    1461            0 :                         v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
    1462            0 :                         i = coarse_bo(1, 1)
    1463            0 :                         fi = 2*i + f_shift(1)
    1464            0 :                         v0 = coarse_coeffs(i, j, k)
    1465            0 :                         v1 = coarse_coeffs(i + 1, j, k)
    1466              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1467            0 :                                                   ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1468            0 :                         v2 = coarse_coeffs(i + 2, j, k)
    1469            0 :                         fi = fi + 1
    1470              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1471            0 :                                                   ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1472     18885604 :                      ELSE IF (.NOT. has_i_lbound) THEN
    1473     18826844 :                         i = coarse_bo(1, 1)
    1474     18826844 :                         fi = 2*i + f_shift(1)
    1475     18826844 :                         v0 = coarse_coeffs(i, j, k)
    1476              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1477     18826844 :                                                   w_border0*wj*v0
    1478     18826844 :                         v1 = coarse_coeffs(i + 1, j, k)
    1479     18826844 :                         v2 = coarse_coeffs(i + 2, j, k)
    1480     18826844 :                         fi = fi + 1
    1481              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1482              :                                                   wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
    1483     18826844 :                                                       w_border1(3)*v2)
    1484              :                      ELSE
    1485        58760 :                         i = coarse_bo(1, 1)
    1486        58760 :                         v0 = coarse_coeffs(i, j, k)
    1487        58760 :                         v1 = coarse_coeffs(i + 1, j, k)
    1488        58760 :                         v2 = coarse_coeffs(i + 2, j, k)
    1489        58760 :                         fi = 2*i + f_shift(1) + 1
    1490        58760 :                         IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
    1491              :                                    fi + 2 == fine_bo(1, 1))) THEN
    1492              :                            CALL cp_abort(__LOCATION__, &
    1493              :                                          "unexpected start index "// &
    1494              :                                          TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
    1495            0 :                                          TRIM(cp_to_string(fi)))
    1496              :                         END IF
    1497              :                      END IF
    1498     18885604 :                      fi = fi + 1
    1499     18885604 :                      IF (fi >= fine_bo(1, 1)) THEN
    1500              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1501              :                                                   ww0(1)*v0 + ww0(2)*v1 + &
    1502     18885604 :                                                   ww0(3)*v2
    1503              :                      ELSE
    1504            0 :                         CPASSERT(fi + 1 == fine_bo(1, 1))
    1505              :                      END IF
    1506              : !                      CALL timestop(handle2)
    1507              : !                      CALL timeset(routineN//"_core",handle2)
    1508     18885604 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
    1509     84267688 :                         v3 = coarse_coeffs(i, j, k)
    1510     84267688 :                         fi = fi + 1
    1511              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1512              :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1513     84267688 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1514     84267688 :                         fi = fi + 1
    1515              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1516              :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1517     84267688 :                                                    ww0(3)*v3)
    1518     84267688 :                         v0 = coarse_coeffs(i + 1, j, k)
    1519     84267688 :                         fi = fi + 1
    1520              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1521              :                                                   (ww1(4)*v0 + ww1(1)*v1 + &
    1522     84267688 :                                                    ww1(2)*v2 + ww1(3)*v3)
    1523     84267688 :                         fi = fi + 1
    1524              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1525              :                                                   (ww0(1)*v2 + ww0(2)*v3 + &
    1526     84267688 :                                                    ww0(3)*v0)
    1527     84267688 :                         v1 = coarse_coeffs(i + 2, j, k)
    1528     84267688 :                         fi = fi + 1
    1529              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1530              :                                                   (ww1(3)*v0 + ww1(4)*v1 + &
    1531     84267688 :                                                    ww1(1)*v2 + ww1(2)*v3)
    1532     84267688 :                         fi = fi + 1
    1533              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1534              :                                                   (ww0(1)*v3 + ww0(2)*v0 + &
    1535     84267688 :                                                    ww0(3)*v1)
    1536     84267688 :                         v2 = coarse_coeffs(i + 3, j, k)
    1537     84267688 :                         fi = fi + 1
    1538              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1539              :                                                   (ww1(2)*v0 + ww1(3)*v1 + &
    1540     84267688 :                                                    ww1(4)*v2 + ww1(1)*v3)
    1541     84267688 :                         fi = fi + 1
    1542              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1543              :                                                   (ww0(1)*v0 + ww0(2)*v1 + &
    1544     84543910 :                                                    ww0(3)*v2)
    1545              :                      END DO
    1546              : !                      CALL timestop(handle2)
    1547              : !                      CALL timeset(routineN//"_clean",handle2)
    1548     18885604 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
    1549     18885604 :                      IF (rest_b > 0) THEN
    1550     18508720 :                         i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
    1551     18508720 :                         v3 = coarse_coeffs(i, j, k)
    1552     18508720 :                         fi = fi + 1
    1553              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1554              :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1555     18508720 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1556     18508720 :                         fi = fi + 1
    1557              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1558              :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1559     18508720 :                                                    ww0(3)*v3)
    1560     18508720 :                         IF (rest_b > 1) THEN
    1561     18449960 :                            v0 = coarse_coeffs(i + 1, j, k)
    1562     18449960 :                            fi = fi + 1
    1563              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1564              :                                                      (ww1(4)*v0 + ww1(1)*v1 + &
    1565     18449960 :                                                       ww1(2)*v2 + ww1(3)*v3)
    1566     18449960 :                            fi = fi + 1
    1567              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1568              :                                                      (ww0(1)*v2 + ww0(2)*v3 + &
    1569     18449960 :                                                       ww0(3)*v0)
    1570     18449960 :                            IF (rest_b > 2) THEN
    1571        73160 :                               v1 = coarse_coeffs(i + 2, j, k)
    1572        73160 :                               fi = fi + 1
    1573              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1574              :                                                         (ww1(3)*v0 + ww1(4)*v1 + &
    1575        73160 :                                                          ww1(1)*v2 + ww1(2)*v3)
    1576        73160 :                               fi = fi + 1
    1577              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1578              :                                                         (ww0(1)*v3 + ww0(2)*v0 + &
    1579        73160 :                                                          ww0(3)*v1)
    1580        73160 :                               IF (pbc .AND. .NOT. is_split) THEN
    1581            0 :                                  v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1582            0 :                                  fi = fi + 1
    1583              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1584            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1585            0 :                                  fi = fi + 1
    1586              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1587            0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1588            0 :                                  v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1589            0 :                                  fi = fi + 1
    1590              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1591            0 :                                                            ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1592        73160 :                               ELSE IF (has_i_ubound) THEN
    1593            0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1594            0 :                                  fi = fi + 1
    1595              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1596            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1597            0 :                                  fi = fi + 1
    1598              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1599            0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1600            0 :                                  IF (fi + 1 == fine_bo(2, 1)) THEN
    1601            0 :                                     v3 = coarse_coeffs(i + 4, j, k)
    1602            0 :                                     fi = fi + 1
    1603              :                                     fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1604            0 :                                                               ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1605              :                                  END IF
    1606              :                               ELSE
    1607        73160 :                                  fi = fi + 1
    1608              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1609              :                                                            wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
    1610        73160 :                                                                w_border1(1)*v1)
    1611        73160 :                                  fi = fi + 1
    1612              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1613        73160 :                                                            w_border0*wj*v1
    1614              :                               END IF
    1615     18376800 :                            ELSE IF (pbc .AND. .NOT. is_split) THEN
    1616            0 :                               v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1617            0 :                               fi = fi + 1
    1618              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1619            0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1620            0 :                               fi = fi + 1
    1621              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1622            0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1623            0 :                               v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1624            0 :                               fi = fi + 1
    1625              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1626            0 :                                                         ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1627     18376800 :                            ELSE IF (has_i_ubound) THEN
    1628            0 :                               v1 = coarse_coeffs(i + 2, j, k)
    1629            0 :                               fi = fi + 1
    1630              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1631            0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1632            0 :                               fi = fi + 1
    1633              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1634            0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1635            0 :                               IF (fi + 1 == fine_bo(2, 1)) THEN
    1636            0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1637            0 :                                  fi = fi + 1
    1638              :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1639            0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1640              :                               END IF
    1641              :                            ELSE
    1642     18376800 :                               fi = fi + 1
    1643              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1644              :                                                         wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
    1645     18376800 :                                                             w_border1(1)*v0)
    1646     18376800 :                               fi = fi + 1
    1647              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1648     18376800 :                                                         w_border0*wj*v0
    1649              :                            END IF
    1650        58760 :                         ELSE IF (pbc .AND. .NOT. is_split) THEN
    1651            0 :                            v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1652            0 :                            fi = fi + 1
    1653              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1654            0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1655            0 :                            fi = fi + 1
    1656              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1657            0 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1658            0 :                            v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1659            0 :                            fi = fi + 1
    1660              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1661            0 :                                                      ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1662        58760 :                         ELSE IF (has_i_ubound) THEN
    1663        58760 :                            v0 = coarse_coeffs(i + 1, j, k)
    1664        58760 :                            fi = fi + 1
    1665              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1666        58760 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1667        58760 :                            fi = fi + 1
    1668              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1669        58760 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1670        58760 :                            IF (fi + 1 == fine_bo(2, 1)) THEN
    1671        58760 :                               v1 = coarse_coeffs(i + 2, j, k)
    1672        58760 :                               fi = fi + 1
    1673              :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1674        58760 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1675              :                            END IF
    1676              :                         ELSE
    1677            0 :                            fi = fi + 1
    1678              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1679              :                                                      wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
    1680            0 :                                                          w_border1(1)*v3)
    1681            0 :                            fi = fi + 1
    1682              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1683            0 :                                                      w_border0*wj*v3
    1684              :                         END IF
    1685       376884 :                      ELSE IF (pbc .AND. .NOT. is_split) THEN
    1686            0 :                         v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1687            0 :                         fi = fi + 1
    1688              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1689            0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1690            0 :                         fi = fi + 1
    1691              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1692            0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1693            0 :                         v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1694            0 :                         fi = fi + 1
    1695              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1696            0 :                                                   ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1697       376884 :                      ELSE IF (has_i_ubound) THEN
    1698            0 :                         v3 = coarse_coeffs(i, j, k)
    1699            0 :                         fi = fi + 1
    1700              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1701            0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1702            0 :                         fi = fi + 1
    1703              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1704            0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1705            0 :                         IF (fi + 1 == fine_bo(2, 1)) THEN
    1706            0 :                            v0 = coarse_coeffs(i + 1, j, k)
    1707            0 :                            fi = fi + 1
    1708              :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1709            0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1710              :                         END IF
    1711              :                      ELSE
    1712       376884 :                         fi = fi + 1
    1713              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1714              :                                                   wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
    1715       376884 :                                                       w_border1(1)*v2)
    1716       376884 :                         fi = fi + 1
    1717              :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1718       376884 :                                                   w_border0*wj*v2
    1719              :                      END IF
    1720     18885604 :                      CPASSERT(fi == fine_bo(2, 1))
    1721              :                   END IF
    1722              : !                   CALL timestop(handle2)
    1723              :                END DO
    1724              :             END DO
    1725              :          END DO
    1726              :       END DO
    1727              : 
    1728         2260 :       IF (is_split) THEN
    1729           24 :          DEALLOCATE (coarse_coeffs)
    1730              :       END IF
    1731         2260 :       CALL timestop(handle)
    1732         4520 :    END SUBROUTINE add_coarse2fine
    1733              : 
    1734              : ! **************************************************************************************************
    1735              : !> \brief low level function that adds a coarse grid (without boundary)
    1736              : !>      to a fine grid.
    1737              : !>
    1738              : !>      It will add to
    1739              : !>
    1740              : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
    1741              : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
    1742              : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
    1743              : !>
    1744              : !>      using
    1745              : !>
    1746              : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
    1747              : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
    1748              : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
    1749              : !>
    1750              : !>      composed with the weights obtained by the direct product of the
    1751              : !>      1d coefficients weights:
    1752              : !>
    1753              : !>      for i,j,k in -3..3
    1754              : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
    1755              : !>                  weights_1d(abs(k)+1)
    1756              : !> \param fine_values_pw 3d array where to add the values due to the
    1757              : !>        coarse coeffs
    1758              : !> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
    1759              : !>        coefficients
    1760              : !> \param weights_1d the weights of the 1d smearing
    1761              : !> \param w_border0 the 1d weight at the border
    1762              : !> \param w_border1 the 1d weights for a point one off the border
    1763              : !>        (w_border1(1) is the weight of the coefficent at the border)
    1764              : !> \param pbc ...
    1765              : !> \param safe_computation ...
    1766              : !> \author fawzi
    1767              : !> \note
    1768              : !>      see coarse2fine for some relevant notes
    1769              : ! **************************************************************************************************
    1770         1062 :    SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
    1771              :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
    1772              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: fine_values_pw, coarse_coeffs_pw
    1773              :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
    1774              :       REAL(kind=dp), INTENT(in)                          :: w_border0
    1775              :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
    1776              :       LOGICAL, INTENT(in)                                :: pbc
    1777              :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
    1778              : 
    1779              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_fine2coarse'
    1780              : 
    1781              :       INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
    1782              :          k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
    1783         1062 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pp_lb, pp_ub, rcv_offset, rcv_size, &
    1784         1062 :                                                             real_rcv_size, send_offset, send_size, &
    1785         1062 :                                                             sent_size
    1786              :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
    1787              :                                                             fine_gbo, my_coarse_bo
    1788         1062 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
    1789              :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
    1790              :                                                             local_data, safe_calc
    1791              :       REAL(kind=dp)                                      :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
    1792              :                                                             wi, wj, wk
    1793         1062 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
    1794              :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
    1795              :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1796         1062 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1797              : 
    1798         1062 :       CALL timeset(routineN, handle)
    1799              : 
    1800         1062 :       safe_calc = .FALSE.
    1801         1062 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1802              : 
    1803        10620 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1804        10620 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1805        10620 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1806        10620 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1807         4248 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1808         3150 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1809         1062 :       coarse_bo = my_coarse_bo
    1810         1062 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1811         1062 :          coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
    1812         1062 :          coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
    1813              :       ELSE
    1814            0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1815            0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1816              :       END IF
    1817         1062 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1818         1062 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1819         1062 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1820              :       END IF
    1821         1062 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1822         1062 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1823              : 
    1824         1062 :       IF (pbc) THEN
    1825            0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1826            0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
    1827              :       ELSE
    1828         4248 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1829         4248 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1830              :       END IF
    1831         1062 :       CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
    1832         1062 :       local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
    1833         1062 :       IF (local_data) THEN
    1834              :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1835              :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1836          120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1837        31240 :          coarse_coeffs = 0._dp
    1838              :       ELSE
    1839         1038 :          coarse_coeffs => coarse_coeffs_pw%array
    1840              :       END IF
    1841              : 
    1842         1062 :       fine_values => fine_values_pw%array
    1843         4248 :       w_0 = [weights_1d(3), weights_1d(1), weights_1d(3)]
    1844         5310 :       w_1 = [weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)]
    1845              : 
    1846         4248 :       DO i = 1, 3
    1847         4248 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1848              :       END DO
    1849         4248 :       IF (ANY(s < 1)) RETURN
    1850              : 
    1851        14092 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1852       105302 :          DO ik = -3, 3
    1853        91210 :             IF (pbc) THEN
    1854            0 :                wk = weights_1d(ABS(ik) + 1)
    1855            0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1856              :             ELSE
    1857        91210 :                fk = 2*k + ik + f_shift(3)
    1858        91210 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1859        19116 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1860        10620 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1861         4248 :                      IF (ik /= 0) CYCLE
    1862         2124 :                      wk = w_border0
    1863         6372 :                   ELSE IF (fk == fine_bo(1, 3) + 1) THEN
    1864         1062 :                      SELECT CASE (ik)
    1865              :                      CASE (1)
    1866         1062 :                         wk = w_border1(1)
    1867              :                      CASE (-1)
    1868         1062 :                         wk = w_border1(2)
    1869              :                      CASE (-3)
    1870         1062 :                         wk = w_border1(3)
    1871              :                      CASE default
    1872            0 :                         CPABORT("")
    1873         3186 :                         CYCLE
    1874              :                      END SELECT
    1875              :                   ELSE
    1876         1062 :                      SELECT CASE (ik)
    1877              :                      CASE (3)
    1878         1062 :                         wk = w_border1(3)
    1879              :                      CASE (1)
    1880         1062 :                         wk = w_border1(2)
    1881              :                      CASE (-1)
    1882         1062 :                         wk = w_border1(1)
    1883              :                      CASE default
    1884            0 :                         CPABORT("")
    1885         3186 :                         CYCLE
    1886              :                      END SELECT
    1887              :                   END IF
    1888              :                ELSE
    1889        72094 :                   wk = weights_1d(ABS(ik) + 1)
    1890              :                END IF
    1891              :             END IF
    1892      1471974 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1893     11118042 :                DO ij = -3, 3
    1894      9648478 :                   IF (pbc) THEN
    1895              :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
    1896            0 :                                                   2*s(2))
    1897            0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1898              :                   ELSE
    1899      9648478 :                      fj = 2*j + ij + f_shift(2)
    1900      9648478 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1901      1450620 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1902       805900 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1903       322360 :                            IF (ij /= 0) CYCLE
    1904       161180 :                            wj = w_border0*wk
    1905       483540 :                         ELSE IF (fj == fine_bo(1, 2) + 1) THEN
    1906        80590 :                            SELECT CASE (ij)
    1907              :                            CASE (1)
    1908        80590 :                               wj = w_border1(1)*wk
    1909              :                            CASE (-1)
    1910        80590 :                               wj = w_border1(2)*wk
    1911              :                            CASE (-3)
    1912        80590 :                               wj = w_border1(3)*wk
    1913              :                            CASE default
    1914            0 :                               CPABORT("")
    1915       241770 :                               CYCLE
    1916              :                            END SELECT
    1917              :                         ELSE
    1918        80590 :                            SELECT CASE (ij)
    1919              :                            CASE (-1)
    1920        80590 :                               wj = w_border1(1)*wk
    1921              :                            CASE (1)
    1922        80590 :                               wj = w_border1(2)*wk
    1923              :                            CASE (3)
    1924        80590 :                               wj = w_border1(3)*wk
    1925              :                            CASE default
    1926            0 :                               CPABORT("")
    1927       241770 :                               CYCLE
    1928              :                            END SELECT
    1929              :                         END IF
    1930              :                      ELSE
    1931      8197858 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1932              :                      END IF
    1933              :                   END IF
    1934              : 
    1935     10220932 :                   IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
    1936      1559844 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1937     10785788 :                         DO ii = -3, 3
    1938      9225944 :                            IF (pbc .AND. .NOT. is_split) THEN
    1939            0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1940            0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1941              :                            ELSE
    1942      9225944 :                               fi = 2*i + ii + f_shift(1)
    1943      9225944 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1944      7112560 :                               IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
    1945              :                                   ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
    1946      2281160 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1947       912464 :                                     IF (ii /= 0) CYCLE
    1948       456232 :                                     wi = w_border0*wj
    1949      1368696 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1950       228116 :                                     SELECT CASE (ii)
    1951              :                                     CASE (1)
    1952       228116 :                                        wi = w_border1(1)*wj
    1953              :                                     CASE (-1)
    1954       228116 :                                        wi = w_border1(2)*wj
    1955              :                                     CASE (-3)
    1956       228116 :                                        wi = w_border1(3)*wj
    1957              :                                     CASE default
    1958       684348 :                                        CYCLE
    1959              :                                     END SELECT
    1960              :                                  ELSE
    1961       228116 :                                     SELECT CASE (ii)
    1962              :                                     CASE (-1)
    1963       228116 :                                        wi = w_border1(1)*wj
    1964              :                                     CASE (1)
    1965       228116 :                                        wi = w_border1(2)*wj
    1966              :                                     CASE (3)
    1967       228116 :                                        wi = w_border1(3)*wj
    1968              :                                     CASE default
    1969       684348 :                                        CYCLE
    1970              :                                     END SELECT
    1971              :                                  END IF
    1972              :                               ELSE
    1973      4831400 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1974              :                               END IF
    1975              :                            END IF
    1976              :                            coarse_coeffs(i, j, k) = &
    1977              :                               coarse_coeffs(i, j, k) + &
    1978     10543936 :                               wi*fine_values(fi, fj, fk)
    1979              :                         END DO
    1980              :                      END DO
    1981              :                   ELSE
    1982     34402904 :                      ww0 = wj*w_0
    1983     43003630 :                      ww1 = wj*w_1
    1984      8600726 :                      IF (pbc .AND. .NOT. is_split) THEN
    1985            0 :                         i = coarse_bo(1, 1) - 1
    1986            0 :                         vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
    1987            0 :                         vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
    1988            0 :                         vv4 = fine_values(fine_bo(2, 1), fj, fk)
    1989            0 :                         fi = fine_bo(1, 1)
    1990            0 :                         vv5 = fine_values(fi, fj, fk)
    1991            0 :                         fi = fi + 1
    1992            0 :                         vv6 = fine_values(fi, fj, fk)
    1993            0 :                         fi = fi + 1
    1994            0 :                         vv7 = fine_values(fi, fj, fk)
    1995              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    1996            0 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    1997              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    1998            0 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    1999              :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2000            0 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    2001      8600726 :                      ELSE IF (has_i_lbound) THEN
    2002        47524 :                         i = coarse_bo(1, 1)
    2003        47524 :                         fi = fine_bo(1, 1) - 1
    2004        47524 :                         IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
    2005        47524 :                            fi = fi + 1
    2006        47524 :                            vv0 = fine_values(fi, fj, fk)
    2007              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2008        47524 :                                                     vv0*ww0(3)
    2009              :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2010        47524 :                                                         vv0*ww0(2)
    2011              :                            coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2012        47524 :                                                         vv0*ww0(1)
    2013              :                         END IF
    2014              :                      ELSE
    2015      8553202 :                         i = coarse_bo(1, 1)
    2016      8553202 :                         fi = 2*i + f_shift(1)
    2017      8553202 :                         vv0 = fine_values(fi, fj, fk)
    2018      8553202 :                         fi = fi + 1
    2019      8553202 :                         vv1 = fine_values(fi, fj, fk)
    2020      8553202 :                         fi = fi + 1
    2021      8553202 :                         vv2 = fine_values(fi, fj, fk)
    2022              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2023      8553202 :                                                  (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
    2024              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2025      8553202 :                                                      wj*w_border1(2)*vv1 + ww0(2)*vv2
    2026              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2027      8553202 :                                                      wj*w_border1(3)*vv1 + ww0(3)*vv2
    2028              :                      END IF
    2029      8600726 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
    2030     37559568 :                         fi = fi + 1
    2031     37559568 :                         vv0 = fine_values(fi, fj, fk)
    2032     37559568 :                         fi = fi + 1
    2033     37559568 :                         vv1 = fine_values(fi, fj, fk)
    2034     37559568 :                         fi = fi + 1
    2035     37559568 :                         vv2 = fine_values(fi, fj, fk)
    2036     37559568 :                         fi = fi + 1
    2037     37559568 :                         vv3 = fine_values(fi, fj, fk)
    2038     37559568 :                         fi = fi + 1
    2039     37559568 :                         vv4 = fine_values(fi, fj, fk)
    2040     37559568 :                         fi = fi + 1
    2041     37559568 :                         vv5 = fine_values(fi, fj, fk)
    2042     37559568 :                         fi = fi + 1
    2043     37559568 :                         vv6 = fine_values(fi, fj, fk)
    2044     37559568 :                         fi = fi + 1
    2045     37559568 :                         vv7 = fine_values(fi, fj, fk)
    2046              :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2047     37559568 :                                                      + ww1(1)*vv0
    2048              :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2049     37559568 :                                                      + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2050              :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2051     37559568 :                                                      + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2052              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2053     37559568 :                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2054              :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2055     37559568 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    2056              :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2057     37559568 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    2058              :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2059     37559568 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    2060              :                      END DO
    2061      8600726 :                      IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
    2062            0 :                         CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
    2063              :                      END IF
    2064      8600726 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
    2065      8600726 :                      i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
    2066      8600726 :                      CPASSERT(fi == (i - 2)*2 + f_shift(1))
    2067      8600726 :                      IF (rest_b > 0) THEN
    2068      8540210 :                         fi = fi + 1
    2069      8540210 :                         vv0 = fine_values(fi, fj, fk)
    2070      8540210 :                         fi = fi + 1
    2071      8540210 :                         vv1 = fine_values(fi, fj, fk)
    2072              :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2073      8540210 :                                                      + ww1(1)*vv0
    2074              :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2075      8540210 :                                                      + ww1(2)*vv0 + ww0(1)*vv1
    2076              :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2077      8540210 :                                                      + ww1(3)*vv0 + ww0(2)*vv1
    2078              :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2079      8540210 :                                                  + ww1(4)*vv0 + ww0(3)*vv1
    2080      8540210 :                         IF (rest_b > 1) THEN
    2081      8492686 :                            fi = fi + 1
    2082      8492686 :                            vv2 = fine_values(fi, fj, fk)
    2083      8492686 :                            fi = fi + 1
    2084      8492686 :                            vv3 = fine_values(fi, fj, fk)
    2085              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2086      8492686 :                                                         + ww1(1)*vv2
    2087              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2088      8492686 :                                                         + ww1(2)*vv2 + ww0(1)*vv3
    2089              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2090      8492686 :                                                     + ww1(3)*vv2 + ww0(2)*vv3
    2091              :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2092      8492686 :                                                         + ww1(4)*vv2 + ww0(3)*vv3
    2093      8492686 :                            IF (rest_b > 2) THEN
    2094        61924 :                               fi = fi + 1
    2095        61924 :                               vv4 = fine_values(fi, fj, fk)
    2096        61924 :                               fi = fi + 1
    2097        61924 :                               vv5 = fine_values(fi, fj, fk)
    2098        61924 :                               fi = fi + 1
    2099        61924 :                               vv6 = fine_values(fi, fj, fk)
    2100        61924 :                               fi = fi + 1
    2101        61924 :                               vv7 = fine_values(fi, fj, fk)
    2102              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2103        61924 :                                                            + ww1(1)*vv4
    2104        61924 :                               IF (has_i_ubound) THEN
    2105            0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2106            0 :                                     fi = fi + 1
    2107            0 :                                     vv0 = fine_values(fi, fj, fk)
    2108              :                                     coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
    2109            0 :                                                                  + vv0*ww1(4)
    2110              :                                  ELSE
    2111              :                                     vv0 = 0._dp
    2112              :                                  END IF
    2113              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2114            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2115              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2116            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2117              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2118            0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
    2119              :                                  coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2120            0 :                                                               + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
    2121        61924 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2122            0 :                                  fi = fi + 1
    2123            0 :                                  vv0 = fine_values(fi, fj, fk)
    2124            0 :                                  vv1 = fine_values(fine_bo(1, 1), fj, fk)
    2125            0 :                                  vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2126              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2127            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2128              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2129            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2130              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2131              :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
    2132            0 :                                                               + vv1*ww0(1) + vv2*ww1(1)
    2133              :                               ELSE
    2134              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2135        61924 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
    2136              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2137        61924 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
    2138              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2139        61924 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
    2140              :                               END IF
    2141              :                            ELSE
    2142      8430762 :                               fi = fi + 1
    2143      8430762 :                               vv4 = fine_values(fi, fj, fk)
    2144      8430762 :                               fi = fi + 1
    2145      8430762 :                               vv5 = fine_values(fi, fj, fk)
    2146      8430762 :                               IF (has_i_ubound) THEN
    2147            0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2148            0 :                                     fi = fi + 1
    2149            0 :                                     vv6 = fine_values(fi, fj, fk)
    2150              :                                     coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2151            0 :                                                                  + ww1(4)*vv6
    2152              :                                  ELSE
    2153              :                                     vv6 = 0._dp
    2154              :                                  END IF
    2155              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2156            0 :                                                               + ww1(1)*vv4
    2157              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2158            0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2159              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2160            0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
    2161              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2162            0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
    2163      8430762 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2164            0 :                                  fi = fi + 1
    2165            0 :                                  vv6 = fine_values(fi, fj, fk)
    2166            0 :                                  vv7 = fine_values(fine_bo(1, 1), fj, fk)
    2167            0 :                                  vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2168              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2169            0 :                                                               + ww1(1)*vv4
    2170              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2171              :                                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
    2172            0 :                                                           ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2173              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2174              :                                                               + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
    2175            0 :                                                               + ww0(1)*vv7 + ww1(1)*vv0
    2176              :                               ELSE
    2177              :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2178      8430762 :                                                               + wj*w_border1(3)*vv4
    2179              :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2180      8430762 :                                                           + wj*w_border1(2)*vv4
    2181              :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2182      8430762 :                                                               + wj*(w_border1(1)*vv4 + w_border0*vv5)
    2183              :                               END IF
    2184              :                            END IF
    2185              :                         ELSE
    2186        47524 :                            fi = fi + 1
    2187        47524 :                            vv2 = fine_values(fi, fj, fk)
    2188        47524 :                            fi = fi + 1
    2189        47524 :                            vv3 = fine_values(fi, fj, fk)
    2190        47524 :                            IF (has_i_ubound) THEN
    2191        47524 :                               IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2192        47524 :                                  fi = fi + 1
    2193        47524 :                                  vv4 = fine_values(fi, fj, fk)
    2194              :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2195        47524 :                                                               + ww1(4)*vv4
    2196              :                               ELSE
    2197              :                                  vv4 = 0._dp
    2198              :                               END IF
    2199              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2200        47524 :                                                            + ww1(1)*vv2
    2201              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2202        47524 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2203              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2204        47524 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
    2205              :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2206        47524 :                                                            + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
    2207            0 :                            ELSEIF (pbc .AND. .NOT. is_split) THEN
    2208            0 :                               fi = fi + 1
    2209            0 :                               vv4 = fine_values(fi, fj, fk)
    2210            0 :                               vv5 = fine_values(fine_bo(1, 1), fj, fk)
    2211            0 :                               vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2212              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2213            0 :                                                            + ww1(1)*vv2
    2214              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2215            0 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2216              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2217            0 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
    2218              :                            ELSE
    2219              :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2220            0 :                                                            + wj*w_border1(3)*vv2
    2221              :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2222            0 :                                                            + wj*w_border1(2)*vv2
    2223              :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2224            0 :                                                        + wj*(w_border1(1)*vv2 + w_border0*vv3)
    2225              :                            END IF
    2226              :                         END IF
    2227              :                      ELSE
    2228        60516 :                         fi = fi + 1
    2229        60516 :                         vv0 = fine_values(fi, fj, fk)
    2230        60516 :                         fi = fi + 1
    2231        60516 :                         vv1 = fine_values(fi, fj, fk)
    2232        60516 :                         IF (has_i_ubound) THEN
    2233            0 :                            IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2234            0 :                               fi = fi + 1
    2235            0 :                               vv2 = fine_values(fi, fj, fk)
    2236              :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2237            0 :                                                            + ww1(4)*vv2
    2238              :                            ELSE
    2239              :                               vv2 = 0._dp
    2240              :                            END IF
    2241              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2242            0 :                                                         + ww1(1)*vv0
    2243              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2244            0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2245              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2246            0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
    2247              :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2248            0 :                                                     + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
    2249        60516 :                         ELSEIF (pbc .AND. .NOT. is_split) THEN
    2250            0 :                            fi = fi + 1
    2251            0 :                            vv2 = fine_values(fi, fj, fk)
    2252            0 :                            vv3 = fine_values(fine_bo(1, 1), fk, fk)
    2253            0 :                            vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
    2254              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2255            0 :                                                         + ww1(1)*vv0
    2256              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2257            0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2258              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2259            0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2260              :                         ELSE
    2261              :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2262        60516 :                                                         + wj*w_border1(3)*vv0
    2263              :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2264        60516 :                                                         + wj*w_border1(2)*vv0
    2265              :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2266        60516 :                                                         + wj*(w_border1(1)*vv0 + w_border0*vv1)
    2267              :                         END IF
    2268              :                      END IF
    2269      8600726 :                      CPASSERT(fi == fine_bo(2, 1))
    2270              :                   END IF
    2271              :                END DO
    2272              :             END DO
    2273              :          END DO
    2274              :       END DO
    2275              : 
    2276              :       ! *** parallel case
    2277         1062 :       IF (is_split) THEN
    2278           24 :          CALL timeset(routineN//"_comm", handle2)
    2279              :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    2280           24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    2281           24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group%num_pe
    2282              :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    2283              :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    2284              :                    rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
    2285          240 :                    pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    2286              : 
    2287              :          ! ** send size count
    2288              : 
    2289           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2290           72 :          send_size = 0
    2291          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2292          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2293          184 :             send_size(p) = send_size(p) + coarse_slice_size
    2294              :          END DO
    2295              : 
    2296              :          ! ** rcv size count
    2297              : 
    2298           24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    2299           24 :          p_old = pos_of_x(fine_gbo(1, 1))
    2300           72 :          pp_lb = fine_gbo(2, 1)
    2301           72 :          pp_ub = fine_gbo(2, 1) - 1
    2302           24 :          pp_lb(p_old) = fine_gbo(1, 1)
    2303          496 :          DO x = fine_gbo(1, 1), fine_gbo(2, 1)
    2304          472 :             p = pos_of_x(x)
    2305          496 :             IF (p /= p_old) THEN
    2306           24 :                pp_ub(p_old) = x - 1
    2307           24 :                pp_lb(p) = x
    2308           24 :                p_old = p
    2309              :             END IF
    2310              :          END DO
    2311           24 :          pp_ub(p_old) = fine_gbo(2, 1)
    2312              : 
    2313           72 :          DO ip = 0, n_procs - 1
    2314           48 :             IF (pp_lb(ip) <= pp_ub(ip)) THEN
    2315           48 :                pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
    2316           48 :                pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
    2317              :             ELSE
    2318            0 :                pp_lb(ip) = coarse_gbo(2, 1)
    2319            0 :                pp_ub(ip) = coarse_gbo(2, 1) - 1
    2320              :             END IF
    2321           72 :             IF (.NOT. is_split .OR. .NOT. pbc) THEN
    2322           48 :                pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
    2323           48 :                pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
    2324              :             END IF
    2325              :          END DO
    2326              : 
    2327           72 :          rcv_size = 0
    2328           72 :          DO ip = 0, n_procs - 1
    2329           48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2330            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2331           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2332            0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2333              :                END IF
    2334              :             END DO
    2335              :             rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
    2336              :                            MAX(0, &
    2337           48 :                                MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
    2338           72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2339            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2340           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2341            0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2342              :                END IF
    2343              :             END DO
    2344              :          END DO
    2345              : 
    2346              :          ! ** offsets & alloc send-rcv
    2347              : 
    2348              :          send_tot_size = 0
    2349           72 :          DO ip = 0, n_procs - 1
    2350           48 :             send_offset(ip) = send_tot_size
    2351           72 :             send_tot_size = send_tot_size + send_size(ip)
    2352              :          END DO
    2353           24 :          IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
    2354            0 :             CPABORT("Error calculating send_tot_size")
    2355           72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    2356              : 
    2357           24 :          rcv_tot_size = 0
    2358           72 :          DO ip = 0, n_procs - 1
    2359           48 :             rcv_offset(ip) = rcv_tot_size
    2360           72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    2361              :          END DO
    2362           72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    2363              : 
    2364              :          ! ** fill send buffer
    2365              : 
    2366           24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2367              :          p_old = pos_of_x(coarse_gbo(1, 1) &
    2368           24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    2369           72 :          sent_size(:) = send_offset
    2370           24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    2371          184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2372          160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2373              :             CALL dcopy(coarse_slice_size, &
    2374              :                        coarse_coeffs(x, coarse_bo(1, 2), &
    2375          160 :                                      coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
    2376          184 :             sent_size(p) = sent_size(p) + coarse_slice_size
    2377              :          END DO
    2378              : 
    2379           48 :          IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
    2380            0 :             CPABORT("error 1 filling send buffer")
    2381           24 :          IF (sent_size(n_procs - 1) /= send_tot_size) &
    2382            0 :             CPABORT("error 2 filling send buffer")
    2383              : 
    2384              :          IF (local_data) THEN
    2385           24 :             DEALLOCATE (coarse_coeffs)
    2386              :          ELSE
    2387              :             NULLIFY (coarse_coeffs)
    2388              :          END IF
    2389              : 
    2390           48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    2391           24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    2392              :          ! test send/rcv sizes
    2393           24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    2394              : 
    2395           72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    2396              :          ! all2all
    2397              :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    2398           24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    2399              : 
    2400              :          ! ** sum & reorder rcv buffer
    2401              : 
    2402           72 :          sent_size(:) = rcv_offset
    2403           72 :          DO ip = 0, n_procs - 1
    2404              : 
    2405           48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2406            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2407           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2408            0 :                   ii = sent_size(ip)
    2409            0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2410            0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2411            0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2412            0 :                         ii = ii + 1
    2413              :                      END DO
    2414              :                   END DO
    2415            0 :                   sent_size(ip) = ii
    2416              :                END IF
    2417              :             END DO
    2418              : 
    2419           48 :             ii = sent_size(ip)
    2420          208 :             DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
    2421         2160 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2422        29920 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2423        27808 :                      coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2424        29760 :                      ii = ii + 1
    2425              :                   END DO
    2426              :                END DO
    2427              :             END DO
    2428           48 :             sent_size(ip) = ii
    2429              : 
    2430           72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2431            0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2432           48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2433            0 :                   ii = sent_size(ip)
    2434            0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2435            0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2436            0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2437            0 :                         ii = ii + 1
    2438              :                      END DO
    2439              :                   END DO
    2440            0 :                   sent_size(ip) = ii
    2441              :                END IF
    2442              :             END DO
    2443              : 
    2444              :          END DO
    2445              : 
    2446           48 :          IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
    2447            0 :             CPABORT("error 1 handling the rcv buffer")
    2448           24 :          IF (sent_size(n_procs - 1) /= rcv_tot_size) &
    2449            0 :             CPABORT("error 2 handling the rcv buffer")
    2450              : 
    2451              :          ! dealloc
    2452           24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    2453           24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    2454           24 :          DEALLOCATE (pp_ub, pp_lb)
    2455           48 :          CALL timestop(handle2)
    2456              :       ELSE
    2457              :          CPASSERT(.NOT. local_data)
    2458              :       END IF
    2459              : 
    2460         1062 :       CALL timestop(handle)
    2461         2124 :    END SUBROUTINE add_fine2coarse
    2462              : 
    2463              : ! **************************************************************************************************
    2464              : !> \brief ...
    2465              : !> \param preconditioner the preconditioner to create
    2466              : !> \param precond_kind the kind of preconditioner to use
    2467              : !> \param pool a pool with grids of the same type as the elements to
    2468              : !>        precondition
    2469              : !> \param pbc if periodic boundary conditions should be applied
    2470              : !> \param transpose ...
    2471              : !> \author fawzi
    2472              : ! **************************************************************************************************
    2473        33858 :    SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
    2474              :                                        pool, pbc, transpose)
    2475              :       TYPE(pw_spline_precond_type), INTENT(OUT)          :: preconditioner
    2476              :       INTEGER, INTENT(in)                                :: precond_kind
    2477              :       TYPE(pw_pool_type), INTENT(IN), POINTER            :: pool
    2478              :       LOGICAL, INTENT(in)                                :: pbc, transpose
    2479              : 
    2480              :       preconditioner%kind = no_precond
    2481         3762 :       preconditioner%pool => pool
    2482         3762 :       preconditioner%pbc = pbc
    2483         3762 :       preconditioner%transpose = transpose
    2484         3762 :       CALL pool%retain()
    2485         3762 :       CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
    2486         3762 :    END SUBROUTINE pw_spline_precond_create
    2487              : 
    2488              : ! **************************************************************************************************
    2489              : !> \brief switches the types of precoditioner to use
    2490              : !> \param preconditioner the preconditioner to be changed
    2491              : !> \param precond_kind the new kind of preconditioner to use
    2492              : !> \param pbc ...
    2493              : !> \param transpose ...
    2494              : !> \author fawzi
    2495              : ! **************************************************************************************************
    2496         7524 :    SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
    2497              :                                          transpose)
    2498              :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2499              :       INTEGER, INTENT(in)                                :: precond_kind
    2500              :       LOGICAL, INTENT(in), OPTIONAL                      :: pbc, transpose
    2501              : 
    2502              :       LOGICAL                                            :: do_3d_coeff
    2503              :       REAL(kind=dp)                                      :: s
    2504              : 
    2505         7524 :       IF (PRESENT(transpose)) preconditioner%transpose = transpose
    2506         7524 :       do_3d_coeff = .FALSE.
    2507         7524 :       preconditioner%kind = precond_kind
    2508         7524 :       IF (PRESENT(pbc)) preconditioner%pbc = pbc
    2509              :       SELECT CASE (precond_kind)
    2510              :       CASE (no_precond)
    2511              :       CASE (precond_spl3_aint2)
    2512            0 :          preconditioner%coeffs_1d = [-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp]
    2513            0 :          preconditioner%sharpen = .FALSE.
    2514            0 :          preconditioner%normalize = .FALSE.
    2515         3762 :          do_3d_coeff = .TRUE.
    2516              :       CASE (precond_spl3_3)
    2517         3762 :          preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
    2518         3762 :          preconditioner%coeffs_1d(2) = 1.6_dp
    2519         3762 :          preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
    2520         3762 :          preconditioner%sharpen = .FALSE.
    2521         3762 :          preconditioner%normalize = .FALSE.
    2522            0 :          do_3d_coeff = .TRUE.
    2523              :       CASE (precond_spl3_2)
    2524            0 :          preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
    2525            0 :          preconditioner%coeffs_1d(2) = 1.76_dp
    2526            0 :          preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
    2527            0 :          preconditioner%sharpen = .FALSE.
    2528            0 :          preconditioner%normalize = .FALSE.
    2529              :          do_3d_coeff = .TRUE.
    2530              :       CASE (precond_spl3_aint)
    2531        15048 :          preconditioner%coeffs_1d = spl3_1d_coeffs0
    2532         3762 :          preconditioner%sharpen = .TRUE.
    2533         3762 :          preconditioner%normalize = .TRUE.
    2534            0 :          do_3d_coeff = .TRUE.
    2535              :       CASE (precond_spl3_1)
    2536            0 :          preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
    2537            0 :          preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
    2538            0 :          preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
    2539            0 :          preconditioner%sharpen = .TRUE.
    2540            0 :          preconditioner%normalize = .FALSE.
    2541            0 :          do_3d_coeff = .TRUE.
    2542              :       CASE default
    2543         7524 :          CPABORT("")
    2544              :       END SELECT
    2545              :       IF (do_3d_coeff) THEN
    2546         7524 :          s = 1._dp
    2547         7524 :          IF (preconditioner%sharpen) s = -1._dp
    2548              :          preconditioner%coeffs(1) = &
    2549              :             s*preconditioner%coeffs_1d(2)* &
    2550              :             preconditioner%coeffs_1d(2)* &
    2551         7524 :             preconditioner%coeffs_1d(2)
    2552              :          preconditioner%coeffs(2) = &
    2553              :             s*preconditioner%coeffs_1d(1)* &
    2554              :             preconditioner%coeffs_1d(2)* &
    2555         7524 :             preconditioner%coeffs_1d(2)
    2556              :          preconditioner%coeffs(3) = &
    2557              :             s*preconditioner%coeffs_1d(1)* &
    2558              :             preconditioner%coeffs_1d(1)* &
    2559         7524 :             preconditioner%coeffs_1d(2)
    2560              :          preconditioner%coeffs(4) = &
    2561              :             s*preconditioner%coeffs_1d(1)* &
    2562              :             preconditioner%coeffs_1d(1)* &
    2563         7524 :             preconditioner%coeffs_1d(1)
    2564         7524 :          IF (preconditioner%sharpen) THEN
    2565         3762 :             IF (preconditioner%normalize) THEN
    2566              :                preconditioner%coeffs(1) = 2._dp + &
    2567         3762 :                                           preconditioner%coeffs(1)
    2568              :             ELSE
    2569            0 :                preconditioner%coeffs(1) = -preconditioner%coeffs(1)
    2570              :             END IF
    2571              :          END IF
    2572              :       END IF
    2573         7524 :    END SUBROUTINE pw_spline_precond_set_kind
    2574              : 
    2575              : ! **************************************************************************************************
    2576              : !> \brief releases the preconditioner
    2577              : !> \param preconditioner the preconditioner to release
    2578              : !> \author fawzi
    2579              : ! **************************************************************************************************
    2580         3762 :    SUBROUTINE pw_spline_precond_release(preconditioner)
    2581              :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2582              : 
    2583         3762 :       CALL pw_pool_release(preconditioner%pool)
    2584         3762 :    END SUBROUTINE pw_spline_precond_release
    2585              : 
    2586              : ! **************************************************************************************************
    2587              : !> \brief applies the preconditioner to the system of equations to find the
    2588              : !>      coefficients of the spline
    2589              : !> \param preconditioner the preconditioner to apply
    2590              : !> \param in_v the grid on which the preconditioner should be applied
    2591              : !> \param out_v place to store the preconditioner applied on v_out
    2592              : !> \author fawzi
    2593              : ! **************************************************************************************************
    2594        76125 :    SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
    2595              :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2596              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: in_v
    2597              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: out_v
    2598              : 
    2599        76125 :       SELECT CASE (preconditioner%kind)
    2600              :       CASE (no_precond)
    2601            0 :          CALL pw_copy(in_v, out_v)
    2602              :       CASE (precond_spl3_aint, precond_spl3_1)
    2603         3762 :          CALL pw_zero(out_v)
    2604         3762 :          IF (preconditioner%pbc) THEN
    2605              :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2606          440 :                                coeffs=preconditioner%coeffs)
    2607              :          ELSE
    2608              :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2609              :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2610              :                                         normalize=preconditioner%normalize, &
    2611         3322 :                                         transpose=preconditioner%transpose)
    2612              :          END IF
    2613              :       CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
    2614        72363 :          CALL pw_zero(out_v)
    2615        72363 :          IF (preconditioner%pbc) THEN
    2616              :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2617         6332 :                                coeffs=preconditioner%coeffs)
    2618              :          ELSE
    2619              :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2620              :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2621              :                                         normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
    2622        66031 :                                         transpose=preconditioner%transpose)
    2623              :          END IF
    2624              :       CASE default
    2625        76125 :          CPABORT("")
    2626              :       END SELECT
    2627        76125 :    END SUBROUTINE pw_spline_do_precond
    2628              : 
    2629              : ! **************************************************************************************************
    2630              : !> \brief solves iteratively (CG) a systmes of linear equations
    2631              : !>           linOp(coeffs)=values
    2632              : !>      (for example those needed to find the coefficients of a spline)
    2633              : !>      Returns true if the it succeeded to achieve the requested accuracy
    2634              : !> \param values the right hand side of the system
    2635              : !> \param coeffs will contain the solution of the system (and on entry
    2636              : !>        it contains the starting point)
    2637              : !> \param linOp the linear operator to be inverted
    2638              : !> \param preconditioner the preconditioner to apply
    2639              : !> \param pool a pool of grids (for the temporary objects)
    2640              : !> \param eps_r the requested precision on the residual
    2641              : !> \param eps_x the requested precision on the solution
    2642              : !> \param max_iter maximum number of iteration allowed
    2643              : !> \param sumtype ...
    2644              : !> \return ...
    2645              : !> \author fawzi
    2646              : ! **************************************************************************************************
    2647         3762 :    FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
    2648              :                         eps_r, eps_x, max_iter, sumtype) RESULT(res)
    2649              : 
    2650              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: values
    2651              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: coeffs
    2652              :       INTERFACE
    2653              :          SUBROUTINE linOp(pw_in, pw_out)
    2654              :             USE pw_types, ONLY: pw_r3d_rs_type
    2655              :             TYPE(pw_r3d_rs_type), INTENT(IN)    :: pw_in
    2656              :             TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
    2657              :          END SUBROUTINE linOp
    2658              :       END INTERFACE
    2659              :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2660              :       TYPE(pw_pool_type), POINTER                        :: pool
    2661              :       REAL(kind=dp), INTENT(in)                          :: eps_r, eps_x
    2662              :       INTEGER, INTENT(in)                                :: max_iter
    2663              :       INTEGER, INTENT(in), OPTIONAL                      :: sumtype
    2664              :       LOGICAL                                            :: res
    2665              : 
    2666              :       INTEGER                                            :: i, iiter, iter, j, k
    2667              :       INTEGER, DIMENSION(2, 3)                           :: bo
    2668              :       LOGICAL                                            :: last
    2669              :       REAL(kind=dp)                                      :: alpha, beta, eps_r_att, eps_x_att, r_z, &
    2670              :                                                             r_z_new
    2671              :       TYPE(cp_logger_type), POINTER                      :: logger
    2672              :       TYPE(pw_r3d_rs_type)                               :: Ap, p, r, z
    2673              : 
    2674         3762 :       last = .FALSE.
    2675              : 
    2676         3762 :       res = .FALSE.
    2677         3762 :       logger => cp_get_default_logger()
    2678         3762 :       CALL pool%create_pw(r)
    2679         3762 :       CALL pool%create_pw(z)
    2680         3762 :       CALL pool%create_pw(p)
    2681         3762 :       CALL pool%create_pw(Ap)
    2682              : 
    2683              :       !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2684         8280 :       ext_do: DO iiter = 1, max_iter, 10
    2685         8280 :          CALL pw_zero(r)
    2686         8280 :          CALL linOp(pw_in=coeffs, pw_out=r)
    2687     62070026 :          r%array = -r%array
    2688         8280 :          CALL pw_axpy(values, r)
    2689         8280 :          CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2690         8280 :          CALL pw_copy(z, p)
    2691         8280 :          r_z = pw_integral_ab(r, z, sumtype)
    2692              : 
    2693        72363 :          DO iter = iiter, MIN(iiter + 9, max_iter)
    2694        67845 :             eps_r_att = SQRT(pw_integral_ab(r, r, sumtype))
    2695        67845 :             IF (eps_r_att == 0._dp) THEN
    2696        67845 :                eps_x_att = 0._dp
    2697              :                last = .TRUE.
    2698              :             ELSE
    2699        67839 :                CALL pw_zero(Ap)
    2700        67839 :                CALL linOp(pw_in=p, pw_out=Ap)
    2701        67839 :                alpha = r_z/pw_integral_ab(Ap, p, sumtype)
    2702              : 
    2703        67839 :                CALL pw_axpy(p, coeffs, alpha=alpha)
    2704              : 
    2705        67839 :                eps_x_att = alpha*SQRT(pw_integral_ab(p, p, sumtype)) ! try to spare if unneeded?
    2706        67839 :                IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
    2707              :             END IF
    2708              :             !CALL cp_iterate(logger%iter_info,last=last)
    2709        64083 :             IF (last) THEN
    2710              :                res = .TRUE.
    2711              :                EXIT ext_do
    2712              :             END IF
    2713              : 
    2714        64083 :             CALL pw_axpy(Ap, r, alpha=-alpha)
    2715              : 
    2716        64083 :             CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2717              : 
    2718        64083 :             r_z_new = pw_integral_ab(r, z, sumtype)
    2719        64083 :             beta = r_z_new/r_z
    2720        64083 :             r_z = r_z_new
    2721              : 
    2722       640830 :             bo = p%pw_grid%bounds_local
    2723       973104 :             DO k = bo(1, 3), bo(2, 3)
    2724     19615385 :                DO j = bo(1, 2), bo(2, 2)
    2725    418462035 :                   DO i = bo(1, 1), bo(2, 1)
    2726    417557532 :                      p%array(i, j, k) = z%array(i, j, k) + beta*p%array(i, j, k)
    2727              :                   END DO
    2728              :                END DO
    2729              :             END DO
    2730              : 
    2731              :          END DO
    2732              :       END DO ext_do
    2733              :       !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2734              : 
    2735         3762 :       CALL pool%give_back_pw(r)
    2736         3762 :       CALL pool%give_back_pw(z)
    2737         3762 :       CALL pool%give_back_pw(p)
    2738         3762 :       CALL pool%give_back_pw(Ap)
    2739              : 
    2740         3762 :    END FUNCTION find_coeffs
    2741              : 
    2742              : ! **************************************************************************************************
    2743              : !> \brief adds to pw_out pw_in composed with the weights
    2744              : !>      pw_out%array(i,j,k)=pw_out%array(i,j,k)+sum(pw_in%array(i+l,j+m,k+n)*
    2745              : !>         weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
    2746              : !>         l=-1..1,m=-1..1,n=-1..1)
    2747              : !> \param weights_1d ...
    2748              : !> \param pw_in ...
    2749              : !> \param pw_out ...
    2750              : !> \param sharpen ...
    2751              : !> \param normalize ...
    2752              : !> \param transpose ...
    2753              : !> \param smooth_boundary ...
    2754              : !> \author fawzi
    2755              : ! **************************************************************************************************
    2756       138700 :    SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
    2757              :                                      sharpen, normalize, transpose, smooth_boundary)
    2758              :       REAL(kind=dp), DIMENSION(-1:1)                     :: weights_1d
    2759              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
    2760              :       LOGICAL, INTENT(in), OPTIONAL                      :: sharpen, normalize, transpose, &
    2761              :                                                             smooth_boundary
    2762              : 
    2763              :       INTEGER                                            :: first_index, i, j, jw, k, kw, &
    2764              :                                                             last_index, myj, myk, n_els
    2765              :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
    2766              :       INTEGER, DIMENSION(3)                              :: s
    2767              :       LOGICAL                                            :: has_l_boundary, has_u_boundary, &
    2768              :                                                             is_split, my_normalize, my_sharpen, &
    2769              :                                                             my_smooth_boundary, my_transpose
    2770              :       REAL(kind=dp)                                      :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
    2771              :       REAL(kind=dp), DIMENSION(-1:1)                     :: w
    2772       138700 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
    2773       138700 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: in_val, out_val
    2774              : 
    2775      1387000 :       bo = pw_in%pw_grid%bounds_local
    2776      1387000 :       gbo = pw_in%pw_grid%bounds
    2777       138700 :       in_val => pw_in%array
    2778       138700 :       out_val => pw_out%array
    2779       138700 :       my_sharpen = .FALSE.
    2780       138700 :       IF (PRESENT(sharpen)) my_sharpen = sharpen
    2781       138700 :       my_normalize = .FALSE.
    2782       138700 :       IF (PRESENT(normalize)) my_normalize = normalize
    2783       138700 :       my_transpose = .FALSE.
    2784       138700 :       IF (PRESENT(transpose)) my_transpose = transpose
    2785       138700 :       my_smooth_boundary = .FALSE.
    2786       138700 :       IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
    2787       138700 :       CPASSERT(.NOT. my_normalize .OR. my_sharpen)
    2788       138700 :       CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
    2789       554800 :       DO i = 1, 3
    2790       554800 :          s(i) = bo(2, i) - bo(1, i) + 1
    2791              :       END DO
    2792       554800 :       IF (ANY(s < 1)) RETURN
    2793              :       is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
    2794       412752 :                      pw_in%pw_grid%bounds(:, 1))
    2795       138700 :       has_l_boundary = (gbo(1, 1) == bo(1, 1))
    2796       138700 :       has_u_boundary = (gbo(2, 1) == bo(2, 1))
    2797       138700 :       IF (is_split) THEN
    2798              :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2799              :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2800        17856 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
    2801       311256 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
    2802              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2803              :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2804              :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
    2805       620280 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2806       311256 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
    2807              :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2808              :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2809              :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
    2810       620280 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2811         2232 :          DEALLOCATE (tmp)
    2812              :       END IF
    2813              : 
    2814       138700 :       n_els = s(1)
    2815       138700 :       IF (has_l_boundary) THEN
    2816       137584 :          n_els = n_els - 1
    2817       137584 :          first_index = bo(1, 1) + 1
    2818              :       ELSE
    2819         1116 :          first_index = bo(1, 1)
    2820              :       END IF
    2821       138700 :       IF (has_u_boundary) THEN
    2822       137584 :          n_els = n_els - 1
    2823       137584 :          last_index = bo(2, 1) - 1
    2824              :       ELSE
    2825         1116 :          last_index = bo(2, 1)
    2826              :       END IF
    2827              : !$OMP PARALLEL DO DEFAULT(NONE) &
    2828              : !$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
    2829              : !$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
    2830              : !$OMP        my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
    2831       138700 : !$OMP        my_sharpen, last_index, first_index, my_normalize, n_els)
    2832              :       DO k = bo(1, 3), bo(2, 3)
    2833              :          DO kw = -1, 1
    2834              :             myk = k + kw
    2835              :             IF (my_transpose) THEN
    2836              :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2837              :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2838              :                      IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
    2839              :                         w_k = weights_1d(kw)
    2840              :                         IF (my_smooth_boundary) THEN
    2841              :                            w_k = weights_1d(kw)/weights_1d(0)
    2842              :                         END IF
    2843              :                      ELSE IF (kw == 0) THEN
    2844              :                         w_k = 1._dp
    2845              :                      ELSE
    2846              :                         CYCLE
    2847              :                      END IF
    2848              :                   ELSE
    2849              :                      IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
    2850              :                      w_k = weights_1d(kw)
    2851              :                   END IF
    2852              :                ELSE
    2853              :                   w_k = weights_1d(kw)
    2854              :                END IF
    2855              :             ELSE
    2856              :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2857              :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2858              :                      IF (kw /= 0) CYCLE
    2859              :                      w_k = 1._dp
    2860              :                   ELSE
    2861              :                      IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
    2862              :                                                    (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
    2863              :                         w_k = weights_1d(kw)/weights_1d(0)
    2864              :                      ELSE
    2865              :                         w_k = weights_1d(kw)
    2866              :                      END IF
    2867              :                   END IF
    2868              :                ELSE
    2869              :                   w_k = weights_1d(kw)
    2870              :                END IF
    2871              :             END IF
    2872              :             DO j = bo(1, 2), bo(2, 2)
    2873              :                DO jw = -1, 1
    2874              :                   myj = j + jw
    2875              :                   IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
    2876              :                      w_j = w_k*weights_1d(jw)
    2877              :                   ELSE
    2878              :                      IF (my_transpose) THEN
    2879              :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2880              :                            IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
    2881              :                               w_j = weights_1d(jw)*w_k
    2882              :                               IF (my_smooth_boundary) THEN
    2883              :                                  w_j = weights_1d(jw)/weights_1d(0)*w_k
    2884              :                               END IF
    2885              :                            ELSE IF (jw == 0) THEN
    2886              :                               w_j = w_k
    2887              :                            ELSE
    2888              :                               CYCLE
    2889              :                            END IF
    2890              :                         ELSE
    2891              :                            IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
    2892              :                            w_j = w_k*weights_1d(jw)
    2893              :                         END IF
    2894              :                      ELSE
    2895              :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2896              :                            IF (jw /= 0) CYCLE
    2897              :                            w_j = w_k
    2898              :                         ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
    2899              :                                                            (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
    2900              :                            w_j = w_k*weights_1d(jw)/weights_1d(0)
    2901              :                         ELSE
    2902              :                            w_j = w_k*weights_1d(jw)
    2903              :                         END IF
    2904              :                      END IF
    2905              :                   END IF
    2906              : 
    2907              :                   IF (has_l_boundary) THEN
    2908              :                      IF (my_transpose) THEN
    2909              :                         IF (s(1) == 1) THEN
    2910              :                            CPASSERT(.NOT. has_u_boundary)
    2911              :                            in_val_tmp = u_boundary(myj, myk)
    2912              :                         ELSE
    2913              :                            in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
    2914              :                         END IF
    2915              :                         IF (my_sharpen) THEN
    2916              :                            IF (kw == 0 .AND. jw == 0) THEN
    2917              :                               IF (my_normalize) THEN
    2918              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2919              :                                                            (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
    2920              :                                                            in_val_tmp*weights_1d(1)*w_j
    2921              :                               ELSE
    2922              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2923              :                                                            in_val(bo(1, 1), myj, myk)*w_j - &
    2924              :                                                            in_val_tmp*weights_1d(1)*w_j
    2925              :                               END IF
    2926              :                            ELSE
    2927              :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2928              :                                                         in_val(bo(1, 1), myj, myk)*w_j - &
    2929              :                                                         in_val_tmp*weights_1d(1)*w_j
    2930              :                            END IF
    2931              :                         ELSE IF (my_smooth_boundary) THEN
    2932              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2933              :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2934              :                                                           in_val_tmp*weights_1d(1)/weights_1d(0))
    2935              :                         ELSE
    2936              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2937              :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2938              :                                                           in_val_tmp*weights_1d(1))
    2939              :                         END IF
    2940              :                         in_val_f = 0.0_dp
    2941              :                      ELSE
    2942              :                         in_val_f = in_val(bo(1, 1), myj, myk)
    2943              :                         IF (my_sharpen) THEN
    2944              :                            IF (kw == 0 .AND. jw == 0) THEN
    2945              :                               IF (my_normalize) THEN
    2946              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2947              :                                                            (2.0_dp - w_j)*in_val_f
    2948              :                               ELSE
    2949              :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2950              :                                                            in_val_f*w_j
    2951              :                               END IF
    2952              :                            ELSE
    2953              :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2954              :                                                         in_val_f*w_j
    2955              :                            END IF
    2956              :                         ELSE
    2957              :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2958              :                                                      in_val_f*w_j
    2959              :                         END IF
    2960              :                      END IF
    2961              :                   ELSE
    2962              :                      in_val_f = l_boundary(myj, myk)
    2963              :                   END IF
    2964              :                   IF (has_u_boundary) THEN
    2965              :                      IF (my_transpose) THEN
    2966              :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2967              :                         IF (s(1) == 1) THEN
    2968              :                            CPASSERT(.NOT. has_l_boundary)
    2969              :                            in_val_tmp = l_boundary(myj, myk)
    2970              :                         ELSE
    2971              :                            in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
    2972              :                         END IF
    2973              :                         IF (my_sharpen) THEN
    2974              :                            IF (kw == 0 .AND. jw == 0) THEN
    2975              :                               IF (my_normalize) THEN
    2976              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2977              :                                                            in_val_l*(2._dp - w_j) - &
    2978              :                                                            in_val_tmp*weights_1d(1)*w_j
    2979              :                               ELSE
    2980              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2981              :                                                            in_val_l*w_j - &
    2982              :                                                            in_val_tmp*weights_1d(1)*w_j
    2983              :                               END IF
    2984              :                            ELSE
    2985              :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    2986              :                                                         w_j*in_val_l - &
    2987              :                                                         in_val_tmp*weights_1d(1)*w_j
    2988              :                            END IF
    2989              :                         ELSE IF (my_smooth_boundary) THEN
    2990              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2991              :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
    2992              :                         ELSE
    2993              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2994              :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1))
    2995              :                         END IF
    2996              :                         in_val_l = 0._dp
    2997              :                      ELSE
    2998              :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2999              :                         IF (my_sharpen) THEN
    3000              :                            IF (kw == 0 .AND. jw == 0) THEN
    3001              :                               IF (my_normalize) THEN
    3002              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3003              :                                                            in_val_l*(2._dp - w_j)
    3004              :                               ELSE
    3005              :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3006              :                                                            in_val_l*w_j
    3007              :                               END IF
    3008              :                            ELSE
    3009              :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    3010              :                                                         w_j*in_val_l
    3011              :                            END IF
    3012              :                         ELSE
    3013              :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3014              :                                                      w_j*in_val_l
    3015              :                         END IF
    3016              :                      END IF
    3017              :                   ELSE
    3018              :                      in_val_l = u_boundary(myj, myk)
    3019              :                   END IF
    3020              :                   IF (last_index >= first_index) THEN
    3021              :                      IF (my_transpose) THEN
    3022              :                         IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
    3023              :                            in_val_f = 0._dp
    3024              :                         ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
    3025              :                            in_val_l = 0._dp
    3026              :                         END IF
    3027              :                      END IF
    3028              :                      IF (my_sharpen) THEN
    3029              :                         w = -weights_1d*w_j
    3030              :                         IF (kw == 0 .AND. jw == 0) THEN
    3031              :                            IF (my_normalize) THEN
    3032              :                               w(0) = w(0) + 2._dp
    3033              :                            ELSE
    3034              :                               w(0) = -w(0)
    3035              :                            END IF
    3036              :                         END IF
    3037              :                      ELSE
    3038              :                         w = weights_1d*w_j
    3039              :                      END IF
    3040              :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3041              :                         IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
    3042              :                             gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3043              :                            IF (gbo(1, 1) >= bo(1, 1)) THEN
    3044              :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3045              :                                                              in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
    3046              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3047              :                            ELSE
    3048              :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3049              :                                                              l_boundary(myj, myk)*w_j*weights_1d(-1)* &
    3050              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3051              :                            END IF
    3052              :                         END IF
    3053              :                      END IF
    3054              :                      CALL pw_compose_stripe(weights=w, &
    3055              :                                             in_val=in_val(first_index:last_index, myj, myk), &
    3056              :                                             in_val_first=in_val_f, in_val_last=in_val_l, &
    3057              :                                             out_val=out_val(first_index:last_index, j, k), &
    3058              :                                             n_el=n_els)
    3059              : !FM                   call pw_compose_stripe2(weights=w,&
    3060              : !FM                        in_val=in_val,&
    3061              : !FM                        in_val_first=in_val_f,in_val_last=in_val_l,&
    3062              : !FM                        out_val=out_val,&
    3063              : !FM                        first_val=first_index,last_val=last_index,&
    3064              : !FM                        myj=myj,myk=myk,j=j,k=k)
    3065              :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3066              :                         IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
    3067              :                             gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3068              :                            IF (gbo(2, 1) <= bo(2, 1)) THEN
    3069              :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3070              :                                                              in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
    3071              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3072              :                            ELSE
    3073              :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3074              :                                                              u_boundary(myj, myk)*w_j*weights_1d(1)* &
    3075              :                                                              (1._dp/weights_1d(0) - 1._dp)
    3076              :                            END IF
    3077              :                         END IF
    3078              :                      END IF
    3079              : 
    3080              :                   END IF
    3081              :                END DO
    3082              :             END DO
    3083              :          END DO
    3084              :       END DO
    3085              : 
    3086       138700 :       IF (is_split) THEN
    3087         2232 :          DEALLOCATE (l_boundary, u_boundary)
    3088              :       END IF
    3089       138700 :    END SUBROUTINE pw_nn_compose_r_no_pbc
    3090              : 
    3091              : ! **************************************************************************************************
    3092              : !> \brief ...
    3093              : !> \param pw_in ...
    3094              : !> \param pw_out ...
    3095              : ! **************************************************************************************************
    3096        41588 :    SUBROUTINE spl3_nopbc(pw_in, pw_out)
    3097              : 
    3098              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3099              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3100              : 
    3101        41588 :       CALL pw_zero(pw_out)
    3102              :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3103        41588 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
    3104              : 
    3105        41588 :    END SUBROUTINE spl3_nopbc
    3106              : 
    3107              : ! **************************************************************************************************
    3108              : !> \brief ...
    3109              : !> \param pw_in ...
    3110              : !> \param pw_out ...
    3111              : ! **************************************************************************************************
    3112        27759 :    SUBROUTINE spl3_nopbct(pw_in, pw_out)
    3113              : 
    3114              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3115              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3116              : 
    3117        27759 :       CALL pw_zero(pw_out)
    3118              :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3119        27759 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
    3120              : 
    3121        27759 :    END SUBROUTINE spl3_nopbct
    3122              : 
    3123              : ! **************************************************************************************************
    3124              : !> \brief ...
    3125              : !> \param pw_in ...
    3126              : !> \param pw_out ...
    3127              : ! **************************************************************************************************
    3128         6772 :    SUBROUTINE spl3_pbc(pw_in, pw_out)
    3129              : 
    3130              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3131              :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3132              : 
    3133         6772 :       CALL pw_zero(pw_out)
    3134         6772 :       CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
    3135              : 
    3136         6772 :    END SUBROUTINE spl3_pbc
    3137              : 
    3138              : ! **************************************************************************************************
    3139              : !> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
    3140              : !>      input vector (vec)
    3141              : !> \param vec ...
    3142              : !> \param pw ...
    3143              : !> \return ...
    3144              : !> \par History
    3145              : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3146              : !> \author Teodoro Laino 12/2005 [tlaino]
    3147              : !> \note
    3148              : !>      Requires the Spline coefficients to be computed with PBC
    3149              : ! **************************************************************************************************
    3150      1821909 :    FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3151              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3152              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3153              :       REAL(KIND=dp)                                      :: val
    3154              : 
    3155              :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3156              :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3157              :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3158              :       LOGICAL                                            :: my_mpsum
    3159              :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
    3160              :          f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
    3161              :          t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
    3162              :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3163      1821909 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3164              : 
    3165      1821909 :       NULLIFY (grid)
    3166      1821909 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3167      7287636 :       npts = pw%pw_grid%npts
    3168      7287636 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3169      1821909 :       dr1 = pw%pw_grid%dr(1)
    3170      1821909 :       dr2 = pw%pw_grid%dr(2)
    3171      1821909 :       dr3 = pw%pw_grid%dr(3)
    3172              : 
    3173      1821909 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3174      1821909 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3175      1821909 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3176      1821909 :       grid => pw%array(:, :, :)
    3177     18219090 :       bo = pw%pw_grid%bounds
    3178     18219090 :       bo_l = pw%pw_grid%bounds_local
    3179              : 
    3180      1821909 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3181      1821909 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3182      1821909 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3183      1821909 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3184              : 
    3185      1821909 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3186      1821909 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3187      1821909 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3188      1821909 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3189              : 
    3190      1821909 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3191      1821909 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3192      1821909 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3193      1821909 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3194              : 
    3195      9109545 :       DO k = 1, 4
    3196     38260089 :          DO j = 1, 4
    3197    153040356 :             DO i = 1, 4
    3198              :                IF ( &
    3199              :                   ii(i) >= bo_l(1, 1) .AND. &
    3200              :                   ii(i) <= bo_l(2, 1) .AND. &
    3201              :                   ij(j) >= bo_l(1, 2) .AND. &
    3202              :                   ij(j) <= bo_l(2, 2) .AND. &
    3203    116602176 :                   ik(k) >= bo_l(1, 3) .AND. &
    3204              :                   ik(k) <= bo_l(2, 3) &
    3205     29150544 :                   ) THEN
    3206              :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3207              :                                       ij(j) + 1 - bo_l(1, 2), &
    3208     58528128 :                                       ik(k) + 1 - bo_l(1, 3))
    3209              :                ELSE
    3210     58074048 :                   box(i, j, k) = 0.0_dp
    3211              :                END IF
    3212              :             END DO
    3213              :          END DO
    3214              :       END DO
    3215              : 
    3216      1821909 :       a1 = 3.0_dp + xd1
    3217      1821909 :       a2 = a1*a1
    3218      1821909 :       a3 = a2*a1
    3219      1821909 :       b1 = 2.0_dp + xd1
    3220      1821909 :       b2 = b1*b1
    3221      1821909 :       b3 = b2*b1
    3222      1821909 :       c1 = 1.0_dp + xd1
    3223      1821909 :       c2 = c1*c1
    3224      1821909 :       c3 = c2*c1
    3225      1821909 :       d1 = xd1
    3226      1821909 :       d2 = d1*d1
    3227      1821909 :       d3 = d2*d1
    3228      1821909 :       e1 = 3.0_dp + xd2
    3229      1821909 :       e2 = e1*e1
    3230      1821909 :       e3 = e2*e1
    3231      1821909 :       f1 = 2.0_dp + xd2
    3232      1821909 :       f2 = f1*f1
    3233      1821909 :       f3 = f2*f1
    3234      1821909 :       g1 = 1.0_dp + xd2
    3235      1821909 :       g2 = g1*g1
    3236      1821909 :       g3 = g2*g1
    3237      1821909 :       h1 = xd2
    3238      1821909 :       h2 = h1*h1
    3239      1821909 :       h3 = h2*h1
    3240      1821909 :       p1 = 3.0_dp + xd3
    3241      1821909 :       p2 = p1*p1
    3242      1821909 :       p3 = p2*p1
    3243      1821909 :       q1 = 2.0_dp + xd3
    3244      1821909 :       q2 = q1*q1
    3245      1821909 :       q3 = q2*q1
    3246      1821909 :       r1 = 1.0_dp + xd3
    3247      1821909 :       r2 = r1*r1
    3248      1821909 :       r3 = r2*r1
    3249      1821909 :       u1 = xd3
    3250      1821909 :       u2 = u1*u1
    3251      1821909 :       u3 = u2*u1
    3252              : 
    3253      1821909 :       t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3254      1821909 :       t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3255      1821909 :       t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3256      1821909 :       t4 = 1.0_dp/6.0_dp*d3
    3257      1821909 :       s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3258      1821909 :       s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3259      1821909 :       s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3260      1821909 :       s4 = 1.0_dp/6.0_dp*h3
    3261      1821909 :       v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3262      1821909 :       v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3263      1821909 :       v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3264      1821909 :       v4 = 1.0_dp/6.0_dp*u3
    3265              : 
    3266              :       val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3267              :              (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3268              :              (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3269              :              (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3270              :             ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3271              :              (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3272              :              (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3273              :              (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3274              :             ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3275              :              (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3276              :              (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3277              :              (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3278              :             ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3279              :              (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3280              :              (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3281      1821909 :              (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3282              : 
    3283      1821909 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3284              : 
    3285      1821909 :    END FUNCTION Eval_Interp_Spl3_pbc
    3286              : 
    3287              : ! **************************************************************************************************
    3288              : !> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
    3289              : !>      function on the generic input vector (vec)
    3290              : !> \param vec ...
    3291              : !> \param pw ...
    3292              : !> \return ...
    3293              : !> \par History
    3294              : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3295              : !> \author Teodoro Laino 12/2005 [tlaino]
    3296              : !> \note
    3297              : !>      Requires the Spline coefficients to be computed with PBC
    3298              : ! **************************************************************************************************
    3299        10166 :    FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3300              :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3301              :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3302              :       REAL(KIND=dp)                                      :: val(3)
    3303              : 
    3304              :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3305              :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3306              :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3307              :       LOGICAL                                            :: my_mpsum
    3308              :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
    3309              :          dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
    3310              :          s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
    3311              :          t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
    3312              :          v4o, xd1, xd2, xd3
    3313              :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3314         5083 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3315              : 
    3316         5083 :       NULLIFY (grid)
    3317         5083 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3318        20332 :       npts = pw%pw_grid%npts
    3319        20332 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3320         5083 :       dr1 = pw%pw_grid%dr(1)
    3321         5083 :       dr2 = pw%pw_grid%dr(2)
    3322         5083 :       dr3 = pw%pw_grid%dr(3)
    3323         5083 :       dr1i = 1.0_dp/dr1
    3324         5083 :       dr2i = 1.0_dp/dr2
    3325         5083 :       dr3i = 1.0_dp/dr3
    3326         5083 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3327         5083 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3328         5083 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3329         5083 :       grid => pw%array(:, :, :)
    3330        50830 :       bo = pw%pw_grid%bounds
    3331        50830 :       bo_l = pw%pw_grid%bounds_local
    3332              : 
    3333         5083 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3334         5083 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3335         5083 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3336         5083 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3337              : 
    3338         5083 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3339         5083 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3340         5083 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3341         5083 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3342              : 
    3343         5083 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3344         5083 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3345         5083 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3346         5083 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3347              : 
    3348        25415 :       DO k = 1, 4
    3349       106743 :          DO j = 1, 4
    3350       426972 :             DO i = 1, 4
    3351              :                IF ( &
    3352              :                   ii(i) >= bo_l(1, 1) .AND. &
    3353              :                   ii(i) <= bo_l(2, 1) .AND. &
    3354              :                   ij(j) >= bo_l(1, 2) .AND. &
    3355              :                   ij(j) <= bo_l(2, 2) .AND. &
    3356       325312 :                   ik(k) >= bo_l(1, 3) .AND. &
    3357              :                   ik(k) <= bo_l(2, 3) &
    3358        81328 :                   ) THEN
    3359              :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3360              :                                       ij(j) + 1 - bo_l(1, 2), &
    3361       324160 :                                       ik(k) + 1 - bo_l(1, 3))
    3362              :                ELSE
    3363         1152 :                   box(i, j, k) = 0.0_dp
    3364              :                END IF
    3365              :             END DO
    3366              :          END DO
    3367              :       END DO
    3368              : 
    3369         5083 :       a1 = 3.0_dp + xd1
    3370         5083 :       a2 = a1*a1
    3371         5083 :       a3 = a2*a1
    3372         5083 :       b1 = 2.0_dp + xd1
    3373         5083 :       b2 = b1*b1
    3374         5083 :       b3 = b2*b1
    3375         5083 :       c1 = 1.0_dp + xd1
    3376         5083 :       c2 = c1*c1
    3377         5083 :       c3 = c2*c1
    3378         5083 :       d1 = xd1
    3379         5083 :       d2 = d1*d1
    3380         5083 :       d3 = d2*d1
    3381         5083 :       e1 = 3.0_dp + xd2
    3382         5083 :       e2 = e1*e1
    3383         5083 :       e3 = e2*e1
    3384         5083 :       f1 = 2.0_dp + xd2
    3385         5083 :       f2 = f1*f1
    3386         5083 :       f3 = f2*f1
    3387         5083 :       g1 = 1.0_dp + xd2
    3388         5083 :       g2 = g1*g1
    3389         5083 :       g3 = g2*g1
    3390         5083 :       h1 = xd2
    3391         5083 :       h2 = h1*h1
    3392         5083 :       h3 = h2*h1
    3393         5083 :       p1 = 3.0_dp + xd3
    3394         5083 :       p2 = p1*p1
    3395         5083 :       p3 = p2*p1
    3396         5083 :       q1 = 2.0_dp + xd3
    3397         5083 :       q2 = q1*q1
    3398         5083 :       q3 = q2*q1
    3399         5083 :       r1 = 1.0_dp + xd3
    3400         5083 :       r2 = r1*r1
    3401         5083 :       r3 = r2*r1
    3402         5083 :       u1 = xd3
    3403         5083 :       u2 = u1*u1
    3404         5083 :       u3 = u2*u1
    3405              : 
    3406         5083 :       t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3407         5083 :       t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3408         5083 :       t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3409         5083 :       t4o = 1.0_dp/6.0_dp*d3
    3410         5083 :       s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3411         5083 :       s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3412         5083 :       s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3413         5083 :       s4o = 1.0_dp/6.0_dp*h3
    3414         5083 :       v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3415         5083 :       v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3416         5083 :       v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3417         5083 :       v4o = 1.0_dp/6.0_dp*u3
    3418              : 
    3419         5083 :       t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
    3420         5083 :       t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
    3421         5083 :       t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
    3422         5083 :       t4d = 0.5_dp*d2
    3423         5083 :       s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
    3424         5083 :       s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
    3425         5083 :       s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
    3426         5083 :       s4d = 0.5_dp*h2
    3427         5083 :       v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
    3428         5083 :       v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
    3429         5083 :       v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
    3430         5083 :       v4d = 0.5_dp*u2
    3431              : 
    3432         5083 :       t1 = t1d*dr1i
    3433         5083 :       t2 = t2d*dr1i
    3434         5083 :       t3 = t3d*dr1i
    3435         5083 :       t4 = t4d*dr1i
    3436         5083 :       s1 = s1o
    3437         5083 :       s2 = s2o
    3438         5083 :       s3 = s3o
    3439         5083 :       s4 = s4o
    3440         5083 :       v1 = v1o
    3441         5083 :       v2 = v2o
    3442         5083 :       v3 = v3o
    3443         5083 :       v4 = v4o
    3444              :       val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3445              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3446              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3447              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3448              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3449              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3450              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3451              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3452              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3453              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3454              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3455              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3456              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3457              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3458              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3459         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3460              : 
    3461         5083 :       t1 = t1o
    3462         5083 :       t2 = t2o
    3463         5083 :       t3 = t3o
    3464         5083 :       t4 = t4o
    3465         5083 :       s1 = s1d*dr2i
    3466         5083 :       s2 = s2d*dr2i
    3467         5083 :       s3 = s3d*dr2i
    3468         5083 :       s4 = s4d*dr2i
    3469         5083 :       v1 = v1o
    3470         5083 :       v2 = v2o
    3471         5083 :       v3 = v3o
    3472         5083 :       v4 = v4o
    3473              :       val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3474              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3475              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3476              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3477              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3478              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3479              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3480              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3481              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3482              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3483              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3484              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3485              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3486              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3487              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3488         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3489              : 
    3490         5083 :       t1 = t1o
    3491         5083 :       t2 = t2o
    3492         5083 :       t3 = t3o
    3493         5083 :       t4 = t4o
    3494         5083 :       s1 = s1o
    3495         5083 :       s2 = s2o
    3496         5083 :       s3 = s3o
    3497         5083 :       s4 = s4o
    3498         5083 :       v1 = v1d*dr3i
    3499         5083 :       v2 = v2d*dr3i
    3500         5083 :       v3 = v3d*dr3i
    3501         5083 :       v4 = v4d*dr3i
    3502              :       val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3503              :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3504              :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3505              :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3506              :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3507              :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3508              :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3509              :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3510              :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3511              :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3512              :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3513              :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3514              :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3515              :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3516              :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3517         5083 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3518              : 
    3519         5083 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3520              : 
    3521         5083 :    END FUNCTION Eval_d_Interp_Spl3_pbc
    3522              : 
    3523            0 : END MODULE pw_spline_utils
        

Generated by: LCOV version 2.0-1