LCOV - code coverage report
Current view: top level - src/pw - pw_spline_utils.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b195825) Lines: 1189 1453 81.8 %
Date: 2024-04-20 06:29:22 Functions: 23 24 95.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief different utils that are useful to manipulate splines on the regular
      10             : !>      grid of a pw
      11             : !> \par History
      12             : !>      05.2003 created [fawzi]
      13             : !>      08.2004 removed spline evaluation method using more than 2 read streams
      14             : !>              (pw_compose_stripe_rs3), added linear solver based spline
      15             : !>              inversion [fawzi]
      16             : !> \author Fawzi Mohamed
      17             : ! **************************************************************************************************
      18             : MODULE pw_spline_utils
      19             : 
      20             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      21             :                                               cp_logger_type,&
      22             :                                               cp_to_string
      23             :    USE kinds,                           ONLY: dp
      24             :    USE mathconstants,                   ONLY: twopi
      25             :    USE message_passing,                 ONLY: mp_comm_congruent
      26             :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      27             :                                               PW_MODE_LOCAL
      28             :    USE pw_methods,                      ONLY: pw_axpy,&
      29             :                                               pw_copy,&
      30             :                                               pw_integral_ab,&
      31             :                                               pw_zero
      32             :    USE pw_pool_types,                   ONLY: pw_pool_release,&
      33             :                                               pw_pool_type
      34             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      35             :                                               pw_r3d_rs_type
      36             : #include "../base/base_uses.f90"
      37             : 
      38             :    IMPLICIT NONE
      39             :    PRIVATE
      40             : 
      41             :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      42             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
      43             : 
      44             :    INTEGER, PARAMETER, PUBLIC               :: no_precond = 0, &
      45             :                                                precond_spl3_aint = 1, &
      46             :                                                precond_spl3_1 = 2, &
      47             :                                                precond_spl3_aint2 = 3, &
      48             :                                                precond_spl3_2 = 4, &
      49             :                                                precond_spl3_3 = 5
      50             : 
      51             :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
      52             :                                                      (/125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp/), &
      53             :                                                      spline3_coeffs = &
      54             :                                                      (/8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
      55             :                                                        1._dp/(27._dp*8._dp)/), &
      56             :                                                      spline2_coeffs = &
      57             :                                                      (/27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
      58             :                                                        1._dp/(64._dp*8._dp)/), &
      59             :                                                      nn50_coeffs = &
      60             :                                                      (/15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
      61             :                                                        1._dp/140608._dp/), &
      62             :                                                      spl3_aint_coeff = &
      63             :                                                      (/46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
      64             :                                                        -1._dp/(27._dp*8._dp)/), &
      65             :                                                      spl3_precond1_coeff = &
      66             :                                                      (/64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp/), &
      67             :                                                      spl3_1d_transf_coeffs = &
      68             :                                                      (/2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp/)
      69             : 
      70             :    REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
      71             :                                                      (/2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp/), &
      72             :                                                      spline2_deriv_coeffs = &
      73             :                                                      (/9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp/), &
      74             :                                                      nn10_deriv_coeffs = &
      75             :                                                      (/25._dp/72._dp, 5._dp/144, 1._dp/288._dp/), &
      76             :                                                      nn50_deriv_coeffs = &
      77             :                                                      (/625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp/), &
      78             :                                                      spl3_1d_coeffs0 = &
      79             :                                                      (/1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp/), &
      80             :                                                      spl3_1d_transf_border1 = &
      81             :                                                      (/0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp/)
      82             : 
      83             :    PUBLIC :: pw_spline3_interpolate_values_g, &
      84             :              pw_spline3_deriv_g
      85             :    PUBLIC :: pw_spline_scale_deriv
      86             :    PUBLIC :: pw_spline2_interpolate_values_g, &
      87             :              pw_spline2_deriv_g
      88             :    PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
      89             :              spl3_nopbc, spl3_pbc, spl3_nopbct
      90             :    PUBLIC :: add_fine2coarse, add_coarse2fine
      91             :    PUBLIC :: pw_spline_precond_create, &
      92             :              pw_spline_do_precond, &
      93             :              pw_spline_precond_set_kind, &
      94             :              find_coeffs, &
      95             :              pw_spline_precond_release, &
      96             :              pw_spline_precond_type, &
      97             :              Eval_Interp_Spl3_pbc, &
      98             :              Eval_d_Interp_Spl3_pbc
      99             : 
     100             : !***
     101             : 
     102             : ! **************************************************************************************************
     103             : !> \brief stores information for the preconditioner used to calculate the
     104             : !>      coeffs of splines
     105             : !> \author fawzi
     106             : ! **************************************************************************************************
     107             :    TYPE pw_spline_precond_type
     108             :       INTEGER :: kind = no_precond
     109             :       REAL(kind=dp), DIMENSION(4) :: coeffs = 0.0_dp
     110             :       REAL(kind=dp), DIMENSION(3) :: coeffs_1d = 0.0_dp
     111             :       LOGICAL :: sharpen = .FALSE., normalize = .FALSE., pbc = .FALSE., transpose = .FALSE.
     112             :       TYPE(pw_pool_type), POINTER :: pool => NULL()
     113             :    END TYPE pw_spline_precond_type
     114             : 
     115             : CONTAINS
     116             : 
     117             : ! **************************************************************************************************
     118             : !> \brief calculates the FFT of the coefficients of the quadratic spline that
     119             : !>      interpolates the given values
     120             : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     121             : !>        will contain the FFT of the coefficients of the spline
     122             : !> \par History
     123             : !>      06.2003 created [fawzi]
     124             : !> \author Fawzi Mohamed
     125             : !> \note
     126             : !>      does not work with spherical cutoff
     127             : ! **************************************************************************************************
     128       38532 :    SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
     129             :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     130             : 
     131             :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g'
     132             : 
     133             :       INTEGER                                            :: handle, i, ii, j, k
     134             :       INTEGER, DIMENSION(2, 3)                           :: gbo
     135             :       INTEGER, DIMENSION(3)                              :: n_tot
     136             :       REAL(KIND=dp)                                      :: c23, coeff
     137       38532 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     138             : 
     139       38532 :       CALL timeset(routineN, handle)
     140             : 
     141      154128 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     142      385320 :       gbo = spline_g%pw_grid%bounds
     143             : 
     144       38532 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     145       38532 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     146             : 
     147           0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
     148      269724 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     149             : 
     150       38532 :       coeff = twopi/n_tot(1)
     151       38532 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     152             :       DO i = gbo(1, 1), gbo(2, 1)
     153             :          cosIVals(i) = COS(coeff*REAL(i, dp))
     154             :       END DO
     155       38532 :       coeff = twopi/n_tot(2)
     156       38532 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     157             :       DO j = gbo(1, 2), gbo(2, 2)
     158             :          cosJVals(j) = COS(coeff*REAL(j, dp))
     159             :       END DO
     160       38532 :       coeff = twopi/n_tot(3)
     161       38532 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     162             :       DO k = gbo(1, 3), gbo(2, 3)
     163             :          cosKVals(k) = COS(coeff*REAL(k, dp))
     164             :       END DO
     165             : 
     166       38532 : !$OMP     PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     167             :       DO ii = 1, SIZE(spline_g%array)
     168             :          i = spline_g%pw_grid%g_hat(1, ii)
     169             :          j = spline_g%pw_grid%g_hat(2, ii)
     170             :          k = spline_g%pw_grid%g_hat(3, ii)
     171             : 
     172             :          c23 = cosJVals(j)*cosKVals(k)
     173             :          coeff = 64.0_dp/(cosIVals(i)*c23 + &
     174             :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
     175             :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
     176             :                           27.0_dp)
     177             : 
     178             :          spline_g%array(ii) = spline_g%array(ii)*coeff
     179             : 
     180             :       END DO
     181       38532 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     182             : 
     183       38532 :       CALL timestop(handle)
     184       77064 :    END SUBROUTINE pw_spline2_interpolate_values_g
     185             : 
     186             : ! **************************************************************************************************
     187             : !> \brief calculates the FFT of the coefficients of the2 cubic spline that
     188             : !>      interpolates the given values
     189             : !> \param spline_g on entry the FFT of the values to interpolate as cc,
     190             : !>        will contain the FFT of the coefficients of the spline
     191             : !> \par History
     192             : !>      06.2003 created [fawzi]
     193             : !> \author Fawzi Mohamed
     194             : !> \note
     195             : !>      does not work with spherical cutoff
     196             : !>      stupid distribution for cos calculation, it should calculate only the
     197             : !>      needed cos, and avoid the mpi_allreduce
     198             : ! **************************************************************************************************
     199         396 :    SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
     200             :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     201             : 
     202             :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g'
     203             : 
     204             :       INTEGER                                            :: handle, i, ii, j, k
     205             :       INTEGER, DIMENSION(2, 3)                           :: gbo
     206             :       INTEGER, DIMENSION(3)                              :: n_tot
     207             :       REAL(KIND=dp)                                      :: c23, coeff
     208         396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
     209             : 
     210         396 :       CALL timeset(routineN, handle)
     211             : 
     212        1584 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     213        3960 :       gbo = spline_g%pw_grid%bounds
     214             : 
     215         396 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     216         396 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     217             : 
     218           0 :       ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
     219           0 :                 cosJVals(gbo(1, 2):gbo(2, 2)), &
     220        2772 :                 cosKVals(gbo(1, 3):gbo(2, 3)))
     221             : 
     222         396 :       coeff = twopi/n_tot(1)
     223         396 : !$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
     224             :       DO i = gbo(1, 1), gbo(2, 1)
     225             :          cosIVals(i) = COS(coeff*REAL(i, dp))
     226             :       END DO
     227         396 :       coeff = twopi/n_tot(2)
     228         396 : !$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
     229             :       DO j = gbo(1, 2), gbo(2, 2)
     230             :          cosJVals(j) = COS(coeff*REAL(j, dp))
     231             :       END DO
     232         396 :       coeff = twopi/n_tot(3)
     233         396 : !$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
     234             :       DO k = gbo(1, 3), gbo(2, 3)
     235             :          cosKVals(k) = COS(coeff*REAL(k, dp))
     236             :       END DO
     237             : 
     238         396 : !$OMP     PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
     239             :       DO ii = 1, SIZE(spline_g%array)
     240             :          i = spline_g%pw_grid%g_hat(1, ii)
     241             :          j = spline_g%pw_grid%g_hat(2, ii)
     242             :          k = spline_g%pw_grid%g_hat(3, ii)
     243             :          ! no opt
     244             : !FM                coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
     245             : !FM                     (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
     246             : !FM                     cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
     247             : !FM                     (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
     248             : !FM                     8.0_dp/27.0_dp)
     249             :          ! opt
     250             :          c23 = cosJVals(j)*cosKVals(k)
     251             :          coeff = 27.0_dp/(cosIVals(i)*c23 + &
     252             :                           (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
     253             :                           (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
     254             :                           8.0_dp)
     255             : 
     256             :          spline_g%array(ii) = spline_g%array(ii)*coeff
     257             : 
     258             :       END DO
     259         396 :       DEALLOCATE (cosIVals, cosJVals, cosKVals)
     260             : 
     261         396 :       CALL timestop(handle)
     262         792 :    END SUBROUTINE pw_spline3_interpolate_values_g
     263             : 
     264             : ! **************************************************************************************************
     265             : !> \brief rescales the derivatives from gridspacing=1 to the real derivatives
     266             : !> \param deriv_vals_r an array of x,y,z derivatives
     267             : !> \param transpose if true applies the transpose of the map (defaults to
     268             : !>        false)
     269             : !> \param scale a scaling factor (defaults to 1.0)
     270             : !> \par History
     271             : !>      06.2003 created [fawzi]
     272             : !> \author Fawzi Mohamed
     273             : ! **************************************************************************************************
     274       18110 :    SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
     275             :       TYPE(pw_r3d_rs_type), DIMENSION(3), INTENT(IN)     :: deriv_vals_r
     276             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     277             :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: scale
     278             : 
     279             :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv'
     280             : 
     281             :       INTEGER                                            :: handle, i, idir, j, k
     282             :       INTEGER, DIMENSION(2, 3)                           :: bo
     283             :       INTEGER, DIMENSION(3)                              :: n_tot
     284             :       LOGICAL                                            :: diag, my_transpose
     285             :       REAL(KIND=dp)                                      :: dVal1, dVal2, dVal3, my_scale, scalef
     286             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: dh_inv, h_grid
     287       18110 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: ddata, ddata2, ddata3
     288             : 
     289       18110 :       CALL timeset(routineN, handle)
     290             : 
     291       18110 :       my_transpose = .FALSE.
     292       18110 :       IF (PRESENT(transpose)) my_transpose = transpose
     293       18110 :       my_scale = 1.0_dp
     294       18110 :       IF (PRESENT(scale)) my_scale = scale
     295       72440 :       n_tot(1:3) = deriv_vals_r(1)%pw_grid%npts(1:3)
     296      181100 :       bo = deriv_vals_r(1)%pw_grid%bounds_local
     297      235430 :       dh_inv = deriv_vals_r(1)%pw_grid%dh_inv
     298             : 
     299             :       ! map grid to real derivative
     300       18110 :       diag = .TRUE.
     301       18110 :       IF (my_transpose) THEN
     302       32848 :          DO j = 1, 3
     303      106756 :             DO i = 1, 3
     304       73908 :                h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     305       98544 :                IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
     306             :             END DO
     307             :          END DO
     308             :       ELSE
     309       39592 :          DO j = 1, 3
     310      128674 :             DO i = 1, 3
     311       89082 :                h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
     312      118776 :                IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
     313             :             END DO
     314             :          END DO
     315             :       END IF
     316             : 
     317       18110 :       IF (diag) THEN
     318       71920 :          DO idir = 1, 3
     319       53940 :             ddata => deriv_vals_r(idir)%array
     320       53940 :             scalef = h_grid(idir, idir)
     321             :             CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
     322       71920 :                        scalef, ddata, 1)
     323             :          END DO
     324             :       ELSE
     325         130 :          ddata => deriv_vals_r(1)%array
     326         130 :          ddata2 => deriv_vals_r(2)%array
     327         130 :          ddata3 => deriv_vals_r(3)%array
     328             : !$OMP        PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
     329         130 : !$OMP                 SHARED(ddata,ddata2,ddata3,h_grid,bo)
     330             :          DO k = bo(1, 3), bo(2, 3)
     331             :             DO j = bo(1, 2), bo(2, 2)
     332             :                DO i = bo(1, 1), bo(2, 1)
     333             : 
     334             :                   dVal1 = ddata(i, j, k)
     335             :                   dVal2 = ddata2(i, j, k)
     336             :                   dVal3 = ddata3(i, j, k)
     337             : 
     338             :                   ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
     339             :                                    h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
     340             :                   ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
     341             :                                     h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
     342             :                   ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
     343             :                                     h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
     344             : 
     345             :                END DO
     346             :             END DO
     347             :          END DO
     348             :       END IF
     349             : 
     350       18110 :       CALL timestop(handle)
     351       18110 :    END SUBROUTINE pw_spline_scale_deriv
     352             : 
     353             : ! **************************************************************************************************
     354             : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     355             : !>      derivative of the cubic spline
     356             : !> \param spline_g on entry the FFT of the coefficients of the spline
     357             : !>        will contain the FFT of the derivative
     358             : !> \param idir direction of the derivative
     359             : !> \par History
     360             : !>      06.2003 created [fawzi]
     361             : !> \author Fawzi Mohamed
     362             : !> \note
     363             : !>      the distance between gridpoints is assumed to be 1
     364             : ! **************************************************************************************************
     365         396 :    SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
     366             :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     367             :       INTEGER, INTENT(in)                                :: idir
     368             : 
     369             :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g'
     370             :       REAL(KIND=dp), PARAMETER                           :: inv9 = 1.0_dp/9.0_dp
     371             : 
     372             :       INTEGER                                            :: handle, i, ii, j, k
     373             :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
     374             :       INTEGER, DIMENSION(3)                              :: n, n_tot
     375             :       REAL(KIND=dp)                                      :: coeff, tmp
     376         396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     377             : 
     378         396 :       CALL timeset(routineN, handle)
     379             : 
     380        1584 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     381        1584 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     382        3960 :       bo = spline_g%pw_grid%bounds_local
     383        3960 :       gbo = spline_g%pw_grid%bounds
     384             : 
     385         396 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     386         396 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     387             : 
     388           0 :       ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
     389           0 :                 csJVals(gbo(1, 2):gbo(2, 2)), &
     390        2772 :                 csKVals(gbo(1, 3):gbo(2, 3)))
     391             : 
     392         396 :       coeff = twopi/n_tot(1)
     393         396 :       IF (idir == 1) THEN
     394         132 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     395             :          DO i = gbo(1, 1), gbo(2, 1)
     396             :             csIVals(i) = SIN(coeff*REAL(i, dp))
     397             :          END DO
     398             :       ELSE
     399         264 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
     400             :          DO i = gbo(1, 1), gbo(2, 1)
     401             :             csIVals(i) = COS(coeff*REAL(i, dp))
     402             :          END DO
     403             :       END IF
     404         396 :       coeff = twopi/n_tot(2)
     405         396 :       IF (idir == 2) THEN
     406         132 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     407             :          DO j = gbo(1, 2), gbo(2, 2)
     408             :             csJVals(j) = SIN(coeff*REAL(j, dp))
     409             :          END DO
     410             :       ELSE
     411         264 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
     412             :          DO j = gbo(1, 2), gbo(2, 2)
     413             :             csJVals(j) = COS(coeff*REAL(j, dp))
     414             :          END DO
     415             :       END IF
     416         396 :       coeff = twopi/n_tot(3)
     417         396 :       IF (idir == 3) THEN
     418         132 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     419             :          DO k = gbo(1, 3), gbo(2, 3)
     420             :             csKVals(k) = SIN(coeff*REAL(k, dp))
     421             :          END DO
     422             :       ELSE
     423         264 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
     424             :          DO k = gbo(1, 3), gbo(2, 3)
     425             :             csKVals(k) = COS(coeff*REAL(k, dp))
     426             :          END DO
     427             :       END IF
     428             : 
     429         132 :       SELECT CASE (idir)
     430             :       CASE (1)
     431             :          ! x deriv
     432         132 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     433             :          DO ii = 1, SIZE(spline_g%array)
     434             :             i = spline_g%pw_grid%g_hat(1, ii)
     435             :             j = spline_g%pw_grid%g_hat(2, ii)
     436             :             k = spline_g%pw_grid%g_hat(3, ii)
     437             : !FM                ! formula
     438             : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
     439             : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
     440             : !FM                     sinVal(1)*4.0_dp/9.0_dp
     441             :             tmp = csIVals(i)*csJVals(j)
     442             :             coeff = (tmp*csKVals(k) + &
     443             :                      (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
     444             :                      csIVals(i)*4.0_dp)*inv9
     445             : 
     446             :             spline_g%array(ii) = spline_g%array(ii)* &
     447             :                                  CMPLX(0.0_dp, coeff, dp)
     448             :          END DO
     449             :       CASE (2)
     450             :          ! y deriv
     451         132 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     452             :          DO ii = 1, SIZE(spline_g%array)
     453             :             i = spline_g%pw_grid%g_hat(1, ii)
     454             :             j = spline_g%pw_grid%g_hat(2, ii)
     455             :             k = spline_g%pw_grid%g_hat(3, ii)
     456             : 
     457             :             tmp = csIVals(i)*csJVals(j)
     458             :             coeff = (tmp*csKVals(k) + &
     459             :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     460             :                      csJVals(j)*4.0_dp)*inv9
     461             : 
     462             :             spline_g%array(ii) = spline_g%array(ii)* &
     463             :                                  CMPLX(0.0_dp, coeff, dp)
     464             :          END DO
     465             :       CASE (3)
     466             :          ! z deriv
     467         396 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     468             :          DO ii = 1, SIZE(spline_g%array)
     469             :             i = spline_g%pw_grid%g_hat(1, ii)
     470             :             j = spline_g%pw_grid%g_hat(2, ii)
     471             :             k = spline_g%pw_grid%g_hat(3, ii)
     472             : 
     473             :             tmp = csIVals(i)*csKVals(k)
     474             :             coeff = (tmp*csJVals(j) + &
     475             :                      (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
     476             :                      csKVals(k)*4.0_dp)*inv9
     477             : 
     478             :             spline_g%array(ii) = spline_g%array(ii)* &
     479             :                                  CMPLX(0.0_dp, coeff, dp)
     480             :          END DO
     481             :       END SELECT
     482             : 
     483         396 :       DEALLOCATE (csIVals, csJVals, csKVals)
     484             : 
     485         396 :       CALL timestop(handle)
     486         792 :    END SUBROUTINE pw_spline3_deriv_g
     487             : 
     488             : ! **************************************************************************************************
     489             : !> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
     490             : !>      derivative of the quadratic spline
     491             : !> \param spline_g on entry the FFT of the coefficients of the spline
     492             : !>        will contain the FFT of the derivative
     493             : !> \param idir direction of the derivative
     494             : !> \par History
     495             : !>      06.2003 created [fawzi]
     496             : !> \author Fawzi Mohamed
     497             : !> \note
     498             : !>      the distance between gridpoints is assumed to be 1
     499             : ! **************************************************************************************************
     500       38532 :    SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
     501             :       TYPE(pw_c1d_gs_type), INTENT(IN)                   :: spline_g
     502             :       INTEGER, INTENT(in)                                :: idir
     503             : 
     504             :       CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g'
     505             :       REAL(KIND=dp), PARAMETER                           :: inv16 = 1.0_dp/16.0_dp
     506             : 
     507             :       INTEGER                                            :: handle, i, ii, j, k
     508             :       INTEGER, DIMENSION(2, 3)                           :: bo
     509             :       INTEGER, DIMENSION(3)                              :: n, n_tot
     510             :       REAL(KIND=dp)                                      :: coeff, tmp
     511       38532 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
     512             : 
     513       38532 :       CALL timeset(routineN, handle)
     514             : 
     515      154128 :       n(1:3) = spline_g%pw_grid%npts_local(1:3)
     516      154128 :       n_tot(1:3) = spline_g%pw_grid%npts(1:3)
     517      385320 :       bo = spline_g%pw_grid%bounds
     518             : 
     519       38532 :       CPASSERT(.NOT. spline_g%pw_grid%spherical)
     520       38532 :       CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
     521             : 
     522           0 :       ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
     523      269724 :                 csKVals(bo(1, 3):bo(2, 3)))
     524             : 
     525       38532 :       coeff = twopi/n_tot(1)
     526       38532 :       IF (idir == 1) THEN
     527       12844 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     528             :          DO i = bo(1, 1), bo(2, 1)
     529             :             csIVals(i) = SIN(coeff*REAL(i, dp))
     530             :          END DO
     531             :       ELSE
     532       25688 : !$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
     533             :          DO i = bo(1, 1), bo(2, 1)
     534             :             csIVals(i) = COS(coeff*REAL(i, dp))
     535             :          END DO
     536             :       END IF
     537       38532 :       coeff = twopi/n_tot(2)
     538       38532 :       IF (idir == 2) THEN
     539       12844 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     540             :          DO j = bo(1, 2), bo(2, 2)
     541             :             csJVals(j) = SIN(coeff*REAL(j, dp))
     542             :          END DO
     543             :       ELSE
     544       25688 : !$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
     545             :          DO j = bo(1, 2), bo(2, 2)
     546             :             csJVals(j) = COS(coeff*REAL(j, dp))
     547             :          END DO
     548             :       END IF
     549       38532 :       coeff = twopi/n_tot(3)
     550       38532 :       IF (idir == 3) THEN
     551       12844 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     552             :          DO k = bo(1, 3), bo(2, 3)
     553             :             csKVals(k) = SIN(coeff*REAL(k, dp))
     554             :          END DO
     555             :       ELSE
     556       25688 : !$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
     557             :          DO k = bo(1, 3), bo(2, 3)
     558             :             csKVals(k) = COS(coeff*REAL(k, dp))
     559             :          END DO
     560             :       END IF
     561             : 
     562       12844 :       SELECT CASE (idir)
     563             :       CASE (1)
     564             :          ! x deriv
     565       12844 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
     566             :          DO ii = 1, SIZE(spline_g%array)
     567             :             i = spline_g%pw_grid%g_hat(1, ii)
     568             :             j = spline_g%pw_grid%g_hat(2, ii)
     569             :             k = spline_g%pw_grid%g_hat(3, ii)
     570             : !FM                ! formula
     571             : !FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
     572             : !FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
     573             : !FM                     sinVal(1)*9.0_dp/16.0_dp
     574             :             tmp = csIVals(i)*csJVals(j)
     575             :             coeff = (tmp*csKVals(k) + &
     576             :                      (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
     577             :                      csIVals(i)*9.0_dp)*inv16
     578             : 
     579             :             spline_g%array(ii) = spline_g%array(ii)* &
     580             :                                  CMPLX(0.0_dp, coeff, dp)
     581             :          END DO
     582             :       CASE (2)
     583             :          ! y deriv
     584       12844 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     585             :          DO ii = 1, SIZE(spline_g%array)
     586             :             i = spline_g%pw_grid%g_hat(1, ii)
     587             :             j = spline_g%pw_grid%g_hat(2, ii)
     588             :             k = spline_g%pw_grid%g_hat(3, ii)
     589             : 
     590             :             tmp = csIVals(i)*csJVals(j)
     591             :             coeff = (tmp*csKVals(k) + &
     592             :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     593             :                      csJVals(j)*9.0_dp)*inv16
     594             : 
     595             :             spline_g%array(ii) = spline_g%array(ii)* &
     596             :                                  CMPLX(0.0_dp, coeff, dp)
     597             :          END DO
     598             :       CASE (3)
     599             :          ! z deriv
     600       38532 : !$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
     601             :          DO ii = 1, SIZE(spline_g%array)
     602             :             i = spline_g%pw_grid%g_hat(1, ii)
     603             :             j = spline_g%pw_grid%g_hat(2, ii)
     604             :             k = spline_g%pw_grid%g_hat(3, ii)
     605             : 
     606             :             tmp = csIVals(i)*csKVals(k)
     607             :             coeff = (tmp*csJVals(j) + &
     608             :                      (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
     609             :                      csKVals(k)*9.0_dp)*inv16
     610             : 
     611             :             spline_g%array(ii) = spline_g%array(ii)* &
     612             :                                  CMPLX(0.0_dp, coeff, dp)
     613             :          END DO
     614             :       END SELECT
     615             : 
     616       38532 :       DEALLOCATE (csIVals, csJVals, csKVals)
     617             : 
     618       38532 :       CALL timestop(handle)
     619       77064 :    END SUBROUTINE pw_spline2_deriv_g
     620             : 
     621             : ! **************************************************************************************************
     622             : !> \brief applies a nearest neighbor linear operator to a stripe in x direction:
     623             : !>      out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
     624             : !> \param weights the weights of the linear operator
     625             : !> \param in_val the argument to the operator
     626             : !> \param in_val_first the first argument (needed to calculate out_val(1))
     627             : !> \param in_val_last the last argument (needed to calculate out_val(n_el))
     628             : !> \param out_val the place where the result is accumulated
     629             : !> \param n_el the number of elements in in_v and out_v
     630             : !> \par History
     631             : !>      04.2004 created [fawzi]
     632             : !> \author fawzi
     633             : !> \note
     634             : !>      uses 2 read streams and 1 write stream
     635             : ! **************************************************************************************************
     636   637289732 :    SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
     637             :                                 out_val, n_el)
     638             :       REAL(kind=dp), DIMENSION(0:2), INTENT(in)          :: weights
     639             :       REAL(kind=dp), DIMENSION(*), INTENT(in)            :: in_val
     640             :       REAL(kind=dp), INTENT(in)                          :: in_val_first, in_val_last
     641             :       REAL(kind=dp), DIMENSION(*), INTENT(inout)         :: out_val
     642             :       INTEGER                                            :: n_el
     643             : 
     644             :       INTEGER                                            :: i
     645             :       REAL(kind=dp)                                      :: v0, v1, v2
     646             : 
     647             : !1:n_el), &
     648             : !1:n_el), &
     649             : 
     650   637289732 :       IF (n_el < 1) RETURN
     651   637289732 :       v0 = in_val_first
     652   637289732 :       v1 = in_val(1)
     653   637289732 :       IF (weights(1) == 0.0_dp) THEN
     654             :          ! optimized version for x deriv
     655   100241316 :          DO i = 1, n_el - 3, 3
     656   914503932 :             v2 = in_val(i + 1)
     657             :             out_val(i) = out_val(i) + &
     658             :                          weights(0)*v0 + &
     659   914503932 :                          weights(2)*v2
     660   914503932 :             v0 = in_val(i + 2)
     661             :             out_val(i + 1) = out_val(i + 1) + &
     662             :                              weights(0)*v1 + &
     663   914503932 :                              weights(2)*v0
     664   914503932 :             v1 = in_val(i + 3)
     665             :             out_val(i + 2) = out_val(i + 2) + &
     666             :                              weights(0)*v2 + &
     667   914503932 :                              weights(2)*v1
     668             :          END DO
     669             :       ELSE
     670             :          ! generic version
     671   537048416 :          DO i = 1, n_el - 3, 3
     672  4001796166 :             v2 = in_val(i + 1)
     673             :             out_val(i) = out_val(i) + &
     674             :                          weights(0)*v0 + &
     675             :                          weights(1)*v1 + &
     676  4001796166 :                          weights(2)*v2
     677  4001796166 :             v0 = in_val(i + 2)
     678             :             out_val(i + 1) = out_val(i + 1) + &
     679             :                              weights(0)*v1 + &
     680             :                              weights(1)*v2 + &
     681  4001796166 :                              weights(2)*v0
     682  4001796166 :             v1 = in_val(i + 3)
     683             :             out_val(i + 2) = out_val(i + 2) + &
     684             :                              weights(0)*v2 + &
     685             :                              weights(1)*v0 + &
     686  4006048216 :                              weights(2)*v1
     687             :          END DO
     688             :       END IF
     689   850092404 :       SELECT CASE (MODULO(n_el - 1, 3))
     690             :       CASE (0)
     691   212802672 :          v2 = in_val_last
     692             :          out_val(n_el) = out_val(n_el) + &
     693             :                          weights(0)*v0 + &
     694             :                          weights(1)*v1 + &
     695   212802672 :                          weights(2)*v2
     696             :       CASE (1)
     697   190169698 :          v2 = in_val(n_el)
     698             :          out_val(n_el - 1) = out_val(n_el - 1) + &
     699             :                              weights(0)*v0 + &
     700             :                              weights(1)*v1 + &
     701   190169698 :                              weights(2)*v2
     702   190169698 :          v0 = in_val_last
     703             :          out_val(n_el) = out_val(n_el) + &
     704             :                          weights(0)*v1 + &
     705             :                          weights(1)*v2 + &
     706   190169698 :                          weights(2)*v0
     707             :       CASE (2)
     708   234317362 :          v2 = in_val(n_el - 1)
     709             :          out_val(n_el - 2) = out_val(n_el - 2) + &
     710             :                              weights(0)*v0 + &
     711             :                              weights(1)*v1 + &
     712   234317362 :                              weights(2)*v2
     713   234317362 :          v0 = in_val(n_el)
     714             :          out_val(n_el - 1) = out_val(n_el - 1) + &
     715             :                              weights(0)*v1 + &
     716             :                              weights(1)*v2 + &
     717   234317362 :                              weights(2)*v0
     718   234317362 :          v1 = in_val_last
     719             :          out_val(n_el) = out_val(n_el) + &
     720             :                          weights(0)*v2 + &
     721             :                          weights(1)*v0 + &
     722   637289732 :                          weights(2)*v1
     723             :       END SELECT
     724             : 
     725             :    END SUBROUTINE pw_compose_stripe
     726             : 
     727             : ! **************************************************************************************************
     728             : !> \brief private routine that computes pw_nn_compose_r (it seems that without
     729             : !>      passing arrays in this way either some compiler do a copyin/out (xlf)
     730             : !>      or by inlining suboptimal code is produced (nag))
     731             : !> \param weights a 3x3x3 array with the linear operator
     732             : !> \param in_val the argument for the linear operator
     733             : !> \param out_val place where the value of the linear oprator should be added
     734             : !> \param pw_in pw to be able to get the needed meta data about in_val and
     735             : !>        out_val
     736             : !> \param bo boundaries of in_val and out_val
     737             : !> \author fawzi
     738             : ! **************************************************************************************************
     739       29892 :    SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
     740             :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     741             :       INTEGER, DIMENSION(2, 3)                           :: bo
     742             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
     743             :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     744             :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout)  :: out_val
     745             :       REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
     746             :          2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in)     :: in_val
     747             : 
     748             :       INTEGER                                            :: i, j, jw, k, kw, myj, myk
     749             :       INTEGER, DIMENSION(2, 3)                           :: gbo
     750             :       INTEGER, DIMENSION(3)                              :: s
     751             :       LOGICAL                                            :: has_boundary, yderiv, zderiv
     752             :       REAL(kind=dp)                                      :: in_val_f, in_val_l
     753       29892 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
     754             : 
     755       91500 :       zderiv = ALL(weights(:, :, 1) == 0.0_dp)
     756       91500 :       yderiv = ALL(weights(:, 1, :) == 0.0_dp)
     757      298920 :       bo = pw_in%pw_grid%bounds_local
     758      298920 :       gbo = pw_in%pw_grid%bounds
     759      119568 :       DO i = 1, 3
     760      119568 :          s(i) = bo(2, i) - bo(1, i) + 1
     761             :       END DO
     762      119568 :       IF (ANY(s < 1)) RETURN
     763             :       has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
     764       46800 :                          pw_in%pw_grid%bounds(:, 1))
     765       29892 :       IF (has_boundary) THEN
     766             :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     767             :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
     768      285840 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
     769    53905904 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
     770             :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     771             :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     772             :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
     773   107783224 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     774    53905904 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
     775             :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
     776             :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
     777             :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
     778   107783224 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
     779       28584 :          DEALLOCATE (tmp)
     780             :       END IF
     781             : 
     782             : !$OMP   PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
     783             : !$OMP       in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
     784       29892 : !$OMP       u_boundary,weights,has_boundary)
     785             :       DO k = 0, s(3) - 1
     786             :          DO kw = 0, 2
     787             :             myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
     788             :             IF (zderiv .AND. kw == 1) CYCLE
     789             :             DO j = 0, s(2) - 1
     790             :                DO jw = 0, 2
     791             :                   myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
     792             :                   IF (yderiv .AND. jw == 1) CYCLE
     793             :                   IF (has_boundary) THEN
     794             :                      in_val_f = l_boundary(myj, myk)
     795             :                      in_val_l = u_boundary(myj, myk)
     796             :                   ELSE
     797             :                      in_val_f = in_val(bo(2, 1), myj, myk)
     798             :                      in_val_l = in_val(bo(1, 1), myj, myk)
     799             :                   END IF
     800             :                   CALL pw_compose_stripe(weights=weights(:, jw, kw), &
     801             :                                          in_val=in_val(:, myj, myk), &
     802             :                                          in_val_first=in_val_f, in_val_last=in_val_l, &
     803             :                                          out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
     804             :                END DO
     805             :             END DO
     806             :          END DO
     807             :       END DO
     808       29892 :       IF (has_boundary) THEN
     809       28584 :          DEALLOCATE (l_boundary, u_boundary)
     810             :       END IF
     811       29892 :    END SUBROUTINE pw_nn_compose_r_work
     812             : 
     813             : ! **************************************************************************************************
     814             : !> \brief applies a nearest neighbor linear operator to a pw in real space
     815             : !> \param weights a 3x3x3 array with the linear operator
     816             : !> \param pw_in the argument for the linear operator
     817             : !> \param pw_out place where the value of the linear oprator should be added
     818             : !> \author fawzi
     819             : !> \note
     820             : !>      has specialized versions for derivative operator (with central values==0)
     821             : ! **************************************************************************************************
     822       29892 :    SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
     823             :       REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
     824             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     825             : 
     826             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'pw_nn_compose_r'
     827             : 
     828             :       INTEGER                                            :: handle
     829             : 
     830       29892 :       CALL timeset(routineN, handle)
     831      209244 :       IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
     832           0 :          CPABORT("wrong pw distribution")
     833             :       END IF
     834             :       CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%array, &
     835       29892 :                                 out_val=pw_out%array, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
     836       29892 :       CALL timestop(handle)
     837       29892 :    END SUBROUTINE pw_nn_compose_r
     838             : 
     839             : ! **************************************************************************************************
     840             : !> \brief calculates the values of a nearest neighbor smearing
     841             : !> \param pw_in the argument for the linear operator
     842             : !> \param pw_out place where the smeared values should be added
     843             : !> \param coeffs array with the coefficent of the smearing, ordered with
     844             : !>        the distance from the center: coeffs(1) the coeff of the central
     845             : !>        element, coeffs(2) the coeff of the 6 element with distance 1,
     846             : !>        coeff(3) the coeff of the 12 elements at distance sqrt(2),
     847             : !>        coeff(4) the coeff of the 8 elements at distance sqrt(3).
     848             : !> \author Fawzi Mohamed
     849             : !> \note
     850             : !>      does not normalize the smear to 1.
     851             : !>      with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
     852             : !>      is equivalent to pw_spline3_evaluate_values_g, with
     853             : !>      coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
     854             : ! **************************************************************************************************
     855       14490 :    SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
     856             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     857             :       REAL(KIND=dp), DIMENSION(4), INTENT(in)            :: coeffs
     858             : 
     859             :       INTEGER                                            :: i, j, k
     860             :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     861             : 
     862       57960 :       DO k = -1, 1
     863      188370 :          DO j = -1, 1
     864      565110 :             DO i = -1, 1
     865      521640 :                weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
     866             :             END DO
     867             :          END DO
     868             :       END DO
     869             : 
     870       14490 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     871       14490 :    END SUBROUTINE pw_nn_smear_r
     872             : 
     873             : ! **************************************************************************************************
     874             : !> \brief calculates a nearest neighbor central derivative.
     875             : !>      for the x dir:
     876             : !>      pw_out%array(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
     877             : !>             ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
     878             : !>               pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
     879             : !>             ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
     880             : !>               pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
     881             : !>      periodic boundary conditions are applied
     882             : !> \param pw_in the argument for the linear operator
     883             : !> \param pw_out place where the smeared values should be added
     884             : !> \param coeffs array with the coefficent of the front (positive) plane
     885             : !>        of the central derivative, ordered with
     886             : !>        the distance from the center: coeffs(1) the coeff of the central
     887             : !>        element, coeffs(2) the coeff of the 4 element with distance 1,
     888             : !>        coeff(3) the coeff of the 4 elements at distance sqrt(2)
     889             : !> \param idir ...
     890             : !> \author Fawzi Mohamed
     891             : !> \note
     892             : !>      with coeff=(/ 2.0_dp/9.0_dp,  1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
     893             : !>      is equivalent to pw_spline3_deriv_r, with
     894             : !>      coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
     895             : !>      to pw_spline2_deriv_r
     896             : !>      coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
     897             : ! **************************************************************************************************
     898       15402 :    SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
     899             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
     900             :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: coeffs
     901             :       INTEGER                                            :: idir
     902             : 
     903             :       INTEGER                                            :: i, idirVal, j, k
     904             :       REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
     905             : 
     906       61608 :       DO k = -1, 1
     907      200226 :          DO j = -1, 1
     908      600678 :             DO i = -1, 1
     909      415854 :                SELECT CASE (idir)
     910             :                CASE (1)
     911      138618 :                   idirVal = i
     912             :                CASE (2)
     913      138618 :                   idirVal = j
     914             :                CASE (3)
     915      138618 :                   idirVal = k
     916             :                CASE default
     917      415854 :                   CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
     918             :                END SELECT
     919      554472 :                IF (idirVal == 0) THEN
     920      138618 :                   weights(i, j, k) = 0.0_dp
     921             :                ELSE
     922      277236 :                   weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
     923             :                END IF
     924             :             END DO
     925             :          END DO
     926             :       END DO
     927             : 
     928       15402 :       CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
     929       15402 :    END SUBROUTINE pw_nn_deriv_r
     930             : 
     931             : ! **************************************************************************************************
     932             : !> \brief low level function that adds a coarse grid
     933             : !>      to a fine grid.
     934             : !>      If pbc is true periodic boundary conditions are applied
     935             : !>
     936             : !>      It will add to
     937             : !>
     938             : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
     939             : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
     940             : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
     941             : !>
     942             : !>      using
     943             : !>
     944             : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
     945             : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
     946             : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
     947             : !>
     948             : !>      composed with the weights obtained by the direct product of the
     949             : !>      1d coefficients weights:
     950             : !>
     951             : !>      for i,j,k in -3..3
     952             : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
     953             : !>                  weights_1d(abs(k)+1)
     954             : !> \param coarse_coeffs_pw the values of the coefficients
     955             : !> \param fine_values_pw where to add the values due to the
     956             : !>        coarse coeffs
     957             : !> \param weights_1d the weights of the 1d smearing
     958             : !> \param w_border0 the 1d weight at the border (when pbc is false)
     959             : !> \param w_border1 the 1d weights for a point one off the border
     960             : !>        (w_border1(1) is the weight of the coefficent at the border)
     961             : !>        (used if pbc is false)
     962             : !> \param pbc if periodic boundary conditions should be applied
     963             : !> \param safe_computation ...
     964             : !> \author fawzi
     965             : !> \note
     966             : !>      coarse looping is continuos, I did not check if keeping the fine looping
     967             : !>      contiguous is better.
     968             : !>      And I ask myself yet again why, why we use x-slice distribution,
     969             : !>      z-slice distribution would be much better performancewise
     970             : !>      (and would semplify this code enormously).
     971             : !>      fine2coarse has much more understandable parallel part (build up of
     972             : !>      send/rcv sizes,... but worse if you have really a lot of processors,
     973             : !>      probabily irrelevant because it is not critical) [fawzi].
     974             : ! **************************************************************************************************
     975        2260 :    SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
     976             :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
     977             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: coarse_coeffs_pw, fine_values_pw
     978             :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
     979             :       REAL(kind=dp), INTENT(in)                          :: w_border0
     980             :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
     981             :       LOGICAL, INTENT(in)                                :: pbc
     982             :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
     983             : 
     984             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_coarse2fine'
     985             : 
     986             :       INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
     987             :          ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
     988             :          s(3), send_tot_size, sf, shift, ss, x, x_att, xx
     989        2260 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcv_offset, rcv_size, real_rcv_size, &
     990        2260 :                                                             send_offset, send_size, sent_size
     991             :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
     992             :                                                             fine_gbo, my_coarse_bo
     993        2260 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
     994             :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
     995             :                                                             safe_calc
     996             :       REAL(kind=dp)                                      :: v0, v1, v2, v3, wi, wj, wk
     997        2260 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
     998             :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
     999             :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1000        2260 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1001             : 
    1002        2260 :       CALL timeset(routineN, handle)
    1003             : !    CALL timeset(routineN//"_pre",handle2)
    1004        2260 :       safe_calc = .FALSE.
    1005        2260 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1006        2260 :       ii = coarse_coeffs_pw%pw_grid%para%group%compare(fine_values_pw%pw_grid%para%group)
    1007        2260 :       IF (ii > mp_comm_congruent) THEN
    1008           0 :          CPABORT("")
    1009             :       END IF
    1010       22600 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1011       22600 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1012       22600 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1013       22600 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1014        9040 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1015        6780 :       DO j = 2, 3
    1016       15820 :          DO i = 1, 2
    1017       13560 :             coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
    1018             :          END DO
    1019             :       END DO
    1020        2260 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1021        2260 :          coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
    1022        2260 :          coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
    1023             :       ELSE
    1024           0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1025           0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1026             :       END IF
    1027        6744 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1028        2260 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1029        2260 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1030        2260 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1031             :       END IF
    1032        2260 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1033        2260 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1034             : 
    1035        2260 :       IF (pbc) THEN
    1036           0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1037           0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
    1038             :       ELSE
    1039        9040 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1040        9040 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1041             :       END IF
    1042             : 
    1043        2260 :       coarse_coeffs => coarse_coeffs_pw%array
    1044        9040 :       DO i = 1, 3
    1045        9040 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1046             :       END DO
    1047             : !       CALL timestop(handle2)
    1048             :       ! *** parallel case
    1049        2260 :       IF (is_split) THEN
    1050          24 :          CALL timeset(routineN//"_comm", handle2)
    1051             :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    1052          24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    1053          24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group_size
    1054             :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    1055             :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    1056         312 :                    rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    1057             : 
    1058             :          ! ** rcv size count
    1059             : 
    1060          24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1061             :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1062          24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    1063          72 :          rcv_size = 0
    1064         184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    1065         160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1066         184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1067             :          END DO
    1068             : 
    1069             :          ! ** send size count
    1070             : 
    1071          24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    1072          24 :          sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
    1073          24 :          fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
    1074          24 :          fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
    1075          24 :          IF (.NOT. pbc) THEN
    1076          24 :             fi_lb = MAX(fi_lb, fine_gbo(1, 1))
    1077          24 :             fi_ub = MIN(fi_ub, fine_gbo(2, 1))
    1078             :          ELSE
    1079           0 :             fi_ub = MIN(fi_ub, fi_lb + sf - 1)
    1080             :          END IF
    1081          24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1082          24 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1083          72 :          send_size = 0
    1084         320 :          DO x = fi_lb, fi_ub
    1085         296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1086         320 :             IF (p /= p_old) THEN
    1087          24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
    1088             : 
    1089             :                send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1090          24 :                                                       - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1091             : 
    1092          24 :                IF (pbc) THEN
    1093           0 :                   DO xx = p_lb, coarse_gbo(1, 1) - 1
    1094           0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1095           0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1096           0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1097             :                      END IF
    1098             :                   END DO
    1099           0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub
    1100           0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1101           0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1102           0 :                         send_size(p_old) = send_size(p_old) + coarse_slice_size
    1103             :                      END IF
    1104             :                   END DO
    1105             :                END IF
    1106             : 
    1107          24 :                p_old = p
    1108          24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1109             :             END IF
    1110             :          END DO
    1111          24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1112             : 
    1113             :          send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
    1114          24 :                                                 - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
    1115             : 
    1116          24 :          IF (pbc) THEN
    1117           0 :             DO xx = p_lb, coarse_gbo(1, 1) - 1
    1118           0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1119           0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1120           0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1121             :                END IF
    1122             :             END DO
    1123           0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub
    1124           0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1125           0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1126           0 :                   send_size(p_old) = send_size(p_old) + coarse_slice_size
    1127             :                END IF
    1128             :             END DO
    1129             :          END IF
    1130             :          ! ** offsets & alloc send-rcv
    1131             : 
    1132             :          send_tot_size = 0
    1133          72 :          DO ip = 0, n_procs - 1
    1134          48 :             send_offset(ip) = send_tot_size
    1135          72 :             send_tot_size = send_tot_size + send_size(ip)
    1136             :          END DO
    1137          72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    1138             : 
    1139          24 :          rcv_tot_size = 0
    1140          72 :          DO ip = 0, n_procs - 1
    1141          48 :             rcv_offset(ip) = rcv_tot_size
    1142          72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    1143             :          END DO
    1144          24 :          IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
    1145           0 :             CPABORT("Error calculating rcv_tot_size ")
    1146             :          END IF
    1147          72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    1148             : 
    1149             :          ! ** fill send buffer
    1150             : 
    1151          24 :          p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
    1152          72 :          p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
    1153          72 :          sent_size(:) = send_offset
    1154          24 :          ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
    1155         320 :          DO x = fi_lb, fi_ub
    1156         296 :             p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
    1157         320 :             IF (p /= p_old) THEN
    1158             :                shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1159          24 :                        FLOOR((x - 1 - f_shift(1))/2._dp)
    1160          24 :                p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
    1161             : 
    1162          24 :                IF (pbc) THEN
    1163           0 :                   DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1164           0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
    1165           0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1166             :                         CALL dcopy(coarse_slice_size, &
    1167             :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1168           0 :                                                  my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1169           0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1170             :                      END IF
    1171             :                   END DO
    1172             :                END IF
    1173             : 
    1174          24 :                ii = sent_size(p_old)
    1175         272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1176        3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1177       17312 :                      DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1178       13904 :                         send_buf(ii) = coarse_coeffs(i, j, k)
    1179       17064 :                         ii = ii + 1
    1180             :                      END DO
    1181             :                   END DO
    1182             :                END DO
    1183          24 :                sent_size(p_old) = ii
    1184             : 
    1185          24 :                IF (pbc) THEN
    1186           0 :                   DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1187           0 :                      x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1188           0 :                      IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1189             :                         CALL dcopy(coarse_slice_size, &
    1190             :                                    coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1191             :                                                  my_coarse_bo(1, 3)), ss, &
    1192           0 :                                    send_buf(sent_size(p_old)), 1)
    1193           0 :                         sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1194             :                      END IF
    1195             :                   END DO
    1196             :                END IF
    1197             : 
    1198          24 :                p_old = p
    1199          24 :                p_lb = FLOOR((x - 2 - f_shift(1))/2.)
    1200             :             END IF
    1201             :          END DO
    1202             :          shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
    1203          24 :                  FLOOR((x - 1 - f_shift(1))/2._dp)
    1204          24 :          p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
    1205             : 
    1206          24 :          IF (pbc) THEN
    1207           0 :             DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
    1208           0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1209           0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1210             :                   CALL dcopy(coarse_slice_size, &
    1211             :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1212           0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1213           0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1214             :                END IF
    1215             :             END DO
    1216             :          END IF
    1217             : 
    1218          24 :          ii = sent_size(p_old)
    1219         272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1220        3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1221       17312 :                DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
    1222       13904 :                   send_buf(ii) = coarse_coeffs(i, j, k)
    1223       17064 :                   ii = ii + 1
    1224             :                END DO
    1225             :             END DO
    1226             :          END DO
    1227          24 :          sent_size(p_old) = ii
    1228             : 
    1229          24 :          IF (pbc) THEN
    1230           0 :             DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
    1231           0 :                x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
    1232           0 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    1233             :                   CALL dcopy(coarse_slice_size, &
    1234             :                              coarse_coeffs(x_att, my_coarse_bo(1, 2), &
    1235           0 :                                            my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
    1236           0 :                   sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1237             :                END IF
    1238             :             END DO
    1239             :          END IF
    1240             : 
    1241          48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    1242          24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    1243             :          ! test send/rcv sizes
    1244          24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    1245          72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    1246             :          ! all2all
    1247             :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    1248          24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    1249             : 
    1250             :          ! ** reorder rcv buffer
    1251             :          ! (actually reordering should be needed only with pbc)
    1252             : 
    1253             :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1254             :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1255         120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1256             : 
    1257          24 :          my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1258          24 :          my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1259          24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    1260          72 :          sent_size(:) = rcv_offset
    1261          24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    1262          24 :          DO x = my_ub + 1, coarse_bo(2, 1)
    1263           0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1264             :             CALL dcopy(coarse_slice_size, &
    1265             :                        rcv_buf(sent_size(p_old)), 1, &
    1266             :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1267           0 :                                      coarse_bo(1, 3)), ss)
    1268          24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1269             :          END DO
    1270             :          p_old = pos_of_x(coarse_gbo(1, 1) &
    1271          24 :                           + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
    1272          24 :          p_lb = my_lb
    1273         184 :          DO x = my_lb, my_ub
    1274         160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1275         160 :             IF (p /= p_old) THEN
    1276          24 :                p_ub = x - 1
    1277             : 
    1278          24 :                ii = sent_size(p_old)
    1279         272 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1280        3432 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1281       15732 :                      DO i = p_lb, p_ub
    1282       12324 :                         coarse_coeffs(i, j, k) = rcv_buf(ii)
    1283       15484 :                         ii = ii + 1
    1284             :                      END DO
    1285             :                   END DO
    1286             :                END DO
    1287          24 :                sent_size(p_old) = ii
    1288             : 
    1289          24 :                p_lb = x
    1290          24 :                p_old = p
    1291             :             END IF
    1292         184 :             rcv_size(p) = rcv_size(p) + coarse_slice_size
    1293             :          END DO
    1294          24 :          p_ub = my_ub
    1295          24 :          ii = sent_size(p_old)
    1296         272 :          DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1297        3432 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1298       18892 :                DO i = p_lb, p_ub
    1299       15484 :                   coarse_coeffs(i, j, k) = rcv_buf(ii)
    1300       18644 :                   ii = ii + 1
    1301             :                END DO
    1302             :             END DO
    1303             :          END DO
    1304          24 :          sent_size(p_old) = ii
    1305          24 :          DO x = coarse_bo(1, 1), my_lb - 1
    1306           0 :             p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    1307             :             CALL dcopy(coarse_slice_size, &
    1308             :                        rcv_buf(sent_size(p_old)), 1, &
    1309             :                        coarse_coeffs(x, coarse_bo(1, 2), &
    1310           0 :                                      coarse_bo(1, 3)), ss)
    1311          24 :             sent_size(p_old) = sent_size(p_old) + coarse_slice_size
    1312             :          END DO
    1313             : 
    1314          48 :          CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
    1315          24 :          CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
    1316             : 
    1317             :          ! dealloc
    1318          24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    1319          24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    1320          48 :          CALL timestop(handle2)
    1321             : 
    1322             :       END IF
    1323        2260 :       fine_values => fine_values_pw%array
    1324        9040 :       w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
    1325       11300 :       w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
    1326             : 
    1327       30388 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1328      227284 :          DO ik = -3, 3
    1329      196896 :             IF (pbc) THEN
    1330           0 :                wk = weights_1d(ABS(ik) + 1)
    1331           0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1332             :             ELSE
    1333      196896 :                fk = 2*k + ik + f_shift(3)
    1334      196896 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1335       40680 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1336       22600 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1337        9040 :                      IF (ik /= 0) CYCLE
    1338        4520 :                      wk = w_border0
    1339       13560 :                   ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
    1340        2260 :                      SELECT CASE (ik)
    1341             :                      CASE (1)
    1342        2260 :                         wk = w_border1(1)
    1343             :                      CASE (-1)
    1344        2260 :                         wk = w_border1(2)
    1345             :                      CASE (-3)
    1346        2260 :                         wk = w_border1(3)
    1347             :                      CASE default
    1348           0 :                         CPABORT("")
    1349        6780 :                         CYCLE
    1350             :                      END SELECT
    1351             :                   ELSE
    1352        2260 :                      SELECT CASE (ik)
    1353             :                      CASE (3)
    1354        2260 :                         wk = w_border1(3)
    1355             :                      CASE (1)
    1356        2260 :                         wk = w_border1(2)
    1357             :                      CASE (-1)
    1358        2260 :                         wk = w_border1(1)
    1359             :                      CASE default
    1360           0 :                         CPABORT("")
    1361        6780 :                         CYCLE
    1362             :                      END SELECT
    1363             :                   END IF
    1364             :                ELSE
    1365      156216 :                   wk = weights_1d(ABS(ik) + 1)
    1366             :                END IF
    1367             :             END IF
    1368     3150076 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1369    23778112 :                DO ij = -3, 3
    1370    20633564 :                   IF (pbc) THEN
    1371           0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1372           0 :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
    1373             :                   ELSE
    1374    20633564 :                      fj = 2*j + ij + f_shift(2)
    1375    20633564 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1376     3137328 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1377     1742960 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1378      697184 :                            IF (ij /= 0) CYCLE
    1379      348592 :                            wj = w_border0*wk
    1380     1045776 :                         ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
    1381      174296 :                            SELECT CASE (ij)
    1382             :                            CASE (1)
    1383      174296 :                               wj = w_border1(1)*wk
    1384             :                            CASE (-1)
    1385      174296 :                               wj = w_border1(2)*wk
    1386             :                            CASE (-3)
    1387      174296 :                               wj = w_border1(3)*wk
    1388             :                            CASE default
    1389      522888 :                               CYCLE
    1390             :                            END SELECT
    1391             :                         ELSE
    1392      174296 :                            SELECT CASE (ij)
    1393             :                            CASE (-1)
    1394      174296 :                               wj = w_border1(1)*wk
    1395             :                            CASE (1)
    1396      174296 :                               wj = w_border1(2)*wk
    1397             :                            CASE (3)
    1398      174296 :                               wj = w_border1(3)*wk
    1399             :                            CASE default
    1400      522888 :                               CYCLE
    1401             :                            END SELECT
    1402             :                         END IF
    1403             :                      ELSE
    1404    17496236 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1405             :                      END IF
    1406             :                   END IF
    1407             : 
    1408    21838256 :                   IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
    1409             : !                      CALL timeset(routineN//"_safe",handle2)
    1410       25000 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1411      165000 :                         DO ii = -3, 3
    1412      140000 :                            IF (pbc .AND. .NOT. is_split) THEN
    1413           0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1414           0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1415             :                            ELSE
    1416      140000 :                               fi = 2*i + ii + f_shift(1)
    1417      140000 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1418       67500 :                               IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
    1419             :                                                    fi >= fine_gbo(2, 1) - 1)) THEN
    1420       25000 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1421       10000 :                                     IF (ii /= 0) CYCLE
    1422        5000 :                                     wi = w_border0*wj
    1423       15000 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1424        2500 :                                     SELECT CASE (ii)
    1425             :                                     CASE (1)
    1426        2500 :                                        wi = w_border1(1)*wj
    1427             :                                     CASE (-1)
    1428        2500 :                                        wi = w_border1(2)*wj
    1429             :                                     CASE (-3)
    1430        2500 :                                        wi = w_border1(3)*wj
    1431             :                                     CASE default
    1432        7500 :                                        CYCLE
    1433             :                                     END SELECT
    1434             :                                  ELSE
    1435        2500 :                                     SELECT CASE (ii)
    1436             :                                     CASE (-1)
    1437        2500 :                                        wi = w_border1(1)*wj
    1438             :                                     CASE (1)
    1439        2500 :                                        wi = w_border1(2)*wj
    1440             :                                     CASE (3)
    1441        2500 :                                        wi = w_border1(3)*wj
    1442             :                                     CASE default
    1443        7500 :                                        CYCLE
    1444             :                                     END SELECT
    1445             :                                  END IF
    1446             :                               ELSE
    1447       42500 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1448             :                               END IF
    1449             :                            END IF
    1450             :                            fine_values(fi, fj, fk) = &
    1451             :                               fine_values(fi, fj, fk) + &
    1452      160000 :                               wi*coarse_coeffs(i, j, k)
    1453             :                         END DO
    1454             :                      END DO
    1455             : !                      CALL timestop(handle2)
    1456             :                   ELSE
    1457             : !                      CALL timeset(routineN//"_core1",handle2)
    1458    75542416 :                      ww0 = wj*w_0
    1459    94428020 :                      ww1 = wj*w_1
    1460    18885604 :                      IF (pbc .AND. .NOT. is_split) THEN
    1461           0 :                         v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
    1462           0 :                         i = coarse_bo(1, 1)
    1463           0 :                         fi = 2*i + f_shift(1)
    1464           0 :                         v0 = coarse_coeffs(i, j, k)
    1465           0 :                         v1 = coarse_coeffs(i + 1, j, k)
    1466             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1467           0 :                                                   ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1468           0 :                         v2 = coarse_coeffs(i + 2, j, k)
    1469           0 :                         fi = fi + 1
    1470             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1471           0 :                                                   ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1472    18885604 :                      ELSE IF (.NOT. has_i_lbound) THEN
    1473    18826844 :                         i = coarse_bo(1, 1)
    1474    18826844 :                         fi = 2*i + f_shift(1)
    1475    18826844 :                         v0 = coarse_coeffs(i, j, k)
    1476             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1477    18826844 :                                                   w_border0*wj*v0
    1478    18826844 :                         v1 = coarse_coeffs(i + 1, j, k)
    1479    18826844 :                         v2 = coarse_coeffs(i + 2, j, k)
    1480    18826844 :                         fi = fi + 1
    1481             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1482             :                                                   wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
    1483    18826844 :                                                       w_border1(3)*v2)
    1484             :                      ELSE
    1485       58760 :                         i = coarse_bo(1, 1)
    1486       58760 :                         v0 = coarse_coeffs(i, j, k)
    1487       58760 :                         v1 = coarse_coeffs(i + 1, j, k)
    1488       58760 :                         v2 = coarse_coeffs(i + 2, j, k)
    1489       58760 :                         fi = 2*i + f_shift(1) + 1
    1490       58760 :                         IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
    1491             :                                    fi + 2 == fine_bo(1, 1))) THEN
    1492             :                            CALL cp_abort(__LOCATION__, &
    1493             :                                          "unexpected start index "// &
    1494             :                                          TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
    1495           0 :                                          TRIM(cp_to_string(fi)))
    1496             :                         END IF
    1497             :                      END IF
    1498    18885604 :                      fi = fi + 1
    1499    18885604 :                      IF (fi >= fine_bo(1, 1)) THEN
    1500             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1501             :                                                   ww0(1)*v0 + ww0(2)*v1 + &
    1502    18885604 :                                                   ww0(3)*v2
    1503             :                      ELSE
    1504           0 :                         CPASSERT(fi + 1 == fine_bo(1, 1))
    1505             :                      END IF
    1506             : !                      CALL timestop(handle2)
    1507             : !                      CALL timeset(routineN//"_core",handle2)
    1508    18885604 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
    1509    84267688 :                         v3 = coarse_coeffs(i, j, k)
    1510    84267688 :                         fi = fi + 1
    1511             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1512             :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1513    84267688 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1514    84267688 :                         fi = fi + 1
    1515             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1516             :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1517    84267688 :                                                    ww0(3)*v3)
    1518    84267688 :                         v0 = coarse_coeffs(i + 1, j, k)
    1519    84267688 :                         fi = fi + 1
    1520             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1521             :                                                   (ww1(4)*v0 + ww1(1)*v1 + &
    1522    84267688 :                                                    ww1(2)*v2 + ww1(3)*v3)
    1523    84267688 :                         fi = fi + 1
    1524             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1525             :                                                   (ww0(1)*v2 + ww0(2)*v3 + &
    1526    84267688 :                                                    ww0(3)*v0)
    1527    84267688 :                         v1 = coarse_coeffs(i + 2, j, k)
    1528    84267688 :                         fi = fi + 1
    1529             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1530             :                                                   (ww1(3)*v0 + ww1(4)*v1 + &
    1531    84267688 :                                                    ww1(1)*v2 + ww1(2)*v3)
    1532    84267688 :                         fi = fi + 1
    1533             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1534             :                                                   (ww0(1)*v3 + ww0(2)*v0 + &
    1535    84267688 :                                                    ww0(3)*v1)
    1536    84267688 :                         v2 = coarse_coeffs(i + 3, j, k)
    1537    84267688 :                         fi = fi + 1
    1538             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1539             :                                                   (ww1(2)*v0 + ww1(3)*v1 + &
    1540    84267688 :                                                    ww1(4)*v2 + ww1(1)*v3)
    1541    84267688 :                         fi = fi + 1
    1542             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1543             :                                                   (ww0(1)*v0 + ww0(2)*v1 + &
    1544    84543910 :                                                    ww0(3)*v2)
    1545             :                      END DO
    1546             : !                      CALL timestop(handle2)
    1547             : !                      CALL timeset(routineN//"_clean",handle2)
    1548    18885604 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
    1549    18885604 :                      IF (rest_b > 0) THEN
    1550    18508720 :                         i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
    1551    18508720 :                         v3 = coarse_coeffs(i, j, k)
    1552    18508720 :                         fi = fi + 1
    1553             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1554             :                                                   (ww1(1)*v0 + ww1(2)*v1 + &
    1555    18508720 :                                                    ww1(3)*v2 + ww1(4)*v3)
    1556    18508720 :                         fi = fi + 1
    1557             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1558             :                                                   (ww0(1)*v1 + ww0(2)*v2 + &
    1559    18508720 :                                                    ww0(3)*v3)
    1560    18508720 :                         IF (rest_b > 1) THEN
    1561    18449960 :                            v0 = coarse_coeffs(i + 1, j, k)
    1562    18449960 :                            fi = fi + 1
    1563             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1564             :                                                      (ww1(4)*v0 + ww1(1)*v1 + &
    1565    18449960 :                                                       ww1(2)*v2 + ww1(3)*v3)
    1566    18449960 :                            fi = fi + 1
    1567             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1568             :                                                      (ww0(1)*v2 + ww0(2)*v3 + &
    1569    18449960 :                                                       ww0(3)*v0)
    1570    18449960 :                            IF (rest_b > 2) THEN
    1571       73160 :                               v1 = coarse_coeffs(i + 2, j, k)
    1572       73160 :                               fi = fi + 1
    1573             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1574             :                                                         (ww1(3)*v0 + ww1(4)*v1 + &
    1575       73160 :                                                          ww1(1)*v2 + ww1(2)*v3)
    1576       73160 :                               fi = fi + 1
    1577             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1578             :                                                         (ww0(1)*v3 + ww0(2)*v0 + &
    1579       73160 :                                                          ww0(3)*v1)
    1580       73160 :                               IF (pbc .AND. .NOT. is_split) THEN
    1581           0 :                                  v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1582           0 :                                  fi = fi + 1
    1583             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1584           0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1585           0 :                                  fi = fi + 1
    1586             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1587           0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1588           0 :                                  v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1589           0 :                                  fi = fi + 1
    1590             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1591           0 :                                                            ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1592       73160 :                               ELSE IF (has_i_ubound) THEN
    1593           0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1594           0 :                                  fi = fi + 1
    1595             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1596           0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1597           0 :                                  fi = fi + 1
    1598             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1599           0 :                                                            ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
    1600           0 :                                  IF (fi + 1 == fine_bo(2, 1)) THEN
    1601           0 :                                     v3 = coarse_coeffs(i + 4, j, k)
    1602           0 :                                     fi = fi + 1
    1603             :                                     fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1604           0 :                                                               ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1605             :                                  END IF
    1606             :                               ELSE
    1607       73160 :                                  fi = fi + 1
    1608             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1609             :                                                            wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
    1610       73160 :                                                                w_border1(1)*v1)
    1611       73160 :                                  fi = fi + 1
    1612             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1613       73160 :                                                            w_border0*wj*v1
    1614             :                               END IF
    1615    18376800 :                            ELSE IF (pbc .AND. .NOT. is_split) THEN
    1616           0 :                               v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1617           0 :                               fi = fi + 1
    1618             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1619           0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1620           0 :                               fi = fi + 1
    1621             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1622           0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1623           0 :                               v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1624           0 :                               fi = fi + 1
    1625             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1626           0 :                                                         ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1627    18376800 :                            ELSE IF (has_i_ubound) THEN
    1628           0 :                               v1 = coarse_coeffs(i + 2, j, k)
    1629           0 :                               fi = fi + 1
    1630             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1631           0 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1632           0 :                               fi = fi + 1
    1633             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1634           0 :                                                         ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
    1635           0 :                               IF (fi + 1 == fine_bo(2, 1)) THEN
    1636           0 :                                  v2 = coarse_coeffs(i + 3, j, k)
    1637           0 :                                  fi = fi + 1
    1638             :                                  fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1639           0 :                                                            ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
    1640             :                               END IF
    1641             :                            ELSE
    1642    18376800 :                               fi = fi + 1
    1643             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1644             :                                                         wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
    1645    18376800 :                                                             w_border1(1)*v0)
    1646    18376800 :                               fi = fi + 1
    1647             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1648    18376800 :                                                         w_border0*wj*v0
    1649             :                            END IF
    1650       58760 :                         ELSE IF (pbc .AND. .NOT. is_split) THEN
    1651           0 :                            v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1652           0 :                            fi = fi + 1
    1653             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1654           0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1655           0 :                            fi = fi + 1
    1656             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1657           0 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1658           0 :                            v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1659           0 :                            fi = fi + 1
    1660             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1661           0 :                                                      ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1662       58760 :                         ELSE IF (has_i_ubound) THEN
    1663       58760 :                            v0 = coarse_coeffs(i + 1, j, k)
    1664       58760 :                            fi = fi + 1
    1665             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1666       58760 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1667       58760 :                            fi = fi + 1
    1668             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1669       58760 :                                                      ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
    1670       58760 :                            IF (fi + 1 == fine_bo(2, 1)) THEN
    1671       58760 :                               v1 = coarse_coeffs(i + 2, j, k)
    1672       58760 :                               fi = fi + 1
    1673             :                               fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1674       58760 :                                                         ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
    1675             :                            END IF
    1676             :                         ELSE
    1677           0 :                            fi = fi + 1
    1678             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1679             :                                                      wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
    1680           0 :                                                          w_border1(1)*v3)
    1681           0 :                            fi = fi + 1
    1682             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1683           0 :                                                      w_border0*wj*v3
    1684             :                         END IF
    1685      376884 :                      ELSE IF (pbc .AND. .NOT. is_split) THEN
    1686           0 :                         v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
    1687           0 :                         fi = fi + 1
    1688             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1689           0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1690           0 :                         fi = fi + 1
    1691             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1692           0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1693           0 :                         v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
    1694           0 :                         fi = fi + 1
    1695             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1696           0 :                                                   ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1697      376884 :                      ELSE IF (has_i_ubound) THEN
    1698           0 :                         v3 = coarse_coeffs(i, j, k)
    1699           0 :                         fi = fi + 1
    1700             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1701           0 :                                                   ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
    1702           0 :                         fi = fi + 1
    1703             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1704           0 :                                                   ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
    1705           0 :                         IF (fi + 1 == fine_bo(2, 1)) THEN
    1706           0 :                            v0 = coarse_coeffs(i + 1, j, k)
    1707           0 :                            fi = fi + 1
    1708             :                            fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1709           0 :                                                      ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
    1710             :                         END IF
    1711             :                      ELSE
    1712      376884 :                         fi = fi + 1
    1713             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1714             :                                                   wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
    1715      376884 :                                                       w_border1(1)*v2)
    1716      376884 :                         fi = fi + 1
    1717             :                         fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
    1718      376884 :                                                   w_border0*wj*v2
    1719             :                      END IF
    1720    18885604 :                      CPASSERT(fi == fine_bo(2, 1))
    1721             :                   END IF
    1722             : !                   CALL timestop(handle2)
    1723             :                END DO
    1724             :             END DO
    1725             :          END DO
    1726             :       END DO
    1727             : 
    1728        2260 :       IF (is_split) THEN
    1729          24 :          DEALLOCATE (coarse_coeffs)
    1730             :       END IF
    1731        2260 :       CALL timestop(handle)
    1732        4520 :    END SUBROUTINE add_coarse2fine
    1733             : 
    1734             : ! **************************************************************************************************
    1735             : !> \brief low level function that adds a coarse grid (without boundary)
    1736             : !>      to a fine grid.
    1737             : !>
    1738             : !>      It will add to
    1739             : !>
    1740             : !>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
    1741             : !>                      coarse_bounds(1,2):coarse_bounds(2,2),
    1742             : !>                      coarse_bounds(1,3):coarse_bounds(2,3))
    1743             : !>
    1744             : !>      using
    1745             : !>
    1746             : !>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
    1747             : !>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
    1748             : !>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
    1749             : !>
    1750             : !>      composed with the weights obtained by the direct product of the
    1751             : !>      1d coefficients weights:
    1752             : !>
    1753             : !>      for i,j,k in -3..3
    1754             : !>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
    1755             : !>                  weights_1d(abs(k)+1)
    1756             : !> \param fine_values_pw 3d array where to add the values due to the
    1757             : !>        coarse coeffs
    1758             : !> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
    1759             : !>        coefficients
    1760             : !> \param weights_1d the weights of the 1d smearing
    1761             : !> \param w_border0 the 1d weight at the border
    1762             : !> \param w_border1 the 1d weights for a point one off the border
    1763             : !>        (w_border1(1) is the weight of the coefficent at the border)
    1764             : !> \param pbc ...
    1765             : !> \param safe_computation ...
    1766             : !> \author fawzi
    1767             : !> \note
    1768             : !>      see coarse2fine for some relevant notes
    1769             : ! **************************************************************************************************
    1770        1062 :    SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
    1771             :                               weights_1d, w_border0, w_border1, pbc, safe_computation)
    1772             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: fine_values_pw, coarse_coeffs_pw
    1773             :       REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
    1774             :       REAL(kind=dp), INTENT(in)                          :: w_border0
    1775             :       REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
    1776             :       LOGICAL, INTENT(in)                                :: pbc
    1777             :       LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
    1778             : 
    1779             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'add_fine2coarse'
    1780             : 
    1781             :       INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
    1782             :          k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
    1783        1062 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pp_lb, pp_ub, rcv_offset, rcv_size, &
    1784        1062 :                                                             real_rcv_size, send_offset, send_size, &
    1785        1062 :                                                             sent_size
    1786             :       INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
    1787             :                                                             fine_gbo, my_coarse_bo
    1788        1062 :       INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
    1789             :       LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
    1790             :                                                             local_data, safe_calc
    1791             :       REAL(kind=dp)                                      :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
    1792             :                                                             wi, wj, wk
    1793        1062 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
    1794             :       REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
    1795             :       REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
    1796        1062 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
    1797             : 
    1798        1062 :       CALL timeset(routineN, handle)
    1799             : 
    1800        1062 :       safe_calc = .FALSE.
    1801        1062 :       IF (PRESENT(safe_computation)) safe_calc = safe_computation
    1802             : 
    1803       10620 :       my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
    1804       10620 :       coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
    1805       10620 :       fine_bo = fine_values_pw%pw_grid%bounds_local
    1806       10620 :       fine_gbo = fine_values_pw%pw_grid%bounds
    1807        4248 :       f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
    1808        3150 :       is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
    1809        1062 :       coarse_bo = my_coarse_bo
    1810        1062 :       IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
    1811        1062 :          coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
    1812        1062 :          coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
    1813             :       ELSE
    1814           0 :          coarse_bo(1, 1) = coarse_gbo(2, 1)
    1815           0 :          coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
    1816             :       END IF
    1817        1062 :       IF (.NOT. is_split .OR. .NOT. pbc) THEN
    1818        1062 :          coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
    1819        1062 :          coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
    1820             :       END IF
    1821        1062 :       has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
    1822        1062 :       has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
    1823             : 
    1824        1062 :       IF (pbc) THEN
    1825           0 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1826           0 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
    1827             :       ELSE
    1828        4248 :          CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
    1829        4248 :          CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
    1830             :       END IF
    1831        1062 :       CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
    1832        1062 :       local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
    1833        1062 :       IF (local_data) THEN
    1834             :          ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
    1835             :                                  coarse_bo(1, 2):coarse_bo(2, 2), &
    1836         120 :                                  coarse_bo(1, 3):coarse_bo(2, 3)))
    1837       31240 :          coarse_coeffs = 0._dp
    1838             :       ELSE
    1839        1038 :          coarse_coeffs => coarse_coeffs_pw%array
    1840             :       END IF
    1841             : 
    1842        1062 :       fine_values => fine_values_pw%array
    1843        4248 :       w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
    1844        5310 :       w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
    1845             : 
    1846        4248 :       DO i = 1, 3
    1847        4248 :          s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
    1848             :       END DO
    1849        4248 :       IF (ANY(s < 1)) RETURN
    1850             : 
    1851       14092 :       DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    1852      105302 :          DO ik = -3, 3
    1853       91210 :             IF (pbc) THEN
    1854           0 :                wk = weights_1d(ABS(ik) + 1)
    1855           0 :                fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
    1856             :             ELSE
    1857       91210 :                fk = 2*k + ik + f_shift(3)
    1858       91210 :                IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
    1859       19116 :                   IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
    1860       10620 :                   IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
    1861        4248 :                      IF (ik /= 0) CYCLE
    1862        2124 :                      wk = w_border0
    1863        6372 :                   ELSE IF (fk == fine_bo(1, 3) + 1) THEN
    1864        1062 :                      SELECT CASE (ik)
    1865             :                      CASE (1)
    1866        1062 :                         wk = w_border1(1)
    1867             :                      CASE (-1)
    1868        1062 :                         wk = w_border1(2)
    1869             :                      CASE (-3)
    1870        1062 :                         wk = w_border1(3)
    1871             :                      CASE default
    1872           0 :                         CPABORT("")
    1873        3186 :                         CYCLE
    1874             :                      END SELECT
    1875             :                   ELSE
    1876        1062 :                      SELECT CASE (ik)
    1877             :                      CASE (3)
    1878        1062 :                         wk = w_border1(3)
    1879             :                      CASE (1)
    1880        1062 :                         wk = w_border1(2)
    1881             :                      CASE (-1)
    1882        1062 :                         wk = w_border1(1)
    1883             :                      CASE default
    1884           0 :                         CPABORT("")
    1885        3186 :                         CYCLE
    1886             :                      END SELECT
    1887             :                   END IF
    1888             :                ELSE
    1889       72094 :                   wk = weights_1d(ABS(ik) + 1)
    1890             :                END IF
    1891             :             END IF
    1892     1471974 :             DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    1893    11118042 :                DO ij = -3, 3
    1894     9648478 :                   IF (pbc) THEN
    1895             :                      fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
    1896           0 :                                                   2*s(2))
    1897           0 :                      wj = weights_1d(ABS(ij) + 1)*wk
    1898             :                   ELSE
    1899     9648478 :                      fj = 2*j + ij + f_shift(2)
    1900     9648478 :                      IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
    1901     1450620 :                         IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
    1902      805900 :                         IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
    1903      322360 :                            IF (ij /= 0) CYCLE
    1904      161180 :                            wj = w_border0*wk
    1905      483540 :                         ELSE IF (fj == fine_bo(1, 2) + 1) THEN
    1906       80590 :                            SELECT CASE (ij)
    1907             :                            CASE (1)
    1908       80590 :                               wj = w_border1(1)*wk
    1909             :                            CASE (-1)
    1910       80590 :                               wj = w_border1(2)*wk
    1911             :                            CASE (-3)
    1912       80590 :                               wj = w_border1(3)*wk
    1913             :                            CASE default
    1914           0 :                               CPABORT("")
    1915      241770 :                               CYCLE
    1916             :                            END SELECT
    1917             :                         ELSE
    1918       80590 :                            SELECT CASE (ij)
    1919             :                            CASE (-1)
    1920       80590 :                               wj = w_border1(1)*wk
    1921             :                            CASE (1)
    1922       80590 :                               wj = w_border1(2)*wk
    1923             :                            CASE (3)
    1924       80590 :                               wj = w_border1(3)*wk
    1925             :                            CASE default
    1926           0 :                               CPABORT("")
    1927      241770 :                               CYCLE
    1928             :                            END SELECT
    1929             :                         END IF
    1930             :                      ELSE
    1931     8197858 :                         wj = weights_1d(ABS(ij) + 1)*wk
    1932             :                      END IF
    1933             :                   END IF
    1934             : 
    1935    10220932 :                   IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
    1936     1559844 :                      DO i = coarse_bo(1, 1), coarse_bo(2, 1)
    1937    10785788 :                         DO ii = -3, 3
    1938     9225944 :                            IF (pbc .AND. .NOT. is_split) THEN
    1939           0 :                               wi = weights_1d(ABS(ii) + 1)*wj
    1940           0 :                               fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
    1941             :                            ELSE
    1942     9225944 :                               fi = 2*i + ii + f_shift(1)
    1943     9225944 :                               IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
    1944     7112560 :                               IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
    1945             :                                   ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
    1946     2281160 :                                  IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
    1947      912464 :                                     IF (ii /= 0) CYCLE
    1948      456232 :                                     wi = w_border0*wj
    1949     1368696 :                                  ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
    1950      228116 :                                     SELECT CASE (ii)
    1951             :                                     CASE (1)
    1952      228116 :                                        wi = w_border1(1)*wj
    1953             :                                     CASE (-1)
    1954      228116 :                                        wi = w_border1(2)*wj
    1955             :                                     CASE (-3)
    1956      228116 :                                        wi = w_border1(3)*wj
    1957             :                                     CASE default
    1958      684348 :                                        CYCLE
    1959             :                                     END SELECT
    1960             :                                  ELSE
    1961      228116 :                                     SELECT CASE (ii)
    1962             :                                     CASE (-1)
    1963      228116 :                                        wi = w_border1(1)*wj
    1964             :                                     CASE (1)
    1965      228116 :                                        wi = w_border1(2)*wj
    1966             :                                     CASE (3)
    1967      228116 :                                        wi = w_border1(3)*wj
    1968             :                                     CASE default
    1969      684348 :                                        CYCLE
    1970             :                                     END SELECT
    1971             :                                  END IF
    1972             :                               ELSE
    1973     4831400 :                                  wi = weights_1d(ABS(ii) + 1)*wj
    1974             :                               END IF
    1975             :                            END IF
    1976             :                            coarse_coeffs(i, j, k) = &
    1977             :                               coarse_coeffs(i, j, k) + &
    1978    10543936 :                               wi*fine_values(fi, fj, fk)
    1979             :                         END DO
    1980             :                      END DO
    1981             :                   ELSE
    1982    34402904 :                      ww0 = wj*w_0
    1983    43003630 :                      ww1 = wj*w_1
    1984     8600726 :                      IF (pbc .AND. .NOT. is_split) THEN
    1985           0 :                         i = coarse_bo(1, 1) - 1
    1986           0 :                         vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
    1987           0 :                         vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
    1988           0 :                         vv4 = fine_values(fine_bo(2, 1), fj, fk)
    1989           0 :                         fi = fine_bo(1, 1)
    1990           0 :                         vv5 = fine_values(fi, fj, fk)
    1991           0 :                         fi = fi + 1
    1992           0 :                         vv6 = fine_values(fi, fj, fk)
    1993           0 :                         fi = fi + 1
    1994           0 :                         vv7 = fine_values(fi, fj, fk)
    1995             :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    1996           0 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    1997             :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    1998           0 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    1999             :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2000           0 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    2001     8600726 :                      ELSE IF (has_i_lbound) THEN
    2002       47524 :                         i = coarse_bo(1, 1)
    2003       47524 :                         fi = fine_bo(1, 1) - 1
    2004       47524 :                         IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
    2005       47524 :                            fi = fi + 1
    2006       47524 :                            vv0 = fine_values(fi, fj, fk)
    2007             :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2008       47524 :                                                     vv0*ww0(3)
    2009             :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2010       47524 :                                                         vv0*ww0(2)
    2011             :                            coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2012       47524 :                                                         vv0*ww0(1)
    2013             :                         END IF
    2014             :                      ELSE
    2015     8553202 :                         i = coarse_bo(1, 1)
    2016     8553202 :                         fi = 2*i + f_shift(1)
    2017     8553202 :                         vv0 = fine_values(fi, fj, fk)
    2018     8553202 :                         fi = fi + 1
    2019     8553202 :                         vv1 = fine_values(fi, fj, fk)
    2020     8553202 :                         fi = fi + 1
    2021     8553202 :                         vv2 = fine_values(fi, fj, fk)
    2022             :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
    2023     8553202 :                                                  (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
    2024             :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
    2025     8553202 :                                                      wj*w_border1(2)*vv1 + ww0(2)*vv2
    2026             :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
    2027     8553202 :                                                      wj*w_border1(3)*vv1 + ww0(3)*vv2
    2028             :                      END IF
    2029     8600726 :                      DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
    2030    37559568 :                         fi = fi + 1
    2031    37559568 :                         vv0 = fine_values(fi, fj, fk)
    2032    37559568 :                         fi = fi + 1
    2033    37559568 :                         vv1 = fine_values(fi, fj, fk)
    2034    37559568 :                         fi = fi + 1
    2035    37559568 :                         vv2 = fine_values(fi, fj, fk)
    2036    37559568 :                         fi = fi + 1
    2037    37559568 :                         vv3 = fine_values(fi, fj, fk)
    2038    37559568 :                         fi = fi + 1
    2039    37559568 :                         vv4 = fine_values(fi, fj, fk)
    2040    37559568 :                         fi = fi + 1
    2041    37559568 :                         vv5 = fine_values(fi, fj, fk)
    2042    37559568 :                         fi = fi + 1
    2043    37559568 :                         vv6 = fine_values(fi, fj, fk)
    2044    37559568 :                         fi = fi + 1
    2045    37559568 :                         vv7 = fine_values(fi, fj, fk)
    2046             :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2047    37559568 :                                                      + ww1(1)*vv0
    2048             :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2049    37559568 :                                                      + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2050             :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2051    37559568 :                                                      + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2052             :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2053    37559568 :                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2054             :                         coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2055    37559568 :                                                      + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
    2056             :                         coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2057    37559568 :                                                      + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
    2058             :                         coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2059    37559568 :                                                      + ww1(4)*vv6 + ww0(3)*vv7
    2060             :                      END DO
    2061     8600726 :                      IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
    2062           0 :                         CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
    2063             :                      END IF
    2064     8600726 :                      rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
    2065     8600726 :                      i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
    2066     8600726 :                      CPASSERT(fi == (i - 2)*2 + f_shift(1))
    2067     8600726 :                      IF (rest_b > 0) THEN
    2068     8540210 :                         fi = fi + 1
    2069     8540210 :                         vv0 = fine_values(fi, fj, fk)
    2070     8540210 :                         fi = fi + 1
    2071     8540210 :                         vv1 = fine_values(fi, fj, fk)
    2072             :                         coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2073     8540210 :                                                      + ww1(1)*vv0
    2074             :                         coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2075     8540210 :                                                      + ww1(2)*vv0 + ww0(1)*vv1
    2076             :                         coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2077     8540210 :                                                      + ww1(3)*vv0 + ww0(2)*vv1
    2078             :                         coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2079     8540210 :                                                  + ww1(4)*vv0 + ww0(3)*vv1
    2080     8540210 :                         IF (rest_b > 1) THEN
    2081     8492686 :                            fi = fi + 1
    2082     8492686 :                            vv2 = fine_values(fi, fj, fk)
    2083     8492686 :                            fi = fi + 1
    2084     8492686 :                            vv3 = fine_values(fi, fj, fk)
    2085             :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2086     8492686 :                                                         + ww1(1)*vv2
    2087             :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2088     8492686 :                                                         + ww1(2)*vv2 + ww0(1)*vv3
    2089             :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2090     8492686 :                                                     + ww1(3)*vv2 + ww0(2)*vv3
    2091             :                            coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2092     8492686 :                                                         + ww1(4)*vv2 + ww0(3)*vv3
    2093     8492686 :                            IF (rest_b > 2) THEN
    2094       61924 :                               fi = fi + 1
    2095       61924 :                               vv4 = fine_values(fi, fj, fk)
    2096       61924 :                               fi = fi + 1
    2097       61924 :                               vv5 = fine_values(fi, fj, fk)
    2098       61924 :                               fi = fi + 1
    2099       61924 :                               vv6 = fine_values(fi, fj, fk)
    2100       61924 :                               fi = fi + 1
    2101       61924 :                               vv7 = fine_values(fi, fj, fk)
    2102             :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2103       61924 :                                                            + ww1(1)*vv4
    2104       61924 :                               IF (has_i_ubound) THEN
    2105           0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2106           0 :                                     fi = fi + 1
    2107           0 :                                     vv0 = fine_values(fi, fj, fk)
    2108             :                                     coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
    2109           0 :                                                                  + vv0*ww1(4)
    2110             :                                  ELSE
    2111             :                                     vv0 = 0._dp
    2112             :                                  END IF
    2113             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2114           0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2115             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2116           0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2117             :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2118           0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
    2119             :                                  coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2120           0 :                                                               + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
    2121       61924 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2122           0 :                                  fi = fi + 1
    2123           0 :                                  vv0 = fine_values(fi, fj, fk)
    2124           0 :                                  vv1 = fine_values(fine_bo(1, 1), fj, fk)
    2125           0 :                                  vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2126             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2127           0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2128             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2129           0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
    2130             :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2131             :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
    2132           0 :                                                               + vv1*ww0(1) + vv2*ww1(1)
    2133             :                               ELSE
    2134             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2135       61924 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
    2136             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2137       61924 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
    2138             :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2139       61924 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
    2140             :                               END IF
    2141             :                            ELSE
    2142     8430762 :                               fi = fi + 1
    2143     8430762 :                               vv4 = fine_values(fi, fj, fk)
    2144     8430762 :                               fi = fi + 1
    2145     8430762 :                               vv5 = fine_values(fi, fj, fk)
    2146     8430762 :                               IF (has_i_ubound) THEN
    2147           0 :                                  IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2148           0 :                                     fi = fi + 1
    2149           0 :                                     vv6 = fine_values(fi, fj, fk)
    2150             :                                     coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
    2151           0 :                                                                  + ww1(4)*vv6
    2152             :                                  ELSE
    2153             :                                     vv6 = 0._dp
    2154             :                                  END IF
    2155             :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2156           0 :                                                               + ww1(1)*vv4
    2157             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2158           0 :                                                           + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2159             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2160           0 :                                                               + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
    2161             :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2162           0 :                                                               + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
    2163     8430762 :                               ELSEIF (pbc .AND. .NOT. is_split) THEN
    2164           0 :                                  fi = fi + 1
    2165           0 :                                  vv6 = fine_values(fi, fj, fk)
    2166           0 :                                  vv7 = fine_values(fine_bo(1, 1), fj, fk)
    2167           0 :                                  vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2168             :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2169           0 :                                                               + ww1(1)*vv4
    2170             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2171             :                                                           + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
    2172           0 :                                                           ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
    2173             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2174             :                                                               + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
    2175           0 :                                                               + ww0(1)*vv7 + ww1(1)*vv0
    2176             :                               ELSE
    2177             :                                  coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2178     8430762 :                                                               + wj*w_border1(3)*vv4
    2179             :                                  coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2180     8430762 :                                                           + wj*w_border1(2)*vv4
    2181             :                                  coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2182     8430762 :                                                               + wj*(w_border1(1)*vv4 + w_border0*vv5)
    2183             :                               END IF
    2184             :                            END IF
    2185             :                         ELSE
    2186       47524 :                            fi = fi + 1
    2187       47524 :                            vv2 = fine_values(fi, fj, fk)
    2188       47524 :                            fi = fi + 1
    2189       47524 :                            vv3 = fine_values(fi, fj, fk)
    2190       47524 :                            IF (has_i_ubound) THEN
    2191       47524 :                               IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2192       47524 :                                  fi = fi + 1
    2193       47524 :                                  vv4 = fine_values(fi, fj, fk)
    2194             :                                  coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
    2195       47524 :                                                               + ww1(4)*vv4
    2196             :                               ELSE
    2197             :                                  vv4 = 0._dp
    2198             :                               END IF
    2199             :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2200       47524 :                                                            + ww1(1)*vv2
    2201             :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2202       47524 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2203             :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2204       47524 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
    2205             :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2206       47524 :                                                            + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
    2207           0 :                            ELSEIF (pbc .AND. .NOT. is_split) THEN
    2208           0 :                               fi = fi + 1
    2209           0 :                               vv4 = fine_values(fi, fj, fk)
    2210           0 :                               vv5 = fine_values(fine_bo(1, 1), fj, fk)
    2211           0 :                               vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
    2212             :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2213           0 :                                                            + ww1(1)*vv2
    2214             :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2215           0 :                                                            + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2216             :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2217           0 :                                                        + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
    2218             :                            ELSE
    2219             :                               coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2220           0 :                                                            + wj*w_border1(3)*vv2
    2221             :                               coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2222           0 :                                                            + wj*w_border1(2)*vv2
    2223             :                               coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2224           0 :                                                        + wj*(w_border1(1)*vv2 + w_border0*vv3)
    2225             :                            END IF
    2226             :                         END IF
    2227             :                      ELSE
    2228       60516 :                         fi = fi + 1
    2229       60516 :                         vv0 = fine_values(fi, fj, fk)
    2230       60516 :                         fi = fi + 1
    2231       60516 :                         vv1 = fine_values(fi, fj, fk)
    2232       60516 :                         IF (has_i_ubound) THEN
    2233           0 :                            IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
    2234           0 :                               fi = fi + 1
    2235           0 :                               vv2 = fine_values(fi, fj, fk)
    2236             :                               coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
    2237           0 :                                                            + ww1(4)*vv2
    2238             :                            ELSE
    2239             :                               vv2 = 0._dp
    2240             :                            END IF
    2241             :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2242           0 :                                                         + ww1(1)*vv0
    2243             :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2244           0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2245             :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2246           0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
    2247             :                            coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
    2248           0 :                                                     + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
    2249       60516 :                         ELSEIF (pbc .AND. .NOT. is_split) THEN
    2250           0 :                            fi = fi + 1
    2251           0 :                            vv2 = fine_values(fi, fj, fk)
    2252           0 :                            vv3 = fine_values(fine_bo(1, 1), fk, fk)
    2253           0 :                            vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
    2254             :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2255           0 :                                                         + ww1(1)*vv0
    2256             :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2257           0 :                                                         + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
    2258             :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2259           0 :                                                         + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
    2260             :                         ELSE
    2261             :                            coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
    2262       60516 :                                                         + wj*w_border1(3)*vv0
    2263             :                            coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
    2264       60516 :                                                         + wj*w_border1(2)*vv0
    2265             :                            coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
    2266       60516 :                                                         + wj*(w_border1(1)*vv0 + w_border0*vv1)
    2267             :                         END IF
    2268             :                      END IF
    2269     8600726 :                      CPASSERT(fi == fine_bo(2, 1))
    2270             :                   END IF
    2271             :                END DO
    2272             :             END DO
    2273             :          END DO
    2274             :       END DO
    2275             : 
    2276             :       ! *** parallel case
    2277        1062 :       IF (is_split) THEN
    2278          24 :          CALL timeset(routineN//"_comm", handle2)
    2279             :          coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
    2280          24 :                              (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
    2281          24 :          n_procs = coarse_coeffs_pw%pw_grid%para%group_size
    2282             :          ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
    2283             :                    sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
    2284             :                    rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
    2285         408 :                    pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
    2286             : 
    2287             :          ! ** send size count
    2288             : 
    2289          24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2290          72 :          send_size = 0
    2291         184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2292         160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2293         184 :             send_size(p) = send_size(p) + coarse_slice_size
    2294             :          END DO
    2295             : 
    2296             :          ! ** rcv size count
    2297             : 
    2298          24 :          pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
    2299          24 :          p_old = pos_of_x(fine_gbo(1, 1))
    2300          72 :          pp_lb = fine_gbo(2, 1)
    2301          72 :          pp_ub = fine_gbo(2, 1) - 1
    2302          24 :          pp_lb(p_old) = fine_gbo(1, 1)
    2303         496 :          DO x = fine_gbo(1, 1), fine_gbo(2, 1)
    2304         472 :             p = pos_of_x(x)
    2305         496 :             IF (p /= p_old) THEN
    2306          24 :                pp_ub(p_old) = x - 1
    2307          24 :                pp_lb(p) = x
    2308          24 :                p_old = p
    2309             :             END IF
    2310             :          END DO
    2311          24 :          pp_ub(p_old) = fine_gbo(2, 1)
    2312             : 
    2313          72 :          DO ip = 0, n_procs - 1
    2314          48 :             IF (pp_lb(ip) <= pp_ub(ip)) THEN
    2315          48 :                pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
    2316          48 :                pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
    2317             :             ELSE
    2318           0 :                pp_lb(ip) = coarse_gbo(2, 1)
    2319           0 :                pp_ub(ip) = coarse_gbo(2, 1) - 1
    2320             :             END IF
    2321          72 :             IF (.NOT. is_split .OR. .NOT. pbc) THEN
    2322          48 :                pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
    2323          48 :                pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
    2324             :             END IF
    2325             :          END DO
    2326             : 
    2327          72 :          rcv_size = 0
    2328          72 :          DO ip = 0, n_procs - 1
    2329          48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2330           0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2331          48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2332           0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2333             :                END IF
    2334             :             END DO
    2335             :             rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
    2336             :                            MAX(0, &
    2337          48 :                                MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
    2338          72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2339           0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2340          48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2341           0 :                   rcv_size(ip) = rcv_size(ip) + coarse_slice_size
    2342             :                END IF
    2343             :             END DO
    2344             :          END DO
    2345             : 
    2346             :          ! ** offsets & alloc send-rcv
    2347             : 
    2348             :          send_tot_size = 0
    2349          72 :          DO ip = 0, n_procs - 1
    2350          48 :             send_offset(ip) = send_tot_size
    2351          72 :             send_tot_size = send_tot_size + send_size(ip)
    2352             :          END DO
    2353          24 :          IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
    2354           0 :             CPABORT("Error calculating send_tot_size")
    2355          72 :          ALLOCATE (send_buf(0:send_tot_size - 1))
    2356             : 
    2357          24 :          rcv_tot_size = 0
    2358          72 :          DO ip = 0, n_procs - 1
    2359          48 :             rcv_offset(ip) = rcv_tot_size
    2360          72 :             rcv_tot_size = rcv_tot_size + rcv_size(ip)
    2361             :          END DO
    2362          72 :          ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
    2363             : 
    2364             :          ! ** fill send buffer
    2365             : 
    2366          24 :          pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
    2367             :          p_old = pos_of_x(coarse_gbo(1, 1) &
    2368          24 :                           + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
    2369          72 :          sent_size(:) = send_offset
    2370          24 :          ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
    2371         184 :          DO x = coarse_bo(1, 1), coarse_bo(2, 1)
    2372         160 :             p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
    2373             :             CALL dcopy(coarse_slice_size, &
    2374             :                        coarse_coeffs(x, coarse_bo(1, 2), &
    2375         160 :                                      coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
    2376         184 :             sent_size(p) = sent_size(p) + coarse_slice_size
    2377             :          END DO
    2378             : 
    2379          48 :          IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
    2380           0 :             CPABORT("error 1 filling send buffer")
    2381          24 :          IF (sent_size(n_procs - 1) /= send_tot_size) &
    2382           0 :             CPABORT("error 2 filling send buffer")
    2383             : 
    2384             :          IF (local_data) THEN
    2385          24 :             DEALLOCATE (coarse_coeffs)
    2386             :          ELSE
    2387             :             NULLIFY (coarse_coeffs)
    2388             :          END IF
    2389             : 
    2390          48 :          CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
    2391          24 :          CPASSERT(sent_size(n_procs - 1) == send_tot_size)
    2392             :          ! test send/rcv sizes
    2393          24 :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(send_size, real_rcv_size, 1)
    2394             : 
    2395          72 :          CPASSERT(ALL(real_rcv_size == rcv_size))
    2396             :          ! all2all
    2397             :          CALL coarse_coeffs_pw%pw_grid%para%group%alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
    2398          24 :                                                            rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset)
    2399             : 
    2400             :          ! ** sum & reorder rcv buffer
    2401             : 
    2402          72 :          sent_size(:) = rcv_offset
    2403          72 :          DO ip = 0, n_procs - 1
    2404             : 
    2405          48 :             DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
    2406           0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2407          48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2408           0 :                   ii = sent_size(ip)
    2409           0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2410           0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2411           0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2412           0 :                         ii = ii + 1
    2413             :                      END DO
    2414             :                   END DO
    2415           0 :                   sent_size(ip) = ii
    2416             :                END IF
    2417             :             END DO
    2418             : 
    2419          48 :             ii = sent_size(ip)
    2420         208 :             DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
    2421        2160 :                DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2422       29920 :                   DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2423       27808 :                      coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2424       29760 :                      ii = ii + 1
    2425             :                   END DO
    2426             :                END DO
    2427             :             END DO
    2428          48 :             sent_size(ip) = ii
    2429             : 
    2430          72 :             DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
    2431           0 :                x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
    2432          48 :                IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
    2433           0 :                   ii = sent_size(ip)
    2434           0 :                   DO k = coarse_bo(1, 3), coarse_bo(2, 3)
    2435           0 :                      DO j = coarse_bo(1, 2), coarse_bo(2, 2)
    2436           0 :                         coarse_coeffs_pw%array(x_att, j, k) = coarse_coeffs_pw%array(x_att, j, k) + rcv_buf(ii)
    2437           0 :                         ii = ii + 1
    2438             :                      END DO
    2439             :                   END DO
    2440           0 :                   sent_size(ip) = ii
    2441             :                END IF
    2442             :             END DO
    2443             : 
    2444             :          END DO
    2445             : 
    2446          48 :          IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
    2447           0 :             CPABORT("error 1 handling the rcv buffer")
    2448          24 :          IF (sent_size(n_procs - 1) /= rcv_tot_size) &
    2449           0 :             CPABORT("error 2 handling the rcv buffer")
    2450             : 
    2451             :          ! dealloc
    2452          24 :          DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
    2453          24 :          DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
    2454          24 :          DEALLOCATE (pp_ub, pp_lb)
    2455          48 :          CALL timestop(handle2)
    2456             :       ELSE
    2457             :          CPASSERT(.NOT. local_data)
    2458             :       END IF
    2459             : 
    2460        1062 :       CALL timestop(handle)
    2461        2124 :    END SUBROUTINE add_fine2coarse
    2462             : 
    2463             : ! **************************************************************************************************
    2464             : !> \brief ...
    2465             : !> \param preconditioner the preconditioner to create
    2466             : !> \param precond_kind the kind of preconditioner to use
    2467             : !> \param pool a pool with grids of the same type as the elements to
    2468             : !>        precondition
    2469             : !> \param pbc if periodic boundary conditions should be applied
    2470             : !> \param transpose ...
    2471             : !> \author fawzi
    2472             : ! **************************************************************************************************
    2473       33840 :    SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
    2474             :                                        pool, pbc, transpose)
    2475             :       TYPE(pw_spline_precond_type), INTENT(OUT)          :: preconditioner
    2476             :       INTEGER, INTENT(in)                                :: precond_kind
    2477             :       TYPE(pw_pool_type), INTENT(IN), POINTER            :: pool
    2478             :       LOGICAL, INTENT(in)                                :: pbc, transpose
    2479             : 
    2480             :       preconditioner%kind = no_precond
    2481        3760 :       preconditioner%pool => pool
    2482        3760 :       preconditioner%pbc = pbc
    2483        3760 :       preconditioner%transpose = transpose
    2484        3760 :       CALL pool%retain()
    2485        3760 :       CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
    2486        3760 :    END SUBROUTINE pw_spline_precond_create
    2487             : 
    2488             : ! **************************************************************************************************
    2489             : !> \brief switches the types of precoditioner to use
    2490             : !> \param preconditioner the preconditioner to be changed
    2491             : !> \param precond_kind the new kind of preconditioner to use
    2492             : !> \param pbc ...
    2493             : !> \param transpose ...
    2494             : !> \author fawzi
    2495             : ! **************************************************************************************************
    2496        7520 :    SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
    2497             :                                          transpose)
    2498             :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2499             :       INTEGER, INTENT(in)                                :: precond_kind
    2500             :       LOGICAL, INTENT(in), OPTIONAL                      :: pbc, transpose
    2501             : 
    2502             :       LOGICAL                                            :: do_3d_coeff
    2503             :       REAL(kind=dp)                                      :: s
    2504             : 
    2505        7520 :       IF (PRESENT(transpose)) preconditioner%transpose = transpose
    2506        7520 :       do_3d_coeff = .FALSE.
    2507        7520 :       preconditioner%kind = precond_kind
    2508        7520 :       IF (PRESENT(pbc)) preconditioner%pbc = pbc
    2509             :       SELECT CASE (precond_kind)
    2510             :       CASE (no_precond)
    2511             :       CASE (precond_spl3_aint2)
    2512           0 :          preconditioner%coeffs_1d = (/-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp/)
    2513           0 :          preconditioner%sharpen = .FALSE.
    2514           0 :          preconditioner%normalize = .FALSE.
    2515        3760 :          do_3d_coeff = .TRUE.
    2516             :       CASE (precond_spl3_3)
    2517        3760 :          preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
    2518        3760 :          preconditioner%coeffs_1d(2) = 1.6_dp
    2519        3760 :          preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
    2520        3760 :          preconditioner%sharpen = .FALSE.
    2521        3760 :          preconditioner%normalize = .FALSE.
    2522           0 :          do_3d_coeff = .TRUE.
    2523             :       CASE (precond_spl3_2)
    2524           0 :          preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
    2525           0 :          preconditioner%coeffs_1d(2) = 1.76_dp
    2526           0 :          preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
    2527           0 :          preconditioner%sharpen = .FALSE.
    2528           0 :          preconditioner%normalize = .FALSE.
    2529             :          do_3d_coeff = .TRUE.
    2530             :       CASE (precond_spl3_aint)
    2531       15040 :          preconditioner%coeffs_1d = spl3_1d_coeffs0
    2532        3760 :          preconditioner%sharpen = .TRUE.
    2533        3760 :          preconditioner%normalize = .TRUE.
    2534           0 :          do_3d_coeff = .TRUE.
    2535             :       CASE (precond_spl3_1)
    2536           0 :          preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
    2537           0 :          preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
    2538           0 :          preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
    2539           0 :          preconditioner%sharpen = .TRUE.
    2540           0 :          preconditioner%normalize = .FALSE.
    2541           0 :          do_3d_coeff = .TRUE.
    2542             :       CASE default
    2543        7520 :          CPABORT("")
    2544             :       END SELECT
    2545             :       IF (do_3d_coeff) THEN
    2546        7520 :          s = 1._dp
    2547        7520 :          IF (preconditioner%sharpen) s = -1._dp
    2548             :          preconditioner%coeffs(1) = &
    2549             :             s*preconditioner%coeffs_1d(2)* &
    2550             :             preconditioner%coeffs_1d(2)* &
    2551        7520 :             preconditioner%coeffs_1d(2)
    2552             :          preconditioner%coeffs(2) = &
    2553             :             s*preconditioner%coeffs_1d(1)* &
    2554             :             preconditioner%coeffs_1d(2)* &
    2555        7520 :             preconditioner%coeffs_1d(2)
    2556             :          preconditioner%coeffs(3) = &
    2557             :             s*preconditioner%coeffs_1d(1)* &
    2558             :             preconditioner%coeffs_1d(1)* &
    2559        7520 :             preconditioner%coeffs_1d(2)
    2560             :          preconditioner%coeffs(4) = &
    2561             :             s*preconditioner%coeffs_1d(1)* &
    2562             :             preconditioner%coeffs_1d(1)* &
    2563        7520 :             preconditioner%coeffs_1d(1)
    2564        7520 :          IF (preconditioner%sharpen) THEN
    2565        3760 :             IF (preconditioner%normalize) THEN
    2566             :                preconditioner%coeffs(1) = 2._dp + &
    2567        3760 :                                           preconditioner%coeffs(1)
    2568             :             ELSE
    2569           0 :                preconditioner%coeffs(1) = -preconditioner%coeffs(1)
    2570             :             END IF
    2571             :          END IF
    2572             :       END IF
    2573        7520 :    END SUBROUTINE pw_spline_precond_set_kind
    2574             : 
    2575             : ! **************************************************************************************************
    2576             : !> \brief releases the preconditioner
    2577             : !> \param preconditioner the preconditioner to release
    2578             : !> \author fawzi
    2579             : ! **************************************************************************************************
    2580        3760 :    SUBROUTINE pw_spline_precond_release(preconditioner)
    2581             :       TYPE(pw_spline_precond_type), INTENT(INOUT)        :: preconditioner
    2582             : 
    2583        3760 :       CALL pw_pool_release(preconditioner%pool)
    2584        3760 :    END SUBROUTINE pw_spline_precond_release
    2585             : 
    2586             : ! **************************************************************************************************
    2587             : !> \brief applies the preconditioner to the system of equations to find the
    2588             : !>      coefficients of the spline
    2589             : !> \param preconditioner the preconditioner to apply
    2590             : !> \param in_v the grid on which the preconditioner should be applied
    2591             : !> \param out_v place to store the preconditioner applied on v_out
    2592             : !> \author fawzi
    2593             : ! **************************************************************************************************
    2594       76109 :    SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
    2595             :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2596             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: in_v
    2597             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: out_v
    2598             : 
    2599       76109 :       SELECT CASE (preconditioner%kind)
    2600             :       CASE (no_precond)
    2601           0 :          CALL pw_copy(in_v, out_v)
    2602             :       CASE (precond_spl3_aint, precond_spl3_1)
    2603        3760 :          CALL pw_zero(out_v)
    2604        3760 :          IF (preconditioner%pbc) THEN
    2605             :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2606         438 :                                coeffs=preconditioner%coeffs)
    2607             :          ELSE
    2608             :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2609             :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2610             :                                         normalize=preconditioner%normalize, &
    2611        3322 :                                         transpose=preconditioner%transpose)
    2612             :          END IF
    2613             :       CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
    2614       72349 :          CALL pw_zero(out_v)
    2615       72349 :          IF (preconditioner%pbc) THEN
    2616             :             CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
    2617        6322 :                                coeffs=preconditioner%coeffs)
    2618             :          ELSE
    2619             :             CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
    2620             :                                         pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
    2621             :                                         normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
    2622       66027 :                                         transpose=preconditioner%transpose)
    2623             :          END IF
    2624             :       CASE default
    2625       76109 :          CPABORT("")
    2626             :       END SELECT
    2627       76109 :    END SUBROUTINE pw_spline_do_precond
    2628             : 
    2629             : ! **************************************************************************************************
    2630             : !> \brief solves iteratively (CG) a systmes of linear equations
    2631             : !>           linOp(coeffs)=values
    2632             : !>      (for example those needed to find the coefficients of a spline)
    2633             : !>      Returns true if the it succeeded to achieve the requested accuracy
    2634             : !> \param values the right hand side of the system
    2635             : !> \param coeffs will contain the solution of the system (and on entry
    2636             : !>        it contains the starting point)
    2637             : !> \param linOp the linear operator to be inverted
    2638             : !> \param preconditioner the preconditioner to apply
    2639             : !> \param pool a pool of grids (for the temporary objects)
    2640             : !> \param eps_r the requested precision on the residual
    2641             : !> \param eps_x the requested precision on the solution
    2642             : !> \param max_iter maximum number of iteration allowed
    2643             : !> \param sumtype ...
    2644             : !> \return ...
    2645             : !> \author fawzi
    2646             : ! **************************************************************************************************
    2647        3760 :    FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
    2648             :                         eps_r, eps_x, max_iter, sumtype) RESULT(res)
    2649             : 
    2650             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: values
    2651             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: coeffs
    2652             :       INTERFACE
    2653             :          SUBROUTINE linOp(pw_in, pw_out)
    2654             :             USE pw_types, ONLY: pw_r3d_rs_type
    2655             :             TYPE(pw_r3d_rs_type), INTENT(IN)    :: pw_in
    2656             :             TYPE(pw_r3d_rs_type), INTENT(INOUT) :: pw_out
    2657             :          END SUBROUTINE linOp
    2658             :       END INTERFACE
    2659             :       TYPE(pw_spline_precond_type), INTENT(IN)           :: preconditioner
    2660             :       TYPE(pw_pool_type), POINTER                        :: pool
    2661             :       REAL(kind=dp), INTENT(in)                          :: eps_r, eps_x
    2662             :       INTEGER, INTENT(in)                                :: max_iter
    2663             :       INTEGER, INTENT(in), OPTIONAL                      :: sumtype
    2664             :       LOGICAL                                            :: res
    2665             : 
    2666             :       INTEGER                                            :: i, iiter, iter, j, k
    2667             :       INTEGER, DIMENSION(2, 3)                           :: bo
    2668             :       LOGICAL                                            :: last
    2669             :       REAL(kind=dp)                                      :: alpha, beta, eps_r_att, eps_x_att, r_z, &
    2670             :                                                             r_z_new
    2671             :       TYPE(cp_logger_type), POINTER                      :: logger
    2672             :       TYPE(pw_r3d_rs_type)                               :: Ap, p, r, z
    2673             : 
    2674        3760 :       last = .FALSE.
    2675             : 
    2676        3760 :       res = .FALSE.
    2677        3760 :       logger => cp_get_default_logger()
    2678        3760 :       CALL pool%create_pw(r)
    2679        3760 :       CALL pool%create_pw(z)
    2680        3760 :       CALL pool%create_pw(p)
    2681        3760 :       CALL pool%create_pw(Ap)
    2682             : 
    2683             :       !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2684        8278 :       ext_do: DO iiter = 1, max_iter, 10
    2685        8278 :          CALL pw_zero(r)
    2686        8278 :          CALL linOp(pw_in=coeffs, pw_out=r)
    2687    61814924 :          r%array = -r%array
    2688        8278 :          CALL pw_axpy(values, r)
    2689        8278 :          CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2690        8278 :          CALL pw_copy(z, p)
    2691        8278 :          r_z = pw_integral_ab(r, z, sumtype)
    2692             : 
    2693       72349 :          DO iter = iiter, MIN(iiter + 9, max_iter)
    2694       67831 :             eps_r_att = SQRT(pw_integral_ab(r, r, sumtype))
    2695       67831 :             IF (eps_r_att == 0._dp) THEN
    2696       67831 :                eps_x_att = 0._dp
    2697             :                last = .TRUE.
    2698             :             ELSE
    2699       67825 :                CALL pw_zero(Ap)
    2700       67825 :                CALL linOp(pw_in=p, pw_out=Ap)
    2701       67825 :                alpha = r_z/pw_integral_ab(Ap, p, sumtype)
    2702             : 
    2703       67825 :                CALL pw_axpy(p, coeffs, alpha=alpha)
    2704             : 
    2705       67825 :                eps_x_att = alpha*SQRT(pw_integral_ab(p, p, sumtype)) ! try to spare if unneeded?
    2706       67825 :                IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
    2707             :             END IF
    2708             :             !CALL cp_iterate(logger%iter_info,last=last)
    2709       64071 :             IF (last) THEN
    2710             :                res = .TRUE.
    2711             :                EXIT ext_do
    2712             :             END IF
    2713             : 
    2714       64071 :             CALL pw_axpy(Ap, r, alpha=-alpha)
    2715             : 
    2716       64071 :             CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
    2717             : 
    2718       64071 :             r_z_new = pw_integral_ab(r, z, sumtype)
    2719       64071 :             beta = r_z_new/r_z
    2720       64071 :             r_z = r_z_new
    2721             : 
    2722      640710 :             bo = p%pw_grid%bounds_local
    2723      972656 :             DO k = bo(1, 3), bo(2, 3)
    2724    19594613 :                DO j = bo(1, 2), bo(2, 2)
    2725   417438359 :                   DO i = bo(1, 1), bo(2, 1)
    2726   416534292 :                      p%array(i, j, k) = z%array(i, j, k) + beta*p%array(i, j, k)
    2727             :                   END DO
    2728             :                END DO
    2729             :             END DO
    2730             : 
    2731             :          END DO
    2732             :       END DO ext_do
    2733             :       !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
    2734             : 
    2735        3760 :       CALL pool%give_back_pw(r)
    2736        3760 :       CALL pool%give_back_pw(z)
    2737        3760 :       CALL pool%give_back_pw(p)
    2738        3760 :       CALL pool%give_back_pw(Ap)
    2739             : 
    2740        3760 :    END FUNCTION find_coeffs
    2741             : 
    2742             : ! **************************************************************************************************
    2743             : !> \brief adds to pw_out pw_in composed with the weights
    2744             : !>      pw_out%array(i,j,k)=pw_out%array(i,j,k)+sum(pw_in%array(i+l,j+m,k+n)*
    2745             : !>         weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
    2746             : !>         l=-1..1,m=-1..1,n=-1..1)
    2747             : !> \param weights_1d ...
    2748             : !> \param pw_in ...
    2749             : !> \param pw_out ...
    2750             : !> \param sharpen ...
    2751             : !> \param normalize ...
    2752             : !> \param transpose ...
    2753             : !> \param smooth_boundary ...
    2754             : !> \author fawzi
    2755             : ! **************************************************************************************************
    2756      138692 :    SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
    2757             :                                      sharpen, normalize, transpose, smooth_boundary)
    2758             :       REAL(kind=dp), DIMENSION(-1:1)                     :: weights_1d
    2759             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in, pw_out
    2760             :       LOGICAL, INTENT(in), OPTIONAL                      :: sharpen, normalize, transpose, &
    2761             :                                                             smooth_boundary
    2762             : 
    2763             :       INTEGER                                            :: first_index, i, j, jw, k, kw, &
    2764             :                                                             last_index, myj, myk, n_els
    2765             :       INTEGER, DIMENSION(2, 3)                           :: bo, gbo
    2766             :       INTEGER, DIMENSION(3)                              :: s
    2767             :       LOGICAL                                            :: has_l_boundary, has_u_boundary, &
    2768             :                                                             is_split, my_normalize, my_sharpen, &
    2769             :                                                             my_smooth_boundary, my_transpose
    2770             :       REAL(kind=dp)                                      :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
    2771             :       REAL(kind=dp), DIMENSION(-1:1)                     :: w
    2772      138692 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
    2773      138692 :       REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: in_val, out_val
    2774             : 
    2775     1386920 :       bo = pw_in%pw_grid%bounds_local
    2776     1386920 :       gbo = pw_in%pw_grid%bounds
    2777      138692 :       in_val => pw_in%array
    2778      138692 :       out_val => pw_out%array
    2779      138692 :       my_sharpen = .FALSE.
    2780      138692 :       IF (PRESENT(sharpen)) my_sharpen = sharpen
    2781      138692 :       my_normalize = .FALSE.
    2782      138692 :       IF (PRESENT(normalize)) my_normalize = normalize
    2783      138692 :       my_transpose = .FALSE.
    2784      138692 :       IF (PRESENT(transpose)) my_transpose = transpose
    2785      138692 :       my_smooth_boundary = .FALSE.
    2786      138692 :       IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
    2787      138692 :       CPASSERT(.NOT. my_normalize .OR. my_sharpen)
    2788      138692 :       CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
    2789      554768 :       DO i = 1, 3
    2790      554768 :          s(i) = bo(2, i) - bo(1, i) + 1
    2791             :       END DO
    2792      554768 :       IF (ANY(s < 1)) RETURN
    2793             :       is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
    2794      412728 :                      pw_in%pw_grid%bounds(:, 1))
    2795      138692 :       has_l_boundary = (gbo(1, 1) == bo(1, 1))
    2796      138692 :       has_u_boundary = (gbo(2, 1) == bo(2, 1))
    2797      138692 :       IF (is_split) THEN
    2798             :          ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2799             :                    u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
    2800       22320 :                    tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
    2801      311256 :          tmp(:, :) = pw_in%array(bo(2, 1), :, :)
    2802             :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2803             :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2804             :                                                 l_boundary, pw_in%pw_grid%para%pos_of_x( &
    2805      620280 :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2806      311256 :          tmp(:, :) = pw_in%array(bo(1, 1), :, :)
    2807             :          CALL pw_in%pw_grid%para%group%sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
    2808             :                                                 gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
    2809             :                                                 u_boundary, pw_in%pw_grid%para%pos_of_x( &
    2810      620280 :                                                 gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)))
    2811        2232 :          DEALLOCATE (tmp)
    2812             :       END IF
    2813             : 
    2814      138692 :       n_els = s(1)
    2815      138692 :       IF (has_l_boundary) THEN
    2816      137576 :          n_els = n_els - 1
    2817      137576 :          first_index = bo(1, 1) + 1
    2818             :       ELSE
    2819        1116 :          first_index = bo(1, 1)
    2820             :       END IF
    2821      138692 :       IF (has_u_boundary) THEN
    2822      137576 :          n_els = n_els - 1
    2823      137576 :          last_index = bo(2, 1) - 1
    2824             :       ELSE
    2825        1116 :          last_index = bo(2, 1)
    2826             :       END IF
    2827             : !$OMP PARALLEL DO DEFAULT(NONE) &
    2828             : !$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
    2829             : !$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
    2830             : !$OMP        my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
    2831      138692 : !$OMP        my_sharpen, last_index, first_index, my_normalize, n_els)
    2832             :       DO k = bo(1, 3), bo(2, 3)
    2833             :          DO kw = -1, 1
    2834             :             myk = k + kw
    2835             :             IF (my_transpose) THEN
    2836             :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2837             :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2838             :                      IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
    2839             :                         w_k = weights_1d(kw)
    2840             :                         IF (my_smooth_boundary) THEN
    2841             :                            w_k = weights_1d(kw)/weights_1d(0)
    2842             :                         END IF
    2843             :                      ELSE IF (kw == 0) THEN
    2844             :                         w_k = 1._dp
    2845             :                      ELSE
    2846             :                         CYCLE
    2847             :                      END IF
    2848             :                   ELSE
    2849             :                      IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
    2850             :                      w_k = weights_1d(kw)
    2851             :                   END IF
    2852             :                ELSE
    2853             :                   w_k = weights_1d(kw)
    2854             :                END IF
    2855             :             ELSE
    2856             :                IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
    2857             :                   IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
    2858             :                      IF (kw /= 0) CYCLE
    2859             :                      w_k = 1._dp
    2860             :                   ELSE
    2861             :                      IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
    2862             :                                                    (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
    2863             :                         w_k = weights_1d(kw)/weights_1d(0)
    2864             :                      ELSE
    2865             :                         w_k = weights_1d(kw)
    2866             :                      END IF
    2867             :                   END IF
    2868             :                ELSE
    2869             :                   w_k = weights_1d(kw)
    2870             :                END IF
    2871             :             END IF
    2872             :             DO j = bo(1, 2), bo(2, 2)
    2873             :                DO jw = -1, 1
    2874             :                   myj = j + jw
    2875             :                   IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
    2876             :                      w_j = w_k*weights_1d(jw)
    2877             :                   ELSE
    2878             :                      IF (my_transpose) THEN
    2879             :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2880             :                            IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
    2881             :                               w_j = weights_1d(jw)*w_k
    2882             :                               IF (my_smooth_boundary) THEN
    2883             :                                  w_j = weights_1d(jw)/weights_1d(0)*w_k
    2884             :                               END IF
    2885             :                            ELSE IF (jw == 0) THEN
    2886             :                               w_j = w_k
    2887             :                            ELSE
    2888             :                               CYCLE
    2889             :                            END IF
    2890             :                         ELSE
    2891             :                            IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
    2892             :                            w_j = w_k*weights_1d(jw)
    2893             :                         END IF
    2894             :                      ELSE
    2895             :                         IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
    2896             :                            IF (jw /= 0) CYCLE
    2897             :                            w_j = w_k
    2898             :                         ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
    2899             :                                                            (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
    2900             :                            w_j = w_k*weights_1d(jw)/weights_1d(0)
    2901             :                         ELSE
    2902             :                            w_j = w_k*weights_1d(jw)
    2903             :                         END IF
    2904             :                      END IF
    2905             :                   END IF
    2906             : 
    2907             :                   IF (has_l_boundary) THEN
    2908             :                      IF (my_transpose) THEN
    2909             :                         IF (s(1) == 1) THEN
    2910             :                            CPASSERT(.NOT. has_u_boundary)
    2911             :                            in_val_tmp = u_boundary(myj, myk)
    2912             :                         ELSE
    2913             :                            in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
    2914             :                         END IF
    2915             :                         IF (my_sharpen) THEN
    2916             :                            IF (kw == 0 .AND. jw == 0) THEN
    2917             :                               IF (my_normalize) THEN
    2918             :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2919             :                                                            (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
    2920             :                                                            in_val_tmp*weights_1d(1)*w_j
    2921             :                               ELSE
    2922             :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2923             :                                                            in_val(bo(1, 1), myj, myk)*w_j - &
    2924             :                                                            in_val_tmp*weights_1d(1)*w_j
    2925             :                               END IF
    2926             :                            ELSE
    2927             :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2928             :                                                         in_val(bo(1, 1), myj, myk)*w_j - &
    2929             :                                                         in_val_tmp*weights_1d(1)*w_j
    2930             :                            END IF
    2931             :                         ELSE IF (my_smooth_boundary) THEN
    2932             :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2933             :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2934             :                                                           in_val_tmp*weights_1d(1)/weights_1d(0))
    2935             :                         ELSE
    2936             :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2937             :                                                      w_j*(in_val(bo(1, 1), myj, myk) + &
    2938             :                                                           in_val_tmp*weights_1d(1))
    2939             :                         END IF
    2940             :                         in_val_f = 0.0_dp
    2941             :                      ELSE
    2942             :                         in_val_f = in_val(bo(1, 1), myj, myk)
    2943             :                         IF (my_sharpen) THEN
    2944             :                            IF (kw == 0 .AND. jw == 0) THEN
    2945             :                               IF (my_normalize) THEN
    2946             :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2947             :                                                            (2.0_dp - w_j)*in_val_f
    2948             :                               ELSE
    2949             :                                  out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2950             :                                                            in_val_f*w_j
    2951             :                               END IF
    2952             :                            ELSE
    2953             :                               out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
    2954             :                                                         in_val_f*w_j
    2955             :                            END IF
    2956             :                         ELSE
    2957             :                            out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
    2958             :                                                      in_val_f*w_j
    2959             :                         END IF
    2960             :                      END IF
    2961             :                   ELSE
    2962             :                      in_val_f = l_boundary(myj, myk)
    2963             :                   END IF
    2964             :                   IF (has_u_boundary) THEN
    2965             :                      IF (my_transpose) THEN
    2966             :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2967             :                         IF (s(1) == 1) THEN
    2968             :                            CPASSERT(.NOT. has_l_boundary)
    2969             :                            in_val_tmp = l_boundary(myj, myk)
    2970             :                         ELSE
    2971             :                            in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
    2972             :                         END IF
    2973             :                         IF (my_sharpen) THEN
    2974             :                            IF (kw == 0 .AND. jw == 0) THEN
    2975             :                               IF (my_normalize) THEN
    2976             :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2977             :                                                            in_val_l*(2._dp - w_j) - &
    2978             :                                                            in_val_tmp*weights_1d(1)*w_j
    2979             :                               ELSE
    2980             :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2981             :                                                            in_val_l*w_j - &
    2982             :                                                            in_val_tmp*weights_1d(1)*w_j
    2983             :                               END IF
    2984             :                            ELSE
    2985             :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    2986             :                                                         w_j*in_val_l - &
    2987             :                                                         in_val_tmp*weights_1d(1)*w_j
    2988             :                            END IF
    2989             :                         ELSE IF (my_smooth_boundary) THEN
    2990             :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2991             :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
    2992             :                         ELSE
    2993             :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    2994             :                                                      w_j*(in_val_l + in_val_tmp*weights_1d(1))
    2995             :                         END IF
    2996             :                         in_val_l = 0._dp
    2997             :                      ELSE
    2998             :                         in_val_l = in_val(bo(2, 1), myj, myk)
    2999             :                         IF (my_sharpen) THEN
    3000             :                            IF (kw == 0 .AND. jw == 0) THEN
    3001             :                               IF (my_normalize) THEN
    3002             :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3003             :                                                            in_val_l*(2._dp - w_j)
    3004             :                               ELSE
    3005             :                                  out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3006             :                                                            in_val_l*w_j
    3007             :                               END IF
    3008             :                            ELSE
    3009             :                               out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
    3010             :                                                         w_j*in_val_l
    3011             :                            END IF
    3012             :                         ELSE
    3013             :                            out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
    3014             :                                                      w_j*in_val_l
    3015             :                         END IF
    3016             :                      END IF
    3017             :                   ELSE
    3018             :                      in_val_l = u_boundary(myj, myk)
    3019             :                   END IF
    3020             :                   IF (last_index >= first_index) THEN
    3021             :                      IF (my_transpose) THEN
    3022             :                         IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
    3023             :                            in_val_f = 0._dp
    3024             :                         ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
    3025             :                            in_val_l = 0._dp
    3026             :                         END IF
    3027             :                      END IF
    3028             :                      IF (my_sharpen) THEN
    3029             :                         w = -weights_1d*w_j
    3030             :                         IF (kw == 0 .AND. jw == 0) THEN
    3031             :                            IF (my_normalize) THEN
    3032             :                               w(0) = w(0) + 2._dp
    3033             :                            ELSE
    3034             :                               w(0) = -w(0)
    3035             :                            END IF
    3036             :                         END IF
    3037             :                      ELSE
    3038             :                         w = weights_1d*w_j
    3039             :                      END IF
    3040             :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3041             :                         IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
    3042             :                             gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3043             :                            IF (gbo(1, 1) >= bo(1, 1)) THEN
    3044             :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3045             :                                                              in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
    3046             :                                                              (1._dp/weights_1d(0) - 1._dp)
    3047             :                            ELSE
    3048             :                               out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
    3049             :                                                              l_boundary(myj, myk)*w_j*weights_1d(-1)* &
    3050             :                                                              (1._dp/weights_1d(0) - 1._dp)
    3051             :                            END IF
    3052             :                         END IF
    3053             :                      END IF
    3054             :                      CALL pw_compose_stripe(weights=w, &
    3055             :                                             in_val=in_val(first_index:last_index, myj, myk), &
    3056             :                                             in_val_first=in_val_f, in_val_last=in_val_l, &
    3057             :                                             out_val=out_val(first_index:last_index, j, k), &
    3058             :                                             n_el=n_els)
    3059             : !FM                   call pw_compose_stripe2(weights=w,&
    3060             : !FM                        in_val=in_val,&
    3061             : !FM                        in_val_first=in_val_f,in_val_last=in_val_l,&
    3062             : !FM                        out_val=out_val,&
    3063             : !FM                        first_val=first_index,last_val=last_index,&
    3064             : !FM                        myj=myj,myk=myk,j=j,k=k)
    3065             :                      IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
    3066             :                         IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
    3067             :                             gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
    3068             :                            IF (gbo(2, 1) <= bo(2, 1)) THEN
    3069             :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3070             :                                                              in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
    3071             :                                                              (1._dp/weights_1d(0) - 1._dp)
    3072             :                            ELSE
    3073             :                               out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
    3074             :                                                              u_boundary(myj, myk)*w_j*weights_1d(1)* &
    3075             :                                                              (1._dp/weights_1d(0) - 1._dp)
    3076             :                            END IF
    3077             :                         END IF
    3078             :                      END IF
    3079             : 
    3080             :                   END IF
    3081             :                END DO
    3082             :             END DO
    3083             :          END DO
    3084             :       END DO
    3085             : 
    3086      138692 :       IF (is_split) THEN
    3087        2232 :          DEALLOCATE (l_boundary, u_boundary)
    3088             :       END IF
    3089      138692 :    END SUBROUTINE pw_nn_compose_r_no_pbc
    3090             : 
    3091             : ! **************************************************************************************************
    3092             : !> \brief ...
    3093             : !> \param pw_in ...
    3094             : !> \param pw_out ...
    3095             : ! **************************************************************************************************
    3096       41588 :    SUBROUTINE spl3_nopbc(pw_in, pw_out)
    3097             : 
    3098             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3099             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3100             : 
    3101       41588 :       CALL pw_zero(pw_out)
    3102             :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3103       41588 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
    3104             : 
    3105       41588 :    END SUBROUTINE spl3_nopbc
    3106             : 
    3107             : ! **************************************************************************************************
    3108             : !> \brief ...
    3109             : !> \param pw_in ...
    3110             : !> \param pw_out ...
    3111             : ! **************************************************************************************************
    3112       27755 :    SUBROUTINE spl3_nopbct(pw_in, pw_out)
    3113             : 
    3114             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3115             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3116             : 
    3117       27755 :       CALL pw_zero(pw_out)
    3118             :       CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
    3119       27755 :                                   pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
    3120             : 
    3121       27755 :    END SUBROUTINE spl3_nopbct
    3122             : 
    3123             : ! **************************************************************************************************
    3124             : !> \brief ...
    3125             : !> \param pw_in ...
    3126             : !> \param pw_out ...
    3127             : ! **************************************************************************************************
    3128        6760 :    SUBROUTINE spl3_pbc(pw_in, pw_out)
    3129             : 
    3130             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw_in
    3131             :       TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: pw_out
    3132             : 
    3133        6760 :       CALL pw_zero(pw_out)
    3134        6760 :       CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
    3135             : 
    3136        6760 :    END SUBROUTINE spl3_pbc
    3137             : 
    3138             : ! **************************************************************************************************
    3139             : !> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
    3140             : !>      input vector (vec)
    3141             : !> \param vec ...
    3142             : !> \param pw ...
    3143             : !> \return ...
    3144             : !> \par History
    3145             : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3146             : !> \author Teodoro Laino 12/2005 [tlaino]
    3147             : !> \note
    3148             : !>      Requires the Spline coefficients to be computed with PBC
    3149             : ! **************************************************************************************************
    3150     1821805 :    FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3151             :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3152             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3153             :       REAL(KIND=dp)                                      :: val
    3154             : 
    3155             :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3156             :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3157             :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3158             :       LOGICAL                                            :: my_mpsum
    3159             :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
    3160             :          f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
    3161             :          t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
    3162             :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3163     1821805 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3164             : 
    3165     1821805 :       NULLIFY (grid)
    3166     1821805 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3167     7287220 :       npts = pw%pw_grid%npts
    3168     7287220 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3169     1821805 :       dr1 = pw%pw_grid%dr(1)
    3170     1821805 :       dr2 = pw%pw_grid%dr(2)
    3171     1821805 :       dr3 = pw%pw_grid%dr(3)
    3172             : 
    3173     1821805 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3174     1821805 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3175     1821805 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3176     1821805 :       grid => pw%array(:, :, :)
    3177    18218050 :       bo = pw%pw_grid%bounds
    3178    18218050 :       bo_l = pw%pw_grid%bounds_local
    3179             : 
    3180     1821805 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3181     1821805 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3182     1821805 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3183     1821805 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3184             : 
    3185     1821805 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3186     1821805 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3187     1821805 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3188     1821805 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3189             : 
    3190     1821805 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3191     1821805 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3192     1821805 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3193     1821805 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3194             : 
    3195     9109025 :       DO k = 1, 4
    3196    38257905 :          DO j = 1, 4
    3197   153031620 :             DO i = 1, 4
    3198             :                IF ( &
    3199             :                   ii(i) >= bo_l(1, 1) .AND. &
    3200             :                   ii(i) <= bo_l(2, 1) .AND. &
    3201             :                   ij(j) >= bo_l(1, 2) .AND. &
    3202             :                   ij(j) <= bo_l(2, 2) .AND. &
    3203   116595520 :                   ik(k) >= bo_l(1, 3) .AND. &
    3204             :                   ik(k) <= bo_l(2, 3) &
    3205    29148880 :                   ) THEN
    3206             :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3207             :                                       ij(j) + 1 - bo_l(1, 2), &
    3208    58521472 :                                       ik(k) + 1 - bo_l(1, 3))
    3209             :                ELSE
    3210    58074048 :                   box(i, j, k) = 0.0_dp
    3211             :                END IF
    3212             :             END DO
    3213             :          END DO
    3214             :       END DO
    3215             : 
    3216     1821805 :       a1 = 3.0_dp + xd1
    3217     1821805 :       a2 = a1*a1
    3218     1821805 :       a3 = a2*a1
    3219     1821805 :       b1 = 2.0_dp + xd1
    3220     1821805 :       b2 = b1*b1
    3221     1821805 :       b3 = b2*b1
    3222     1821805 :       c1 = 1.0_dp + xd1
    3223     1821805 :       c2 = c1*c1
    3224     1821805 :       c3 = c2*c1
    3225     1821805 :       d1 = xd1
    3226     1821805 :       d2 = d1*d1
    3227     1821805 :       d3 = d2*d1
    3228     1821805 :       e1 = 3.0_dp + xd2
    3229     1821805 :       e2 = e1*e1
    3230     1821805 :       e3 = e2*e1
    3231     1821805 :       f1 = 2.0_dp + xd2
    3232     1821805 :       f2 = f1*f1
    3233     1821805 :       f3 = f2*f1
    3234     1821805 :       g1 = 1.0_dp + xd2
    3235     1821805 :       g2 = g1*g1
    3236     1821805 :       g3 = g2*g1
    3237     1821805 :       h1 = xd2
    3238     1821805 :       h2 = h1*h1
    3239     1821805 :       h3 = h2*h1
    3240     1821805 :       p1 = 3.0_dp + xd3
    3241     1821805 :       p2 = p1*p1
    3242     1821805 :       p3 = p2*p1
    3243     1821805 :       q1 = 2.0_dp + xd3
    3244     1821805 :       q2 = q1*q1
    3245     1821805 :       q3 = q2*q1
    3246     1821805 :       r1 = 1.0_dp + xd3
    3247     1821805 :       r2 = r1*r1
    3248     1821805 :       r3 = r2*r1
    3249     1821805 :       u1 = xd3
    3250     1821805 :       u2 = u1*u1
    3251     1821805 :       u3 = u2*u1
    3252             : 
    3253     1821805 :       t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3254     1821805 :       t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3255     1821805 :       t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3256     1821805 :       t4 = 1.0_dp/6.0_dp*d3
    3257     1821805 :       s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3258     1821805 :       s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3259     1821805 :       s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3260     1821805 :       s4 = 1.0_dp/6.0_dp*h3
    3261     1821805 :       v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3262     1821805 :       v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3263     1821805 :       v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3264     1821805 :       v4 = 1.0_dp/6.0_dp*u3
    3265             : 
    3266             :       val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3267             :              (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3268             :              (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3269             :              (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3270             :             ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3271             :              (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3272             :              (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3273             :              (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3274             :             ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3275             :              (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3276             :              (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3277             :              (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3278             :             ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3279             :              (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3280             :              (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3281     1821805 :              (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3282             : 
    3283     1821805 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3284             : 
    3285     1821805 :    END FUNCTION Eval_Interp_Spl3_pbc
    3286             : 
    3287             : ! **************************************************************************************************
    3288             : !> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
    3289             : !>      function on the generic input vector (vec)
    3290             : !> \param vec ...
    3291             : !> \param pw ...
    3292             : !> \return ...
    3293             : !> \par History
    3294             : !>      12.2007 Adapted for use with distributed grids [rdeclerck]
    3295             : !> \author Teodoro Laino 12/2005 [tlaino]
    3296             : !> \note
    3297             : !>      Requires the Spline coefficients to be computed with PBC
    3298             : ! **************************************************************************************************
    3299        9964 :    FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
    3300             :       REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
    3301             :       TYPE(pw_r3d_rs_type), INTENT(IN)                   :: pw
    3302             :       REAL(KIND=dp)                                      :: val(3)
    3303             : 
    3304             :       INTEGER                                            :: i, ivec(3), j, k, npts(3)
    3305             :       INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
    3306             :       INTEGER, DIMENSION(4)                              :: ii, ij, ik
    3307             :       LOGICAL                                            :: my_mpsum
    3308             :       REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
    3309             :          dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
    3310             :          s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
    3311             :          t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
    3312             :          v4o, xd1, xd2, xd3
    3313             :       REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
    3314        4982 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
    3315             : 
    3316        4982 :       NULLIFY (grid)
    3317        4982 :       my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
    3318       19928 :       npts = pw%pw_grid%npts
    3319       19928 :       ivec = FLOOR(vec/pw%pw_grid%dr)
    3320        4982 :       dr1 = pw%pw_grid%dr(1)
    3321        4982 :       dr2 = pw%pw_grid%dr(2)
    3322        4982 :       dr3 = pw%pw_grid%dr(3)
    3323        4982 :       dr1i = 1.0_dp/dr1
    3324        4982 :       dr2i = 1.0_dp/dr2
    3325        4982 :       dr3i = 1.0_dp/dr3
    3326        4982 :       xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
    3327        4982 :       xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
    3328        4982 :       xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
    3329        4982 :       grid => pw%array(:, :, :)
    3330       49820 :       bo = pw%pw_grid%bounds
    3331       49820 :       bo_l = pw%pw_grid%bounds_local
    3332             : 
    3333        4982 :       ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
    3334        4982 :       ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
    3335        4982 :       ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
    3336        4982 :       ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
    3337             : 
    3338        4982 :       ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
    3339        4982 :       ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
    3340        4982 :       ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
    3341        4982 :       ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
    3342             : 
    3343        4982 :       ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
    3344        4982 :       ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
    3345        4982 :       ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
    3346        4982 :       ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
    3347             : 
    3348       24910 :       DO k = 1, 4
    3349      104622 :          DO j = 1, 4
    3350      418488 :             DO i = 1, 4
    3351             :                IF ( &
    3352             :                   ii(i) >= bo_l(1, 1) .AND. &
    3353             :                   ii(i) <= bo_l(2, 1) .AND. &
    3354             :                   ij(j) >= bo_l(1, 2) .AND. &
    3355             :                   ij(j) <= bo_l(2, 2) .AND. &
    3356      318848 :                   ik(k) >= bo_l(1, 3) .AND. &
    3357             :                   ik(k) <= bo_l(2, 3) &
    3358       79712 :                   ) THEN
    3359             :                   box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
    3360             :                                       ij(j) + 1 - bo_l(1, 2), &
    3361      317696 :                                       ik(k) + 1 - bo_l(1, 3))
    3362             :                ELSE
    3363        1152 :                   box(i, j, k) = 0.0_dp
    3364             :                END IF
    3365             :             END DO
    3366             :          END DO
    3367             :       END DO
    3368             : 
    3369        4982 :       a1 = 3.0_dp + xd1
    3370        4982 :       a2 = a1*a1
    3371        4982 :       a3 = a2*a1
    3372        4982 :       b1 = 2.0_dp + xd1
    3373        4982 :       b2 = b1*b1
    3374        4982 :       b3 = b2*b1
    3375        4982 :       c1 = 1.0_dp + xd1
    3376        4982 :       c2 = c1*c1
    3377        4982 :       c3 = c2*c1
    3378        4982 :       d1 = xd1
    3379        4982 :       d2 = d1*d1
    3380        4982 :       d3 = d2*d1
    3381        4982 :       e1 = 3.0_dp + xd2
    3382        4982 :       e2 = e1*e1
    3383        4982 :       e3 = e2*e1
    3384        4982 :       f1 = 2.0_dp + xd2
    3385        4982 :       f2 = f1*f1
    3386        4982 :       f3 = f2*f1
    3387        4982 :       g1 = 1.0_dp + xd2
    3388        4982 :       g2 = g1*g1
    3389        4982 :       g3 = g2*g1
    3390        4982 :       h1 = xd2
    3391        4982 :       h2 = h1*h1
    3392        4982 :       h3 = h2*h1
    3393        4982 :       p1 = 3.0_dp + xd3
    3394        4982 :       p2 = p1*p1
    3395        4982 :       p3 = p2*p1
    3396        4982 :       q1 = 2.0_dp + xd3
    3397        4982 :       q2 = q1*q1
    3398        4982 :       q3 = q2*q1
    3399        4982 :       r1 = 1.0_dp + xd3
    3400        4982 :       r2 = r1*r1
    3401        4982 :       r3 = r2*r1
    3402        4982 :       u1 = xd3
    3403        4982 :       u2 = u1*u1
    3404        4982 :       u3 = u2*u1
    3405             : 
    3406        4982 :       t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
    3407        4982 :       t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
    3408        4982 :       t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
    3409        4982 :       t4o = 1.0_dp/6.0_dp*d3
    3410        4982 :       s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
    3411        4982 :       s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
    3412        4982 :       s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
    3413        4982 :       s4o = 1.0_dp/6.0_dp*h3
    3414        4982 :       v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
    3415        4982 :       v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
    3416        4982 :       v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
    3417        4982 :       v4o = 1.0_dp/6.0_dp*u3
    3418             : 
    3419        4982 :       t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
    3420        4982 :       t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
    3421        4982 :       t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
    3422        4982 :       t4d = 0.5_dp*d2
    3423        4982 :       s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
    3424        4982 :       s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
    3425        4982 :       s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
    3426        4982 :       s4d = 0.5_dp*h2
    3427        4982 :       v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
    3428        4982 :       v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
    3429        4982 :       v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
    3430        4982 :       v4d = 0.5_dp*u2
    3431             : 
    3432        4982 :       t1 = t1d*dr1i
    3433        4982 :       t2 = t2d*dr1i
    3434        4982 :       t3 = t3d*dr1i
    3435        4982 :       t4 = t4d*dr1i
    3436        4982 :       s1 = s1o
    3437        4982 :       s2 = s2o
    3438        4982 :       s3 = s3o
    3439        4982 :       s4 = s4o
    3440        4982 :       v1 = v1o
    3441        4982 :       v2 = v2o
    3442        4982 :       v3 = v3o
    3443        4982 :       v4 = v4o
    3444             :       val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3445             :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3446             :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3447             :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3448             :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3449             :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3450             :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3451             :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3452             :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3453             :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3454             :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3455             :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3456             :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3457             :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3458             :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3459        4982 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3460             : 
    3461        4982 :       t1 = t1o
    3462        4982 :       t2 = t2o
    3463        4982 :       t3 = t3o
    3464        4982 :       t4 = t4o
    3465        4982 :       s1 = s1d*dr2i
    3466        4982 :       s2 = s2d*dr2i
    3467        4982 :       s3 = s3d*dr2i
    3468        4982 :       s4 = s4d*dr2i
    3469        4982 :       v1 = v1o
    3470        4982 :       v2 = v2o
    3471        4982 :       v3 = v3o
    3472        4982 :       v4 = v4o
    3473             :       val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3474             :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3475             :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3476             :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3477             :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3478             :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3479             :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3480             :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3481             :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3482             :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3483             :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3484             :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3485             :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3486             :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3487             :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3488        4982 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3489             : 
    3490        4982 :       t1 = t1o
    3491        4982 :       t2 = t2o
    3492        4982 :       t3 = t3o
    3493        4982 :       t4 = t4o
    3494        4982 :       s1 = s1o
    3495        4982 :       s2 = s2o
    3496        4982 :       s3 = s3o
    3497        4982 :       s4 = s4o
    3498        4982 :       v1 = v1d*dr3i
    3499        4982 :       v2 = v2d*dr3i
    3500        4982 :       v3 = v3d*dr3i
    3501        4982 :       v4 = v4d*dr3i
    3502             :       val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
    3503             :                 (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
    3504             :                 (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
    3505             :                 (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
    3506             :                ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
    3507             :                 (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
    3508             :                 (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
    3509             :                 (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
    3510             :                ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
    3511             :                 (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
    3512             :                 (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
    3513             :                 (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
    3514             :                ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
    3515             :                 (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
    3516             :                 (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
    3517        4982 :                 (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
    3518             : 
    3519        4982 :       IF (my_mpsum) CALL pw%pw_grid%para%group%sum(val)
    3520             : 
    3521        4982 :    END FUNCTION Eval_d_Interp_Spl3_pbc
    3522             : 
    3523           0 : END MODULE pw_spline_utils

Generated by: LCOV version 1.15