LCOV - code coverage report
Current view: top level - src/pw - ps_wavelet_kernel.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 98.9 % 733 725
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 12 12

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
      10              : !> \author Florian Schiffmann (09.2007,fschiff)
      11              : ! **************************************************************************************************
      12              : MODULE ps_wavelet_kernel
      13              : 
      14              :    USE kinds,                           ONLY: dp
      15              :    USE message_passing,                 ONLY: mp_comm_type
      16              :    USE ps_wavelet_base,                 ONLY: scramble_unpack
      17              :    USE ps_wavelet_fft3d,                ONLY: ctrig,&
      18              :                                               ctrig_length,&
      19              :                                               fftstp
      20              :    USE ps_wavelet_scaling_function,     ONLY: scaling_function,&
      21              :                                               scf_recursion
      22              :    USE ps_wavelet_util,                 ONLY: F_FFT_dimensions,&
      23              :                                               S_FFT_dimensions
      24              : #include "../base/base_uses.f90"
      25              : 
      26              :    IMPLICIT NONE
      27              : 
      28              :    PRIVATE
      29              : 
      30              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_kernel'
      31              : 
      32              : ! *** Public data types ***
      33              : 
      34              :    PUBLIC :: createKernel
      35              : 
      36              : CONTAINS
      37              : 
      38              : ! **************************************************************************************************
      39              : !> \brief Allocate a pointer which corresponds to the zero-padded FFT slice needed for
      40              : !>     calculating the convolution with the kernel expressed in the interpolating scaling
      41              : !>     function basis. The kernel pointer is unallocated on input, allocated on output.
      42              : !> \param geocode Indicates the boundary conditions (BC) of the problem:
      43              : !>             'F' free BC, isolated systems.
      44              : !>                 The program calculates the solution as if the given density is
      45              : !>                 "alone" in R^3 space.
      46              : !>             'S' surface BC, isolated in y direction, periodic in xz plane
      47              : !>                 The given density is supposed to be periodic in the xz plane,
      48              : !>                 so the dimensions in these direction mus be compatible with the FFT
      49              : !>                 Beware of the fact that the isolated direction is y!
      50              : !>             'P' periodic BC.
      51              : !>                 The density is supposed to be periodic in all the three directions,
      52              : !>                 then all the dimensions must be compatible with the FFT.
      53              : !>                 No need for setting up the kernel.
      54              : !> \param n01 dimensions of the real space grid to be hit with the Poisson Solver
      55              : !> \param n02 dimensions of the real space grid to be hit with the Poisson Solver
      56              : !> \param n03 dimensions of the real space grid to be hit with the Poisson Solver
      57              : !> \param hx  grid spacings. For the isolated BC case for the moment they are supposed to
      58              : !>                 be equal in the three directions
      59              : !> \param hy grid spacings. For the isolated BC case for the moment they are supposed to
      60              : !>                 be equal in the three directions
      61              : !> \param hz grid spacings. For the isolated BC case for the moment they are supposed to
      62              : !>                 be equal in the three directions
      63              : !> \param itype_scf order of the interpolating scaling functions used in the decomposition
      64              : !> \param iproc ,nproc: number of process, number of processes
      65              : !> \param nproc ...
      66              : !> \param kernel pointer for the kernel FFT. Unallocated on input, allocated on output.
      67              : !>                 Its dimensions are equivalent to the region of the FFT space for which the
      68              : !>                 kernel is injective. This will divide by two each direction,
      69              : !>                 since the kernel for the zero-padded convolution is real and symmetric.
      70              : !> \param mpi_group ...
      71              : !> \date February 2007
      72              : !> \author Luigi Genovese
      73              : !> \note Due to the fact that the kernel dimensions are unknown before the calling, the kernel
      74              : !>     must be declared as pointer in input of this routine.
      75              : !>     To avoid that, one can properly define the kernel dimensions by adding
      76              : !>     the nd1,nd2,nd3 arguments to the PS_dim4allocation routine, then eliminating the pointer
      77              : !>     declaration.
      78              : ! **************************************************************************************************
      79          836 :    SUBROUTINE createKernel(geocode, n01, n02, n03, hx, hy, hz, itype_scf, iproc, nproc, kernel, mpi_group)
      80              : 
      81              :       CHARACTER(len=1), INTENT(in)                       :: geocode
      82              :       INTEGER, INTENT(in)                                :: n01, n02, n03
      83              :       REAL(KIND=dp), INTENT(in)                          :: hx, hy, hz
      84              :       INTEGER, INTENT(in)                                :: itype_scf, iproc, nproc
      85              :       REAL(KIND=dp), POINTER                             :: kernel(:)
      86              : 
      87              :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
      88              : 
      89              :       INTEGER                                            :: m1, m2, m3, md1, md2, md3, n1, n2, n3, &
      90              :                                                             nd1, nd2, nd3, nlimd, nlimk
      91              :       REAL(KIND=dp)                                      :: hgrid
      92              : 
      93          836 :       hgrid = MAX(hx, hy, hz)
      94              : 
      95          836 :       IF (geocode == 'P') THEN
      96              : 
      97              :          CALL F_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
      98          326 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
      99              : 
     100          326 :          ALLOCATE (kernel(1))
     101          326 :          nlimd = n2
     102          652 :          nlimk = 0
     103              : 
     104          510 :       ELSE IF (geocode == 'S') THEN
     105              : 
     106              :          CALL S_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
     107            2 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
     108              : 
     109            6 :          ALLOCATE (kernel(nd1*nd2*nd3/nproc))
     110              : 
     111              :          !the kernel must be built and scattered to all the processes
     112              : 
     113              :          CALL Surfaces_Kernel(n1, n2, n3, m3, nd1, nd2, nd3, hx, hz, hy, &
     114            2 :                               itype_scf, kernel, iproc, nproc, mpi_group)
     115              :          !last plane calculated for the density and the kernel
     116              : 
     117            2 :          nlimd = n2
     118            4 :          nlimk = n3/2 + 1
     119          508 :       ELSE IF (geocode == 'F') THEN
     120              : 
     121              :          !Build the Kernel
     122              : 
     123              :          CALL F_FFT_dimensions(n01, n02, n03, m1, m2, m3, n1, n2, n3, &
     124          508 :                                md1, md2, md3, nd1, nd2, nd3, nproc)
     125         1524 :          ALLOCATE (kernel(nd1*nd2*nd3/nproc))
     126              : 
     127              :          !the kernel must be built and scattered to all the processes
     128              :          CALL Free_Kernel(n01, n02, n03, n1, n2, n3, nd1, nd2, nd3, hgrid, &
     129          508 :                           itype_scf, iproc, nproc, kernel, mpi_group)
     130              : 
     131              :          !last plane calculated for the density and the kernel
     132          508 :          nlimd = n2/2
     133         1016 :          nlimk = n3/2 + 1
     134              : 
     135              :       ELSE
     136              : 
     137            0 :          CPABORT("No wavelet based poisson solver for given geometry")
     138              : 
     139              :       END IF
     140              : !!!  IF (iproc==0) THEN
     141              : !!!     write(*,*)'done.'
     142              : !!!     write(*,'(1x,a,i0)') 'Allocate words for kernel ',nd1*nd2*nd3/nproc
     143              : !!!     !print the load balancing of the different dimensions on screen
     144              : !!!     write(*,'(1x,a,3(i5))')'Grid Dimensions:',n01,n02,n03
     145              : !!!     if (nproc > 1) then
     146              : !!!        write(*,'(1x,a,3(i5),a,3(i5),a,3(i5))')&
     147              : !!!             'Memory occ. per proc.  Density',md1,md3,md2/nproc,'   Kernel',nd1,nd2,nd3/nproc
     148              : !!!        write(*,'(1x,a)')'Load Balancing--------------------------------------------'
     149              : !!!        jhd=1000
     150              : !!!        jzd=1000
     151              : !!!        npd=0
     152              : !!!        load_balancing: do jproc=0,nproc-1
     153              : !!!           !print *,'jproc,jfull=',jproc,jproc*md2/nproc,(jproc+1)*md2/nproc
     154              : !!!           if ((jproc+1)*md2/nproc <= nlimd) then
     155              : !!!              jfd=jproc
     156              : !!!           else if (jproc*md2/nproc <= nlimd) then
     157              : !!!              jhd=jproc
     158              : !!!              npd=nint(real(nlimd-(jproc)*md2/nproc,KIND=dp)/real(md2/nproc,KIND=dp)*100._dp)
     159              : !!!           else
     160              : !!!              jzd=jproc
     161              : !!!              exit load_balancing
     162              : !!!           end if
     163              : !!!        end do load_balancing
     164              : !!!        write(*,'(1x,a,i3,a)')'LB_den        : processors   0  -',jfd,' work at 100%'
     165              : !!!        if (jfd < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhd,&
     166              : !!!             '    work at ',npd,'%'
     167              : !!!        if (jhd < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',&
     168              : !!!             jzd,'  -',nproc-1,' work at   0%'
     169              : !!!        jhk=1000
     170              : !!!        jzk=1000
     171              : !!!        npk=0
     172              : !!!        if (geocode /= 'P') then
     173              : !!!           load_balancingk: do jproc=0,nproc-1
     174              : !!!              !print *,'jproc,jfull=',jproc,jproc*nd3/nproc,(jproc+1)*nd3/nproc
     175              : !!!              if ((jproc+1)*nd3/nproc <= nlimk) then
     176              : !!!                 jfk=jproc
     177              : !!!              else if (jproc*nd3/nproc <= nlimk) then
     178              : !!!                 jhk=jproc
     179              : !!!                 npk=nint(real(nlimk-(jproc)*nd3/nproc,KIND=dp)/real(nd3/nproc,KIND=dp)*100._dp)
     180              : !!!              else
     181              : !!!                 jzk=jproc
     182              : !!!                 exit load_balancingk
     183              : !!!              end if
     184              : !!!           end do load_balancingk
     185              : !!!           write(*,'(1x,a,i3,a)')'LB_ker        : processors   0  -',jfk,' work at 100%'
     186              : !!!           if (jfk < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhk,&
     187              : !!!                '    work at ',npk,'%'
     188              : !!!           if (jhk < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',jzk,'  -',nproc-1,&
     189              : !!!                ' work at   0%'
     190              : !!!        end if
     191              : !!!        write(*,'(1x,a)')'The LB per processor is 1/3 LB_den + 2/3 LB_ker-----------'
     192              : !!!     end if
     193              : !!!
     194              : !!!  END IF
     195          836 :    END SUBROUTINE createKernel
     196              : 
     197              : ! **************************************************************************************************
     198              : !> \brief Build the kernel of the Poisson operator with
     199              : !>     surfaces Boundary conditions
     200              : !>     in an interpolating scaling functions basis.
     201              : !>     Beware of the fact that the nonperiodic direction is y!
     202              : !> \param n1 Dimensions for the FFT
     203              : !> \param n2 Dimensions for the FFT
     204              : !> \param n3 Dimensions for the FFT
     205              : !> \param m3 Actual dimension in non-periodic direction
     206              : !> \param nker1 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     207              : !> \param nker2 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     208              : !> \param nker3 Dimensions of the kernel (nker3=n3/2+1) nker(1,2)=n(1,2)/2+1
     209              : !> \param h1 Mesh steps in the three dimensions
     210              : !> \param h2 Mesh steps in the three dimensions
     211              : !> \param h3 Mesh steps in the three dimensions
     212              : !> \param itype_scf Order of the scaling function
     213              : !> \param karray output array
     214              : !> \param iproc Number of process
     215              : !> \param nproc number of processes
     216              : !> \param mpi_group ...
     217              : !> \date October 2006
     218              : !> \author L. Genovese
     219              : ! **************************************************************************************************
     220            2 :    SUBROUTINE Surfaces_Kernel(n1, n2, n3, m3, nker1, nker2, nker3, h1, h2, h3, &
     221            2 :                               itype_scf, karray, iproc, nproc, mpi_group)
     222              : 
     223              :       INTEGER, INTENT(in)                                :: n1, n2, n3, m3, nker1, nker2, nker3
     224              :       REAL(KIND=dp), INTENT(in)                          :: h1, h2, h3
     225              :       INTEGER, INTENT(in)                                :: itype_scf, nproc, iproc
     226              :       REAL(KIND=dp), &
     227              :          DIMENSION(nker1, nker2, nker3/nproc), &
     228              :          INTENT(out)                                     :: karray
     229              :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
     230              : 
     231              :       INTEGER, PARAMETER                                 :: n_points = 2**6, ncache_optimal = 8*1024
     232              : 
     233              :       INTEGER :: i, i1, i2, i3, ic, iend, imu, ind1, ind2, inzee, ipolyord, ireim, istart, j2, &
     234              :          j2nd, j2st, jnd1, jp2, jreim, n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, &
     235              :          shift
     236            2 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after, before, now
     237              :       REAL(kind=dp)                                      :: a, b, c, cp, d, diff, dx, feI, feR, foI, &
     238              :                                                             foR, fR, mu1, pi, pion, ponx, pony, &
     239              :                                                             sp, value, x
     240            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: kernel_scf, x_scf, y_scf
     241            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig, cossinarr
     242            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: halfft_cache, kernel
     243            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: kernel_mpi
     244              :       REAL(KIND=dp), DIMENSION(9, 8)                     :: cpol
     245              : 
     246              : !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
     247              : !  include "perfdata.inc"
     248              : !FFT arrays
     249              : !coefficients for the polynomial interpolation
     250              : !assign the values of the coefficients
     251              : 
     252      1116350 :       karray = 0.0_dp
     253          162 :       cpol(:, :) = 1._dp
     254              : 
     255            2 :       cpol(1, 2) = .25_dp
     256              : 
     257            2 :       cpol(1, 3) = 1._dp/3._dp
     258              : 
     259            2 :       cpol(1, 4) = 7._dp/12._dp
     260            2 :       cpol(2, 4) = 8._dp/3._dp
     261              : 
     262            2 :       cpol(1, 5) = 19._dp/50._dp
     263            2 :       cpol(2, 5) = 3._dp/2._dp
     264              : 
     265            2 :       cpol(1, 6) = 41._dp/272._dp
     266            2 :       cpol(2, 6) = 27._dp/34._dp
     267            2 :       cpol(3, 6) = 27._dp/272._dp
     268              : 
     269            2 :       cpol(1, 7) = 751._dp/2989._dp
     270            2 :       cpol(2, 7) = 73._dp/61._dp
     271            2 :       cpol(3, 7) = 27._dp/61._dp
     272              : 
     273            2 :       cpol(1, 8) = -989._dp/4540._dp
     274            2 :       cpol(2, 8) = -1472._dp/1135._dp
     275            2 :       cpol(3, 8) = 232._dp/1135._dp
     276            2 :       cpol(4, 8) = -2624._dp/1135._dp
     277              : 
     278              :       !renormalize values
     279            2 :       cpol(1, 1) = .5_dp*cpol(1, 1)
     280            6 :       cpol(1:2, 2) = 2._dp/3._dp*cpol(1:2, 2)
     281            6 :       cpol(1:2, 3) = 3._dp/8._dp*cpol(1:2, 3)
     282            8 :       cpol(1:3, 4) = 2._dp/15._dp*cpol(1:3, 4)
     283            8 :       cpol(1:3, 5) = 25._dp/144._dp*cpol(1:3, 5)
     284           10 :       cpol(1:4, 6) = 34._dp/105._dp*cpol(1:4, 6)
     285           10 :       cpol(1:4, 7) = 2989._dp/17280._dp*cpol(1:4, 7)
     286           12 :       cpol(1:5, 8) = -454._dp/2835._dp*cpol(1:5, 8)
     287              : 
     288              :       !assign the complete values
     289            2 :       cpol(2, 1) = cpol(1, 1)
     290              : 
     291            2 :       cpol(3, 2) = cpol(1, 2)
     292              : 
     293            2 :       cpol(3, 3) = cpol(2, 3)
     294            2 :       cpol(4, 3) = cpol(1, 3)
     295              : 
     296            2 :       cpol(4, 4) = cpol(2, 4)
     297            2 :       cpol(5, 4) = cpol(1, 4)
     298              : 
     299            2 :       cpol(4, 5) = cpol(3, 5)
     300            2 :       cpol(5, 5) = cpol(2, 5)
     301            2 :       cpol(6, 5) = cpol(1, 5)
     302              : 
     303            2 :       cpol(5, 6) = cpol(3, 6)
     304            2 :       cpol(6, 6) = cpol(2, 6)
     305            2 :       cpol(7, 6) = cpol(1, 6)
     306              : 
     307            2 :       cpol(5, 7) = cpol(4, 7)
     308            2 :       cpol(6, 7) = cpol(3, 7)
     309            2 :       cpol(7, 7) = cpol(2, 7)
     310            2 :       cpol(8, 7) = cpol(1, 7)
     311              : 
     312            2 :       cpol(6, 8) = cpol(4, 8)
     313            2 :       cpol(7, 8) = cpol(3, 8)
     314            2 :       cpol(8, 8) = cpol(2, 8)
     315            2 :       cpol(9, 8) = cpol(1, 8)
     316              : 
     317              :       !Number of integration points : 2*itype_scf*n_points
     318            2 :       n_scf = 2*itype_scf*n_points
     319              :       !Allocations
     320            6 :       ALLOCATE (x_scf(0:n_scf))
     321            4 :       ALLOCATE (y_scf(0:n_scf))
     322              : 
     323              :       !Build the scaling function
     324            2 :       CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
     325              :       !Step grid for the integration
     326            2 :       dx = REAL(n_range, KIND=dp)/REAL(n_scf, KIND=dp)
     327              :       !Extend the range (no more calculations because fill in by 0._dp)
     328            2 :       n_cell = m3
     329            2 :       n_range = MAX(n_cell, n_range)
     330              : 
     331              :       !Allocations
     332            2 :       ncache = ncache_optimal
     333              :       !the HalFFT must be performed only in the third dimension,
     334              :       !and nker3=n3/2+1, hence
     335            2 :       IF (ncache <= (nker3 - 1)*4) ncache = nker3 - 1*4
     336              : 
     337              :       !enlarge the second dimension of the kernel to be compatible with nproc
     338            2 :       nact2 = nker2
     339            0 :       enlarge_ydim: DO
     340            2 :          IF (nproc*(nact2/nproc) /= nact2) THEN
     341            0 :             nact2 = nact2 + 1
     342              :          ELSE
     343              :             EXIT enlarge_ydim
     344              :          END IF
     345              :       END DO enlarge_ydim
     346              : 
     347              :       !array for the MPI procedure
     348           10 :       ALLOCATE (kernel(nker1, nact2/nproc, nker3))
     349           12 :       ALLOCATE (kernel_mpi(nker1, nact2/nproc, nker3/nproc, nproc))
     350            6 :       ALLOCATE (kernel_scf(n_range))
     351            8 :       ALLOCATE (halfft_cache(2, ncache/4, 2))
     352            6 :       ALLOCATE (cossinarr(2, n3/2 - 1))
     353            2 :       ALLOCATE (btrig(2, ctrig_length))
     354            2 :       ALLOCATE (after(7))
     355            2 :       ALLOCATE (now(7))
     356            2 :       ALLOCATE (before(7))
     357              : 
     358              :       !constants
     359            2 :       pi = 4._dp*ATAN(1._dp)
     360              : 
     361              :       !arrays for the halFFT
     362            2 :       CALL ctrig(n3/2, btrig, after, before, now, 1, ic)
     363              : 
     364              :       !build the phases for the HalFFT reconstruction
     365            2 :       pion = 2._dp*pi/REAL(n3, KIND=dp)
     366          324 :       DO i3 = 2, n3/2
     367          322 :          x = REAL(i3 - 1, KIND=dp)*pion
     368          322 :          cossinarr(1, i3 - 1) = COS(x)
     369          324 :          cossinarr(2, i3 - 1) = -SIN(x)
     370              :       END DO
     371              : 
     372              :       ! satisfy valgrind, init arrays to large value, even if the offending bit is (likely?) padding
     373      1116514 :       kernel = HUGE(0._dp)
     374      1116518 :       kernel_mpi = HUGE(0._dp)
     375              : 
     376              :       !calculate the limits of the FFT calculations
     377              :       !that can be performed in a row remaining inside the cache
     378            2 :       num_of_mus = ncache/(2*n3)
     379              : 
     380            2 :       diff = 0._dp
     381              :       !order of the polynomial to be used for integration (must be a power of two)
     382            2 :       ipolyord = 8 !this part should be incorporated inside the numerical integration
     383              :       !here we have to choice the piece of the x-y grid to cover
     384              : 
     385              :       !let us now calculate the fraction of mu that will be considered
     386            2 :       j2st = iproc*(nact2/nproc)
     387            2 :       j2nd = MIN((iproc + 1)*(nact2/nproc), n2/2 + 1)
     388              : 
     389          564 :       DO ind2 = (n1/2 + 1)*j2st + 1, (n1/2 + 1)*j2nd, num_of_mus
     390          562 :          istart = ind2
     391          562 :          iend = MIN(ind2 + (num_of_mus - 1), (n1/2 + 1)*j2nd)
     392          562 :          nfft = iend - istart + 1
     393          562 :          shift = 0
     394              : 
     395              :          !initialization of the interesting part of the cache array
     396      6907542 :          halfft_cache(:, :, :) = 0._dp
     397              : 
     398          562 :          IF (istart == 1) THEN
     399              :             !i2=1
     400            1 :             shift = 1
     401              : 
     402              :             CALL calculates_green_opt_muzero(n_range, n_scf, ipolyord, x_scf, y_scf, &
     403            1 :                                              cpol(1, ipolyord), dx, kernel_scf)
     404              : 
     405              :             !copy of the first zero value
     406            1 :             halfft_cache(1, 1, 1) = 0._dp
     407              : 
     408          163 :             DO i3 = 1, m3
     409              : 
     410          162 :                value = 0.5_dp*h3*kernel_scf(i3)
     411              :                !index in where to copy the value of the kernel
     412          162 :                CALL indices(ireim, num_of_mus, n3/2 + i3, 1, ind1)
     413              :                !index in where to copy the symmetric value
     414          162 :                CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, 1, jnd1)
     415          162 :                halfft_cache(ireim, ind1, 1) = value
     416          163 :                halfft_cache(jreim, jnd1, 1) = value
     417              : 
     418              :             END DO
     419              : 
     420              :          END IF
     421              : 
     422         7285 :          loopimpulses: DO imu = istart + shift, iend
     423              : 
     424              :             !here there is the value of mu associated to hgrid
     425              :             !note that we have multiplicated mu for hgrid to be comparable
     426              :             !with mu0ref
     427              : 
     428              :             !calculate the proper value of mu taking into account the periodic dimensions
     429              :             !corresponding value of i1 and i2
     430         6723 :             i1 = MOD(imu, n1/2 + 1)
     431         6723 :             IF (i1 == 0) i1 = n1/2 + 1
     432         6723 :             i2 = (imu - i1)/(n1/2 + 1) + 1
     433         6723 :             ponx = REAL(i1 - 1, KIND=dp)/REAL(n1, KIND=dp)
     434         6723 :             pony = REAL(i2 - 1, KIND=dp)/REAL(n2, KIND=dp)
     435              : 
     436         6723 :             mu1 = 2._dp*pi*SQRT((ponx/h1)**2 + (pony/h2)**2)*h3
     437              : 
     438              :             CALL calculates_green_opt(n_range, n_scf, itype_scf, ipolyord, x_scf, y_scf, &
     439         6723 :                                       cpol(1, ipolyord), mu1, dx, kernel_scf)
     440              : 
     441              :             !readjust the coefficient and define the final kernel
     442              : 
     443              :             !copy of the first zero value
     444         6723 :             halfft_cache(1, imu - istart + 1, 1) = 0._dp
     445      1096411 :             DO i3 = 1, m3
     446      1089126 :                value = -0.5_dp*h3/mu1*kernel_scf(i3)
     447              :                !write(80,*)mu1,i3,kernel_scf(i03)
     448              :                !index in where to copy the value of the kernel
     449      1089126 :                CALL indices(ireim, num_of_mus, n3/2 + i3, imu - istart + 1, ind1)
     450              :                !index in where to copy the symmetric value
     451      1089126 :                CALL indices(jreim, num_of_mus, n3/2 + 2 - i3, imu - istart + 1, jnd1)
     452      1089126 :                halfft_cache(ireim, ind1, 1) = value
     453      1095849 :                halfft_cache(jreim, jnd1, 1) = value
     454              :             END DO
     455              : 
     456              :          END DO loopimpulses
     457              : 
     458              :          !now perform the FFT of the array in cache
     459          562 :          inzee = 1
     460         2810 :          DO i = 1, ic
     461              :             CALL fftstp(num_of_mus, nfft, n3/2, num_of_mus, n3/2, &
     462              :                         halfft_cache(1, 1, inzee), halfft_cache(1, 1, 3 - inzee), &
     463         2248 :                         btrig, after(i), now(i), before(i), 1)
     464         2810 :             inzee = 3 - inzee
     465              :          END DO
     466              :          !assign the values of the FFT array
     467              :          !and compare with the good results
     468         7288 :          DO imu = istart, iend
     469              : 
     470              :             !corresponding value of i1 and i2
     471         6724 :             i1 = MOD(imu, n1/2 + 1)
     472         6724 :             IF (i1 == 0) i1 = n1/2 + 1
     473         6724 :             i2 = (imu - i1)/(n1/2 + 1) + 1
     474              : 
     475         6724 :             j2 = i2 - j2st
     476              : 
     477         6724 :             a = halfft_cache(1, imu - istart + 1, inzee)
     478         6724 :             b = halfft_cache(2, imu - istart + 1, inzee)
     479         6724 :             kernel(i1, j2, 1) = a + b
     480         6724 :             kernel(i1, j2, n3/2 + 1) = a - b
     481              : 
     482      1089850 :             DO i3 = 2, n3/2
     483      1082564 :                ind1 = imu - istart + 1 + num_of_mus*(i3 - 1)
     484      1082564 :                jnd1 = imu - istart + 1 + num_of_mus*(n3/2 + 2 - i3 - 1)
     485      1082564 :                cp = cossinarr(1, i3 - 1)
     486      1082564 :                sp = cossinarr(2, i3 - 1)
     487      1082564 :                a = halfft_cache(1, ind1, inzee)
     488      1082564 :                b = halfft_cache(2, ind1, inzee)
     489      1082564 :                c = halfft_cache(1, jnd1, inzee)
     490      1082564 :                d = halfft_cache(2, jnd1, inzee)
     491      1082564 :                feR = .5_dp*(a + c)
     492      1082564 :                feI = .5_dp*(b - d)
     493      1082564 :                foR = .5_dp*(a - c)
     494      1082564 :                foI = .5_dp*(b + d)
     495      1082564 :                fR = feR + cp*foI - sp*foR
     496      1089288 :                kernel(i1, j2, i3) = fR
     497              :             END DO
     498              :          END DO
     499              : 
     500              :       END DO
     501              : 
     502              :       !give to each processor a slice of the third dimension
     503            2 :       IF (nproc > 1) THEN
     504              :          CALL mpi_group%alltoall(kernel, &!nker1*(nact2/nproc)*(nker3/nproc), &
     505            2 :                                  kernel_mpi, nker1*(nact2/nproc)*(nker3/nproc))
     506              : 
     507            6 :          DO jp2 = 1, nproc
     508          334 :             DO i3 = 1, nker3/nproc
     509        13780 :                DO i2 = 1, nact2/nproc
     510        13448 :                   j2 = i2 + (jp2 - 1)*(nact2/nproc)
     511        13776 :                   IF (j2 <= nker2) THEN
     512      1116184 :                      DO i1 = 1, nker1
     513              :                         karray(i1, j2, i3) = &
     514      1116184 :                            kernel_mpi(i1, i2, i3, jp2)
     515              :                      END DO
     516              :                   END IF
     517              :                END DO
     518              :             END DO
     519              :          END DO
     520              : 
     521              :       ELSE
     522            0 :          karray(1:nker1, 1:nker2, 1:nker3) = kernel(1:nker1, 1:nker2, 1:nker3)
     523              :       END IF
     524              : 
     525              :       !De-allocations
     526            2 :       DEALLOCATE (kernel)
     527            2 :       DEALLOCATE (kernel_mpi)
     528            2 :       DEALLOCATE (btrig)
     529            2 :       DEALLOCATE (after)
     530            2 :       DEALLOCATE (now)
     531            2 :       DEALLOCATE (before)
     532            2 :       DEALLOCATE (halfft_cache)
     533            2 :       DEALLOCATE (kernel_scf)
     534            2 :       DEALLOCATE (x_scf)
     535            2 :       DEALLOCATE (y_scf)
     536              : 
     537            4 :    END SUBROUTINE Surfaces_Kernel
     538              : 
     539              : ! **************************************************************************************************
     540              : !> \brief ...
     541              : !> \param n ...
     542              : !> \param n_scf ...
     543              : !> \param itype_scf ...
     544              : !> \param intorder ...
     545              : !> \param xval ...
     546              : !> \param yval ...
     547              : !> \param c ...
     548              : !> \param mu ...
     549              : !> \param hres ...
     550              : !> \param g_mu ...
     551              : ! **************************************************************************************************
     552         6723 :    SUBROUTINE calculates_green_opt(n, n_scf, itype_scf, intorder, xval, yval, c, mu, hres, g_mu)
     553              :       INTEGER, INTENT(in)                                :: n, n_scf, itype_scf, intorder
     554              :       REAL(KIND=dp), DIMENSION(0:n_scf), INTENT(in)      :: xval, yval
     555              :       REAL(KIND=dp), DIMENSION(intorder+1), INTENT(in)   :: c
     556              :       REAL(KIND=dp), INTENT(in)                          :: mu, hres
     557              :       REAL(KIND=dp), DIMENSION(n), INTENT(out)           :: g_mu
     558              : 
     559              :       REAL(KIND=dp), PARAMETER                           :: mu_max = 0.2_dp
     560              : 
     561              :       INTEGER                                            :: i, iend, ikern, ivalue, izero, n_iter, &
     562              :                                                             nrec
     563              :       REAL(KIND=dp)                                      :: f, filter, fl, fr, gleft, gltmp, gright, &
     564              :                                                             grtmp, mu0, ratio, x, x0, x1
     565         6723 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: green, green1
     566              : 
     567      1095849 :       g_mu = 0.0_dp
     568              :       !We calculate the number of iterations to go from mu0 to mu0_ref
     569         6723 :       IF (mu <= mu_max) THEN
     570           27 :          n_iter = 0
     571           27 :          mu0 = mu
     572              :       ELSE
     573         6696 :          n_iter = 1
     574        19644 :          loop_iter: DO
     575        26340 :             ratio = REAL(2**n_iter, KIND=dp)
     576        26340 :             mu0 = mu/ratio
     577        26340 :             IF (mu0 <= mu_max) THEN
     578              :                EXIT loop_iter
     579              :             END IF
     580        19644 :             n_iter = n_iter + 1
     581              :          END DO loop_iter
     582              :       END IF
     583              : 
     584              :       !dimension needed for the correct calculation of the recursion
     585         6723 :       nrec = 2**n_iter*n
     586              : 
     587        20169 :       ALLOCATE (green(-nrec:nrec))
     588              : 
     589              :       !initialization of the branching value
     590      5600259 :       ikern = 0
     591      5600259 :       izero = 0
     592      5593536 :       initialization: DO
     593      5600259 :          IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) EXIT initialization
     594      5593536 :          izero = izero + 1
     595              :       END DO initialization
     596     37542690 :       green = 0._dp
     597              :       !now perform the interpolation in right direction
     598              :       ivalue = izero
     599              :       gright = 0._dp
     600       800037 :       loop_right: DO
     601       806760 :          IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
     602      8000370 :          DO i = 1, intorder + 1
     603      7200333 :             x = xval(ivalue) - REAL(ikern, KIND=dp)
     604      7200333 :             f = yval(ivalue)*EXP(-mu0*x)
     605      7200333 :             filter = intorder*c(i)
     606      7200333 :             gright = gright + filter*f
     607      8000370 :             ivalue = ivalue + 1
     608              :          END DO
     609       800037 :          ivalue = ivalue - 1
     610              :       END DO loop_right
     611         6723 :       iend = n_scf - ivalue
     612        60507 :       DO i = 1, iend
     613        53784 :          x = xval(ivalue) - REAL(ikern, KIND=dp)
     614        53784 :          f = yval(ivalue)*EXP(-mu0*x)
     615        53784 :          filter = intorder*c(i)
     616        53784 :          gright = gright + filter*f
     617        60507 :          ivalue = ivalue + 1
     618              :       END DO
     619         6723 :       gright = hres*gright
     620              : 
     621              :       !the scaling function is symmetric, so the same for the other direction
     622         6723 :       gleft = gright
     623              : 
     624         6723 :       green(ikern) = gleft + gright
     625              : 
     626              :       !now the loop until the last value
     627      2200414 :       DO ikern = 1, nrec
     628      2906278 :          gltmp = 0._dp
     629      2906278 :          grtmp = 0._dp
     630      2906278 :          ivalue = izero
     631      2906278 :          x0 = xval(izero)
     632       705915 :          loop_integration: DO
     633      2906278 :             IF (izero == n_scf) EXIT loop_integration
     634      7966755 :             DO i = 1, intorder + 1
     635      7260840 :                x = xval(ivalue)
     636      7260840 :                fl = yval(ivalue)*EXP(mu0*x)
     637      7260840 :                fr = yval(ivalue)*EXP(-mu0*x)
     638      7260840 :                filter = intorder*c(i)
     639      7260840 :                gltmp = gltmp + filter*fl
     640      7260840 :                grtmp = grtmp + filter*fr
     641      7260840 :                ivalue = ivalue + 1
     642      7260840 :                IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) THEN
     643              :                   x1 = xval(izero)
     644              :                   EXIT loop_integration
     645              :                END IF
     646      7865910 :                izero = izero + 1
     647              :             END DO
     648       705915 :             ivalue = ivalue - 1
     649       705915 :             izero = izero - 1
     650              :          END DO loop_integration
     651      2200363 :          gleft = EXP(-mu0)*(gleft + hres*EXP(-mu0*REAL(ikern - 1, KIND=dp))*gltmp)
     652      2200363 :          IF (izero == n_scf) THEN
     653              :             gright = 0._dp
     654              :          ELSE
     655        94122 :             gright = EXP(mu0)*(gright - hres*EXP(mu0*REAL(ikern - 1, KIND=dp))*grtmp)
     656              :          END IF
     657      2200363 :          green(ikern) = gleft + gright
     658      2200363 :          green(-ikern) = gleft + gright
     659      2200414 :          IF (ABS(green(ikern)) <= 1.e-20_dp) THEN
     660         6672 :             nrec = ikern
     661         6672 :             EXIT
     662              :          END IF
     663              :          !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
     664              :       END DO
     665              :       !now we must calculate the recursion
     666        20169 :       ALLOCATE (green1(-nrec:nrec))
     667              : 
     668              :       !Start the iteration to go from mu0 to mu
     669         6723 :       CALL scf_recursion(itype_scf, n_iter, nrec, green(-nrec), green1(-nrec))
     670              : 
     671      1095849 :       DO i = 1, MIN(n, nrec)
     672      1095849 :          g_mu(i) = green(i - 1)
     673              :       END DO
     674         6723 :       DO i = MIN(n, nrec) + 1, n
     675         6723 :          g_mu(i) = 0._dp
     676              :       END DO
     677              : 
     678         6723 :       DEALLOCATE (green, green1)
     679              : 
     680         6723 :    END SUBROUTINE calculates_green_opt
     681              : 
     682              : ! **************************************************************************************************
     683              : !> \brief ...
     684              : !> \param n ...
     685              : !> \param n_scf ...
     686              : !> \param intorder ...
     687              : !> \param xval ...
     688              : !> \param yval ...
     689              : !> \param c ...
     690              : !> \param hres ...
     691              : !> \param green ...
     692              : ! **************************************************************************************************
     693            1 :    SUBROUTINE calculates_green_opt_muzero(n, n_scf, intorder, xval, yval, c, hres, green)
     694              :       INTEGER, INTENT(in)                                :: n, n_scf, intorder
     695              :       REAL(KIND=dp), DIMENSION(0:n_scf), INTENT(in)      :: xval, yval
     696              :       REAL(KIND=dp), DIMENSION(intorder+1), INTENT(in)   :: c
     697              :       REAL(KIND=dp), INTENT(in)                          :: hres
     698              :       REAL(KIND=dp), DIMENSION(n), INTENT(out)           :: green
     699              : 
     700              :       INTEGER                                            :: i, iend, ikern, ivalue, izero
     701              :       REAL(KIND=dp)                                      :: c0, c1, filter, gl0, gl1, gr0, gr1, x, y
     702              : 
     703              : !initialization of the branching value
     704              : 
     705            1 :       ikern = 0
     706            1 :       izero = 0
     707          832 :       initialization: DO
     708          833 :          IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) EXIT initialization
     709          833 :          izero = izero + 1
     710              :       END DO initialization
     711          163 :       green = 0._dp
     712              :       !first case, ikern=0
     713              :       !now perform the interpolation in right direction
     714              :       ivalue = izero
     715              :       gr1 = 0._dp
     716          119 :       loop_right: DO
     717          120 :          IF (ivalue >= n_scf - intorder - 1) EXIT loop_right
     718         1190 :          DO i = 1, intorder + 1
     719         1071 :             x = xval(ivalue)
     720         1071 :             y = yval(ivalue)
     721         1071 :             filter = intorder*c(i)
     722         1071 :             gr1 = gr1 + filter*x*y
     723         1190 :             ivalue = ivalue + 1
     724              :          END DO
     725          119 :          ivalue = ivalue - 1
     726              :       END DO loop_right
     727            1 :       iend = n_scf - ivalue
     728            9 :       DO i = 1, iend
     729            8 :          x = xval(ivalue)
     730            8 :          y = yval(ivalue)
     731            8 :          filter = intorder*c(i)
     732            8 :          gr1 = gr1 + filter*x*y
     733            9 :          ivalue = ivalue + 1
     734              :       END DO
     735            1 :       gr1 = hres*gr1
     736              :       !the scaling function is symmetric
     737            1 :       gl1 = -gr1
     738            1 :       gl0 = 0.5_dp
     739            1 :       gr0 = 0.5_dp
     740              : 
     741            1 :       green(1) = 2._dp*gr1
     742              : 
     743              :       !now the loop until the last value
     744          162 :       DO ikern = 1, n - 1
     745              :          c0 = 0._dp
     746              :          c1 = 0._dp
     747              :          ivalue = izero
     748          105 :          loop_integration: DO
     749          266 :             IF (izero == n_scf) EXIT loop_integration
     750         1185 :             DO i = 1, intorder + 1
     751         1080 :                x = xval(ivalue)
     752         1080 :                y = yval(ivalue)
     753         1080 :                filter = intorder*c(i)
     754         1080 :                c0 = c0 + filter*y
     755         1080 :                c1 = c1 + filter*y*x
     756         1080 :                ivalue = ivalue + 1
     757         1080 :                IF (xval(izero) >= REAL(ikern, KIND=dp) .OR. izero == n_scf) THEN
     758              :                   EXIT loop_integration
     759              :                END IF
     760         1170 :                izero = izero + 1
     761              :             END DO
     762          105 :             ivalue = ivalue - 1
     763          120 :             izero = izero - 1
     764              :          END DO loop_integration
     765          161 :          c0 = hres*c0
     766          161 :          c1 = hres*c1
     767              : 
     768          161 :          gl0 = gl0 + c0
     769          161 :          gl1 = gl1 + c1
     770          161 :          gr0 = gr0 - c0
     771          161 :          gr1 = gr1 - c1
     772              :          !general case
     773          162 :          green(ikern + 1) = REAL(ikern, KIND=dp)*(gl0 - gr0) + gr1 - gl1
     774              :          !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
     775              :       END DO
     776              : 
     777            1 :    END SUBROUTINE calculates_green_opt_muzero
     778              : 
     779              : ! **************************************************************************************************
     780              : !> \brief ...
     781              : !> \param var_realimag ...
     782              : !> \param nelem ...
     783              : !> \param intrn ...
     784              : !> \param extrn ...
     785              : !> \param index ...
     786              : ! **************************************************************************************************
     787      2178576 :    SUBROUTINE indices(var_realimag, nelem, intrn, extrn, index)
     788              : 
     789              :       INTEGER, INTENT(out)                               :: var_realimag
     790              :       INTEGER, INTENT(in)                                :: nelem, intrn, extrn
     791              :       INTEGER, INTENT(out)                               :: index
     792              : 
     793              :       INTEGER                                            :: i
     794              : 
     795              : !real or imaginary part
     796              : 
     797      2178576 :       var_realimag = 2 - MOD(intrn, 2)
     798              : !actual index over half the length
     799              : 
     800      2178576 :       i = (intrn + 1)/2
     801              :       !check
     802      2178576 :       IF (2*(i - 1) + var_realimag /= intrn) THEN
     803            0 :          PRINT *, 'error, index=', intrn, 'var_realimag=', var_realimag, 'i=', i
     804              :       END IF
     805              :       !complete index to be assigned
     806      2178576 :       index = extrn + nelem*(i - 1)
     807              : 
     808      2178576 :    END SUBROUTINE indices
     809              : 
     810              : ! **************************************************************************************************
     811              : !> \brief Build the kernel of a gaussian function
     812              : !>     for interpolating scaling functions.
     813              : !>     Do the parallel HalFFT of the symmetrized function and stores into
     814              : !>     memory only 1/8 of the grid divided by the number of processes nproc
     815              : !>
     816              : !>     Build the kernel (karray) of a gaussian function
     817              : !>     for interpolating scaling functions
     818              : !>     $$ K(j) = \sum_k \omega_k \int \int \phi(x) g_k(x'-x) \delta(x'- j) dx dx' $$
     819              : !> \param n01 Mesh dimensions of the density
     820              : !> \param n02 Mesh dimensions of the density
     821              : !> \param n03 Mesh dimensions of the density
     822              : !> \param nfft1 Dimensions of the FFT grid (HalFFT in the third direction)
     823              : !> \param nfft2 Dimensions of the FFT grid (HalFFT in the third direction)
     824              : !> \param nfft3 Dimensions of the FFT grid (HalFFT in the third direction)
     825              : !> \param n1k Dimensions of the kernel FFT
     826              : !> \param n2k Dimensions of the kernel FFT
     827              : !> \param n3k Dimensions of the kernel FFT
     828              : !> \param hgrid Mesh step
     829              : !> \param itype_scf Order of the scaling function (8,14,16)
     830              : !> \param iproc ...
     831              : !> \param nproc ...
     832              : !> \param karray ...
     833              : !> \param mpi_group ...
     834              : !> \date February 2006
     835              : !> \author T. Deutsch, L. Genovese
     836              : ! **************************************************************************************************
     837          508 :    SUBROUTINE Free_Kernel(n01, n02, n03, nfft1, nfft2, nfft3, n1k, n2k, n3k, &
     838          508 :                           hgrid, itype_scf, iproc, nproc, karray, mpi_group)
     839              : 
     840              :       INTEGER, INTENT(in)                                :: n01, n02, n03, nfft1, nfft2, nfft3, n1k, &
     841              :                                                             n2k, n3k
     842              :       REAL(KIND=dp), INTENT(in)                          :: hgrid
     843              :       INTEGER, INTENT(in)                                :: itype_scf, iproc, nproc
     844              :       REAL(KIND=dp), DIMENSION(n1k, n2k, n3k/nproc), &
     845              :          INTENT(out)                                     :: karray
     846              :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
     847              : 
     848              :       INTEGER, PARAMETER                                 :: n_gauss = 89, n_points = 2**6
     849              :       REAL(KIND=dp), PARAMETER                           :: p0_ref = 1._dp
     850              : 
     851              :       INTEGER                                            :: i, i01, i02, i03, i1, i2, i3, i_gauss, &
     852              :                                                             i_kern, iend, istart, istart1, n1h, &
     853              :                                                             n2h, n3h, n_cell, n_iter, n_range, &
     854              :                                                             n_scf, nker1, nker2, nker3
     855              :       REAL(KIND=dp)                                      :: a1, a2, a3, a_range, absci, acc_gauss, &
     856              :                                                             dr_gauss, dx, factor, factor2, kern, &
     857              :                                                             p0_cell, p0gauss, pgauss, ur_gauss
     858          508 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: kern_1_scf, kernel_scf, x_scf, y_scf
     859          508 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: kp
     860              :       REAL(KIND=dp), DIMENSION(n_gauss)                  :: p_gauss, w_gauss
     861              : 
     862              : !Do not touch !!!!
     863              : !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
     864              : !Better p_gauss for calculation
     865              : !(the support of the exponential should be inside [-n_range/2,n_range/2])
     866              : !Number of integration points : 2*itype_scf*n_points
     867              : 
     868          508 :       n_scf = 2*itype_scf*n_points
     869              :       !Set karray
     870     33689422 :       karray = 0.0_dp
     871              :       !here we must set the dimensions for the fft part, starting from the nfft
     872              :       !remember that actually nfft2 is associated to n03 and viceversa
     873              : 
     874              :       !dimensions that define the center of symmetry
     875          508 :       n1h = nfft1/2
     876          508 :       n2h = nfft2/2
     877          508 :       n3h = nfft3/2
     878              : 
     879              :       !Auxiliary dimensions only for building the FFT part
     880          508 :       nker1 = nfft1
     881          508 :       nker2 = nfft2
     882          508 :       nker3 = nfft3/2 + 1
     883              : 
     884              :       !adjusting the last two dimensions to be multiples of nproc
     885            0 :       DO
     886          508 :          IF (MODULO(nker2, nproc) == 0) EXIT
     887            0 :          nker2 = nker2 + 1
     888              :       END DO
     889          344 :       DO
     890          852 :          IF (MODULO(nker3, nproc) == 0) EXIT
     891          344 :          nker3 = nker3 + 1
     892              :       END DO
     893              : 
     894              :       !this will be the array of the kernel in the real space
     895         3048 :       ALLOCATE (kp(n1h + 1, n3h + 1, nker2/nproc))
     896              : 
     897              :       !defining proper extremes for the calculation of the
     898              :       !local part of the kernel
     899              : 
     900          508 :       istart = iproc*nker2/nproc + 1
     901          508 :       iend = MIN((iproc + 1)*nker2/nproc, n2h + n03)
     902              : 
     903          508 :       istart1 = istart
     904          508 :       IF (iproc == 0) istart1 = n2h - n03 + 2
     905              : 
     906              :       !Allocations
     907         1524 :       ALLOCATE (x_scf(0:n_scf))
     908         1016 :       ALLOCATE (y_scf(0:n_scf))
     909              : 
     910              :       !Build the scaling function
     911          508 :       CALL scaling_function(itype_scf, n_scf, n_range, x_scf, y_scf)
     912              :       !Step grid for the integration
     913          508 :       dx = REAL(n_range, KIND=dp)/REAL(n_scf, KIND=dp)
     914              :       !Extend the range (no more calculations because fill in by 0._dp)
     915          508 :       n_cell = MAX(n01, n02, n03)
     916          508 :       n_range = MAX(n_cell, n_range)
     917              : 
     918              :       !Allocations
     919         1524 :       ALLOCATE (kernel_scf(-n_range:n_range))
     920         1016 :       ALLOCATE (kern_1_scf(-n_range:n_range))
     921              : 
     922              :       !Lengthes of the box (use FFT dimension)
     923          508 :       a1 = hgrid*REAL(n01, KIND=dp)
     924          508 :       a2 = hgrid*REAL(n02, KIND=dp)
     925          508 :       a3 = hgrid*REAL(n03, KIND=dp)
     926              : 
     927      2588664 :       x_scf(:) = hgrid*x_scf(:)
     928      2588664 :       y_scf(:) = 1._dp/hgrid*y_scf(:)
     929          508 :       dx = hgrid*dx
     930              :       !To have a correct integration
     931          508 :       p0_cell = p0_ref/(hgrid*hgrid)
     932              : 
     933              :       !Initialization of the gaussian (Beylkin)
     934          508 :       CALL gequad(p_gauss, w_gauss, ur_gauss, dr_gauss, acc_gauss)
     935              :       !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
     936              :       !(biggest length in the cube)
     937              :       !We divide the p_gauss by a_range**2 and a_gauss by a_range
     938          508 :       a_range = SQRT(a1*a1 + a2*a2 + a3*a3)
     939          508 :       factor = 1._dp/a_range
     940              :       !factor2 = factor*factor
     941          508 :       factor2 = 1._dp/(a1*a1 + a2*a2 + a3*a3)
     942        45720 :       DO i_gauss = 1, n_gauss
     943        45720 :          p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
     944              :       END DO
     945        45720 :       DO i_gauss = 1, n_gauss
     946        45720 :          w_gauss(i_gauss) = factor*w_gauss(i_gauss)
     947              :       END DO
     948              : 
     949     65217592 :       kp(:, :, :) = 0._dp
     950              :       !Use in this order (better for accuracy).
     951        45720 :       loop_gauss: DO i_gauss = n_gauss, 1, -1
     952              :          !Gaussian
     953        45212 :          pgauss = p_gauss(i_gauss)
     954              : 
     955              :          !We calculate the number of iterations to go from pgauss to p0_ref
     956        45212 :          n_iter = NINT((LOG(pgauss) - LOG(p0_cell))/LOG(4._dp))
     957        45212 :          IF (n_iter <= 0) THEN
     958         9612 :             n_iter = 0
     959         9612 :             p0gauss = pgauss
     960              :          ELSE
     961        35600 :             p0gauss = pgauss/4._dp**n_iter
     962              :          END IF
     963              : 
     964              :          !Stupid integration
     965              :          !Do the integration with the exponential centered in i_kern
     966      7327904 :          kernel_scf(:) = 0._dp
     967      1424452 :          DO i_kern = 0, n_range
     968              :             kern = 0._dp
     969   7250359688 :             DO i = 0, n_scf
     970   7248939204 :                absci = x_scf(i) - REAL(i_kern, KIND=dp)*hgrid
     971   7248939204 :                absci = absci*absci
     972   7250359688 :                kern = kern + y_scf(i)*EXP(-p0gauss*absci)*dx
     973              :             END DO
     974      1420484 :             kernel_scf(i_kern) = kern
     975      1420484 :             kernel_scf(-i_kern) = kern
     976      1424452 :             IF (ABS(kern) < 1.e-18_dp) THEN
     977              :                !Too small not useful to calculate
     978              :                EXIT
     979              :             END IF
     980              :          END DO
     981              : 
     982              :          !Start the iteration to go from p0gauss to pgauss
     983        45212 :          CALL scf_recursion(itype_scf, n_iter, n_range, kernel_scf, kern_1_scf)
     984              : 
     985              :          !Add to the kernel (only the local part)
     986              : 
     987      2376630 :          DO i3 = istart1, iend
     988      2330910 :             i03 = i3 - n2h - 1
     989              :             ! Crash if index out of range
     990              :             ! Without compiler bounds checking, the calculation might run successfully but
     991              :             ! it is also possible that the Hartree energy will blow up
     992              :             ! This seems to happen with large MPI processor counts if the size of the
     993              :             ! RS grid is not directly compatible with the allowed FFT dimensions in
     994              :             ! subroutine fourier_dim (ps_wavelet_fft3d.F)
     995      2330910 :             IF (i03 < -n_range .OR. i03 > n_range) THEN
     996              :                CALL cp_abort(__LOCATION__, "Index out of range in wavelet solver. "// &
     997              :                              "Try decreasing the number of MPI processors, or adjust the PW_CUTOFF or cell size "// &
     998              :                              "so that 2*(number of RS grid points) matches the allowed FFT dimensions "// &
     999            0 :                              "(see ps_wavelet_fft3d.F) exactly.")
    1000              :             END IF
    1001    107446585 :             DO i2 = 1, n02
    1002    105070463 :                i02 = i2 - 1
    1003   5433108952 :                DO i1 = 1, n01
    1004   5325707579 :                   i01 = i1 - 1
    1005              :                   kp(i1, i2, i3 - istart + 1) = kp(i1, i2, i3 - istart + 1) + w_gauss(i_gauss)* &
    1006   5430778042 :                                                 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
    1007              :                END DO
    1008              :             END DO
    1009              :          END DO
    1010              : 
    1011              :       END DO loop_gauss
    1012              : 
    1013              :       !De-allocations
    1014          508 :       DEALLOCATE (kernel_scf)
    1015          508 :       DEALLOCATE (kern_1_scf)
    1016          508 :       DEALLOCATE (x_scf)
    1017          508 :       DEALLOCATE (y_scf)
    1018              : 
    1019              : !!!!END KERNEL CONSTRUCTION
    1020              : 
    1021              : !!$ if(iproc .eq. 0) print *,"Do a 3D PHalFFT for the kernel"
    1022              : 
    1023              :       CALL kernelfft(nfft1, nfft2, nfft3, nker1, nker2, nker3, n1k, n2k, n3k, nproc, iproc, &
    1024          508 :                      kp, karray, mpi_group)
    1025              : 
    1026              :       !De-allocations
    1027          508 :       DEALLOCATE (kp)
    1028              : 
    1029          508 :    END SUBROUTINE Free_Kernel
    1030              : 
    1031              : ! **************************************************************************************************
    1032              : !> \brief ...
    1033              : !> \param n1 ...
    1034              : !> \param n3 ...
    1035              : !> \param lot ...
    1036              : !> \param nfft ...
    1037              : !> \param i1 ...
    1038              : !> \param zf ...
    1039              : !> \param zw ...
    1040              : ! **************************************************************************************************
    1041        71230 :    SUBROUTINE inserthalf(n1, n3, lot, nfft, i1, zf, zw)
    1042              :       INTEGER, INTENT(in)                                :: n1, n3, lot, nfft, i1
    1043              :       REAL(KIND=dp), DIMENSION(n1/2+1, n3/2+1), &
    1044              :          INTENT(in)                                      :: zf
    1045              :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), &
    1046              :          INTENT(out)                                     :: zw
    1047              : 
    1048              :       INTEGER                                            :: i01, i03i, i03r, i3, l1, l3
    1049              : 
    1050    436423182 :       zw = 0.0_dp
    1051        71230 :       i3 = 0
    1052      4066734 :       DO l3 = 1, n3, 2
    1053      3995504 :          i3 = i3 + 1
    1054      3995504 :          i03r = ABS(l3 - n3/2 - 1) + 1
    1055      3995504 :          i03i = ABS(l3 - n3/2) + 1
    1056    127074870 :          DO l1 = 1, nfft
    1057    123008136 :             i01 = ABS(l1 - 1 + i1 - n1/2 - 1) + 1
    1058    123008136 :             zw(1, l1, i3) = zf(i01, i03r)
    1059    127003640 :             zw(2, l1, i3) = zf(i01, i03i)
    1060              :          END DO
    1061              :       END DO
    1062              : 
    1063        71230 :    END SUBROUTINE inserthalf
    1064              : 
    1065              : ! **************************************************************************************************
    1066              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1067              : !>      Calculates the FFT of the distributed kernel
    1068              : !> \param n1 logical dimension of the transform.
    1069              : !> \param n2 logical dimension of the transform.
    1070              : !> \param n3 logical dimension of the transform.
    1071              : !> \param nd1 Dimensions of work arrays
    1072              : !> \param nd2 Dimensions of work arrays
    1073              : !> \param nd3 Dimensions of work arrays
    1074              : !> \param nk1 ...
    1075              : !> \param nk2 ...
    1076              : !> \param nk3 ...
    1077              : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
    1078              : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
    1079              : !> \param zf Real kernel (input)
    1080              : !>                   zf(i1,i2,i3)
    1081              : !> \param zr Distributed Kernel FFT
    1082              : !>                   zr(2,i1,i2,i3)
    1083              : !> \param mpi_group ...
    1084              : !> \date February 2006
    1085              : !> \par Restrictions
    1086              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1087              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1088              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1089              : !>      This file is distributed under the terms of the
    1090              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1091              : !> \author S. Goedecker, L. Genovese
    1092              : !> \note As transform lengths
    1093              : !>                  most products of the prime factors 2,3,5 are allowed.
    1094              : !>                   The detailed table with allowed transform lengths can
    1095              : !>                   be found in subroutine CTRIG
    1096              : ! **************************************************************************************************
    1097          508 :    SUBROUTINE kernelfft(n1, n2, n3, nd1, nd2, nd3, nk1, nk2, nk3, nproc, iproc, zf, zr, mpi_group)
    1098              : 
    1099              :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, nk1, nk2, &
    1100              :                                                             nk3, nproc, iproc
    1101              :       REAL(KIND=dp), &
    1102              :          DIMENSION(n1/2+1, n3/2+1, nd2/nproc), &
    1103              :          INTENT(in)                                      :: zf
    1104              :       REAL(KIND=dp), DIMENSION(nk1, nk2, nk3/nproc), &
    1105              :          INTENT(inout)                                   :: zr
    1106              :       TYPE(mp_comm_type), INTENT(in)                     :: mpi_group
    1107              : 
    1108              :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
    1109              : 
    1110              :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
    1111              :                                                             J2st, j3, Jp2st, lot, lzt, ma, mb, &
    1112              :                                                             ncache, nfft
    1113          508 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
    1114          508 :                                                             before2, before3, now1, now2, now3
    1115              :       REAL(kind=dp)                                      :: twopion
    1116          508 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: cosinarr, trig1, trig2, trig3
    1117          508 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
    1118              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
    1119              :       REAL(KIND=dp), ALLOCATABLE, &
    1120          508 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
    1121              : 
    1122              : !  include "perfdata.inc"
    1123              : !work arrays for transpositions
    1124              : !work arrays for MPI
    1125              : !cache work array
    1126              : !FFT work arrays
    1127              : !Body
    1128              : !check input
    1129              : 
    1130          508 :       CPASSERT(nd1 >= n1)
    1131          508 :       CPASSERT(nd2 >= n2)
    1132          508 :       CPASSERT(nd3 >= n3/2 + 1)
    1133          508 :       CPASSERT(MOD(nd3, nproc) == 0)
    1134          508 :       CPASSERT(MOD(nd2, nproc) == 0)
    1135              :       MARK_USED(nd1)
    1136              : 
    1137              :       !defining work arrays dimensions
    1138          508 :       ncache = ncache_optimal
    1139          508 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
    1140          508 :       lzt = n2
    1141          508 :       IF (MOD(n2, 2) == 0) lzt = lzt + 1
    1142          508 :       IF (MOD(n2, 4) == 0) lzt = lzt + 1
    1143              : 
    1144              :       !Allocations
    1145          508 :       ALLOCATE (trig1(2, ctrig_length))
    1146          508 :       ALLOCATE (after1(7))
    1147          508 :       ALLOCATE (now1(7))
    1148          508 :       ALLOCATE (before1(7))
    1149          508 :       ALLOCATE (trig2(2, ctrig_length))
    1150          508 :       ALLOCATE (after2(7))
    1151          508 :       ALLOCATE (now2(7))
    1152          508 :       ALLOCATE (before2(7))
    1153          508 :       ALLOCATE (trig3(2, ctrig_length))
    1154          508 :       ALLOCATE (after3(7))
    1155          508 :       ALLOCATE (now3(7))
    1156          508 :       ALLOCATE (before3(7))
    1157         2032 :       ALLOCATE (zw(2, ncache/4, 2))
    1158         2032 :       ALLOCATE (zt(2, lzt, n1))
    1159         2540 :       ALLOCATE (zmpi2(2, n1, nd2/nproc, nd3))
    1160         2032 :       ALLOCATE (cosinarr(2, n3/2))
    1161          508 :       IF (nproc > 1) THEN
    1162         2064 :          ALLOCATE (zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc))
    1163    284303352 :          zmpi1 = 0.0_dp
    1164              :       END IF
    1165              : 
    1166    382402076 :       zmpi2 = 0.0_dp
    1167              :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
    1168          508 :       CALL ctrig(n3/2, trig3, after3, before3, now3, 1, ic3)
    1169          508 :       CALL ctrig(n1, trig1, after1, before1, now1, 1, ic1)
    1170          508 :       CALL ctrig(n2, trig2, after2, before2, now2, 1, ic2)
    1171              : 
    1172              :       !Calculating array of phases for HalFFT decoding
    1173          508 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
    1174        22132 :       DO i3 = 1, n3/2
    1175        21624 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
    1176        22132 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
    1177              :       END DO
    1178              : 
    1179              :       !transform along z axis
    1180              : 
    1181          508 :       lot = ncache/(2*n3)
    1182          508 :       CPASSERT(lot >= 1)
    1183              : 
    1184        27184 :       DO j2 = 1, nd2/nproc
    1185              :          !this condition ensures that we manage only the interesting part for the FFT
    1186        27184 :          IF (iproc*(nd2/nproc) + j2 <= n2) THEN
    1187        97906 :             DO i1 = 1, n1, lot
    1188        71230 :                ma = i1
    1189        71230 :                mb = MIN(i1 + (lot - 1), n1)
    1190        71230 :                nfft = mb - ma + 1
    1191              : 
    1192              :                !inserting real data into complex array of half length
    1193              :                !input: I1,I3,J2,(Jp2)
    1194              : 
    1195        71230 :                CALL inserthalf(n1, n3, lot, nfft, i1, zf(1, 1, j2), zw(1, 1, 1))
    1196              : 
    1197              :                !performing FFT
    1198        71230 :                inzee = 1
    1199       258424 :                DO i = 1, ic3
    1200              :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1201       187194 :                               trig3, after3(i), now3(i), before3(i), 1)
    1202       258424 :                   inzee = 3 - inzee
    1203              :                END DO
    1204              :                !output: I1,i3,J2,(Jp2)
    1205              : 
    1206              :                !unpacking FFT in order to restore correct result,
    1207              :                !while exchanging components
    1208              :                !input: I1,i3,J2,(Jp2)
    1209        97906 :                CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, nd2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
    1210              :                !output: I1,J2,i3,(Jp2)
    1211              :             END DO
    1212              :          END IF
    1213              :       END DO
    1214              : 
    1215              :       !Interprocessor data transposition
    1216              :       !input: I1,J2,j3,jp3,(Jp2)
    1217          508 :       IF (nproc > 1) THEN
    1218              :          !communication scheduling
    1219              :          CALL mpi_group%alltoall(zmpi2, &!2*n1*(nd2/nproc)*(nd3/nproc), &
    1220          344 :                                  zmpi1, 2*n1*(nd2/nproc)*(nd3/nproc))
    1221              :          ! output: I1,J2,j3,Jp2,(jp3)
    1222              :       END IF
    1223              : 
    1224        14534 :       DO j3 = 1, nd3/nproc
    1225              :          !this condition ensures that we manage only the interesting part for the FFT
    1226        14534 :          IF (iproc*(nd3/nproc) + j3 <= n3/2 + 1) THEN
    1227        13854 :             Jp2st = 1
    1228        13854 :             J2st = 1
    1229              : 
    1230              :             !transform along x axis
    1231        13854 :             lot = ncache/(4*n1)
    1232        13854 :             CPASSERT(lot >= 1)
    1233              : 
    1234        84028 :             DO j = 1, n2, lot
    1235        70174 :                ma = j
    1236        70174 :                mb = MIN(j + (lot - 1), n2)
    1237        70174 :                nfft = mb - ma + 1
    1238              : 
    1239              :                !reverse ordering
    1240              :                !input: I1,J2,j3,Jp2,(jp3)
    1241        70174 :                IF (nproc == 1) THEN
    1242        19380 :                   CALL mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1243              :                ELSE
    1244        50794 :                   CALL mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1245              :                END IF
    1246              :                !output: J2,Jp2,I1,j3,(jp3)
    1247              : 
    1248              :                !performing FFT
    1249              :                !input: I2,I1,j3,(jp3)
    1250        70174 :                inzee = 1
    1251       229280 :                DO i = 1, ic1 - 1
    1252              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1253       159106 :                               trig1, after1(i), now1(i), before1(i), 1)
    1254       229280 :                   inzee = 3 - inzee
    1255              :                END DO
    1256              :                !storing the last step into zt
    1257        70174 :                i = ic1
    1258              :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1259        84028 :                            trig1, after1(i), now1(i), before1(i), 1)
    1260              :                !output: I2,i1,j3,(jp3)
    1261              :             END DO
    1262              : 
    1263              :             !transform along y axis, and taking only the first half
    1264        13854 :             lot = ncache/(4*n2)
    1265        13854 :             CPASSERT(lot >= 1)
    1266              : 
    1267        54393 :             DO j = 1, nk1, lot
    1268        40539 :                ma = j
    1269        40539 :                mb = MIN(j + (lot - 1), nk1)
    1270        40539 :                nfft = mb - ma + 1
    1271              : 
    1272              :                !reverse ordering
    1273              :                !input: I2,i1,j3,(jp3)
    1274        40539 :                CALL switch(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1275              :                !output: i1,I2,j3,(jp3)
    1276              : 
    1277              :                !performing FFT
    1278              :                !input: i1,I2,j3,(jp3)
    1279        40539 :                inzee = 1
    1280       174117 :                DO i = 1, ic2
    1281              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1282       133578 :                               trig2, after2(i), now2(i), before2(i), 1)
    1283       174117 :                   inzee = 3 - inzee
    1284              :                END DO
    1285              : 
    1286        54393 :                CALL realcopy(lot, nfft, n2, nk1, nk2, zw(1, 1, inzee), zr(j, 1, j3))
    1287              : 
    1288              :             END DO
    1289              :             !output: i1,i2,j3,(jp3)
    1290              :          END IF
    1291              :       END DO
    1292              : 
    1293              :       !De-allocations
    1294          508 :       DEALLOCATE (trig1)
    1295          508 :       DEALLOCATE (after1)
    1296          508 :       DEALLOCATE (now1)
    1297          508 :       DEALLOCATE (before1)
    1298          508 :       DEALLOCATE (trig2)
    1299          508 :       DEALLOCATE (after2)
    1300          508 :       DEALLOCATE (now2)
    1301          508 :       DEALLOCATE (before2)
    1302          508 :       DEALLOCATE (trig3)
    1303          508 :       DEALLOCATE (after3)
    1304          508 :       DEALLOCATE (now3)
    1305          508 :       DEALLOCATE (before3)
    1306          508 :       DEALLOCATE (zmpi2)
    1307          508 :       DEALLOCATE (zw)
    1308          508 :       DEALLOCATE (zt)
    1309          508 :       DEALLOCATE (cosinarr)
    1310          508 :       IF (nproc > 1) DEALLOCATE (zmpi1)
    1311              : 
    1312          508 :    END SUBROUTINE kernelfft
    1313              : 
    1314              : ! **************************************************************************************************
    1315              : !> \brief ...
    1316              : !> \param lot ...
    1317              : !> \param nfft ...
    1318              : !> \param n2 ...
    1319              : !> \param nk1 ...
    1320              : !> \param nk2 ...
    1321              : !> \param zin ...
    1322              : !> \param zout ...
    1323              : ! **************************************************************************************************
    1324        40539 :    SUBROUTINE realcopy(lot, nfft, n2, nk1, nk2, zin, zout)
    1325              :       INTEGER, INTENT(in)                                :: lot, nfft, n2, nk1, nk2
    1326              :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zin
    1327              :       REAL(KIND=dp), DIMENSION(nk1, nk2), INTENT(inout)  :: zout
    1328              : 
    1329              :       INTEGER                                            :: i, j
    1330              : 
    1331      2323378 :       DO i = 1, nk2
    1332     34932436 :          DO j = 1, nfft
    1333     34891897 :             zout(j, i) = zin(1, j, i)
    1334              :          END DO
    1335              :       END DO
    1336              : 
    1337        40539 :    END SUBROUTINE realcopy
    1338              : 
    1339              : ! **************************************************************************************************
    1340              : !> \brief ...
    1341              : !> \param nfft ...
    1342              : !> \param n2 ...
    1343              : !> \param lot ...
    1344              : !> \param n1 ...
    1345              : !> \param lzt ...
    1346              : !> \param zt ...
    1347              : !> \param zw ...
    1348              : ! **************************************************************************************************
    1349        40539 :    SUBROUTINE switch(nfft, n2, lot, n1, lzt, zt, zw)
    1350              :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    1351              :       REAL(KIND=dp)                                      :: zt(2, lzt, n1), zw(2, lot, n2)
    1352              : 
    1353              :       INTEGER                                            :: i, j
    1354              : 
    1355       675269 :       DO 200, j = 1, nfft
    1356     64583386 :          DO 100, i = 1, n2
    1357     63948656 :             zw(1, j, i) = zt(1, i, j)
    1358     63948656 :             zw(2, j, i) = zt(2, i, j)
    1359       634730 : 100         CONTINUE
    1360        40539 : 200         CONTINUE
    1361        40539 :             RETURN
    1362              :             END SUBROUTINE switch
    1363              : 
    1364              : ! **************************************************************************************************
    1365              : !> \brief ...
    1366              : !> \param j3 ...
    1367              : !> \param nfft ...
    1368              : !> \param Jp2st ...
    1369              : !> \param J2st ...
    1370              : !> \param lot ...
    1371              : !> \param n1 ...
    1372              : !> \param nd2 ...
    1373              : !> \param nd3 ...
    1374              : !> \param nproc ...
    1375              : !> \param zmpi1 ...
    1376              : !> \param zw ...
    1377              : ! **************************************************************************************************
    1378        70174 :             SUBROUTINE mpiswitch(j3, nfft, Jp2st, J2st, lot, n1, nd2, nd3, nproc, zmpi1, zw)
    1379              :       INTEGER                                            :: j3, nfft, Jp2st, J2st, lot, n1, nd2, &
    1380              :                                                             nd3, nproc
    1381              :       REAL(KIND=dp) :: zmpi1(2, n1, nd2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
    1382              : 
    1383              :       INTEGER                                            :: I1, J2, JP2, mfft
    1384              : 
    1385        70174 :                mfft = 0
    1386        92306 :                DO 300, Jp2 = Jp2st, nproc
    1387      1320204 :                   DO 200, J2 = J2st, nd2/nproc
    1388      1298072 :                      mfft = mfft + 1
    1389      1298072 :                      IF (mfft > nfft) THEN
    1390        56320 :                         Jp2st = Jp2
    1391        56320 :                         J2st = J2
    1392        56320 :                         RETURN
    1393              :                      END IF
    1394    126655560 :                      DO 100, I1 = 1, n1
    1395    125413808 :                         zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
    1396    125413808 :                         zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
    1397      1241752 : 100                     CONTINUE
    1398        22132 : 200                     CONTINUE
    1399        22132 :                         J2st = 1
    1400        13854 : 300                     CONTINUE
    1401              :                         END SUBROUTINE mpiswitch
    1402              : 
    1403              : ! **************************************************************************************************
    1404              : !> \brief ...
    1405              : !> \param p ...
    1406              : !> \param w ...
    1407              : !> \param urange ...
    1408              : !> \param drange ...
    1409              : !> \param acc ...
    1410              : ! **************************************************************************************************
    1411          508 :                         SUBROUTINE gequad(p, w, urange, drange, acc)
    1412              : !
    1413              :       REAL(KIND=dp)                                      :: p(*), w(*), urange, drange, acc
    1414              : 
    1415              : !
    1416              : !
    1417              : !       range [10^(-9),1] and accuracy ~10^(-8);
    1418              : !
    1419              : !
    1420              : 
    1421          508 :                            p(1) = 4.96142640560223544e19_dp
    1422          508 :                            p(2) = 1.37454269147978052e19_dp
    1423          508 :                            p(3) = 7.58610013441204679e18_dp
    1424          508 :                            p(4) = 4.42040691347806996e18_dp
    1425          508 :                            p(5) = 2.61986077948367892e18_dp
    1426          508 :                            p(6) = 1.56320138155496681e18_dp
    1427          508 :                            p(7) = 9.35645215863028402e17_dp
    1428          508 :                            p(8) = 5.60962910452691703e17_dp
    1429          508 :                            p(9) = 3.3666225119686761e17_dp
    1430          508 :                            p(10) = 2.0218253197947866e17_dp
    1431          508 :                            p(11) = 1.21477756091902017e17_dp
    1432          508 :                            p(12) = 7.3012982513608503e16_dp
    1433          508 :                            p(13) = 4.38951893556421099e16_dp
    1434          508 :                            p(14) = 2.63949482512262325e16_dp
    1435          508 :                            p(15) = 1.58742054072786174e16_dp
    1436          508 :                            p(16) = 9.54806587737665531e15_dp
    1437          508 :                            p(17) = 5.74353712364571709e15_dp
    1438          508 :                            p(18) = 3.455214877389445e15_dp
    1439          508 :                            p(19) = 2.07871658520326804e15_dp
    1440          508 :                            p(20) = 1.25064667315629928e15_dp
    1441          508 :                            p(21) = 7.52469429541933745e14_dp
    1442          508 :                            p(22) = 4.5274603337253175e14_dp
    1443          508 :                            p(23) = 2.72414006900059548e14_dp
    1444          508 :                            p(24) = 1.63912168349216752e14_dp
    1445          508 :                            p(25) = 9.86275802590865738e13_dp
    1446          508 :                            p(26) = 5.93457701624974985e13_dp
    1447          508 :                            p(27) = 3.5709554322296296e13_dp
    1448          508 :                            p(28) = 2.14872890367310454e13_dp
    1449          508 :                            p(29) = 1.29294719957726902e13_dp
    1450          508 :                            p(30) = 7.78003375426361016e12_dp
    1451          508 :                            p(31) = 4.68148199759876704e12_dp
    1452          508 :                            p(32) = 2.8169955024829868e12_dp
    1453          508 :                            p(33) = 1.69507790481958464e12_dp
    1454          508 :                            p(34) = 1.01998486064607581e12_dp
    1455          508 :                            p(35) = 6.13759486539856459e11_dp
    1456          508 :                            p(36) = 3.69320183828682544e11_dp
    1457          508 :                            p(37) = 2.22232783898905102e11_dp
    1458          508 :                            p(38) = 1.33725247623668682e11_dp
    1459          508 :                            p(39) = 8.0467192739036288e10_dp
    1460          508 :                            p(40) = 4.84199582415144143e10_dp
    1461          508 :                            p(41) = 2.91360091170559564e10_dp
    1462          508 :                            p(42) = 1.75321747475309216e10_dp
    1463          508 :                            p(43) = 1.0549735552210995e10_dp
    1464          508 :                            p(44) = 6.34815321079006586e9_dp
    1465          508 :                            p(45) = 3.81991113733594231e9_dp
    1466          508 :                            p(46) = 2.29857747533101109e9_dp
    1467          508 :                            p(47) = 1.38313653595483694e9_dp
    1468          508 :                            p(48) = 8.32282908580025358e8_dp
    1469          508 :                            p(49) = 5.00814519374587467e8_dp
    1470          508 :                            p(50) = 3.01358090773319025e8_dp
    1471          508 :                            p(51) = 1.81337994217503535e8_dp
    1472          508 :                            p(52) = 1.09117589961086823e8_dp
    1473          508 :                            p(53) = 6.56599771718640323e7_dp
    1474          508 :                            p(54) = 3.95099693638497164e7_dp
    1475          508 :                            p(55) = 2.37745694710665991e7_dp
    1476          508 :                            p(56) = 1.43060135285912813e7_dp
    1477          508 :                            p(57) = 8.60844290313506695e6_dp
    1478          508 :                            p(58) = 5.18000974075383424e6_dp
    1479          508 :                            p(59) = 3.116998193057466e6_dp
    1480          508 :                            p(60) = 1.87560993870024029e6_dp
    1481          508 :                            p(61) = 1.12862197183979562e6_dp
    1482          508 :                            p(62) = 679132.441326077231_dp
    1483          508 :                            p(63) = 408658.421279877969_dp
    1484          508 :                            p(64) = 245904.473450669789_dp
    1485          508 :                            p(65) = 147969.568088321005_dp
    1486          508 :                            p(66) = 89038.612357311147_dp
    1487          508 :                            p(67) = 53577.7362552358895_dp
    1488          508 :                            p(68) = 32239.6513926914668_dp
    1489          508 :                            p(69) = 19399.7580852362791_dp
    1490          508 :                            p(70) = 11673.5323603058634_dp
    1491          508 :                            p(71) = 7024.38438577707758_dp
    1492          508 :                            p(72) = 4226.82479307685999_dp
    1493          508 :                            p(73) = 2543.43254175354295_dp
    1494          508 :                            p(74) = 1530.47486269122675_dp
    1495          508 :                            p(75) = 920.941785160749482_dp
    1496          508 :                            p(76) = 554.163803906291646_dp
    1497          508 :                            p(77) = 333.46029740785694_dp
    1498          508 :                            p(78) = 200.6550575335041_dp
    1499          508 :                            p(79) = 120.741366914147284_dp
    1500          508 :                            p(80) = 72.6544243200329916_dp
    1501          508 :                            p(81) = 43.7187810415471025_dp
    1502          508 :                            p(82) = 26.3071631447061043_dp
    1503          508 :                            p(83) = 15.8299486353816329_dp
    1504          508 :                            p(84) = 9.52493152341244004_dp
    1505          508 :                            p(85) = 5.72200417067776041_dp
    1506          508 :                            p(86) = 3.36242234070940928_dp
    1507          508 :                            p(87) = 1.75371394604499472_dp
    1508          508 :                            p(88) = 0.64705932650658966_dp
    1509          508 :                            p(89) = 0.072765905943708247_dp
    1510              :                            !
    1511          508 :                            w(1) = 47.67445484528304247e10_dp
    1512          508 :                            w(2) = 11.37485774750442175e9_dp
    1513          508 :                            w(3) = 78.64340976880190239e8_dp
    1514          508 :                            w(4) = 46.27335788759590498e8_dp
    1515          508 :                            w(5) = 24.7380464827152951e8_dp
    1516          508 :                            w(6) = 13.62904116438987719e8_dp
    1517          508 :                            w(7) = 92.79560029045882433e8_dp
    1518          508 :                            w(8) = 52.15931216254660251e8_dp
    1519          508 :                            w(9) = 31.67018011061666244e8_dp
    1520          508 :                            w(10) = 1.29291036801493046e8_dp
    1521          508 :                            w(11) = 1.00139319988015862e8_dp
    1522          508 :                            w(12) = 7.75892350510188341e7_dp
    1523          508 :                            w(13) = 6.01333567950731271e7_dp
    1524          508 :                            w(14) = 4.66141178654796875e7_dp
    1525          508 :                            w(15) = 3.61398903394911448e7_dp
    1526          508 :                            w(16) = 2.80225846672956389e7_dp
    1527          508 :                            w(17) = 2.1730509180930247e7_dp
    1528          508 :                            w(18) = 1.68524482625876965e7_dp
    1529          508 :                            w(19) = 1.30701489345870338e7_dp
    1530          508 :                            w(20) = 1.01371784832269282e7_dp
    1531          508 :                            w(21) = 7.86264116300379329e6_dp
    1532          508 :                            w(22) = 6.09861667912273717e6_dp
    1533          508 :                            w(23) = 4.73045784039455683e6_dp
    1534          508 :                            w(24) = 3.66928949951594161e6_dp
    1535          508 :                            w(25) = 2.8462050836230259e6_dp
    1536          508 :                            w(26) = 2.20777394798527011e6_dp
    1537          508 :                            w(27) = 1.71256191589205524e6_dp
    1538          508 :                            w(28) = 1.32843556197737076e6_dp
    1539          508 :                            w(29) = 1.0304731275955989e6_dp
    1540          508 :                            w(30) = 799345.206572271448_dp
    1541          508 :                            w(31) = 620059.354143595343_dp
    1542          508 :                            w(32) = 480986.704107449333_dp
    1543          508 :                            w(33) = 373107.167700228515_dp
    1544          508 :                            w(34) = 289424.08337412132_dp
    1545          508 :                            w(35) = 224510.248231581788_dp
    1546          508 :                            w(36) = 174155.825690028966_dp
    1547          508 :                            w(37) = 135095.256919654065_dp
    1548          508 :                            w(38) = 104795.442776800312_dp
    1549          508 :                            w(39) = 81291.4458222430418_dp
    1550          508 :                            w(40) = 63059.0493649328682_dp
    1551          508 :                            w(41) = 48915.9040455329689_dp
    1552          508 :                            w(42) = 37944.8484018048756_dp
    1553          508 :                            w(43) = 29434.4290473253969_dp
    1554          508 :                            w(44) = 22832.7622054490044_dp
    1555          508 :                            w(45) = 17711.743950151233_dp
    1556          508 :                            w(46) = 13739.287867104177_dp
    1557          508 :                            w(47) = 10657.7895710752585_dp
    1558          508 :                            w(48) = 8267.42141053961834_dp
    1559          508 :                            w(49) = 6413.17397520136448_dp
    1560          508 :                            w(50) = 4974.80402838654277_dp
    1561          508 :                            w(51) = 3859.03698188553047_dp
    1562          508 :                            w(52) = 2993.51824493299154_dp
    1563          508 :                            w(53) = 2322.1211966811754_dp
    1564          508 :                            w(54) = 1801.30750964719641_dp
    1565          508 :                            w(55) = 1397.30379659817038_dp
    1566          508 :                            w(56) = 1083.91149143250697_dp
    1567          508 :                            w(57) = 840.807939169209188_dp
    1568          508 :                            w(58) = 652.228524366749422_dp
    1569          508 :                            w(59) = 505.944376983506128_dp
    1570          508 :                            w(60) = 392.469362317941064_dp
    1571          508 :                            w(61) = 304.444930257324312_dp
    1572          508 :                            w(62) = 236.162932842453601_dp
    1573          508 :                            w(63) = 183.195466078603525_dp
    1574          508 :                            w(64) = 142.107732186551471_dp
    1575          508 :                            w(65) = 110.23530215723992_dp
    1576          508 :                            w(66) = 85.5113346705382257_dp
    1577          508 :                            w(67) = 66.3325469806696621_dp
    1578          508 :                            w(68) = 51.4552463353841373_dp
    1579          508 :                            w(69) = 39.9146798429449273_dp
    1580          508 :                            w(70) = 30.9624728409162095_dp
    1581          508 :                            w(71) = 24.018098812215013_dp
    1582          508 :                            w(72) = 18.6312338024296588_dp
    1583          508 :                            w(73) = 14.4525541233150501_dp
    1584          508 :                            w(74) = 11.2110836519105938_dp
    1585          508 :                            w(75) = 8.69662175848497178_dp
    1586          508 :                            w(76) = 6.74611236165731961_dp
    1587          508 :                            w(77) = 5.23307018057529994_dp
    1588          508 :                            w(78) = 4.05937850501539556_dp
    1589          508 :                            w(79) = 3.14892659076635714_dp
    1590          508 :                            w(80) = 2.44267408211071604_dp
    1591          508 :                            w(81) = 1.89482240522855261_dp
    1592          508 :                            w(82) = 1.46984505907050079_dp
    1593          508 :                            w(83) = 1.14019261330527007_dp
    1594          508 :                            w(84) = 0.884791217422925293_dp
    1595          508 :                            w(85) = 0.692686387080616483_dp
    1596          508 :                            w(86) = 0.585244576897023282_dp
    1597          508 :                            w(87) = 0.576182522545327589_dp
    1598          508 :                            w(88) = 0.596688817388997178_dp
    1599          508 :                            w(89) = 0.607879901151108771_dp
    1600              :                            !
    1601              :                            !
    1602          508 :                            urange = 1._dp
    1603          508 :                            drange = 1e-08_dp
    1604          508 :                            acc = 1e-08_dp
    1605              :                            !
    1606          508 :                            RETURN
    1607              :                         END SUBROUTINE gequad
    1608              : 
    1609              :                         END MODULE ps_wavelet_kernel
        

Generated by: LCOV version 2.0-1