LCOV - code coverage report
Current view: top level - src/pw - ps_wavelet_base.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 96.0 % 772 741
Test Date: 2025-07-25 12:55:17 Functions: 96.2 % 26 25

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Creates the wavelet kernel for the wavelet based poisson solver.
      10              : !> \author Florian Schiffmann (09.2007,fschiff)
      11              : ! **************************************************************************************************
      12              : MODULE ps_wavelet_base
      13              : 
      14              :    USE kinds,                           ONLY: dp
      15              :    USE message_passing,                 ONLY: mp_comm_type
      16              :    USE ps_wavelet_fft3d,                ONLY: ctrig,&
      17              :                                               ctrig_length,&
      18              :                                               fftstp
      19              : #include "../base/base_uses.f90"
      20              : 
      21              :    IMPLICIT NONE
      22              : 
      23              :    PRIVATE
      24              : 
      25              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_base'
      26              : 
      27              :    PUBLIC :: scramble_unpack, p_poissonsolver, s_poissonsolver, f_poissonsolver
      28              : 
      29              : CONTAINS
      30              : 
      31              : ! **************************************************************************************************
      32              : !> \brief ...
      33              : !> \param n1 ...
      34              : !> \param n2 ...
      35              : !> \param n3 ...
      36              : !> \param nd1 ...
      37              : !> \param nd2 ...
      38              : !> \param nd3 ...
      39              : !> \param md1 ...
      40              : !> \param md2 ...
      41              : !> \param md3 ...
      42              : !> \param nproc ...
      43              : !> \param iproc ...
      44              : !> \param zf ...
      45              : !> \param scal ...
      46              : !> \param hx ...
      47              : !> \param hy ...
      48              : !> \param hz ...
      49              : !> \param mpi_group ...
      50              : ! **************************************************************************************************
      51        17323 :    SUBROUTINE P_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, zf &
      52              :                               , scal, hx, hy, hz, mpi_group)
      53              :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
      54              :                                                             md3, nproc, iproc
      55              :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
      56              :          INTENT(inout)                                   :: zf
      57              :       REAL(KIND=dp), INTENT(in)                          :: scal, hx, hy, hz
      58              : 
      59              :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
      60              : 
      61              :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
      62              : 
      63              :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
      64              :                                                             J2stb, J2stf, j3, Jp2stb, Jp2stf, lot, &
      65              :                                                             lzt, ma, mb, ncache, nfft
      66        17323 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
      67        17323 :                                                             before2, before3, now1, now2, now3
      68        17323 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, ftrig1, ftrig2, &
      69        17323 :                                                             ftrig3
      70        17323 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
      71              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
      72              :       REAL(KIND=dp), ALLOCATABLE, &
      73        17323 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
      74              : 
      75        17323 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
      76        17323 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
      77        17323 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
      78        17323 :       IF (md1 .LT. n1) CPABORT("Parallel convolution:ERROR:md1")
      79        17323 :       IF (md2 .LT. n2) CPABORT("Parallel convolution:ERROR:md2")
      80        17323 :       IF (md3 .LT. n3) CPABORT("Parallel convolution:ERROR:md3")
      81        17323 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
      82        17323 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
      83              : 
      84              :       !defining work arrays dimensions
      85        17323 :       ncache = ncache_optimal
      86        17323 :       IF (ncache <= MAX(n1, n2, n3)*4) ncache = MAX(n1, n2, n3)*4
      87              : 
      88        17323 :       lzt = n2
      89        17323 :       IF (MOD(n2, 2) .EQ. 0) lzt = lzt + 1
      90        17323 :       IF (MOD(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
      91              : 
      92              :       !Allocations
      93        17323 :       ALLOCATE (btrig1(2, ctrig_length))
      94        17323 :       ALLOCATE (ftrig1(2, ctrig_length))
      95        17323 :       ALLOCATE (after1(7))
      96        17323 :       ALLOCATE (now1(7))
      97        17323 :       ALLOCATE (before1(7))
      98        17323 :       ALLOCATE (btrig2(2, ctrig_length))
      99        17323 :       ALLOCATE (ftrig2(2, ctrig_length))
     100        17323 :       ALLOCATE (after2(7))
     101        17323 :       ALLOCATE (now2(7))
     102        17323 :       ALLOCATE (before2(7))
     103        17323 :       ALLOCATE (btrig3(2, ctrig_length))
     104        17323 :       ALLOCATE (ftrig3(2, ctrig_length))
     105        17323 :       ALLOCATE (after3(7))
     106        17323 :       ALLOCATE (now3(7))
     107        17323 :       ALLOCATE (before3(7))
     108        69292 :       ALLOCATE (zw(2, ncache/4, 2))
     109        69292 :       ALLOCATE (zt(2, lzt, n1))
     110        86615 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
     111        41353 :       IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
     112              : 
     113              :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
     114        17323 :       CALL ctrig(n3, btrig3, after3, before3, now3, 1, ic3)
     115        17323 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
     116        17323 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
     117       375192 :       DO j = 1, n1
     118       357869 :          ftrig1(1, j) = btrig1(1, j)
     119       375192 :          ftrig1(2, j) = -btrig1(2, j)
     120              :       END DO
     121       375192 :       DO j = 1, n2
     122       357869 :          ftrig2(1, j) = btrig2(1, j)
     123       375192 :          ftrig2(2, j) = -btrig2(2, j)
     124              :       END DO
     125       375192 :       DO j = 1, n3
     126       357869 :          ftrig3(1, j) = btrig3(1, j)
     127       375192 :          ftrig3(2, j) = -btrig3(2, j)
     128              :       END DO
     129              : 
     130              :       ! transform along z axis
     131        17323 :       lot = ncache/(4*n3)
     132        17323 :       IF (lot .LT. 1) THEN
     133              :          WRITE (*, *) &
     134              :             'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     135              :             'least one 1-d FFT of this size even though this will'// &
     136            0 :             'reduce the performance for shorter transform lengths'
     137            0 :          CPABORT("")
     138              :       END IF
     139       320326 :       DO j2 = 1, md2/nproc
     140              :          !this condition ensures that we manage only the interesting part for the FFT
     141       320326 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     142       613678 :             DO i1 = 1, n1, lot
     143       311348 :                ma = i1
     144       311348 :                mb = MIN(i1 + (lot - 1), n1)
     145       311348 :                nfft = mb - ma + 1
     146              :                !inserting real data into complex array of half length
     147       311348 :                CALL P_fill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
     148              : 
     149              :                !performing FFT
     150              :                !input: I1,I3,J2,(Jp2)
     151       311348 :                inzee = 1
     152       971439 :                DO i = 1, ic3
     153              :                   CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     154       660091 :                               btrig3, after3(i), now3(i), before3(i), 1)
     155       971439 :                   inzee = 3 - inzee
     156              :                END DO
     157              : 
     158              :                !output: I1,i3,J2,(Jp2)
     159              :                !exchanging components
     160              :                !input: I1,i3,J2,(Jp2)
     161       613678 :                CALL scramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2)
     162              :                !output: I1,J2,i3,(Jp2)
     163              :             END DO
     164              :          END IF
     165              :       END DO
     166              : 
     167              :       !Interprocessor data transposition
     168              :       !input: I1,J2,j3,jp3,(Jp2)
     169        17323 :       IF (nproc .GT. 1) THEN
     170              :          !communication scheduling
     171         4806 :          CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
     172              :       END IF
     173              :       !output: I1,J2,j3,Jp2,(jp3)
     174              : 
     175              :       !now each process perform complete convolution of its planes
     176       182731 :       DO j3 = 1, nd3/nproc
     177              :          !this condition ensures that we manage only the interesting part for the FFT
     178       182731 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
     179       164470 :             Jp2stb = 1
     180       164470 :             J2stb = 1
     181       164470 :             Jp2stf = 1
     182       164470 :             J2stf = 1
     183              : 
     184              :             ! transform along x axis
     185       164470 :             lot = ncache/(4*n1)
     186       164470 :             IF (lot .LT. 1) THEN
     187              :                WRITE (*, *) &
     188              :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     189              :                   'least one 1-d FFT of this size even though this will'// &
     190            0 :                   'reduce the performance for shorter transform lengths'
     191            0 :                CPABORT("")
     192              :             END IF
     193              : 
     194       333616 :             DO j = 1, n2, lot
     195       169146 :                ma = j
     196       169146 :                mb = MIN(j + (lot - 1), n2)
     197       169146 :                nfft = mb - ma + 1
     198              : 
     199              :                !reverse index ordering, leaving the planes to be transformed at the end
     200              :                !input: I1,J2,j3,Jp2,(jp3)
     201       169146 :                IF (nproc .EQ. 1) THEN
     202       136958 :                   CALL P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
     203              :                ELSE
     204        32188 :                   CALL P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
     205              :                END IF
     206              :                !output: J2,Jp2,I1,j3,(jp3)
     207              : 
     208              :                !performing FFT
     209              :                !input: I2,I1,j3,(jp3)
     210       169146 :                inzee = 1
     211       357848 :                DO i = 1, ic1 - 1
     212              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     213       188702 :                               btrig1, after1(i), now1(i), before1(i), 1)
     214       357848 :                   inzee = 3 - inzee
     215              :                END DO
     216              : 
     217              :                !storing the last step into zt array
     218       169146 :                i = ic1
     219              :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
     220       333616 :                            btrig1, after1(i), now1(i), before1(i), 1)
     221              :                !output: I2,i1,j3,(jp3)
     222              :             END DO
     223              : 
     224              :             !transform along y axis
     225       164470 :             lot = ncache/(4*n2)
     226       164470 :             IF (lot .LT. 1) THEN
     227              :                WRITE (*, *) &
     228              :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     229              :                   'least one 1-d FFT of this size even though this will'// &
     230            0 :                   'reduce the performance for shorter transform lengths'
     231            0 :                CPABORT("")
     232              :             END IF
     233              : 
     234       333616 :             DO j = 1, n1, lot
     235       169146 :                ma = j
     236       169146 :                mb = MIN(j + (lot - 1), n1)
     237       169146 :                nfft = mb - ma + 1
     238              : 
     239              :                !reverse ordering
     240              :                !input: I2,i1,j3,(jp3)
     241       169146 :                CALL P_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
     242              :                !output: i1,I2,j3,(jp3)
     243              : 
     244              :                !performing FFT
     245              :                !input: i1,I2,j3,(jp3)
     246       169146 :                inzee = 1
     247       526994 :                DO i = 1, ic2
     248              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     249       357848 :                               btrig2, after2(i), now2(i), before2(i), 1)
     250       526994 :                   inzee = 3 - inzee
     251              :                END DO
     252              :                !output: i1,i2,j3,(jp3)
     253              : 
     254              :                !Multiply with kernel in fourier space
     255       169146 :                i3 = iproc*(nd3/nproc) + j3
     256       169146 :                CALL P_multkernel(n1, n2, n3, lot, nfft, j, i3, zw(1, 1, inzee), hx, hy, hz)
     257              : 
     258              :                !TRANSFORM BACK IN REAL SPACE
     259              : 
     260              :                !transform along y axis
     261              :                !input: i1,i2,j3,(jp3)
     262       526994 :                DO i = 1, ic2
     263              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     264       357848 :                               ftrig2, after2(i), now2(i), before2(i), -1)
     265       526994 :                   inzee = 3 - inzee
     266              :                END DO
     267              : 
     268              :                !reverse ordering
     269              :                !input: i1,I2,j3,(jp3)
     270       333616 :                CALL P_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
     271              :                !output: I2,i1,j3,(jp3)
     272              :             END DO
     273              : 
     274              :             !transform along x axis
     275              :             !input: I2,i1,j3,(jp3)
     276       164470 :             lot = ncache/(4*n1)
     277       333616 :             DO j = 1, n2, lot
     278       169146 :                ma = j
     279       169146 :                mb = MIN(j + (lot - 1), n2)
     280       169146 :                nfft = mb - ma + 1
     281              : 
     282              :                !performing FFT
     283       169146 :                i = 1
     284              :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
     285       169146 :                            ftrig1, after1(i), now1(i), before1(i), -1)
     286              : 
     287       169146 :                inzee = 1
     288       357848 :                DO i = 2, ic1
     289              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     290       188702 :                               ftrig1, after1(i), now1(i), before1(i), -1)
     291       357848 :                   inzee = 3 - inzee
     292              :                END DO
     293              :                !output: I2,I1,j3,(jp3)
     294              : 
     295              :                !reverse ordering
     296              :                !input: J2,Jp2,I1,j3,(jp3)
     297       333616 :                IF (nproc .EQ. 1) THEN
     298       136958 :                   CALL P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
     299              :                ELSE
     300        32188 :                   CALL P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
     301              :                END IF
     302              :                ! output: I1,J2,j3,Jp2,(jp3)
     303              :             END DO
     304              :          END IF
     305              :       END DO
     306              : 
     307              :       !Interprocessor data transposition
     308              :       !input: I1,J2,j3,Jp2,(jp3)
     309        17323 :       IF (nproc .GT. 1) THEN
     310              :          !communication scheduling
     311         4806 :          CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
     312              :       END IF
     313              :       !output: I1,J2,j3,jp3,(Jp2)
     314              :       !transform along z axis
     315              :       !input: I1,J2,i3,(Jp2)
     316        17323 :       lot = ncache/(4*n3)
     317       320326 :       DO j2 = 1, md2/nproc
     318              :          !this condition ensures that we manage only the interesting part for the FFT
     319       320326 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     320       613678 :             DO i1 = 1, n1, lot
     321       311348 :                ma = i1
     322       311348 :                mb = MIN(i1 + (lot - 1), n1)
     323       311348 :                nfft = mb - ma + 1
     324              : 
     325              :                !reverse ordering
     326              :                !input: I1,J2,i3,(Jp2)
     327       311348 :                CALL unscramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1))
     328              :                !output: I1,i3,J2,(Jp2)
     329              : 
     330              :                !performing FFT
     331              :                !input: I1,i3,J2,(Jp2)
     332       311348 :                inzee = 1
     333       971439 :                DO i = 1, ic3
     334              :                   CALL fftstp(lot, nfft, n3, lot, n3, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     335       660091 :                               ftrig3, after3(i), now3(i), before3(i), -1)
     336       971439 :                   inzee = 3 - inzee
     337              :                END DO
     338              :                !output: I1,I3,J2,(Jp2)
     339              : 
     340              :                !rebuild the output array
     341       613678 :                CALL P_unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2), scal)
     342              : 
     343              :             END DO
     344              :          END IF
     345              :       END DO
     346              : 
     347              :       !De-allocations
     348        17323 :       DEALLOCATE (btrig1)
     349        17323 :       DEALLOCATE (ftrig1)
     350        17323 :       DEALLOCATE (after1)
     351        17323 :       DEALLOCATE (now1)
     352        17323 :       DEALLOCATE (before1)
     353        17323 :       DEALLOCATE (btrig2)
     354        17323 :       DEALLOCATE (ftrig2)
     355        17323 :       DEALLOCATE (after2)
     356        17323 :       DEALLOCATE (now2)
     357        17323 :       DEALLOCATE (before2)
     358        17323 :       DEALLOCATE (btrig3)
     359        17323 :       DEALLOCATE (ftrig3)
     360        17323 :       DEALLOCATE (after3)
     361        17323 :       DEALLOCATE (now3)
     362        17323 :       DEALLOCATE (before3)
     363        17323 :       DEALLOCATE (zmpi2)
     364        17323 :       DEALLOCATE (zw)
     365        17323 :       DEALLOCATE (zt)
     366        17323 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
     367              :       !  call timing(iproc,'PSolv_comput  ','OF')
     368        17323 :    END SUBROUTINE P_PoissonSolver
     369              : 
     370              : ! **************************************************************************************************
     371              : !> \brief ...
     372              : !> \param j3 ...
     373              : !> \param nfft ...
     374              : !> \param Jp2stb ...
     375              : !> \param J2stb ...
     376              : !> \param lot ...
     377              : !> \param n1 ...
     378              : !> \param md2 ...
     379              : !> \param nd3 ...
     380              : !> \param nproc ...
     381              : !> \param zmpi1 ...
     382              : !> \param zw ...
     383              : ! **************************************************************************************************
     384       169146 :    SUBROUTINE P_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
     385              :       INTEGER, INTENT(in)                                :: j3, nfft
     386              :       INTEGER, INTENT(inout)                             :: Jp2stb, J2stb
     387              :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
     388              :       REAL(KIND=dp), &
     389              :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
     390              :          INTENT(in)                                      :: zmpi1
     391              :       REAL(KIND=dp), DIMENSION(2, lot, n1), &
     392              :          INTENT(inout)                                   :: zw
     393              : 
     394              :       INTEGER                                            :: I1, J2, Jp2, mfft
     395              : 
     396       169146 :       mfft = 0
     397       354650 :       DO Jp2 = Jp2stb, nproc
     398      3840810 :          DO J2 = J2stb, md2/nproc
     399      3655306 :             mfft = mfft + 1
     400      3655306 :             IF (mfft .GT. nfft) THEN
     401        13478 :                Jp2stb = Jp2
     402        13478 :                J2stb = J2
     403        13478 :                RETURN
     404              :             END IF
     405     95925808 :             DO I1 = 1, n1
     406     92098476 :                zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
     407     95740304 :                zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
     408              :             END DO
     409              :          END DO
     410       341172 :          J2stb = 1
     411              :       END DO
     412              :    END SUBROUTINE P_mpiswitch_upcorn
     413              : 
     414              : ! **************************************************************************************************
     415              : !> \brief ...
     416              : !> \param nfft ...
     417              : !> \param n2 ...
     418              : !> \param lot ...
     419              : !> \param n1 ...
     420              : !> \param lzt ...
     421              : !> \param zt ...
     422              : !> \param zw ...
     423              : ! **************************************************************************************************
     424       169146 :    SUBROUTINE P_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
     425              :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
     426              :       REAL(KIND=dp), DIMENSION(2, lzt, n1), INTENT(in)   :: zt
     427              :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     428              :          INTENT(inout)                                   :: zw
     429              : 
     430              :       INTEGER                                            :: i, j
     431              : 
     432      3810974 :       DO j = 1, nfft
     433     95909450 :          DO i = 1, n2
     434     92098476 :             zw(1, j, i) = zt(1, i, j)
     435     95740304 :             zw(2, j, i) = zt(2, i, j)
     436              :          END DO
     437              :       END DO
     438              : 
     439       169146 :    END SUBROUTINE P_switch_upcorn
     440              : 
     441              : ! **************************************************************************************************
     442              : !> \brief ...
     443              : !> \param nfft ...
     444              : !> \param n2 ...
     445              : !> \param lot ...
     446              : !> \param n1 ...
     447              : !> \param lzt ...
     448              : !> \param zw ...
     449              : !> \param zt ...
     450              : ! **************************************************************************************************
     451       169146 :    SUBROUTINE P_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
     452              :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
     453              :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zw
     454              :       REAL(KIND=dp), DIMENSION(2, lzt, n1), &
     455              :          INTENT(inout)                                   :: zt
     456              : 
     457              :       INTEGER                                            :: i, j
     458              : 
     459      3810974 :       DO j = 1, nfft
     460     95909450 :          DO i = 1, n2
     461     92098476 :             zt(1, i, j) = zw(1, j, i)
     462     95740304 :             zt(2, i, j) = zw(2, j, i)
     463              :          END DO
     464              :       END DO
     465              : 
     466       169146 :    END SUBROUTINE P_unswitch_downcorn
     467              : 
     468              : ! **************************************************************************************************
     469              : !> \brief ...
     470              : !> \param j3 ...
     471              : !> \param nfft ...
     472              : !> \param Jp2stf ...
     473              : !> \param J2stf ...
     474              : !> \param lot ...
     475              : !> \param n1 ...
     476              : !> \param md2 ...
     477              : !> \param nd3 ...
     478              : !> \param nproc ...
     479              : !> \param zw ...
     480              : !> \param zmpi1 ...
     481              : ! **************************************************************************************************
     482       169146 :    SUBROUTINE P_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
     483              :       INTEGER, INTENT(in)                                :: j3, nfft
     484              :       INTEGER, INTENT(inout)                             :: Jp2stf, J2stf
     485              :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
     486              :       REAL(KIND=dp), DIMENSION(2, lot, n1), INTENT(in)   :: zw
     487              :       REAL(KIND=dp), &
     488              :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
     489              :          INTENT(inout)                                   :: zmpi1
     490              : 
     491              :       INTEGER                                            :: I1, J2, Jp2, mfft
     492              : 
     493       169146 :       mfft = 0
     494       354650 :       DO Jp2 = Jp2stf, nproc
     495      3840810 :          DO J2 = J2stf, md2/nproc
     496      3655306 :             mfft = mfft + 1
     497      3655306 :             IF (mfft .GT. nfft) THEN
     498        13478 :                Jp2stf = Jp2
     499        13478 :                J2stf = J2
     500        13478 :                RETURN
     501              :             END IF
     502     95925808 :             DO I1 = 1, n1
     503     92098476 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
     504     95740304 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
     505              :             END DO
     506              :          END DO
     507       341172 :          J2stf = 1
     508              :       END DO
     509              :    END SUBROUTINE P_unmpiswitch_downcorn
     510              : 
     511              : ! **************************************************************************************************
     512              : !> \brief (Based on suitable modifications of S.Goedecker routines)
     513              : !>      Restore data into output array
     514              : !> \param md1 Dimensions of the undistributed part of the real grid
     515              : !> \param md3 Dimensions of the undistributed part of the real grid
     516              : !> \param lot ...
     517              : !> \param nfft number of planes
     518              : !> \param n3 (twice the) dimension of the last FFTtransform
     519              : !> \param zw FFT work array
     520              : !> \param zf Original distributed density as well as
     521              : !>                   Distributed solution of the poisson equation (inout)
     522              : !> \param scal Needed to achieve unitarity and correct dimensions
     523              : !> \date February 2006
     524              : !> \author S. Goedecker, L. Genovese
     525              : !> \note Assuming that high frequencies are in the corners
     526              : !>      and that n3 is multiple of 4
     527              : !>
     528              : !>  RESTRICTIONS on USAGE
     529              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     530              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     531              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     532              : !>      This file is distributed under the terms of the
     533              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     534              : ! **************************************************************************************************
     535       311348 :    SUBROUTINE P_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
     536              :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
     537              :       REAL(KIND=dp), DIMENSION(2, lot, n3), INTENT(in)   :: zw
     538              :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
     539              :       REAL(KIND=dp), INTENT(in)                          :: scal
     540              : 
     541              :       INTEGER                                            :: i1, i3
     542              :       REAL(KIND=dp)                                      :: pot1
     543              : 
     544      7534542 :       DO i3 = 1, n3
     545    179338112 :          DO i1 = 1, nfft
     546    171803570 :             pot1 = scal*zw(1, i1, i3)
     547    179026764 :             zf(i1, i3) = pot1
     548              :          END DO
     549              :       END DO
     550              : 
     551       311348 :    END SUBROUTINE P_unfill_downcorn
     552              : 
     553              : ! **************************************************************************************************
     554              : !> \brief ...
     555              : !> \param md1 ...
     556              : !> \param md3 ...
     557              : !> \param lot ...
     558              : !> \param nfft ...
     559              : !> \param n3 ...
     560              : !> \param zf ...
     561              : !> \param zw ...
     562              : ! **************************************************************************************************
     563       311348 :    SUBROUTINE P_fill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
     564              :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
     565              :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(in)     :: zf
     566              :       REAL(KIND=dp), DIMENSION(2, lot, n3), &
     567              :          INTENT(inout)                                   :: zw
     568              : 
     569              :       INTEGER                                            :: i1, i3
     570              : 
     571      7534542 :       DO i3 = 1, n3
     572    179338112 :          DO i1 = 1, nfft
     573    171803570 :             zw(1, i1, i3) = zf(i1, i3)
     574    179026764 :             zw(2, i1, i3) = 0._dp
     575              :          END DO
     576              :       END DO
     577              : 
     578       311348 :    END SUBROUTINE P_fill_upcorn
     579              : 
     580              : ! **************************************************************************************************
     581              : !> \brief (Based on suitable modifications of S.Goedecker routines)
     582              : !>      Assign the correct planes to the work array zmpi2
     583              : !>      in order to prepare for interprocessor data transposition.
     584              : !> \param i1 Starting points of the plane and number of remaining lines
     585              : !> \param j2 Starting points of the plane and number of remaining lines
     586              : !> \param lot Starting points of the plane and number of remaining lines
     587              : !> \param nfft Starting points of the plane and number of remaining lines
     588              : !> \param n1 logical dimension of the FFT transform, reference for work arrays
     589              : !> \param n3 logical dimension of the FFT transform, reference for work arrays
     590              : !> \param md2 Dimensions of real grid
     591              : !> \param nproc ...
     592              : !> \param nd3 Dimensions of the kernel
     593              : !> \param zw Work array (input)
     594              : !> \param zmpi2 Work array for multiprocessor manipulation (output)
     595              : !> \date February 2006
     596              : !> \author S. Goedecker, L. Genovese
     597              : !> \note
     598              : !>  RESTRICTIONS on USAGE
     599              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     600              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     601              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     602              : !>      This file is distributed under the terms of the
     603              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     604              : ! **************************************************************************************************
     605       311348 :    SUBROUTINE scramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2)
     606              :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
     607              :                                                             nd3
     608              :       REAL(KIND=dp), DIMENSION(2, lot, n3), INTENT(in)   :: zw
     609              :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
     610              :          INTENT(inout)                                   :: zmpi2
     611              : 
     612              :       INTEGER                                            :: i, i3
     613              : 
     614      4205680 :       DO i3 = 1, n3/2 + 1
     615     96304156 :          DO i = 0, nfft - 1
     616     92098476 :             zmpi2(1, i1 + i, j2, i3) = zw(1, i + 1, i3)
     617     95992808 :             zmpi2(2, i1 + i, j2, i3) = zw(2, i + 1, i3)
     618              :          END DO
     619              :       END DO
     620              : 
     621       311348 :    END SUBROUTINE scramble_P
     622              : 
     623              : ! **************************************************************************************************
     624              : !> \brief (Based on suitable modifications of S.Goedecker routines)
     625              : !>      Insert the correct planes of the work array zmpi2
     626              : !>      in order to prepare for backward FFT transform
     627              : !> \param i1 Starting points of the plane and number of remaining lines
     628              : !> \param j2 Starting points of the plane and number of remaining lines
     629              : !> \param lot Starting points of the plane and number of remaining lines
     630              : !> \param nfft Starting points of the plane and number of remaining lines
     631              : !> \param n1 logical dimension of the FFT transform, reference for work arrays
     632              : !> \param n3 logical dimension of the FFT transform, reference for work arrays
     633              : !> \param md2 Dimensions of real grid
     634              : !> \param nproc ...
     635              : !> \param nd3 Dimensions of the kernel
     636              : !> \param zmpi2 Work array for multiprocessor manipulation (output)
     637              : !> \param zw Work array (input)
     638              : !> \date February 2006
     639              : !> \author S. Goedecker, L. Genovese
     640              : !> \note
     641              : !>  RESTRICTIONS on USAGE
     642              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     643              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     644              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     645              : !>      This file is distributed under the terms of the
     646              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     647              : ! **************************************************************************************************
     648       311348 :    SUBROUTINE unscramble_P(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw)
     649              :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
     650              :                                                             nd3
     651              :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
     652              :          INTENT(in)                                      :: zmpi2
     653              :       REAL(KIND=dp), DIMENSION(2, lot, n3), &
     654              :          INTENT(inout)                                   :: zw
     655              : 
     656              :       INTEGER                                            :: i, i3, j3
     657              : 
     658       311348 :       i3 = 1
     659      7047570 :       DO i = 0, nfft - 1
     660      6736222 :          zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
     661      7047570 :          zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
     662              :       END DO
     663              : 
     664      3894332 :       DO i3 = 2, n3/2 + 1
     665      3582984 :          j3 = n3 + 2 - i3
     666     89256586 :          DO i = 0, nfft - 1
     667     85362254 :             zw(1, i + 1, j3) = zmpi2(1, i1 + i, j2, i3)
     668     85362254 :             zw(2, i + 1, j3) = -zmpi2(2, i1 + i, j2, i3)
     669     85362254 :             zw(1, i + 1, i3) = zmpi2(1, i1 + i, j2, i3)
     670     88945238 :             zw(2, i + 1, i3) = zmpi2(2, i1 + i, j2, i3)
     671              :          END DO
     672              :       END DO
     673              : 
     674       311348 :    END SUBROUTINE unscramble_P
     675              : 
     676              : ! **************************************************************************************************
     677              : !> \brief (Based on suitable modifications of S.Goedecker routines)
     678              : !>      Multiply with the kernel taking into account its symmetry
     679              : !>      Conceived to be used into convolution loops
     680              : !> \param n1 ...
     681              : !> \param n2 ...
     682              : !> \param n3 ...
     683              : !> \param lot ...
     684              : !> \param nfft ...
     685              : !> \param jS ...
     686              : !> \param i3 ...
     687              : !> \param zw Work array (input/output)
     688              : !>      n1,n2:      logical dimension of the FFT transform, reference for zw
     689              : !>      nd1,nd2:    Dimensions of POT
     690              : !>      jS,j3,nfft: starting point of the plane and number of remaining lines
     691              : !>
     692              : !>  RESTRICTIONS on USAGE
     693              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     694              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     695              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     696              : !>      This file is distributed under the terms of the
     697              : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     698              : !> \param hx ...
     699              : !> \param hy ...
     700              : !> \param hz ...
     701              : !> \date February 2006
     702              : !> \author S. Goedecker, L. Genovese
     703              : ! **************************************************************************************************
     704       169146 :    SUBROUTINE P_multkernel(n1, n2, n3, lot, nfft, jS, i3, zw, hx, hy, hz)
     705              :       INTEGER, INTENT(in)                                :: n1, n2, n3, lot, nfft, jS, i3
     706              :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     707              :          INTENT(inout)                                   :: zw
     708              :       REAL(KIND=dp), INTENT(in)                          :: hx, hy, hz
     709              : 
     710              :       INTEGER                                            :: i1, i2, j1, j2, j3
     711              :       REAL(KIND=dp)                                      :: fourpi2, ker, mu3, p1, p2, pi
     712              : 
     713       169146 :       pi = 4._dp*ATAN(1._dp)
     714       169146 :       fourpi2 = 4._dp*pi**2
     715       169146 :       j3 = i3 !n3/2+1-abs(n3/2+2-i3)
     716       169146 :       mu3 = REAL(j3 - 1, KIND=dp)/REAL(n3, KIND=dp)
     717       169146 :       mu3 = (mu3/hy)**2 !beware of the exchanged dimension
     718              :       !Body
     719              :       !generic case
     720      4063478 :       DO i2 = 1, n2
     721     96161954 :          DO i1 = 1, nfft
     722     92098476 :             j1 = i1 + jS - 1
     723     92098476 :             j1 = j1 - (j1/(n1/2 + 2))*n1 !n1/2+1-abs(n1/2+2-jS-i1)
     724     92098476 :             j2 = i2 - (i2/(n2/2 + 2))*n2 !n2/2+1-abs(n2/2+1-i2)
     725     92098476 :             p1 = REAL(j1 - 1, KIND=dp)/REAL(n1, KIND=dp)
     726     92098476 :             p2 = REAL(j2 - 1, KIND=dp)/REAL(n2, KIND=dp)
     727     92098476 :             ker = -fourpi2*((p1/hx)**2 + (p2/hz)**2 + mu3) !beware of the exchanged dimension
     728     92098476 :             IF (ker /= 0._dp) ker = 1._dp/ker
     729     92098476 :             zw(1, i1, i2) = zw(1, i1, i2)*ker
     730     95992808 :             zw(2, i1, i2) = zw(2, i1, i2)*ker
     731              :          END DO
     732              :       END DO
     733              : 
     734       169146 :    END SUBROUTINE P_multkernel
     735              : 
     736              : ! **************************************************************************************************
     737              : !> \brief (Based on suitable modifications of S.Goedecker routines)
     738              : !>      Multiply with the kernel taking into account its symmetry
     739              : !>      Conceived to be used into convolution loops
     740              : !> \param nd1 ...
     741              : !> \param nd2 ...
     742              : !> \param n1 ...
     743              : !> \param n2 ...
     744              : !> \param lot ...
     745              : !> \param nfft ...
     746              : !> \param jS ...
     747              : !> \param pot Kernel, symmetric and real, half the length
     748              : !> \param zw Work array (input/output)
     749              : !>      n1,n2:    logical dimension of the FFT transform, reference for zw
     750              : !>      nd1,nd2:  Dimensions of POT
     751              : !>      jS, nfft: starting point of the plane and number of remaining lines
     752              : !>
     753              : !>  RESTRICTIONS on USAGE
     754              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     755              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     756              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     757              : !>      This file is distributed under the terms of the
     758              : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     759              : !> \date February 2006
     760              : !> \author S. Goedecker, L. Genovese
     761              : ! **************************************************************************************************
     762      1713732 :    SUBROUTINE multkernel(nd1, nd2, n1, n2, lot, nfft, jS, pot, zw)
     763              :       INTEGER, INTENT(in)                                :: nd1, nd2, n1, n2, lot, nfft, jS
     764              :       REAL(KIND=dp), DIMENSION(nd1, nd2), INTENT(in)     :: pot
     765              :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
     766              :          INTENT(inout)                                   :: zw
     767              : 
     768              :       INTEGER                                            :: i2, j, j1, j2
     769              : 
     770     34715394 :       DO j = 1, nfft
     771     33001662 :          j1 = j + jS - 1
     772     33001662 :          j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     773     33001662 :          zw(1, j, 1) = zw(1, j, 1)*pot(j1, 1)
     774     34715394 :          zw(2, j, 1) = zw(2, j, 1)*pot(j1, 1)
     775              :       END DO
     776              : 
     777              :       !generic case
     778     83794692 :       DO i2 = 2, n2/2
     779   1533631536 :          DO j = 1, nfft
     780   1449836844 :             j1 = j + jS - 1
     781   1449836844 :             j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     782   1449836844 :             j2 = n2 + 2 - i2
     783   1449836844 :             zw(1, j, i2) = zw(1, j, i2)*pot(j1, i2)
     784   1449836844 :             zw(2, j, i2) = zw(2, j, i2)*pot(j1, i2)
     785   1449836844 :             zw(1, j, j2) = zw(1, j, j2)*pot(j1, i2)
     786   1531917804 :             zw(2, j, j2) = zw(2, j, j2)*pot(j1, i2)
     787              :          END DO
     788              :       END DO
     789              : 
     790              :       !case i2=n2/2+1
     791     34715394 :       DO j = 1, nfft
     792     33001662 :          j1 = j + jS - 1
     793     33001662 :          j1 = j1 + (j1/(n1/2 + 2))*(n1 + 2 - 2*j1)
     794     33001662 :          j2 = n2/2 + 1
     795     33001662 :          zw(1, j, j2) = zw(1, j, j2)*pot(j1, j2)
     796     34715394 :          zw(2, j, j2) = zw(2, j, j2)*pot(j1, j2)
     797              :       END DO
     798              : 
     799      1713732 :    END SUBROUTINE multkernel
     800              : 
     801              : ! **************************************************************************************************
     802              : !> \brief !HERE POT MUST BE THE KERNEL (BEWARE THE HALF DIMENSION)
     803              : !> ****h* BigDFT/S_PoissonSolver
     804              : !>      (Based on suitable modifications of S.Goedecker routines)
     805              : !>      Applies the local FFT space Kernel to the density in Real space.
     806              : !>      Does NOT calculate the LDA exchange-correlation terms
     807              : !> \param n1 logical dimension of the transform.
     808              : !> \param n2 logical dimension of the transform.
     809              : !> \param n3 logical dimension of the transform.
     810              : !> \param nd1 Dimension of POT
     811              : !> \param nd2 Dimension of POT
     812              : !> \param nd3 Dimension of POT
     813              : !> \param md1 Dimension of ZF
     814              : !> \param md2 Dimension of ZF
     815              : !> \param md3 Dimension of ZF
     816              : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
     817              : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
     818              : !> \param pot Kernel, only the distributed part (REAL)
     819              : !>                   POT(i1,i2,i3)
     820              : !>                   i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
     821              : !> \param zf Density (input/output)
     822              : !>                   ZF(i1,i3,i2)
     823              : !>                   i1=1,md1 , i2=1,md2/nproc , i3=1,md3
     824              : !> \param scal factor of renormalization of the FFT in order to acheve unitarity
     825              : !>                   and the correct dimension
     826              : !> \param mpi_group ...
     827              : !> \date October 2006
     828              : !> \author S. Goedecker, L. Genovese
     829              : !> \note   As transform lengths most products of the prime factors 2,3,5 are allowed.
     830              : !>                   The detailed table with allowed transform lengths can
     831              : !>                   be found in subroutine CTRIG
     832              : !> \note
     833              : !>  RESTRICTIONS on USAGE
     834              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
     835              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
     836              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
     837              : !>      This file is distributed under the terms of the
     838              : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
     839              : ! **************************************************************************************************
     840           18 :    SUBROUTINE S_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
     841              :                               scal, mpi_group)
     842              :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
     843              :                                                             md3, nproc, iproc
     844              :       REAL(KIND=dp), DIMENSION(nd1, nd2, nd3/nproc), &
     845              :          INTENT(in)                                      :: pot
     846              :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
     847              :          INTENT(inout)                                   :: zf
     848              :       REAL(KIND=dp), INTENT(in)                          :: scal
     849              : 
     850              :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
     851              : 
     852              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'S_PoissonSolver'
     853              :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
     854              : 
     855              :       INTEGER                                            :: handle, i, i1, i3, ic1, ic2, ic3, inzee, &
     856              :                                                             j, j2, J2stb, J2stf, j3, Jp2stb, &
     857              :                                                             Jp2stf, lot, lzt, ma, mb, ncache, nfft
     858           18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
     859           18 :                                                             before2, before3, now1, now2, now3
     860              :       REAL(kind=dp)                                      :: twopion
     861           18 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, cosinarr, &
     862           18 :                                                             ftrig1, ftrig2, ftrig3
     863           18 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
     864              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
     865              :       REAL(KIND=dp), ALLOCATABLE, &
     866           18 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
     867              : 
     868           18 :       CALL timeset(routineN, handle)
     869              :       ! check input
     870           18 :       IF (MOD(n3, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n3")
     871           18 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
     872           18 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
     873           18 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
     874           18 :       IF (md1 .LT. n1) CPABORT("Parallel convolution:ERROR:md1")
     875           18 :       IF (md2 .LT. n2) CPABORT("Parallel convolution:ERROR:md2")
     876           18 :       IF (md3 .LT. n3/2) CPABORT("Parallel convolution:ERROR:md3")
     877           18 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
     878           18 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
     879              : 
     880              :       !defining work arrays dimensions
     881           18 :       ncache = ncache_optimal
     882           18 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
     883              : 
     884              :       !  if (timing_flag == 1 .and. iproc ==0) print *,'parallel ncache=',ncache
     885              : 
     886           18 :       lzt = n2
     887           18 :       IF (MOD(n2, 2) .EQ. 0) lzt = lzt + 1
     888           18 :       IF (MOD(n2, 4) .EQ. 0) lzt = lzt + 1 !maybe this is useless
     889              : 
     890              :       !Allocations
     891           18 :       ALLOCATE (btrig1(2, ctrig_length))
     892           18 :       ALLOCATE (ftrig1(2, ctrig_length))
     893           18 :       ALLOCATE (after1(7))
     894           18 :       ALLOCATE (now1(7))
     895           18 :       ALLOCATE (before1(7))
     896           18 :       ALLOCATE (btrig2(2, ctrig_length))
     897           18 :       ALLOCATE (ftrig2(2, ctrig_length))
     898           18 :       ALLOCATE (after2(7))
     899           18 :       ALLOCATE (now2(7))
     900           18 :       ALLOCATE (before2(7))
     901           18 :       ALLOCATE (btrig3(2, ctrig_length))
     902           18 :       ALLOCATE (ftrig3(2, ctrig_length))
     903           18 :       ALLOCATE (after3(7))
     904           18 :       ALLOCATE (now3(7))
     905           18 :       ALLOCATE (before3(7))
     906           72 :       ALLOCATE (zw(2, ncache/4, 2))
     907           72 :       ALLOCATE (zt(2, lzt, n1))
     908           90 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
     909           72 :       ALLOCATE (cosinarr(2, n3/2))
     910           18 :       IF (nproc .GT. 1) THEN
     911          108 :          ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
     912    116450550 :          zmpi1 = 0.0_dp
     913              :       END IF
     914    116450514 :       zmpi2 = 0.0_dp
     915       221238 :       zw = 0.0_dp
     916      1428858 :       zt = 0.0_dp
     917              : 
     918              :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
     919           18 :       CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
     920           18 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
     921           18 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
     922         2934 :       DO j = 1, n1
     923         2916 :          ftrig1(1, j) = btrig1(1, j)
     924         2934 :          ftrig1(2, j) = -btrig1(2, j)
     925              :       END DO
     926         2934 :       DO j = 1, n2
     927         2916 :          ftrig2(1, j) = btrig2(1, j)
     928         2934 :          ftrig2(2, j) = -btrig2(2, j)
     929              :       END DO
     930         2934 :       DO j = 1, n3/2
     931         2916 :          ftrig3(1, j) = btrig3(1, j)
     932         2934 :          ftrig3(2, j) = -btrig3(2, j)
     933              :       END DO
     934              : 
     935              :       !Calculating array of phases for HalFFT decoding
     936           18 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
     937         2934 :       DO i3 = 1, n3/2
     938         2916 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
     939         2934 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
     940              :       END DO
     941              : 
     942              :       !initializing integral
     943              :       !ehartree=0._dp
     944              : 
     945              :       ! transform along z axis
     946           18 :       lot = ncache/(2*n3)
     947           18 :       IF (lot .LT. 1) THEN
     948              :          WRITE (*, *) &
     949              :             'convolxc_off:ncache has to be enlarged to be able to hold at'// &
     950              :             'least one 1-d FFT of this size even though this will'// &
     951            0 :             'reduce the performance for shorter transform lengths', n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc
     952            0 :          CPABORT("")
     953              :       END IF
     954              : 
     955         1476 :       DO j2 = 1, md2/nproc
     956              :          !this condition ensures that we manage only the interesting part for the FFT
     957         1476 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
     958        21870 :             DO i1 = 1, n1, lot
     959        20412 :                ma = i1
     960        20412 :                mb = MIN(i1 + (lot - 1), n1)
     961        20412 :                nfft = mb - ma + 1
     962              : 
     963              :                !inserting real data into complex array of half length
     964        20412 :                CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
     965              : 
     966              :                !performing FFT
     967              :                !input: I1,I3,J2,(Jp2)
     968        20412 :                inzee = 1
     969       102060 :                DO i = 1, ic3
     970              :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
     971        81648 :                               btrig3, after3(i), now3(i), before3(i), 1)
     972       102060 :                   inzee = 3 - inzee
     973              :                END DO
     974              :                !output: I1,i3,J2,(Jp2)
     975              :                !unpacking FFT in order to restore correct result,
     976              :                !while exchanging components
     977              :                !input: I1,i3,J2,(Jp2)
     978        21870 :                CALL scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
     979              :                !output: I1,J2,i3,(Jp2)
     980              :             END DO
     981              :          END IF
     982              :       END DO
     983              :       !Interprocessor data transposition
     984              :       !input: I1,J2,j3,jp3,(Jp2)
     985           18 :       IF (nproc .GT. 1) THEN
     986           18 :          CALL mpi_group%alltoall(zmpi2, zmpi1, 2*n1*(md2/nproc)*(nd3/nproc))
     987              :       END IF
     988              :       !output: I1,J2,j3,Jp2,(jp3)
     989              : 
     990              :       !now each process perform complete convolution of its planes
     991         1494 :       DO j3 = 1, nd3/nproc
     992              :          !this condition ensures that we manage only the interesting part for the FFT
     993         1494 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
     994         1467 :             Jp2stb = 1
     995         1467 :             J2stb = 1
     996         1467 :             Jp2stf = 1
     997         1467 :             J2stf = 1
     998              : 
     999              :             ! transform along x axis
    1000         1467 :             lot = ncache/(4*n1)
    1001         1467 :             IF (lot .LT. 1) THEN
    1002              :                WRITE (*, *) &
    1003              :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
    1004              :                   'least one 1-d FFT of this size even though this will'// &
    1005            0 :                   'reduce the performance for shorter transform lengths'
    1006            0 :                CPABORT("")
    1007              :             END IF
    1008              : 
    1009        22005 :             DO j = 1, n2, lot
    1010        20538 :                ma = j
    1011        20538 :                mb = MIN(j + (lot - 1), n2)
    1012        20538 :                nfft = mb - ma + 1
    1013              : 
    1014              :                !reverse index ordering, leaving the planes to be transformed at the end
    1015              :                !input: I1,J2,j3,Jp2,(jp3)
    1016        20538 :                IF (nproc .EQ. 1) THEN
    1017            0 :                   CALL S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1018              :                ELSE
    1019        20538 :                   CALL S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1020              :                END IF
    1021              :                !output: J2,Jp2,I1,j3,(jp3)
    1022              : 
    1023              :                !performing FFT
    1024              :                !input: I2,I1,j3,(jp3)
    1025        20538 :                inzee = 1
    1026        82152 :                DO i = 1, ic1 - 1
    1027              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1028        61614 :                               btrig1, after1(i), now1(i), before1(i), 1)
    1029        82152 :                   inzee = 3 - inzee
    1030              :                END DO
    1031              : 
    1032              :                !storing the last step into zt array
    1033        20538 :                i = ic1
    1034              :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1035        22005 :                            btrig1, after1(i), now1(i), before1(i), 1)
    1036              :                !output: I2,i1,j3,(jp3)
    1037              :             END DO
    1038              : 
    1039              :             !transform along y axis
    1040         1467 :             lot = ncache/(4*n2)
    1041         1467 :             IF (lot .LT. 1) THEN
    1042              :                WRITE (*, *) &
    1043              :                   'convolxc_off:ncache has to be enlarged to be able to hold at'// &
    1044              :                   'least one 1-d FFT of this size even though this will'// &
    1045            0 :                   'reduce the performance for shorter transform lengths'
    1046            0 :                CPABORT("")
    1047              :             END IF
    1048              : 
    1049        22005 :             DO j = 1, n1, lot
    1050        20538 :                ma = j
    1051        20538 :                mb = MIN(j + (lot - 1), n1)
    1052        20538 :                nfft = mb - ma + 1
    1053              : 
    1054              :                !reverse ordering
    1055              :                !input: I2,i1,j3,(jp3)
    1056        20538 :                CALL S_switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1057              :                !output: i1,I2,j3,(jp3)
    1058              : 
    1059              :                !performing FFT
    1060              :                !input: i1,I2,j3,(jp3)
    1061        20538 :                inzee = 1
    1062       102690 :                DO i = 1, ic2
    1063              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1064        82152 :                               btrig2, after2(i), now2(i), before2(i), 1)
    1065       102690 :                   inzee = 3 - inzee
    1066              :                END DO
    1067              :                !output: i1,i2,j3,(jp3)
    1068              : 
    1069              :                !Multiply with kernel in fourier space
    1070        20538 :                CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
    1071              : 
    1072              :                !TRANSFORM BACK IN REAL SPACE
    1073              : 
    1074              :                !transform along y axis
    1075              :                !input: i1,i2,j3,(jp3)
    1076       102690 :                DO i = 1, ic2
    1077              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1078        82152 :                               ftrig2, after2(i), now2(i), before2(i), -1)
    1079       102690 :                   inzee = 3 - inzee
    1080              :                END DO
    1081              : 
    1082              :                !reverse ordering
    1083              :                !input: i1,I2,j3,(jp3)
    1084        22005 :                CALL S_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
    1085              :                !output: I2,i1,j3,(jp3)
    1086              :             END DO
    1087              : 
    1088              :             !transform along x axis
    1089              :             !input: I2,i1,j3,(jp3)
    1090         1467 :             lot = ncache/(4*n1)
    1091        22005 :             DO j = 1, n2, lot
    1092        20538 :                ma = j
    1093        20538 :                mb = MIN(j + (lot - 1), n2)
    1094        20538 :                nfft = mb - ma + 1
    1095              : 
    1096              :                !performing FFT
    1097        20538 :                i = 1
    1098              :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
    1099        20538 :                            ftrig1, after1(i), now1(i), before1(i), -1)
    1100              : 
    1101        20538 :                inzee = 1
    1102        82152 :                DO i = 2, ic1
    1103              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1104        61614 :                               ftrig1, after1(i), now1(i), before1(i), -1)
    1105        82152 :                   inzee = 3 - inzee
    1106              :                END DO
    1107              :                !output: I2,I1,j3,(jp3)
    1108              : 
    1109              :                !reverse ordering
    1110              :                !input: J2,Jp2,I1,j3,(jp3)
    1111        22005 :                IF (nproc .EQ. 1) THEN
    1112            0 :                   CALL S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
    1113              :                ELSE
    1114        20538 :                   CALL S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
    1115              :                END IF
    1116              :                ! output: I1,J2,j3,Jp2,(jp3)
    1117              :             END DO
    1118              :          END IF
    1119              :       END DO
    1120              : 
    1121              :       !Interprocessor data transposition
    1122              :       !input: I1,J2,j3,Jp2,(jp3)
    1123           18 :       IF (nproc .GT. 1) THEN
    1124              :          !communication scheduling
    1125           18 :          CALL mpi_group%alltoall(zmpi1, zmpi2, 2*n1*(md2/nproc)*(nd3/nproc))
    1126              :       END IF
    1127              : 
    1128              :       !output: I1,J2,j3,jp3,(Jp2)
    1129              : 
    1130              :       !transform along z axis
    1131              :       !input: I1,J2,i3,(Jp2)
    1132           18 :       lot = ncache/(2*n3)
    1133         1476 :       DO j2 = 1, md2/nproc
    1134              :          !this condition ensures that we manage only the interesting part for the FFT
    1135         1476 :          IF (iproc*(md2/nproc) + j2 .LE. n2) THEN
    1136        21870 :             DO i1 = 1, n1, lot
    1137        20412 :                ma = i1
    1138        20412 :                mb = MIN(i1 + (lot - 1), n1)
    1139        20412 :                nfft = mb - ma + 1
    1140              : 
    1141              :                !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
    1142              :                !input: I1,J2,i3,(Jp2)
    1143        20412 :                CALL unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
    1144              :                !output: I1,i3,J2,(Jp2)
    1145              : 
    1146              :                !performing FFT
    1147              :                !input: I1,i3,J2,(Jp2)
    1148        20412 :                inzee = 1
    1149       102060 :                DO i = 1, ic3
    1150              :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1151        81648 :                               ftrig3, after3(i), now3(i), before3(i), -1)
    1152       102060 :                   inzee = 3 - inzee
    1153              :                END DO
    1154              :                !output: I1,I3,J2,(Jp2)
    1155              : 
    1156              :                !rebuild the output array
    1157              :                CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
    1158        21870 :                                     , scal)
    1159              : 
    1160              :                !integrate local pieces together
    1161              :                !ehartree=ehartree+0.5_dp*ehartreetmp*hx*hy*hz
    1162              :             END DO
    1163              :          END IF
    1164              :       END DO
    1165              : 
    1166              :       !De-allocations
    1167           18 :       DEALLOCATE (btrig1)
    1168           18 :       DEALLOCATE (ftrig1)
    1169           18 :       DEALLOCATE (after1)
    1170           18 :       DEALLOCATE (now1)
    1171           18 :       DEALLOCATE (before1)
    1172           18 :       DEALLOCATE (btrig2)
    1173           18 :       DEALLOCATE (ftrig2)
    1174           18 :       DEALLOCATE (after2)
    1175           18 :       DEALLOCATE (now2)
    1176           18 :       DEALLOCATE (before2)
    1177           18 :       DEALLOCATE (btrig3)
    1178           18 :       DEALLOCATE (ftrig3)
    1179           18 :       DEALLOCATE (after3)
    1180           18 :       DEALLOCATE (now3)
    1181           18 :       DEALLOCATE (before3)
    1182           18 :       DEALLOCATE (zmpi2)
    1183           18 :       DEALLOCATE (zw)
    1184           18 :       DEALLOCATE (zt)
    1185           18 :       DEALLOCATE (cosinarr)
    1186           18 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
    1187              : 
    1188              :       !  call timing(iproc,'PSolv_comput  ','OF')
    1189           18 :       CALL timestop(handle)
    1190           36 :    END SUBROUTINE S_PoissonSolver
    1191              : 
    1192              : ! **************************************************************************************************
    1193              : !> \brief ...
    1194              : !> \param j3 ...
    1195              : !> \param nfft ...
    1196              : !> \param Jp2stb ...
    1197              : !> \param J2stb ...
    1198              : !> \param lot ...
    1199              : !> \param n1 ...
    1200              : !> \param md2 ...
    1201              : !> \param nd3 ...
    1202              : !> \param nproc ...
    1203              : !> \param zmpi1 ...
    1204              : !> \param zw ...
    1205              : ! **************************************************************************************************
    1206        20538 :    SUBROUTINE S_mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
    1207              :       INTEGER, INTENT(in)                                :: j3, nfft
    1208              :       INTEGER, INTENT(inout)                             :: Jp2stb, J2stb
    1209              :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
    1210              :       REAL(KIND=dp), &
    1211              :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
    1212              :          INTENT(in)                                      :: zmpi1
    1213              :       REAL(KIND=dp), DIMENSION(2, lot, n1), &
    1214              :          INTENT(inout)                                   :: zw
    1215              : 
    1216              :       INTEGER                                            :: I1, J2, Jp2, mfft
    1217              : 
    1218        20538 :       mfft = 0
    1219        23472 :       DO Jp2 = Jp2stb, nproc
    1220       259659 :          DO J2 = J2stb, md2/nproc
    1221       256725 :             mfft = mfft + 1
    1222       256725 :             IF (mfft .GT. nfft) THEN
    1223        19071 :                Jp2stb = Jp2
    1224        19071 :                J2stb = J2
    1225        19071 :                RETURN
    1226              :             END IF
    1227     38740536 :             DO I1 = 1, n1
    1228     38499948 :                zw(1, mfft, I1) = zmpi1(1, I1, J2, j3, Jp2)
    1229     38737602 :                zw(2, mfft, I1) = zmpi1(2, I1, J2, j3, Jp2)
    1230              :             END DO
    1231              :          END DO
    1232         4401 :          J2stb = 1
    1233              :       END DO
    1234              :    END SUBROUTINE S_mpiswitch_upcorn
    1235              : 
    1236              : ! **************************************************************************************************
    1237              : !> \brief ...
    1238              : !> \param nfft ...
    1239              : !> \param n2 ...
    1240              : !> \param lot ...
    1241              : !> \param n1 ...
    1242              : !> \param lzt ...
    1243              : !> \param zt ...
    1244              : !> \param zw ...
    1245              : ! **************************************************************************************************
    1246        20538 :    SUBROUTINE S_switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
    1247              :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
    1248              :       REAL(KIND=dp), DIMENSION(2, lzt, n1), INTENT(in)   :: zt
    1249              :       REAL(KIND=dp), DIMENSION(2, lot, n2), &
    1250              :          INTENT(inout)                                   :: zw
    1251              : 
    1252              :       INTEGER                                            :: i, j
    1253              : 
    1254       258192 :       DO j = 1, nfft
    1255     38758140 :          DO i = 1, n2
    1256     38499948 :             zw(1, j, i) = zt(1, i, j)
    1257     38737602 :             zw(2, j, i) = zt(2, i, j)
    1258              :          END DO
    1259              :       END DO
    1260        20538 :    END SUBROUTINE S_switch_upcorn
    1261              : 
    1262              : ! **************************************************************************************************
    1263              : !> \brief ...
    1264              : !> \param nfft ...
    1265              : !> \param n2 ...
    1266              : !> \param lot ...
    1267              : !> \param n1 ...
    1268              : !> \param lzt ...
    1269              : !> \param zw ...
    1270              : !> \param zt ...
    1271              : ! **************************************************************************************************
    1272        20538 :    SUBROUTINE S_unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
    1273              :       INTEGER, INTENT(in)                                :: nfft, n2, lot, n1, lzt
    1274              :       REAL(KIND=dp), DIMENSION(2, lot, n2), INTENT(in)   :: zw
    1275              :       REAL(KIND=dp), DIMENSION(2, lzt, n1), &
    1276              :          INTENT(inout)                                   :: zt
    1277              : 
    1278              :       INTEGER                                            :: i, j
    1279              : 
    1280       258192 :       DO j = 1, nfft
    1281     38758140 :          DO i = 1, n2
    1282     38499948 :             zt(1, i, j) = zw(1, j, i)
    1283     38737602 :             zt(2, i, j) = zw(2, j, i)
    1284              :          END DO
    1285              :       END DO
    1286        20538 :    END SUBROUTINE S_unswitch_downcorn
    1287              : 
    1288              : ! **************************************************************************************************
    1289              : !> \brief ...
    1290              : !> \param j3 ...
    1291              : !> \param nfft ...
    1292              : !> \param Jp2stf ...
    1293              : !> \param J2stf ...
    1294              : !> \param lot ...
    1295              : !> \param n1 ...
    1296              : !> \param md2 ...
    1297              : !> \param nd3 ...
    1298              : !> \param nproc ...
    1299              : !> \param zw ...
    1300              : !> \param zmpi1 ...
    1301              : ! **************************************************************************************************
    1302        20538 :    SUBROUTINE S_unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
    1303              :       INTEGER, INTENT(in)                                :: j3, nfft
    1304              :       INTEGER, INTENT(inout)                             :: Jp2stf, J2stf
    1305              :       INTEGER, INTENT(in)                                :: lot, n1, md2, nd3, nproc
    1306              :       REAL(KIND=dp), DIMENSION(2, lot, n1), INTENT(in)   :: zw
    1307              :       REAL(KIND=dp), &
    1308              :          DIMENSION(2, n1, md2/nproc, nd3/nproc, nproc), &
    1309              :          INTENT(inout)                                   :: zmpi1
    1310              : 
    1311              :       INTEGER                                            :: I1, J2, Jp2, mfft
    1312              : 
    1313        20538 :       mfft = 0
    1314        23472 :       DO Jp2 = Jp2stf, nproc
    1315       259659 :          DO J2 = J2stf, md2/nproc
    1316       256725 :             mfft = mfft + 1
    1317       256725 :             IF (mfft .GT. nfft) THEN
    1318        19071 :                Jp2stf = Jp2
    1319        19071 :                J2stf = J2
    1320        19071 :                RETURN
    1321              :             END IF
    1322     38740536 :             DO I1 = 1, n1
    1323     38499948 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
    1324     38737602 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
    1325              :             END DO
    1326              :          END DO
    1327         4401 :          J2stf = 1
    1328              :       END DO
    1329              :    END SUBROUTINE S_unmpiswitch_downcorn
    1330              : 
    1331              : ! **************************************************************************************************
    1332              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1333              : !>      Restore data into output array, calculating in the meanwhile
    1334              : !>      Hartree energy of the potential
    1335              : !> \param md1 Dimensions of the undistributed part of the real grid
    1336              : !> \param md3 Dimensions of the undistributed part of the real grid
    1337              : !> \param lot ...
    1338              : !> \param nfft number of planes
    1339              : !> \param n3 (twice the) dimension of the last FFTtransform.
    1340              : !> \param zw FFT work array
    1341              : !> \param zf Original distributed density as well as
    1342              : !>                   Distributed solution of the poisson equation (inout)
    1343              : !> \param scal Needed to achieve unitarity and correct dimensions
    1344              : !> \date February 2006
    1345              : !> \author S. Goedecker, L. Genovese
    1346              : !> \note Assuming that high frequencies are in the corners
    1347              : !>      and that n3 is multiple of 4
    1348              : !>
    1349              : !>  RESTRICTIONS on USAGE
    1350              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1351              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1352              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1353              : !>      This file is distributed under the terms of the
    1354              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1355              : ! **************************************************************************************************
    1356       559582 :    SUBROUTINE unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal)
    1357              :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
    1358              :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    1359              :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
    1360              :       REAL(KIND=dp), INTENT(in)                          :: scal
    1361              : 
    1362              :       INTEGER                                            :: i1, i3
    1363              :       REAL(KIND=dp)                                      :: pot1
    1364              : 
    1365     14697248 :       DO i3 = 1, n3/4
    1366    391862792 :          DO i1 = 1, nfft
    1367    377165544 :             pot1 = scal*zw(1, i1, i3)
    1368              :             !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3-1)
    1369    377165544 :             zf(i1, 2*i3 - 1) = pot1
    1370    377165544 :             pot1 = scal*zw(2, i1, i3)
    1371              :             !ehartreetmp =ehartreetmp + pot1* zf(i1,2*i3)
    1372    391303210 :             zf(i1, 2*i3) = pot1
    1373              :          END DO
    1374              :       END DO
    1375       559582 :    END SUBROUTINE unfill_downcorn
    1376              : 
    1377              : ! **************************************************************************************************
    1378              : !> \brief ...
    1379              : !> \param md1 ...
    1380              : !> \param md3 ...
    1381              : !> \param lot ...
    1382              : !> \param nfft ...
    1383              : !> \param n3 ...
    1384              : !> \param zf ...
    1385              : !> \param zw ...
    1386              : ! **************************************************************************************************
    1387       559582 :    SUBROUTINE halfill_upcorn(md1, md3, lot, nfft, n3, zf, zw)
    1388              :       INTEGER                                            :: md1, md3, lot, nfft, n3
    1389              :       REAL(KIND=dp)                                      :: zf(md1, md3), zw(2, lot, n3/2)
    1390              : 
    1391              :       INTEGER                                            :: i1, i3
    1392              : 
    1393     14697248 :       DO i3 = 1, n3/4
    1394              :          ! WARNING: Assuming that high frequencies are in the corners
    1395              :          !          and that n3 is multiple of 4
    1396              :          !in principle we can relax this condition
    1397    391862792 :          DO i1 = 1, nfft
    1398    377165544 :             zw(1, i1, i3) = 0._dp
    1399    391303210 :             zw(2, i1, i3) = 0._dp
    1400              :          END DO
    1401              :       END DO
    1402     14697248 :       DO i3 = n3/4 + 1, n3/2
    1403    391862792 :          DO i1 = 1, nfft
    1404    377165544 :             zw(1, i1, i3) = zf(i1, 2*i3 - 1 - n3/2)
    1405    391303210 :             zw(2, i1, i3) = zf(i1, 2*i3 - n3/2)
    1406              :          END DO
    1407              :       END DO
    1408              : 
    1409       559582 :    END SUBROUTINE halfill_upcorn
    1410              : 
    1411              : ! **************************************************************************************************
    1412              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1413              : !>      Assign the correct planes to the work array zmpi2
    1414              : !>      in order to prepare for interprocessor data transposition.
    1415              : !>      In the meanwhile, it unpacks the data of the HalFFT in order to prepare for
    1416              : !>      multiplication with the kernel
    1417              : !> \param i1 Starting points of the plane and number of remaining lines
    1418              : !> \param j2 Starting points of the plane and number of remaining lines
    1419              : !> \param lot Starting points of the plane and number of remaining lines
    1420              : !> \param nfft Starting points of the plane and number of remaining lines
    1421              : !> \param n1 logical dimension of the FFT transform, reference for work arrays
    1422              : !> \param n3 logical dimension of the FFT transform, reference for work arrays
    1423              : !> \param md2 Dimensions of real grid
    1424              : !> \param nproc ...
    1425              : !> \param nd3 Dimensions of the kernel
    1426              : !> \param zw Work array (input)
    1427              : !> \param zmpi2 Work array for multiprocessor manipulation (output)
    1428              : !> \param cosinarr Array of the phases needed for unpacking
    1429              : !> \date February 2006
    1430              : !> \author S. Goedecker, L. Genovese
    1431              : !> \note
    1432              : !>  RESTRICTIONS on USAGE
    1433              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1434              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1435              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1436              : !>      This file is distributed under the terms of the
    1437              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1438              : ! **************************************************************************************************
    1439       630488 :    SUBROUTINE scramble_unpack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zw, zmpi2, cosinarr)
    1440              :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
    1441              :                                                             nd3
    1442              :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    1443              :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
    1444              :          INTENT(inout)                                   :: zmpi2
    1445              :       REAL(KIND=dp), DIMENSION(2, n3/2), INTENT(in)      :: cosinarr
    1446              : 
    1447              :       INTEGER                                            :: i, i3, ind1, ind2
    1448              :       REAL(KIND=dp)                                      :: a, b, c, cp, d, feI, feR, fI, foI, foR, &
    1449              :                                                             fR, sp
    1450              : 
    1451              : !case i3=1 and i3=n3/2+1
    1452              : 
    1453     18987622 :       DO i = 0, nfft - 1
    1454     18357134 :          a = zw(1, i + 1, 1)
    1455     18357134 :          b = zw(2, i + 1, 1)
    1456     18357134 :          zmpi2(1, i1 + i, j2, 1) = a + b
    1457     18357134 :          zmpi2(2, i1 + i, j2, 1) = 0._dp
    1458     18357134 :          zmpi2(1, i1 + i, j2, n3/2 + 1) = a - b
    1459     18987622 :          zmpi2(2, i1 + i, j2, n3/2 + 1) = 0._dp
    1460              :       END DO
    1461              :       !case 2<=i3<=n3/2
    1462     32253340 :       DO i3 = 2, n3/2
    1463     31622852 :          ind1 = i3
    1464     31622852 :          ind2 = n3/2 - i3 + 2
    1465     31622852 :          cp = cosinarr(1, i3)
    1466     31622852 :          sp = cosinarr(2, i3)
    1467    890605574 :          DO i = 0, nfft - 1
    1468    858352234 :             a = zw(1, i + 1, ind1)
    1469    858352234 :             b = zw(2, i + 1, ind1)
    1470    858352234 :             c = zw(1, i + 1, ind2)
    1471    858352234 :             d = zw(2, i + 1, ind2)
    1472    858352234 :             feR = .5_dp*(a + c)
    1473    858352234 :             feI = .5_dp*(b - d)
    1474    858352234 :             foR = .5_dp*(a - c)
    1475    858352234 :             foI = .5_dp*(b + d)
    1476    858352234 :             fR = feR + cp*foI - sp*foR
    1477    858352234 :             fI = feI - cp*foR - sp*foI
    1478    858352234 :             zmpi2(1, i1 + i, j2, ind1) = fR
    1479    889975086 :             zmpi2(2, i1 + i, j2, ind1) = fI
    1480              :          END DO
    1481              :       END DO
    1482              : 
    1483       630488 :    END SUBROUTINE scramble_unpack
    1484              : 
    1485              : ! **************************************************************************************************
    1486              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1487              : !>      Insert the correct planes of the work array zmpi2
    1488              : !>      in order to prepare for backward FFT transform
    1489              : !>      In the meanwhile, it packs the data in order to be transformed with the HalFFT
    1490              : !>      procedure
    1491              : !> \param i1 Starting points of the plane and number of remaining lines
    1492              : !> \param j2 Starting points of the plane and number of remaining lines
    1493              : !> \param lot Starting points of the plane and number of remaining lines
    1494              : !> \param nfft Starting points of the plane and number of remaining lines
    1495              : !> \param n1 logical dimension of the FFT transform, reference for work arrays
    1496              : !> \param n3 logical dimension of the FFT transform, reference for work arrays
    1497              : !> \param md2 Dimensions of real grid
    1498              : !> \param nproc ...
    1499              : !> \param nd3 Dimensions of the kernel
    1500              : !> \param zmpi2 Work array for multiprocessor manipulation (output)
    1501              : !> \param zw Work array (inout)
    1502              : !> \param cosinarr Array of the phases needed for packing
    1503              : !> \date February 2006
    1504              : !> \author S. Goedecker, L. Genovese
    1505              : !> \note
    1506              : !>  RESTRICTIONS on USAGE
    1507              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1508              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1509              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1510              : !>      This file is distributed under the terms of the
    1511              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1512              : ! **************************************************************************************************
    1513       559582 :    SUBROUTINE unscramble_pack(i1, j2, lot, nfft, n1, n3, md2, nproc, nd3, zmpi2, zw, cosinarr)
    1514              :       INTEGER, INTENT(in)                                :: i1, j2, lot, nfft, n1, n3, md2, nproc, &
    1515              :                                                             nd3
    1516              :       REAL(KIND=dp), DIMENSION(2, n1, md2/nproc, nd3), &
    1517              :          INTENT(in)                                      :: zmpi2
    1518              :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), &
    1519              :          INTENT(inout)                                   :: zw
    1520              :       REAL(KIND=dp), DIMENSION(2, n3/2), INTENT(in)      :: cosinarr
    1521              : 
    1522              :       INTEGER                                            :: i, i3, indA, indB
    1523              :       REAL(KIND=dp)                                      :: a, b, c, cp, d, ie, ih, io, re, rh, ro, &
    1524              :                                                             sp
    1525              : 
    1526     28834914 :       DO i3 = 1, n3/2
    1527     28275332 :          indA = i3
    1528     28275332 :          indB = n3/2 + 2 - i3
    1529     28275332 :          cp = cosinarr(1, i3)
    1530     28275332 :          sp = cosinarr(2, i3)
    1531    783166002 :          DO i = 0, nfft - 1
    1532    754331088 :             a = zmpi2(1, i1 + i, j2, indA)
    1533    754331088 :             b = zmpi2(2, i1 + i, j2, indA)
    1534    754331088 :             c = zmpi2(1, i1 + i, j2, indB)
    1535    754331088 :             d = -zmpi2(2, i1 + i, j2, indB)
    1536    754331088 :             re = (a + c)
    1537    754331088 :             ie = (b + d)
    1538    754331088 :             ro = (a - c)*cp - (b - d)*sp
    1539    754331088 :             io = (a - c)*sp + (b - d)*cp
    1540    754331088 :             rh = re - io
    1541    754331088 :             ih = ie + ro
    1542    754331088 :             zw(1, i + 1, indA) = rh
    1543    782606420 :             zw(2, i + 1, indA) = ih
    1544              :          END DO
    1545              :       END DO
    1546              : 
    1547       559582 :    END SUBROUTINE unscramble_pack
    1548              : 
    1549              : ! **************************************************************************************************
    1550              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    1551              : !>      Applies the local FFT space Kernel to the density in Real space.
    1552              : !>      Calculates also the LDA exchange-correlation terms
    1553              : !> \param n1 logical dimension of the transform.
    1554              : !> \param n2 logical dimension of the transform.
    1555              : !> \param n3 logical dimension of the transform.
    1556              : !> \param nd1 Dimension of POT
    1557              : !> \param nd2 Dimension of POT
    1558              : !> \param nd3 Dimension of POT
    1559              : !> \param md1 Dimension of ZF
    1560              : !> \param md2 Dimension of ZF
    1561              : !> \param md3 Dimension of ZF
    1562              : !> \param nproc number of processors used as returned by MPI_COMM_SIZE
    1563              : !> \param iproc [0:nproc-1] number of processor as returned by MPI_COMM_RANK
    1564              : !> \param pot Kernel, only the distributed part (REAL)
    1565              : !>                   POT(i1,i2,i3)
    1566              : !>                   i1=1,nd1 , i2=1,nd2 , i3=1,nd3/nproc
    1567              : !> \param zf Density (input/output)
    1568              : !>                   ZF(i1,i3,i2)
    1569              : !>                   i1=1,md1 , i2=1,md2/nproc , i3=1,md3
    1570              : !> \param scal factor of renormalization of the FFT in order to acheve unitarity
    1571              : !>                   and the correct dimension
    1572              : !> \param mpi_group ...
    1573              : !> \date February 2006
    1574              : !> \author S. Goedecker, L. Genovese
    1575              : !> \note As transform lengths
    1576              : !>                     most products of the prime factors 2,3,5 are allowed.
    1577              : !>                   The detailed table with allowed transform lengths can
    1578              : !>                   be found in subroutine CTRIG
    1579              : !> \note
    1580              : !>  RESTRICTIONS on USAGE
    1581              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    1582              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    1583              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    1584              : !>      This file is distributed under the terms of the
    1585              : !>       GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    1586              : ! **************************************************************************************************
    1587        15208 :    SUBROUTINE F_PoissonSolver(n1, n2, n3, nd1, nd2, nd3, md1, md2, md3, nproc, iproc, pot, zf, &
    1588              :                               scal, mpi_group)
    1589              :       INTEGER, INTENT(in)                                :: n1, n2, n3, nd1, nd2, nd3, md1, md2, &
    1590              :                                                             md3, nproc, iproc
    1591              :       REAL(KIND=dp), DIMENSION(nd1, nd2, nd3/nproc), &
    1592              :          INTENT(in)                                      :: pot
    1593              :       REAL(KIND=dp), DIMENSION(md1, md3, md2/nproc), &
    1594              :          INTENT(inout)                                   :: zf
    1595              :       REAL(KIND=dp), INTENT(in)                          :: scal
    1596              : 
    1597              :       CLASS(mp_comm_type), INTENT(in)                     :: mpi_group
    1598              : 
    1599              :       INTEGER, PARAMETER                                 :: ncache_optimal = 8*1024
    1600              : 
    1601              :       INTEGER                                            :: i, i1, i3, ic1, ic2, ic3, inzee, j, j2, &
    1602              :                                                             J2stb, J2stf, j3, Jp2stb, Jp2stf, lot, &
    1603              :                                                             lzt, ma, mb, ncache, nfft
    1604        15208 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: after1, after2, after3, before1, &
    1605        15208 :                                                             before2, before3, now1, now2, now3
    1606              :       REAL(kind=dp)                                      :: twopion
    1607        15208 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: btrig1, btrig2, btrig3, cosinarr, &
    1608        15208 :                                                             ftrig1, ftrig2, ftrig3
    1609        15208 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: zt, zw
    1610              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: zmpi2
    1611              :       REAL(KIND=dp), ALLOCATABLE, &
    1612        15208 :          DIMENSION(:, :, :, :, :)                        :: zmpi1
    1613              : 
    1614        15208 :       IF (MOD(n1, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n1")
    1615        15208 :       IF (MOD(n2, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n2")
    1616        15208 :       IF (MOD(n3, 2) .NE. 0) CPABORT("Parallel convolution:ERROR:n3")
    1617        15208 :       IF (nd1 .LT. n1/2 + 1) CPABORT("Parallel convolution:ERROR:nd1")
    1618        15208 :       IF (nd2 .LT. n2/2 + 1) CPABORT("Parallel convolution:ERROR:nd2")
    1619        15208 :       IF (nd3 .LT. n3/2 + 1) CPABORT("Parallel convolution:ERROR:nd3")
    1620        15208 :       IF (md1 .LT. n1/2) CPABORT("Parallel convolution:ERROR:md1")
    1621        15208 :       IF (md2 .LT. n2/2) CPABORT("Parallel convolution:ERROR:md2")
    1622        15208 :       IF (md3 .LT. n3/2) CPABORT("Parallel convolution:ERROR:md3")
    1623        15208 :       IF (MOD(nd3, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:nd3")
    1624        15208 :       IF (MOD(md2, nproc) .NE. 0) CPABORT("Parallel convolution:ERROR:md2")
    1625              : 
    1626              :       !defining work arrays dimensions
    1627              : 
    1628        15208 :       ncache = ncache_optimal
    1629        15208 :       IF (ncache <= MAX(n1, n2, n3/2)*4) ncache = MAX(n1, n2, n3/2)*4
    1630        15208 :       lzt = n2/2
    1631        15208 :       IF (MOD(n2/2, 2) .EQ. 0) lzt = lzt + 1
    1632        15208 :       IF (MOD(n2/2, 4) .EQ. 0) lzt = lzt + 1
    1633              : 
    1634              :       !Allocations
    1635        15208 :       ALLOCATE (btrig1(2, ctrig_length))
    1636        15208 :       ALLOCATE (ftrig1(2, ctrig_length))
    1637        15208 :       ALLOCATE (after1(7))
    1638        15208 :       ALLOCATE (now1(7))
    1639        15208 :       ALLOCATE (before1(7))
    1640        15208 :       ALLOCATE (btrig2(2, ctrig_length))
    1641        15208 :       ALLOCATE (ftrig2(2, ctrig_length))
    1642        15208 :       ALLOCATE (after2(7))
    1643        15208 :       ALLOCATE (now2(7))
    1644        15208 :       ALLOCATE (before2(7))
    1645        15208 :       ALLOCATE (btrig3(2, ctrig_length))
    1646        15208 :       ALLOCATE (ftrig3(2, ctrig_length))
    1647        15208 :       ALLOCATE (after3(7))
    1648        15208 :       ALLOCATE (now3(7))
    1649        15208 :       ALLOCATE (before3(7))
    1650        60832 :       ALLOCATE (zw(2, ncache/4, 2))
    1651        60832 :       ALLOCATE (zt(2, lzt, n1))
    1652        76040 :       ALLOCATE (zmpi2(2, n1, md2/nproc, nd3))
    1653   4464468616 :       zmpi2 = 1.0E99_dp ! init mpi'ed buffers to silence warnings under valgrind
    1654        60832 :       ALLOCATE (cosinarr(2, n3/2))
    1655        53418 :       IF (nproc .GT. 1) ALLOCATE (zmpi1(2, n1, md2/nproc, nd3/nproc, nproc))
    1656              : 
    1657              :       !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
    1658        15208 :       CALL ctrig(n3/2, btrig3, after3, before3, now3, 1, ic3)
    1659        15208 :       CALL ctrig(n1, btrig1, after1, before1, now1, 1, ic1)
    1660        15208 :       CALL ctrig(n2, btrig2, after2, before2, now2, 1, ic2)
    1661      1138536 :       DO j = 1, n1
    1662      1123328 :          ftrig1(1, j) = btrig1(1, j)
    1663      1138536 :          ftrig1(2, j) = -btrig1(2, j)
    1664              :       END DO
    1665      1138536 :       DO j = 1, n2
    1666      1123328 :          ftrig2(1, j) = btrig2(1, j)
    1667      1138536 :          ftrig2(2, j) = -btrig2(2, j)
    1668              :       END DO
    1669      1161216 :       DO j = 1, n3
    1670      1146008 :          ftrig3(1, j) = btrig3(1, j)
    1671      1161216 :          ftrig3(2, j) = -btrig3(2, j)
    1672              :       END DO
    1673              : 
    1674              :       !Calculating array of phases for HalFFT decoding
    1675        15208 :       twopion = 8._dp*ATAN(1._dp)/REAL(n3, KIND=dp)
    1676       588212 :       DO i3 = 1, n3/2
    1677       573004 :          cosinarr(1, i3) = COS(twopion*(i3 - 1))
    1678       588212 :          cosinarr(2, i3) = -SIN(twopion*(i3 - 1))
    1679              :       END DO
    1680              : 
    1681              :       ! transform along z axis
    1682        15208 :       lot = ncache/(2*n3)
    1683        15208 :       IF (lot .LT. 1) THEN
    1684              :          WRITE (*, *) &
    1685              :             'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1686              :             'least one 1-d FFT of this size even though this will'// &
    1687            0 :             'reduce the performance for shorter transform lengths'
    1688            0 :          CPABORT("")
    1689              :       END IF
    1690              : 
    1691       410366 :       DO j2 = 1, md2/nproc
    1692              :          !this condition ensures that we manage only the interesting part for the FFT
    1693       410366 :          IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
    1694       933298 :             DO i1 = 1, (n1/2), lot
    1695       539170 :                ma = i1
    1696       539170 :                mb = MIN(i1 + (lot - 1), (n1/2))
    1697       539170 :                nfft = mb - ma + 1
    1698              : 
    1699              :                !inserting real data into complex array of half length
    1700       539170 :                CALL halfill_upcorn(md1, md3, lot, nfft, n3, zf(i1, 1, j2), zw(1, 1, 1))
    1701              : 
    1702              :                !performing FFT
    1703              :                !input: I1,I3,J2,(Jp2)
    1704       539170 :                inzee = 1
    1705      1901426 :                DO i = 1, ic3
    1706              :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1707      1362256 :                               btrig3, after3(i), now3(i), before3(i), 1)
    1708      1901426 :                   inzee = 3 - inzee
    1709              :                END DO
    1710              :                !output: I1,i3,J2,(Jp2)
    1711              : 
    1712              :                !unpacking FFT in order to restore correct result,
    1713              :                !while exchanging components
    1714              :                !input: I1,i3,J2,(Jp2)
    1715       933298 :                CALL scramble_unpack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zw(1, 1, inzee), zmpi2, cosinarr)
    1716              :                !output: I1,J2,i3,(Jp2)
    1717              :             END DO
    1718              :          END IF
    1719              :       END DO
    1720              : 
    1721              :       !Interprocessor data transposition
    1722              :       !input: I1,J2,j3,jp3,(Jp2)
    1723        15208 :       IF (nproc .GT. 1) THEN
    1724              :          !communication scheduling
    1725         7642 :          CALL mpi_group%alltoall(zmpi2, zmpi1, n1*(md2/nproc)*(nd3/nproc))
    1726              :       END IF
    1727              :       !output: I1,J2,j3,Jp2,(jp3)
    1728              : 
    1729              :       !now each process perform complete convolution of its planes
    1730       432638 :       DO j3 = 1, nd3/nproc
    1731              :          !this condition ensures that we manage only the interesting part for the FFT
    1732       432638 :          IF (iproc*(nd3/nproc) + j3 .LE. n3/2 + 1) THEN
    1733       413609 :             Jp2stb = 1
    1734       413609 :             J2stb = 1
    1735       413609 :             Jp2stf = 1
    1736       413609 :             J2stf = 1
    1737              : 
    1738              :             ! transform along x axis
    1739       413609 :             lot = ncache/(4*n1)
    1740       413609 :             IF (lot .LT. 1) THEN
    1741              :                WRITE (*, *) &
    1742              :                   'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1743              :                   'least one 1-d FFT of this size even though this will'// &
    1744            0 :                   'reduce the performance for shorter transform lengths'
    1745            0 :                CPABORT("")
    1746              :             END IF
    1747              : 
    1748      1323595 :             DO j = 1, n2/2, lot
    1749       909986 :                ma = j
    1750       909986 :                mb = MIN(j + (lot - 1), n2/2)
    1751       909986 :                nfft = mb - ma + 1
    1752              : 
    1753              :                !reverse index ordering, leaving the planes to be transformed at the end
    1754              :                !input: I1,J2,j3,Jp2,(jp3)
    1755       909986 :                IF (nproc .EQ. 1) THEN
    1756       421221 :                   CALL mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi2, zw(1, 1, 1))
    1757              :                ELSE
    1758       488765 :                   CALL mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw(1, 1, 1))
    1759              :                END IF
    1760              :                !output: J2,Jp2,I1,j3,(jp3)
    1761              : 
    1762              :                !performing FFT
    1763              :                !input: I2,I1,j3,(jp3)
    1764       909986 :                inzee = 1
    1765      2893259 :                DO i = 1, ic1 - 1
    1766              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1767      1983273 :                               btrig1, after1(i), now1(i), before1(i), 1)
    1768      2893259 :                   inzee = 3 - inzee
    1769              :                END DO
    1770              : 
    1771              :                !storing the last step into zt array
    1772       909986 :                i = ic1
    1773              :                CALL fftstp(lot, nfft, n1, lzt, n1, zw(1, 1, inzee), zt(1, j, 1), &
    1774      1323595 :                            btrig1, after1(i), now1(i), before1(i), 1)
    1775              :                !output: I2,i1,j3,(jp3)
    1776              :             END DO
    1777              : 
    1778              :             !transform along y axis
    1779       413609 :             lot = ncache/(4*n2)
    1780       413609 :             IF (lot .LT. 1) THEN
    1781              :                WRITE (*, *) &
    1782              :                   'convolxc_on:ncache has to be enlarged to be able to hold at'// &
    1783              :                   'least one 1-d FFT of this size even though this will'// &
    1784            0 :                   'reduce the performance for shorter transform lengths'
    1785            0 :                CPABORT("")
    1786              :             END IF
    1787              : 
    1788      2106803 :             DO j = 1, n1, lot
    1789      1693194 :                ma = j
    1790      1693194 :                mb = MIN(j + (lot - 1), n1)
    1791      1693194 :                nfft = mb - ma + 1
    1792              : 
    1793              :                !reverse ordering
    1794              :                !input: I2,i1,j3,(jp3)
    1795      1693194 :                CALL switch_upcorn(nfft, n2, lot, n1, lzt, zt(1, 1, j), zw(1, 1, 1))
    1796              :                !output: i1,I2,j3,(jp3)
    1797              : 
    1798              :                !performing FFT
    1799              :                !input: i1,I2,j3,(jp3)
    1800      1693194 :                inzee = 1
    1801      7108762 :                DO i = 1, ic2
    1802              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1803      5415568 :                               btrig2, after2(i), now2(i), before2(i), 1)
    1804      7108762 :                   inzee = 3 - inzee
    1805              :                END DO
    1806              :                !output: i1,i2,j3,(jp3)
    1807              : 
    1808              :                !Multiply with kernel in fourier space
    1809      1693194 :                CALL multkernel(nd1, nd2, n1, n2, lot, nfft, j, pot(1, 1, j3), zw(1, 1, inzee))
    1810              : 
    1811              :                !TRANSFORM BACK IN REAL SPACE
    1812              : 
    1813              :                !transform along y axis
    1814              :                !input: i1,i2,j3,(jp3)
    1815      7108762 :                DO i = 1, ic2
    1816              :                   CALL fftstp(lot, nfft, n2, lot, n2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1817      5415568 :                               ftrig2, after2(i), now2(i), before2(i), -1)
    1818      7108762 :                   inzee = 3 - inzee
    1819              :                END DO
    1820              : 
    1821              :                !reverse ordering
    1822              :                !input: i1,I2,j3,(jp3)
    1823      2106803 :                CALL unswitch_downcorn(nfft, n2, lot, n1, lzt, zw(1, 1, inzee), zt(1, 1, j))
    1824              :                !output: I2,i1,j3,(jp3)
    1825              :             END DO
    1826              : 
    1827              :             !transform along x axis
    1828              :             !input: I2,i1,j3,(jp3)
    1829       413609 :             lot = ncache/(4*n1)
    1830      1323595 :             DO j = 1, n2/2, lot
    1831       909986 :                ma = j
    1832       909986 :                mb = MIN(j + (lot - 1), n2/2)
    1833       909986 :                nfft = mb - ma + 1
    1834              : 
    1835              :                !performing FFT
    1836       909986 :                i = 1
    1837              :                CALL fftstp(lzt, nfft, n1, lot, n1, zt(1, j, 1), zw(1, 1, 1), &
    1838       909986 :                            ftrig1, after1(i), now1(i), before1(i), -1)
    1839              : 
    1840       909986 :                inzee = 1
    1841      2893259 :                DO i = 2, ic1
    1842              :                   CALL fftstp(lot, nfft, n1, lot, n1, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1843      1983273 :                               ftrig1, after1(i), now1(i), before1(i), -1)
    1844      2893259 :                   inzee = 3 - inzee
    1845              :                END DO
    1846              :                !output: I2,I1,j3,(jp3)
    1847              : 
    1848              :                !reverse ordering
    1849              :                !input: J2,Jp2,I1,j3,(jp3)
    1850      1323595 :                IF (nproc .EQ. 1) THEN
    1851       421221 :                   CALL unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi2)
    1852              :                ELSE
    1853       488765 :                   CALL unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw(1, 1, inzee), zmpi1)
    1854              :                END IF
    1855              :                ! output: I1,J2,j3,Jp2,(jp3)
    1856              :             END DO
    1857              :          END IF
    1858              :       END DO
    1859              : 
    1860              :       !Interprocessor data transposition
    1861              :       !input: I1,J2,j3,Jp2,(jp3)
    1862        15208 :       IF (nproc .GT. 1) THEN
    1863              :          !communication scheduling
    1864         7642 :          CALL mpi_group%alltoall(zmpi1, zmpi2, n1*(md2/nproc)*(nd3/nproc))
    1865              :          !output: I1,J2,j3,jp3,(Jp2)
    1866              :       END IF
    1867              : 
    1868              :       !transform along z axis
    1869              :       !input: I1,J2,i3,(Jp2)
    1870        15208 :       lot = ncache/(2*n3)
    1871       410366 :       DO j2 = 1, md2/nproc
    1872              :          !this condition ensures that we manage only the interesting part for the FFT
    1873       410366 :          IF (iproc*(md2/nproc) + j2 .LE. n2/2) THEN
    1874       933298 :             DO i1 = 1, (n1/2), lot
    1875       539170 :                ma = i1
    1876       539170 :                mb = MIN(i1 + (lot - 1), (n1/2))
    1877       539170 :                nfft = mb - ma + 1
    1878              : 
    1879              :                !reverse ordering and repack the FFT data in order to be backward HalFFT transformed
    1880              :                !input: I1,J2,i3,(Jp2)
    1881       539170 :                CALL unscramble_pack(i1, j2, lot, nfft, n1/2, n3, md2, nproc, nd3, zmpi2, zw(1, 1, 1), cosinarr)
    1882              :                !output: I1,i3,J2,(Jp2)
    1883              : 
    1884              :                !performing FFT
    1885              :                !input: I1,i3,J2,(Jp2)
    1886       539170 :                inzee = 1
    1887      1901426 :                DO i = 1, ic3
    1888              :                   CALL fftstp(lot, nfft, n3/2, lot, n3/2, zw(1, 1, inzee), zw(1, 1, 3 - inzee), &
    1889      1362256 :                               ftrig3, after3(i), now3(i), before3(i), -1)
    1890      1901426 :                   inzee = 3 - inzee
    1891              :                END DO
    1892              :                !output: I1,I3,J2,(Jp2)
    1893              : 
    1894              :                !calculates the exchange correlation terms locally and rebuild the output array
    1895              :                CALL unfill_downcorn(md1, md3, lot, nfft, n3, zw(1, 1, inzee), zf(i1, 1, j2) &
    1896       933298 :                                     , scal) !,ehartreetmp)
    1897              : 
    1898              :                !integrate local pieces together
    1899              :                !ehartree=ehartree+0.5_dp*ehartreetmp*hgrid**3
    1900              :             END DO
    1901              :          END IF
    1902              :       END DO
    1903              : 
    1904              :       !De-allocations
    1905        15208 :       DEALLOCATE (btrig1)
    1906        15208 :       DEALLOCATE (ftrig1)
    1907        15208 :       DEALLOCATE (after1)
    1908        15208 :       DEALLOCATE (now1)
    1909        15208 :       DEALLOCATE (before1)
    1910        15208 :       DEALLOCATE (btrig2)
    1911        15208 :       DEALLOCATE (ftrig2)
    1912        15208 :       DEALLOCATE (after2)
    1913        15208 :       DEALLOCATE (now2)
    1914        15208 :       DEALLOCATE (before2)
    1915        15208 :       DEALLOCATE (btrig3)
    1916        15208 :       DEALLOCATE (ftrig3)
    1917        15208 :       DEALLOCATE (after3)
    1918        15208 :       DEALLOCATE (now3)
    1919        15208 :       DEALLOCATE (before3)
    1920        15208 :       DEALLOCATE (zmpi2)
    1921        15208 :       DEALLOCATE (zw)
    1922        15208 :       DEALLOCATE (zt)
    1923        15208 :       DEALLOCATE (cosinarr)
    1924        15208 :       IF (nproc .GT. 1) DEALLOCATE (zmpi1)
    1925              : 
    1926        15208 :    END SUBROUTINE F_PoissonSolver
    1927              : 
    1928              : ! **************************************************************************************************
    1929              : !> \brief ...
    1930              : !> \param nfft ...
    1931              : !> \param n2 ...
    1932              : !> \param lot ...
    1933              : !> \param n1 ...
    1934              : !> \param lzt ...
    1935              : !> \param zt ...
    1936              : !> \param zw ...
    1937              : ! **************************************************************************************************
    1938      1693194 :    SUBROUTINE switch_upcorn(nfft, n2, lot, n1, lzt, zt, zw)
    1939              :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    1940              :       REAL(KIND=dp)                                      :: zt(2, lzt, n1), zw(2, lot, n2)
    1941              : 
    1942              :       INTEGER                                            :: i, j
    1943              : 
    1944              : ! WARNING: Assuming that high frequencies are in the corners
    1945              : !          and that n2 is multiple of 2
    1946              : ! Low frequencies
    1947              : 
    1948     34457202 :       DO j = 1, nfft
    1949   1498045734 :          DO i = n2/2 + 1, n2
    1950   1463588532 :             zw(1, j, i) = zt(1, i - n2/2, j)
    1951   1496352540 :             zw(2, j, i) = zt(2, i - n2/2, j)
    1952              :          END DO
    1953              :       END DO
    1954              :       ! High frequencies
    1955     83824308 :       DO i = 1, n2/2
    1956   1547412840 :          DO j = 1, nfft
    1957   1463588532 :             zw(1, j, i) = 0._dp
    1958   1545719646 :             zw(2, j, i) = 0._dp
    1959              :          END DO
    1960              :       END DO
    1961      1693194 :    END SUBROUTINE switch_upcorn
    1962              : 
    1963              : ! **************************************************************************************************
    1964              : !> \brief ...
    1965              : !> \param j3 ...
    1966              : !> \param nfft ...
    1967              : !> \param Jp2stb ...
    1968              : !> \param J2stb ...
    1969              : !> \param lot ...
    1970              : !> \param n1 ...
    1971              : !> \param md2 ...
    1972              : !> \param nd3 ...
    1973              : !> \param nproc ...
    1974              : !> \param zmpi1 ...
    1975              : !> \param zw ...
    1976              : ! **************************************************************************************************
    1977       909986 :    SUBROUTINE mpiswitch_upcorn(j3, nfft, Jp2stb, J2stb, lot, n1, md2, nd3, nproc, zmpi1, zw)
    1978              :       INTEGER                                            :: j3, nfft, Jp2stb, J2stb, lot, n1, md2, &
    1979              :                                                             nd3, nproc
    1980              :       REAL(KIND=dp) :: zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc), zw(2, lot, n1)
    1981              : 
    1982              :       INTEGER                                            :: i1, j2, jp2, mfft
    1983              : 
    1984              : ! WARNING: Assuming that high frequencies are in the corners
    1985              : !          and that n1 is multiple of 2
    1986              : 
    1987       909986 :       mfft = 0
    1988      1460010 :       Main: DO Jp2 = Jp2stb, nproc
    1989     17466593 :          DO J2 = J2stb, md2/nproc
    1990     16916569 :             mfft = mfft + 1
    1991     16916569 :             IF (mfft .GT. nfft) THEN
    1992       534565 :                Jp2stb = Jp2
    1993       534565 :                J2stb = J2
    1994       534565 :                EXIT Main
    1995              :             END IF
    1996    748176270 :             DO I1 = 1, n1/2
    1997    731794266 :                zw(1, mfft, I1) = 0._dp
    1998    748176270 :                zw(2, mfft, I1) = 0._dp
    1999              :             END DO
    2000    748726294 :             DO I1 = n1/2 + 1, n1
    2001    731794266 :                zw(1, mfft, I1) = zmpi1(1, I1 - n1/2, J2, j3, Jp2)
    2002    748176270 :                zw(2, mfft, I1) = zmpi1(2, I1 - n1/2, J2, j3, Jp2)
    2003              :             END DO
    2004              :          END DO
    2005       925445 :          J2stb = 1
    2006              :       END DO Main
    2007       909986 :    END SUBROUTINE mpiswitch_upcorn
    2008              : 
    2009              : ! **************************************************************************************************
    2010              : !> \brief ...
    2011              : !> \param nfft ...
    2012              : !> \param n2 ...
    2013              : !> \param lot ...
    2014              : !> \param n1 ...
    2015              : !> \param lzt ...
    2016              : !> \param zw ...
    2017              : !> \param zt ...
    2018              : ! **************************************************************************************************
    2019      1693194 :    SUBROUTINE unswitch_downcorn(nfft, n2, lot, n1, lzt, zw, zt)
    2020              :       INTEGER                                            :: nfft, n2, lot, n1, lzt
    2021              :       REAL(KIND=dp)                                      :: zw(2, lot, n2), zt(2, lzt, n1)
    2022              : 
    2023              :       INTEGER                                            :: i, j
    2024              : 
    2025              : ! WARNING: Assuming that high frequencies are in the corners
    2026              : !          and that n2 is multiple of 2
    2027              : ! Low frequencies
    2028              : 
    2029     34457202 :       DO j = 1, nfft
    2030   1498045734 :          DO i = 1, n2/2
    2031   1463588532 :             zt(1, i, j) = zw(1, j, i)
    2032   1496352540 :             zt(2, i, j) = zw(2, j, i)
    2033              :          END DO
    2034              :       END DO
    2035      1693194 :       RETURN
    2036              :    END SUBROUTINE unswitch_downcorn
    2037              : 
    2038              : ! **************************************************************************************************
    2039              : !> \brief ...
    2040              : !> \param j3 ...
    2041              : !> \param nfft ...
    2042              : !> \param Jp2stf ...
    2043              : !> \param J2stf ...
    2044              : !> \param lot ...
    2045              : !> \param n1 ...
    2046              : !> \param md2 ...
    2047              : !> \param nd3 ...
    2048              : !> \param nproc ...
    2049              : !> \param zw ...
    2050              : !> \param zmpi1 ...
    2051              : ! **************************************************************************************************
    2052       909986 :    SUBROUTINE unmpiswitch_downcorn(j3, nfft, Jp2stf, J2stf, lot, n1, md2, nd3, nproc, zw, zmpi1)
    2053              :       INTEGER                                            :: j3, nfft, Jp2stf, J2stf, lot, n1, md2, &
    2054              :                                                             nd3, nproc
    2055              :       REAL(KIND=dp) :: zw(2, lot, n1), zmpi1(2, n1/2, md2/nproc, nd3/nproc, nproc)
    2056              : 
    2057              :       INTEGER                                            :: i1, j2, jp2, mfft
    2058              : 
    2059              : ! WARNING: Assuming that high frequencies are in the corners
    2060              : !          and that n1 is multiple of 2
    2061              : 
    2062       909986 :       mfft = 0
    2063      1460010 :       Main: DO Jp2 = Jp2stf, nproc
    2064     17466593 :          DO J2 = J2stf, md2/nproc
    2065     16916569 :             mfft = mfft + 1
    2066     16916569 :             IF (mfft .GT. nfft) THEN
    2067       534565 :                Jp2stf = Jp2
    2068       534565 :                J2stf = J2
    2069       534565 :                EXIT Main
    2070              :             END IF
    2071    748726294 :             DO I1 = 1, n1/2
    2072    731794266 :                zmpi1(1, I1, J2, j3, Jp2) = zw(1, mfft, I1)
    2073    748176270 :                zmpi1(2, I1, J2, j3, Jp2) = zw(2, mfft, I1)
    2074              :             END DO
    2075              :          END DO
    2076       925445 :          J2stf = 1
    2077              :       END DO Main
    2078       909986 :    END SUBROUTINE unmpiswitch_downcorn
    2079              : 
    2080              : ! **************************************************************************************************
    2081              : !> \brief (Based on suitable modifications of S.Goedecker routines)
    2082              : !>      Restore data into output array, calculating in the meanwhile
    2083              : !>      Hartree energy of the potential
    2084              : !> \param md1 Dimensions of the undistributed part of the real grid
    2085              : !> \param md3 Dimensions of the undistributed part of the real grid
    2086              : !> \param lot ...
    2087              : !> \param nfft number of planes
    2088              : !> \param n3 (twice the) dimension of the last FFTtransform.
    2089              : !> \param zw FFT work array
    2090              : !> \param zf Original distributed density as well as
    2091              : !>                   Distributed solution of the poisson equation (inout)
    2092              : !> \param scal Needed to achieve unitarity and correct dimensions
    2093              : !> \param ehartreetmp Hartree energy
    2094              : !> \date February 2006
    2095              : !> \author S. Goedecker, L. Genovese
    2096              : !> \note Assuming that high frequencies are in the corners
    2097              : !>      and that n3 is multiple of 4
    2098              : !>
    2099              : !>  RESTRICTIONS on USAGE
    2100              : !>      Copyright (C) Stefan Goedecker, Cornell University, Ithaca, USA, 1994
    2101              : !>      Copyright (C) Stefan Goedecker, MPI Stuttgart, Germany, 1999
    2102              : !>      Copyright (C) 2002 Stefan Goedecker, CEA Grenoble
    2103              : !>      This file is distributed under the terms of the
    2104              : !>      GNU General Public License, see http://www.gnu.org/copyleft/gpl.txt .
    2105              : ! **************************************************************************************************
    2106            0 :    SUBROUTINE F_unfill_downcorn(md1, md3, lot, nfft, n3, zw, zf, scal, ehartreetmp)
    2107              :       INTEGER, INTENT(in)                                :: md1, md3, lot, nfft, n3
    2108              :       REAL(KIND=dp), DIMENSION(2, lot, n3/2), INTENT(in) :: zw
    2109              :       REAL(KIND=dp), DIMENSION(md1, md3), INTENT(inout)  :: zf
    2110              :       REAL(KIND=dp), INTENT(in)                          :: scal
    2111              :       REAL(KIND=dp), INTENT(out)                         :: ehartreetmp
    2112              : 
    2113              :       INTEGER                                            :: i1, i3
    2114              :       REAL(KIND=dp)                                      :: pot1
    2115              : 
    2116            0 :       ehartreetmp = 0._dp
    2117            0 :       DO i3 = 1, n3/4
    2118            0 :          DO i1 = 1, nfft
    2119            0 :             pot1 = scal*zw(1, i1, i3)
    2120            0 :             ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3 - 1)
    2121            0 :             zf(i1, 2*i3 - 1) = pot1
    2122            0 :             pot1 = scal*zw(2, i1, i3)
    2123            0 :             ehartreetmp = ehartreetmp + pot1*zf(i1, 2*i3)
    2124            0 :             zf(i1, 2*i3) = pot1
    2125              :          END DO
    2126              :       END DO
    2127            0 :    END SUBROUTINE F_unfill_downcorn
    2128              : 
    2129              : END MODULE ps_wavelet_base
        

Generated by: LCOV version 2.0-1