LCOV - code coverage report
Current view: top level - src - hfx_communication.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 94.6 % 257 243
Test Date: 2025-12-04 06:27:48 Functions: 80.0 % 5 4

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines for data exchange between MPI processes
      10              : !> \par History
      11              : !>      04.2008 created [Manuel Guidon]
      12              : !> \author Manuel Guidon
      13              : ! **************************************************************************************************
      14              : MODULE hfx_communication
      15              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      16              :                                               get_atomic_kind_set
      17              :    USE cp_control_types,                ONLY: dft_control_type
      18              :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      19              :                                               dbcsr_iterator_blocks_left,&
      20              :                                               dbcsr_iterator_next_block,&
      21              :                                               dbcsr_iterator_start,&
      22              :                                               dbcsr_iterator_stop,&
      23              :                                               dbcsr_iterator_type,&
      24              :                                               dbcsr_p_type,&
      25              :                                               dbcsr_type
      26              :    USE hfx_types,                       ONLY: hfx_2D_map,&
      27              :                                               hfx_basis_type,&
      28              :                                               hfx_type
      29              :    USE kinds,                           ONLY: dp,&
      30              :                                               int_8
      31              :    USE message_passing,                 ONLY: mp_para_env_type,&
      32              :                                               mp_request_type,&
      33              :                                               mp_waitall
      34              :    USE particle_types,                  ONLY: particle_type
      35              :    USE qs_environment_types,            ONLY: get_qs_env,&
      36              :                                               qs_environment_type
      37              : #include "./base/base_uses.f90"
      38              : 
      39              :    IMPLICIT NONE
      40              :    PRIVATE
      41              : 
      42              :    PUBLIC :: get_full_density, &
      43              :              distribute_ks_matrix, &
      44              :              scale_and_add_fock_to_ks_matrix, &
      45              :              get_atomic_block_maps
      46              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
      47              : 
      48              : !***
      49              : 
      50              : CONTAINS
      51              : 
      52              : ! **************************************************************************************************
      53              : !> \brief - Collects full density matrix from all CPUs
      54              : !> \param para_env ...
      55              : !> \param full_density The full Density matrix
      56              : !> \param rho Distributed density
      57              : !> \param number_of_p_entries Maximal buffer size
      58              : !> \param block_offset ...
      59              : !> \param kind_of ...
      60              : !> \param basis_parameter ...
      61              : !> \param get_max_vals_spin ...
      62              : !> \param rho_beta ...
      63              : !> \param antisymmetric ...
      64              : !> \par History
      65              : !>      11.2007 created [Manuel Guidon]
      66              : !> \author Manuel Guidon
      67              : !> \note
      68              : !>      - Communication with left/right node only
      69              : !>        added a mpi_sync before and after the ring of isendrecv. This *speed up* the
      70              : !>        communication, and might protect against idle neighbors flooding a busy node
      71              : !>        with messages [Joost]
      72              : ! **************************************************************************************************
      73        45913 :    SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
      74              :                                block_offset, kind_of, basis_parameter, &
      75              :                                get_max_vals_spin, rho_beta, antisymmetric)
      76              : 
      77              :       TYPE(mp_para_env_type), POINTER                    :: para_env
      78              :       REAL(dp), DIMENSION(:)                             :: full_density
      79              :       TYPE(dbcsr_type), POINTER                          :: rho
      80              :       INTEGER, INTENT(IN)                                :: number_of_p_entries
      81              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
      82              :       INTEGER                                            :: kind_of(*)
      83              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
      84              :       LOGICAL, INTENT(IN)                                :: get_max_vals_spin
      85              :       TYPE(dbcsr_type), OPTIONAL, POINTER                :: rho_beta
      86              :       LOGICAL, INTENT(IN)                                :: antisymmetric
      87              : 
      88              :       INTEGER :: block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, jset, &
      89              :          mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source, source_cpu
      90        45913 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
      91              :       LOGICAL                                            :: found
      92              :       REAL(dp)                                           :: symmfac
      93        45913 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
      94        45913 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block, sparse_block_beta
      95              :       TYPE(dbcsr_iterator_type)                          :: iter
      96       137739 :       TYPE(mp_request_type), DIMENSION(2)                :: req
      97              : 
      98     13548522 :       full_density = 0.0_dp
      99       137739 :       ALLOCATE (sendbuffer(number_of_p_entries))
     100        91826 :       ALLOCATE (recbuffer(number_of_p_entries))
     101      9169167 :       sendbuffer = 0.0_dp
     102      9169167 :       recbuffer = 0.0_dp
     103              : 
     104        45913 :       i = 1
     105        45913 :       CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
     106       191808 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     107       145895 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     108              :          ! the resulting vector will be only the upper triangle.
     109              :          ! in case of antisymmetry take care to change signs if a lower block gets copied
     110       145895 :          symmfac = 1.0_dp
     111       145895 :          IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
     112       145895 :          ikind = kind_of(iatom)
     113       145895 :          nseta = basis_parameter(ikind)%nset
     114       145895 :          nsgfa => basis_parameter(ikind)%nsgf
     115       145895 :          jkind = kind_of(jatom)
     116       145895 :          nsetb = basis_parameter(jkind)%nset
     117       145895 :          nsgfb => basis_parameter(jkind)%nsgf
     118       191808 :          IF (get_max_vals_spin) THEN
     119              :             CALL dbcsr_get_block_p(rho_beta, &
     120            0 :                                    row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
     121            0 :             pa = 0
     122            0 :             DO iset = 1, nseta
     123              :                pb = 0
     124            0 :                DO jset = 1, nsetb
     125            0 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     126            0 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     127            0 :                         sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
     128            0 :                         i = i + 1
     129              :                      END DO
     130              :                   END DO
     131            0 :                   pb = pb + nsgfb(jset)
     132              :                END DO
     133            0 :                pa = pa + nsgfa(iset)
     134              :             END DO
     135              :          ELSE
     136              :             pa = 0
     137       558097 :             DO iset = 1, nseta
     138              :                pb = 0
     139      1698419 :                DO jset = 1, nsetb
     140      4259694 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     141     10999773 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     142      6740079 :                         sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
     143      9713556 :                         i = i + 1
     144              :                      END DO
     145              :                   END DO
     146      1698419 :                   pb = pb + nsgfb(jset)
     147              :                END DO
     148       558097 :                pa = pa + nsgfa(iset)
     149              :             END DO
     150              :          END IF
     151              :       END DO
     152        45913 :       CALL dbcsr_iterator_stop(iter)
     153              : 
     154              :       ! sync before/after a ring of isendrecv
     155        45913 :       CALL para_env%sync()
     156        45913 :       ncpu = para_env%num_pe
     157        45913 :       mepos = para_env%mepos
     158        45913 :       dest = MODULO(mepos + 1, ncpu)
     159        45913 :       source = MODULO(mepos - 1, ncpu)
     160       137656 :       DO icpu = 0, ncpu - 1
     161        91743 :          IF (icpu /= ncpu - 1) THEN
     162              :             CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     163        45830 :                                     req(1), req(2), 13)
     164              :          END IF
     165        91743 :          data_from = MODULO(mepos - icpu, ncpu)
     166        91743 :          source_cpu = MODULO(data_from, ncpu) + 1
     167        91743 :          block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
     168     13548439 :          full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
     169              : 
     170        91743 :          IF (icpu /= ncpu - 1) THEN
     171        45830 :             CALL mp_waitall(req)
     172              :          END IF
     173        91743 :          swapbuffer => sendbuffer
     174        91743 :          sendbuffer => recbuffer
     175       137656 :          recbuffer => swapbuffer
     176              :       END DO
     177        45913 :       DEALLOCATE (sendbuffer, recbuffer)
     178              :       ! sync before/after a ring of isendrecv
     179        45913 :       CALL para_env%sync()
     180              : 
     181        91826 :    END SUBROUTINE get_full_density
     182              : 
     183              : ! **************************************************************************************************
     184              : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
     185              : !> \param para_env ...
     186              : !> \param full_ks The full Kohn-Sham matrix
     187              : !> \param ks_matrix Distributed Kohn-Sham matrix
     188              : !> \param number_of_p_entries Maximal buffer size
     189              : !> \param block_offset ...
     190              : !> \param kind_of ...
     191              : !> \param basis_parameter ...
     192              : !> \param off_diag_fac ...
     193              : !> \param diag_fac ...
     194              : !> \par History
     195              : !>      11.2007 created [Manuel Guidon]
     196              : !> \author Manuel Guidon
     197              : !> \note
     198              : !>      - Communication with left/right node only
     199              : ! **************************************************************************************************
     200        43057 :    SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
     201              :                                    block_offset, kind_of, basis_parameter, &
     202              :                                    off_diag_fac, diag_fac)
     203              : 
     204              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     205              :       REAL(dp), DIMENSION(:)                             :: full_ks
     206              :       TYPE(dbcsr_type), POINTER                          :: ks_matrix
     207              :       INTEGER, INTENT(IN)                                :: number_of_p_entries
     208              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     209              :       INTEGER                                            :: kind_of(*)
     210              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     211              :       REAL(dp), INTENT(IN), OPTIONAL                     :: off_diag_fac, diag_fac
     212              : 
     213              :       INTEGER :: block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, jkind, &
     214              :          jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source
     215        43057 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
     216              :       REAL(dp)                                           :: my_fac, myd_fac
     217        43057 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
     218        43057 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block
     219              :       TYPE(dbcsr_iterator_type)                          :: iter
     220       129171 :       TYPE(mp_request_type), DIMENSION(2)                :: req
     221              : 
     222        43057 :       my_fac = 1.0_dp; myd_fac = 1.0_dp
     223        43057 :       IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
     224        43057 :       IF (PRESENT(diag_fac)) myd_fac = diag_fac
     225              : 
     226       129171 :       ALLOCATE (sendbuffer(number_of_p_entries))
     227      8620473 :       sendbuffer = 0.0_dp
     228        86114 :       ALLOCATE (recbuffer(number_of_p_entries))
     229      8620473 :       recbuffer = 0.0_dp
     230              : 
     231        43057 :       ncpu = para_env%num_pe
     232        43057 :       mepos = para_env%mepos
     233        43057 :       dest = MODULO(mepos + 1, ncpu)
     234        43057 :       source = MODULO(mepos - 1, ncpu)
     235              : 
     236              :       ! sync before/after a ring of isendrecv
     237        43057 :       CALL para_env%sync()
     238        86031 :       DO icpu = 1, ncpu
     239        86031 :          i = 1
     240        86031 :          data_to = mepos - icpu
     241        86031 :          dest_cpu = MODULO(data_to, ncpu) + 1
     242        86031 :          block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
     243     12755945 :        sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
     244        86031 :          IF (icpu == ncpu) EXIT
     245              :          CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     246        42974 :                                  req(1), req(2), 13)
     247              : 
     248        42974 :          CALL mp_waitall(req)
     249        42974 :          swapbuffer => sendbuffer
     250        42974 :          sendbuffer => recbuffer
     251        86031 :          recbuffer => swapbuffer
     252              :       END DO
     253              :       ! sync before/after a ring of isendrecv
     254        43057 :       CALL para_env%sync()
     255              : 
     256        43057 :       i = 1
     257        43057 :       CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
     258       180645 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     259       137588 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     260              : 
     261       137588 :          ikind = kind_of(iatom)
     262       137588 :          nseta = basis_parameter(ikind)%nset
     263       137588 :          nsgfa => basis_parameter(ikind)%nsgf
     264       137588 :          jkind = kind_of(jatom)
     265       137588 :          nsetb = basis_parameter(jkind)%nset
     266       137588 :          nsgfb => basis_parameter(jkind)%nsgf
     267       137588 :          pa = 0
     268       569868 :          DO iset = 1, nseta
     269              :             pb = 0
     270      1606187 :             DO jset = 1, nsetb
     271      4025033 :                DO pa1 = pa + 1, pa + nsgfa(iset)
     272     10375511 :                   DO pb1 = pb + 1, pb + nsgfb(jset)
     273      6350478 :                      IF (iatom == jatom .AND. pa1 == pb1) THEN
     274       398359 :                         sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
     275              :                      ELSE
     276      5952119 :                         sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
     277              :                      END IF
     278      9158547 :                      i = i + 1
     279              :                   END DO
     280              :                END DO
     281      1606187 :                pb = pb + nsgfb(jset)
     282              :             END DO
     283       526811 :             pa = pa + nsgfa(iset)
     284              :          END DO
     285              :       END DO
     286        43057 :       CALL dbcsr_iterator_stop(iter)
     287              : 
     288        43057 :       DEALLOCATE (sendbuffer, recbuffer)
     289              : 
     290        86114 :    END SUBROUTINE distribute_ks_matrix
     291              : 
     292              : ! **************************************************************************************************
     293              : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
     294              : !>        case of adiabatic rescaling. This is just a refactored version of
     295              : !>        distribute_ks_matrix
     296              : !> \param para_env ...
     297              : !> \param qs_env ...
     298              : !> \param ks_matrix Distributed Kohn-Sham matrix
     299              : !> \param irep ...
     300              : !> \param scaling_factor ...
     301              : !> \par History
     302              : !>      11.2007 created [Manuel Guidon]
     303              : !> \author Manuel Guidon
     304              : !> \note
     305              : !>      - Communication with left/right node only
     306              : ! **************************************************************************************************
     307           88 :    SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
     308              :                                               scaling_factor)
     309              : 
     310              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     311              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     312              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
     313              :       INTEGER, INTENT(IN)                                :: irep
     314              :       REAL(dp), INTENT(IN)                               :: scaling_factor
     315              : 
     316              :       INTEGER                                            :: iatom, ikind, img, natom, nimages, nspins
     317           88 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global
     318           88 :       REAL(dp), DIMENSION(:, :), POINTER                 :: full_ks
     319           88 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     320              :       TYPE(dft_control_type), POINTER                    :: dft_control
     321              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     322              :       TYPE(hfx_type), POINTER                            :: actual_x_data
     323           88 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     324              : 
     325              : !! All shared data is saved in i_thread = 1!
     326              : 
     327           88 :       NULLIFY (dft_control)
     328           88 :       actual_x_data => qs_env%x_data(irep, 1)
     329           88 :       basis_parameter => actual_x_data%basis_parameter
     330              : 
     331              :       CALL get_qs_env(qs_env=qs_env, &
     332              :                       atomic_kind_set=atomic_kind_set, &
     333              :                       particle_set=particle_set, &
     334           88 :                       dft_control=dft_control)
     335              : 
     336           88 :       nspins = dft_control%nspins
     337           88 :       nimages = dft_control%nimages
     338           88 :       CPASSERT(nimages == 1)
     339              : 
     340           88 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
     341              : 
     342           88 :       natom = SIZE(particle_set, 1)
     343          264 :       ALLOCATE (last_sgf_global(0:natom))
     344           88 :       last_sgf_global(0) = 0
     345          176 :       DO iatom = 1, natom
     346           88 :          ikind = kind_of(iatom)
     347          176 :          last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
     348              :       END DO
     349           88 :       full_ks => actual_x_data%full_ks_alpha
     350           88 :       IF (scaling_factor /= 1.0_dp) THEN
     351        17512 :          full_ks = full_ks*scaling_factor
     352              :       END IF
     353          176 :       DO img = 1, nimages
     354              :          CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
     355              :                                    actual_x_data%block_offset, kind_of, basis_parameter, &
     356          176 :                                    off_diag_fac=0.5_dp)
     357              :       END DO
     358           88 :       DEALLOCATE (actual_x_data%full_ks_alpha)
     359              : 
     360           88 :       IF (nspins == 2) THEN
     361           52 :          full_ks => actual_x_data%full_ks_beta
     362           52 :          IF (scaling_factor /= 1.0_dp) THEN
     363        10348 :             full_ks = full_ks*scaling_factor
     364              :          END IF
     365          104 :          DO img = 1, nimages
     366              :             CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
     367              :                                       actual_x_data%block_offset, kind_of, basis_parameter, &
     368          104 :                                       off_diag_fac=0.5_dp)
     369              :          END DO
     370           52 :          DEALLOCATE (actual_x_data%full_ks_beta)
     371              :       END IF
     372              : 
     373           88 :       DEALLOCATE (last_sgf_global)
     374              : 
     375          176 :    END SUBROUTINE scale_and_add_fock_to_ks_matrix
     376              : 
     377              : ! **************************************************************************************************
     378              : !> \brief Given a 2d index pair, this function returns a 1d index pair for
     379              : !>        a symmetric upper triangle NxN matrix
     380              : !>        The compiler should inline this function, therefore it appears in
     381              : !>        several modules
     382              : !> \param i 2d index
     383              : !> \param j 2d index
     384              : !> \param N matrix size
     385              : !> \return ...
     386              : !> \par History
     387              : !>      03.2009 created [Manuel Guidon]
     388              : !> \author Manuel Guidon
     389              : ! **************************************************************************************************
     390            0 :    PURE FUNCTION get_1D_idx(i, j, N)
     391              :       INTEGER, INTENT(IN)                                :: i, j
     392              :       INTEGER(int_8), INTENT(IN)                         :: N
     393              :       INTEGER(int_8)                                     :: get_1D_idx
     394              : 
     395              :       INTEGER(int_8)                                     :: min_ij
     396              : 
     397            0 :       min_ij = MIN(i, j)
     398            0 :       get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
     399              : 
     400            0 :    END FUNCTION get_1D_idx
     401              : 
     402              : ! **************************************************************************************************
     403              : !> \brief create a several maps array that reflects the ks matrix sparsity
     404              : !> \param matrix ...
     405              : !> \param basis_parameter ...
     406              : !> \param kind_of ...
     407              : !> \param is_assoc_atomic_block ...
     408              : !> \param number_of_p_entries ...
     409              : !> \param para_env ...
     410              : !> \param atomic_block_offset ...
     411              : !> \param set_offset ...
     412              : !> \param block_offset ...
     413              : !> \param map_atoms_to_cpus ...
     414              : !> \param nkind ...
     415              : !> \par History
     416              : !>      11.2007 refactored [Joost VandeVondele]
     417              : !>      07.2009 add new maps
     418              : !> \author Manuel Guidon
     419              : !> \notes
     420              : !>      is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
     421              : !>      zero for unassiated blocks
     422              : ! **************************************************************************************************
     423         2188 :    SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
     424         2188 :                                     is_assoc_atomic_block, number_of_p_entries, &
     425              :                                     para_env, atomic_block_offset, set_offset, &
     426              :                                     block_offset, map_atoms_to_cpus, nkind)
     427              : 
     428              :       TYPE(dbcsr_type), POINTER                          :: matrix
     429              :       TYPE(hfx_basis_type), DIMENSION(:)                 :: basis_parameter
     430              :       INTEGER, DIMENSION(:)                              :: kind_of
     431              :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: is_assoc_atomic_block
     432              :       INTEGER, INTENT(OUT)                               :: number_of_p_entries
     433              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     434              :       INTEGER, DIMENSION(:, :), POINTER                  :: atomic_block_offset
     435              :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offset
     436              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     437              :       TYPE(hfx_2D_map), DIMENSION(:), POINTER            :: map_atoms_to_cpus
     438              :       INTEGER                                            :: nkind
     439              : 
     440              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps'
     441              : 
     442              :       INTEGER :: handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, natom, &
     443              :          ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
     444         2188 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: buffer_in, buffer_out, counter, rcount, &
     445         2188 :                                                             rdispl
     446         2188 :       INTEGER, DIMENSION(:), POINTER                     :: iatom_list, jatom_list, nsgfa, nsgfb
     447         2188 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: sparse_block
     448              :       TYPE(dbcsr_iterator_type)                          :: iter
     449              : 
     450         2188 :       CALL timeset(routineN, handle)
     451              : 
     452        32536 :       is_assoc_atomic_block = 0
     453         2188 :       number_of_p_entries = 0
     454         2188 :       number_of_p_blocks = 0
     455              : 
     456              :       !
     457              :       ! count number_of_p_blocks and number_of_p_entries
     458              :       !
     459         2188 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     460         9732 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     461         7544 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     462         7544 :          ikind = kind_of(iatom)
     463         7544 :          jkind = kind_of(jatom)
     464         7544 :          number_of_p_blocks = number_of_p_blocks + 1
     465              :          number_of_p_entries = number_of_p_entries + &
     466         7544 :                                basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     467              :       END DO
     468         2188 :       CALL dbcsr_iterator_stop(iter)
     469              : 
     470         6564 :       tmp = [number_of_p_entries, number_of_p_blocks]
     471         2188 :       CALL para_env%max(tmp)
     472         2188 :       number_of_p_entries = tmp(1)
     473         2188 :       number_of_p_blocks = tmp(2)
     474              :       !
     475              :       ! send this info around, so we can construct is_assoc_atomic_block
     476              :       ! pack all data in buffers and use allgatherv
     477              :       !
     478         6564 :       ALLOCATE (buffer_in(3*number_of_p_blocks))
     479         6564 :       ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
     480         8752 :       ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
     481              : 
     482        31090 :       buffer_in = 0
     483         2188 :       ibuf = 0
     484              : 
     485         2188 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     486         9732 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     487         7544 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     488              : 
     489         7544 :          buffer_in(ibuf + 1) = iatom
     490         7544 :          buffer_in(ibuf + 2) = jatom
     491         7544 :          buffer_in(ibuf + 3) = para_env%mepos + 1
     492         7544 :          ibuf = ibuf + 3
     493              :       END DO
     494         2188 :       CALL dbcsr_iterator_stop(iter)
     495              : 
     496         6562 :       rcount = SIZE(buffer_in)
     497         2188 :       rdispl(1) = 0
     498         4374 :       DO icpu = 2, para_env%num_pe
     499         4374 :          rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
     500              :       END DO
     501         2188 :       CALL para_env%allgatherv(buffer_in, buffer_out, rcount, rdispl)
     502              : 
     503        21444 :       DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
     504        19256 :          itask = buffer_out(ibuf + 3)
     505              :          ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
     506              :          ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
     507        21444 :          IF (itask /= 0) THEN
     508        15076 :             iatom = buffer_out(ibuf + 1)
     509        15076 :             jatom = buffer_out(ibuf + 2)
     510        15076 :             is_assoc_atomic_block(iatom, jatom) = itask
     511        15076 :             is_assoc_atomic_block(jatom, iatom) = itask
     512              :          END IF
     513              :       END DO
     514              : 
     515         2188 :       IF (ASSOCIATED(map_atoms_to_cpus)) THEN
     516         2856 :          DO icpu = 1, para_env%num_pe
     517         1904 :             DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
     518         2856 :             DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
     519              :          END DO
     520          952 :          DEALLOCATE (map_atoms_to_cpus)
     521              :       END IF
     522              : 
     523         2188 :       natom = SIZE(is_assoc_atomic_block, 1)
     524        10938 :       ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
     525         6564 :       ALLOCATE (counter(para_env%num_pe))
     526         6562 :       counter = 0
     527              : 
     528         8742 :       DO iatom = 1, natom
     529        23916 :          DO jatom = iatom, natom
     530        15174 :             icpu = is_assoc_atomic_block(jatom, iatom)
     531        21728 :             IF (icpu > 0) counter(icpu) = counter(icpu) + 1
     532              :          END DO
     533              :       END DO
     534         6562 :       DO icpu = 1, para_env%num_pe
     535        13066 :          ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
     536        10880 :          ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
     537              :       END DO
     538         6562 :       counter = 0
     539         8742 :       DO iatom = 1, natom
     540        23916 :          DO jatom = iatom, natom
     541        15174 :             icpu = is_assoc_atomic_block(jatom, iatom)
     542        21728 :             IF (icpu > 0) THEN
     543        15076 :                counter(icpu) = counter(icpu) + 1
     544        15076 :                map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
     545        15076 :                map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
     546              :             END IF
     547              :          END DO
     548              :       END DO
     549              : 
     550         2188 :       DEALLOCATE (counter)
     551              : 
     552         2188 :       ncpu = para_env%num_pe
     553         2188 :       offset = 1
     554        32536 :       atomic_block_offset = 0
     555         8750 :       block_offset = 0
     556         6562 :       DO icpu = 1, ncpu
     557         4374 :          iatom_list => map_atoms_to_cpus(icpu)%iatom_list
     558         4374 :          jatom_list => map_atoms_to_cpus(icpu)%jatom_list
     559         4374 :          block_offset(icpu) = offset
     560        21638 :          DO ilist = 1, SIZE(iatom_list)
     561        15076 :             iatom = iatom_list(ilist)
     562        15076 :             ikind = kind_of(iatom)
     563        15076 :             jatom = jatom_list(ilist)
     564        15076 :             jkind = kind_of(jatom)
     565        15076 :             atomic_block_offset(iatom, jatom) = offset
     566        15076 :             atomic_block_offset(jatom, iatom) = offset
     567        19450 :             offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     568              :          END DO
     569              :       END DO
     570         2188 :       block_offset(ncpu + 1) = offset
     571       152780 :       set_offset = 0
     572         6120 :       DO ikind = 1, nkind
     573         3932 :          nseta = basis_parameter(ikind)%nset
     574         3932 :          nsgfa => basis_parameter(ikind)%nsgf
     575        14740 :          DO jkind = 1, nkind
     576         8620 :             nsetb = basis_parameter(jkind)%nset
     577         8620 :             nsgfb => basis_parameter(jkind)%nsgf
     578         8620 :             offset = 1
     579        35502 :             DO iset = 1, nseta
     580       109836 :                DO jset = 1, nsetb
     581        78266 :                   set_offset(jset, iset, jkind, ikind) = offset
     582       101216 :                   offset = offset + nsgfa(iset)*nsgfb(jset)
     583              :                END DO
     584              :             END DO
     585              :          END DO
     586              :       END DO
     587              : 
     588         2188 :       CALL timestop(handle)
     589              : 
     590         4376 :    END SUBROUTINE get_atomic_block_maps
     591              : 
     592              : END MODULE hfx_communication
        

Generated by: LCOV version 2.0-1