LCOV - code coverage report
Current view: top level - src - hfx_communication.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 94.5 % 255 241
Test Date: 2025-07-25 12:55:17 Functions: 80.0 % 5 4

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines for data exchange between MPI processes
      10              : !> \par History
      11              : !>      04.2008 created [Manuel Guidon]
      12              : !> \author Manuel Guidon
      13              : ! **************************************************************************************************
      14              : MODULE hfx_communication
      15              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      16              :                                               get_atomic_kind_set
      17              :    USE cp_control_types,                ONLY: dft_control_type
      18              :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      19              :                                               dbcsr_iterator_blocks_left,&
      20              :                                               dbcsr_iterator_next_block,&
      21              :                                               dbcsr_iterator_start,&
      22              :                                               dbcsr_iterator_stop,&
      23              :                                               dbcsr_iterator_type,&
      24              :                                               dbcsr_p_type,&
      25              :                                               dbcsr_type
      26              :    USE hfx_types,                       ONLY: hfx_2D_map,&
      27              :                                               hfx_basis_type,&
      28              :                                               hfx_type
      29              :    USE kinds,                           ONLY: dp,&
      30              :                                               int_8
      31              :    USE message_passing,                 ONLY: mp_para_env_type,&
      32              :                                               mp_request_type,&
      33              :                                               mp_waitall
      34              :    USE particle_types,                  ONLY: particle_type
      35              :    USE qs_environment_types,            ONLY: get_qs_env,&
      36              :                                               qs_environment_type
      37              : #include "./base/base_uses.f90"
      38              : 
      39              :    IMPLICIT NONE
      40              :    PRIVATE
      41              : 
      42              :    PUBLIC :: get_full_density, &
      43              :              distribute_ks_matrix, &
      44              :              scale_and_add_fock_to_ks_matrix, &
      45              :              get_atomic_block_maps
      46              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
      47              : 
      48              : !***
      49              : 
      50              : CONTAINS
      51              : 
      52              : ! **************************************************************************************************
      53              : !> \brief - Collects full density matrix from all CPUs
      54              : !> \param para_env ...
      55              : !> \param full_density The full Density matrix
      56              : !> \param rho Distributed density
      57              : !> \param number_of_p_entries Maximal buffer size
      58              : !> \param block_offset ...
      59              : !> \param kind_of ...
      60              : !> \param basis_parameter ...
      61              : !> \param get_max_vals_spin ...
      62              : !> \param rho_beta ...
      63              : !> \param antisymmetric ...
      64              : !> \par History
      65              : !>      11.2007 created [Manuel Guidon]
      66              : !> \author Manuel Guidon
      67              : !> \note
      68              : !>      - Communication with left/right node only
      69              : !>        added a mpi_sync before and after the ring of isendrecv. This *speed up* the
      70              : !>        communication, and might protect against idle neighbors flooding a busy node
      71              : !>        with messages [Joost]
      72              : ! **************************************************************************************************
      73        43865 :    SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
      74              :                                block_offset, kind_of, basis_parameter, &
      75              :                                get_max_vals_spin, rho_beta, antisymmetric)
      76              : 
      77              :       TYPE(mp_para_env_type), POINTER                    :: para_env
      78              :       REAL(dp), DIMENSION(:)                             :: full_density
      79              :       TYPE(dbcsr_type), POINTER                          :: rho
      80              :       INTEGER, INTENT(IN)                                :: number_of_p_entries
      81              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
      82              :       INTEGER                                            :: kind_of(*)
      83              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
      84              :       LOGICAL, INTENT(IN)                                :: get_max_vals_spin
      85              :       TYPE(dbcsr_type), OPTIONAL, POINTER                :: rho_beta
      86              :       LOGICAL, INTENT(IN)                                :: antisymmetric
      87              : 
      88              :       INTEGER :: block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, jset, &
      89              :          mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source, source_cpu
      90        43865 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
      91              :       LOGICAL                                            :: found
      92              :       REAL(dp)                                           :: symmfac
      93        43865 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
      94        43865 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block, sparse_block_beta
      95              :       TYPE(dbcsr_iterator_type)                          :: iter
      96       131595 :       TYPE(mp_request_type), DIMENSION(2)                :: req
      97              : 
      98     12935778 :       full_density = 0.0_dp
      99       131595 :       ALLOCATE (sendbuffer(number_of_p_entries))
     100        87730 :       ALLOCATE (recbuffer(number_of_p_entries))
     101              : 
     102        43865 :       i = 1
     103        43865 :       CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
     104       184581 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     105       140716 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     106              :          ! the resulting vector will be only the upper triangle.
     107              :          ! in case of antisymmetry take care to change signs if a lower block gets copied
     108       140716 :          symmfac = 1.0_dp
     109       140716 :          IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
     110       140716 :          ikind = kind_of(iatom)
     111       140716 :          nseta = basis_parameter(ikind)%nset
     112       140716 :          nsgfa => basis_parameter(ikind)%nsgf
     113       140716 :          jkind = kind_of(jatom)
     114       140716 :          nsetb = basis_parameter(jkind)%nset
     115       140716 :          nsgfb => basis_parameter(jkind)%nsgf
     116       184581 :          IF (get_max_vals_spin) THEN
     117              :             CALL dbcsr_get_block_p(rho_beta, &
     118            0 :                                    row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
     119            0 :             pa = 0
     120            0 :             DO iset = 1, nseta
     121              :                pb = 0
     122            0 :                DO jset = 1, nsetb
     123            0 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     124            0 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     125            0 :                         sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
     126            0 :                         i = i + 1
     127              :                      END DO
     128              :                   END DO
     129            0 :                   pb = pb + nsgfb(jset)
     130              :                END DO
     131            0 :                pa = pa + nsgfa(iset)
     132              :             END DO
     133              :          ELSE
     134              :             pa = 0
     135       539927 :             DO iset = 1, nseta
     136              :                pb = 0
     137      1646225 :                DO jset = 1, nsetb
     138      4110559 :                   DO pa1 = pa + 1, pa + nsgfa(iset)
     139     10546314 :                      DO pb1 = pb + 1, pb + nsgfb(jset)
     140      6435755 :                         sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
     141      9299300 :                         i = i + 1
     142              :                      END DO
     143              :                   END DO
     144      1646225 :                   pb = pb + nsgfb(jset)
     145              :                END DO
     146       539927 :                pa = pa + nsgfa(iset)
     147              :             END DO
     148              :          END IF
     149              :       END DO
     150        43865 :       CALL dbcsr_iterator_stop(iter)
     151              : 
     152              :       ! sync before/after a ring of isendrecv
     153        43865 :       CALL para_env%sync()
     154        43865 :       ncpu = para_env%num_pe
     155        43865 :       mepos = para_env%mepos
     156        43865 :       dest = MODULO(mepos + 1, ncpu)
     157        43865 :       source = MODULO(mepos - 1, ncpu)
     158       131512 :       DO icpu = 0, ncpu - 1
     159        87647 :          IF (icpu .NE. ncpu - 1) THEN
     160              :             CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     161        43782 :                                     req(1), req(2), 13)
     162              :          END IF
     163        87647 :          data_from = MODULO(mepos - icpu, ncpu)
     164        87647 :          source_cpu = MODULO(data_from, ncpu) + 1
     165        87647 :          block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
     166     12935695 :          full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
     167              : 
     168        87647 :          IF (icpu .NE. ncpu - 1) THEN
     169        43782 :             CALL mp_waitall(req)
     170              :          END IF
     171        87647 :          swapbuffer => sendbuffer
     172        87647 :          sendbuffer => recbuffer
     173       131512 :          recbuffer => swapbuffer
     174              :       END DO
     175        43865 :       DEALLOCATE (sendbuffer, recbuffer)
     176              :       ! sync before/after a ring of isendrecv
     177        43865 :       CALL para_env%sync()
     178              : 
     179        87730 :    END SUBROUTINE get_full_density
     180              : 
     181              : ! **************************************************************************************************
     182              : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
     183              : !> \param para_env ...
     184              : !> \param full_ks The full Kohn-Sham matrix
     185              : !> \param ks_matrix Distributed Kohn-Sham matrix
     186              : !> \param number_of_p_entries Maximal buffer size
     187              : !> \param block_offset ...
     188              : !> \param kind_of ...
     189              : !> \param basis_parameter ...
     190              : !> \param off_diag_fac ...
     191              : !> \param diag_fac ...
     192              : !> \par History
     193              : !>      11.2007 created [Manuel Guidon]
     194              : !> \author Manuel Guidon
     195              : !> \note
     196              : !>      - Communication with left/right node only
     197              : ! **************************************************************************************************
     198        41081 :    SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
     199              :                                    block_offset, kind_of, basis_parameter, &
     200              :                                    off_diag_fac, diag_fac)
     201              : 
     202              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     203              :       REAL(dp), DIMENSION(:)                             :: full_ks
     204              :       TYPE(dbcsr_type), POINTER                          :: ks_matrix
     205              :       INTEGER, INTENT(IN)                                :: number_of_p_entries
     206              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     207              :       INTEGER                                            :: kind_of(*)
     208              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     209              :       REAL(dp), INTENT(IN), OPTIONAL                     :: off_diag_fac, diag_fac
     210              : 
     211              :       INTEGER :: block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, jkind, &
     212              :          jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source
     213        41081 :       INTEGER, DIMENSION(:), POINTER                     :: nsgfa, nsgfb
     214              :       REAL(dp)                                           :: my_fac, myd_fac
     215        41081 :       REAL(dp), DIMENSION(:), POINTER                    :: recbuffer, sendbuffer, swapbuffer
     216        41081 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sparse_block
     217              :       TYPE(dbcsr_iterator_type)                          :: iter
     218       123243 :       TYPE(mp_request_type), DIMENSION(2)                :: req
     219              : 
     220        41081 :       my_fac = 1.0_dp; myd_fac = 1.0_dp
     221        41081 :       IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
     222        41081 :       IF (PRESENT(diag_fac)) myd_fac = diag_fac
     223              : 
     224       123243 :       ALLOCATE (sendbuffer(number_of_p_entries))
     225      8189361 :       sendbuffer = 0.0_dp
     226        82162 :       ALLOCATE (recbuffer(number_of_p_entries))
     227      8189361 :       recbuffer = 0.0_dp
     228              : 
     229        41081 :       ncpu = para_env%num_pe
     230        41081 :       mepos = para_env%mepos
     231        41081 :       dest = MODULO(mepos + 1, ncpu)
     232        41081 :       source = MODULO(mepos - 1, ncpu)
     233              : 
     234              :       ! sync before/after a ring of isendrecv
     235        41081 :       CALL para_env%sync()
     236        82079 :       DO icpu = 1, ncpu
     237        82079 :          i = 1
     238        82079 :          data_to = mepos - icpu
     239        82079 :          dest_cpu = MODULO(data_to, ncpu) + 1
     240        82079 :          block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
     241     12168513 :        sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
     242        82079 :          IF (icpu .EQ. ncpu) EXIT
     243              :          CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
     244        40998 :                                  req(1), req(2), 13)
     245              : 
     246        40998 :          CALL mp_waitall(req)
     247        40998 :          swapbuffer => sendbuffer
     248        40998 :          sendbuffer => recbuffer
     249        82079 :          recbuffer => swapbuffer
     250              :       END DO
     251              :       ! sync before/after a ring of isendrecv
     252        41081 :       CALL para_env%sync()
     253              : 
     254        41081 :       i = 1
     255        41081 :       CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
     256       173658 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     257       132577 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     258              : 
     259       132577 :          ikind = kind_of(iatom)
     260       132577 :          nseta = basis_parameter(ikind)%nset
     261       132577 :          nsgfa => basis_parameter(ikind)%nsgf
     262       132577 :          jkind = kind_of(jatom)
     263       132577 :          nsetb = basis_parameter(jkind)%nset
     264       132577 :          nsgfb => basis_parameter(jkind)%nsgf
     265       132577 :          pa = 0
     266       550328 :          DO iset = 1, nseta
     267              :             pb = 0
     268      1555917 :             DO jset = 1, nsetb
     269      3881756 :                DO pa1 = pa + 1, pa + nsgfa(iset)
     270      9940494 :                   DO pb1 = pb + 1, pb + nsgfb(jset)
     271      6058738 :                      IF (iatom == jatom .AND. pa1 == pb1) THEN
     272       379763 :                         sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
     273              :                      ELSE
     274      5678975 :                         sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
     275              :                      END IF
     276      8761247 :                      i = i + 1
     277              :                   END DO
     278              :                END DO
     279      1555917 :                pb = pb + nsgfb(jset)
     280              :             END DO
     281       509247 :             pa = pa + nsgfa(iset)
     282              :          END DO
     283              :       END DO
     284        41081 :       CALL dbcsr_iterator_stop(iter)
     285              : 
     286        41081 :       DEALLOCATE (sendbuffer, recbuffer)
     287              : 
     288        82162 :    END SUBROUTINE distribute_ks_matrix
     289              : 
     290              : ! **************************************************************************************************
     291              : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
     292              : !>        case of adiabatic rescaling. This is just a refactored version of
     293              : !>        distribute_ks_matrix
     294              : !> \param para_env ...
     295              : !> \param qs_env ...
     296              : !> \param ks_matrix Distributed Kohn-Sham matrix
     297              : !> \param irep ...
     298              : !> \param scaling_factor ...
     299              : !> \par History
     300              : !>      11.2007 created [Manuel Guidon]
     301              : !> \author Manuel Guidon
     302              : !> \note
     303              : !>      - Communication with left/right node only
     304              : ! **************************************************************************************************
     305           88 :    SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
     306              :                                               scaling_factor)
     307              : 
     308              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     309              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     310              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
     311              :       INTEGER, INTENT(IN)                                :: irep
     312              :       REAL(dp), INTENT(IN)                               :: scaling_factor
     313              : 
     314              :       INTEGER                                            :: iatom, ikind, img, natom, nimages, nspins
     315           88 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global
     316           88 :       REAL(dp), DIMENSION(:, :), POINTER                 :: full_ks
     317           88 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     318              :       TYPE(dft_control_type), POINTER                    :: dft_control
     319              :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     320              :       TYPE(hfx_type), POINTER                            :: actual_x_data
     321           88 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     322              : 
     323              : !! All shared data is saved in i_thread = 1!
     324              : 
     325           88 :       NULLIFY (dft_control)
     326           88 :       actual_x_data => qs_env%x_data(irep, 1)
     327           88 :       basis_parameter => actual_x_data%basis_parameter
     328              : 
     329              :       CALL get_qs_env(qs_env=qs_env, &
     330              :                       atomic_kind_set=atomic_kind_set, &
     331              :                       particle_set=particle_set, &
     332           88 :                       dft_control=dft_control)
     333              : 
     334           88 :       nspins = dft_control%nspins
     335           88 :       nimages = dft_control%nimages
     336           88 :       CPASSERT(nimages == 1)
     337              : 
     338           88 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
     339              : 
     340           88 :       natom = SIZE(particle_set, 1)
     341          264 :       ALLOCATE (last_sgf_global(0:natom))
     342           88 :       last_sgf_global(0) = 0
     343          176 :       DO iatom = 1, natom
     344           88 :          ikind = kind_of(iatom)
     345          176 :          last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
     346              :       END DO
     347           88 :       full_ks => actual_x_data%full_ks_alpha
     348           88 :       IF (scaling_factor /= 1.0_dp) THEN
     349        17512 :          full_ks = full_ks*scaling_factor
     350              :       END IF
     351          176 :       DO img = 1, nimages
     352              :          CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
     353              :                                    actual_x_data%block_offset, kind_of, basis_parameter, &
     354          176 :                                    off_diag_fac=0.5_dp)
     355              :       END DO
     356           88 :       DEALLOCATE (actual_x_data%full_ks_alpha)
     357              : 
     358           88 :       IF (nspins == 2) THEN
     359           52 :          full_ks => actual_x_data%full_ks_beta
     360           52 :          IF (scaling_factor /= 1.0_dp) THEN
     361        10348 :             full_ks = full_ks*scaling_factor
     362              :          END IF
     363          104 :          DO img = 1, nimages
     364              :             CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
     365              :                                       actual_x_data%block_offset, kind_of, basis_parameter, &
     366          104 :                                       off_diag_fac=0.5_dp)
     367              :          END DO
     368           52 :          DEALLOCATE (actual_x_data%full_ks_beta)
     369              :       END IF
     370              : 
     371           88 :       DEALLOCATE (last_sgf_global)
     372              : 
     373          176 :    END SUBROUTINE scale_and_add_fock_to_ks_matrix
     374              : 
     375              : ! **************************************************************************************************
     376              : !> \brief Given a 2d index pair, this function returns a 1d index pair for
     377              : !>        a symmetric upper triangle NxN matrix
     378              : !>        The compiler should inline this function, therefore it appears in
     379              : !>        several modules
     380              : !> \param i 2d index
     381              : !> \param j 2d index
     382              : !> \param N matrix size
     383              : !> \return ...
     384              : !> \par History
     385              : !>      03.2009 created [Manuel Guidon]
     386              : !> \author Manuel Guidon
     387              : ! **************************************************************************************************
     388            0 :    PURE FUNCTION get_1D_idx(i, j, N)
     389              :       INTEGER, INTENT(IN)                                :: i, j
     390              :       INTEGER(int_8), INTENT(IN)                         :: N
     391              :       INTEGER(int_8)                                     :: get_1D_idx
     392              : 
     393              :       INTEGER(int_8)                                     :: min_ij
     394              : 
     395            0 :       min_ij = MIN(i, j)
     396            0 :       get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
     397              : 
     398            0 :    END FUNCTION get_1D_idx
     399              : 
     400              : ! **************************************************************************************************
     401              : !> \brief create a several maps array that reflects the ks matrix sparsity
     402              : !> \param matrix ...
     403              : !> \param basis_parameter ...
     404              : !> \param kind_of ...
     405              : !> \param is_assoc_atomic_block ...
     406              : !> \param number_of_p_entries ...
     407              : !> \param para_env ...
     408              : !> \param atomic_block_offset ...
     409              : !> \param set_offset ...
     410              : !> \param block_offset ...
     411              : !> \param map_atoms_to_cpus ...
     412              : !> \param nkind ...
     413              : !> \par History
     414              : !>      11.2007 refactored [Joost VandeVondele]
     415              : !>      07.2009 add new maps
     416              : !> \author Manuel Guidon
     417              : !> \notes
     418              : !>      is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
     419              : !>      zero for unassiated blocks
     420              : ! **************************************************************************************************
     421         2086 :    SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
     422         2086 :                                     is_assoc_atomic_block, number_of_p_entries, &
     423              :                                     para_env, atomic_block_offset, set_offset, &
     424              :                                     block_offset, map_atoms_to_cpus, nkind)
     425              : 
     426              :       TYPE(dbcsr_type), POINTER                          :: matrix
     427              :       TYPE(hfx_basis_type), DIMENSION(:)                 :: basis_parameter
     428              :       INTEGER, DIMENSION(:)                              :: kind_of
     429              :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: is_assoc_atomic_block
     430              :       INTEGER, INTENT(OUT)                               :: number_of_p_entries
     431              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     432              :       INTEGER, DIMENSION(:, :), POINTER                  :: atomic_block_offset
     433              :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offset
     434              :       INTEGER, DIMENSION(:), POINTER                     :: block_offset
     435              :       TYPE(hfx_2D_map), DIMENSION(:), POINTER            :: map_atoms_to_cpus
     436              :       INTEGER                                            :: nkind
     437              : 
     438              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps'
     439              : 
     440              :       INTEGER :: handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, natom, &
     441              :          ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
     442         2086 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: buffer_in, buffer_out, counter, rcount, &
     443         2086 :                                                             rdispl
     444         2086 :       INTEGER, DIMENSION(:), POINTER                     :: iatom_list, jatom_list, nsgfa, nsgfb
     445         2086 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: sparse_block
     446              :       TYPE(dbcsr_iterator_type)                          :: iter
     447              : 
     448         2086 :       CALL timeset(routineN, handle)
     449              : 
     450        31654 :       is_assoc_atomic_block = 0
     451         2086 :       number_of_p_entries = 0
     452         2086 :       number_of_p_blocks = 0
     453              : 
     454              :       !
     455              :       ! count number_of_p_blocks and number_of_p_entries
     456              :       !
     457         2086 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     458         9435 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     459         7349 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     460         7349 :          ikind = kind_of(iatom)
     461         7349 :          jkind = kind_of(jatom)
     462         7349 :          number_of_p_blocks = number_of_p_blocks + 1
     463              :          number_of_p_entries = number_of_p_entries + &
     464         7349 :                                basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     465              :       END DO
     466         2086 :       CALL dbcsr_iterator_stop(iter)
     467              : 
     468         6258 :       tmp = (/number_of_p_entries, number_of_p_blocks/)
     469         2086 :       CALL para_env%max(tmp)
     470         2086 :       number_of_p_entries = tmp(1)
     471         2086 :       number_of_p_blocks = tmp(2)
     472              :       !
     473              :       ! send this info around, so we can construct is_assoc_atomic_block
     474              :       ! pack all data in buffers and use allgatherv
     475              :       !
     476         6258 :       ALLOCATE (buffer_in(3*number_of_p_blocks))
     477         6258 :       ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
     478         8344 :       ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
     479              : 
     480        30208 :       buffer_in = 0
     481         2086 :       ibuf = 0
     482              : 
     483         2086 :       CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
     484         9435 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     485         7349 :          CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
     486              : 
     487         7349 :          buffer_in(ibuf + 1) = iatom
     488         7349 :          buffer_in(ibuf + 2) = jatom
     489         7349 :          buffer_in(ibuf + 3) = para_env%mepos + 1
     490         7349 :          ibuf = ibuf + 3
     491              :       END DO
     492         2086 :       CALL dbcsr_iterator_stop(iter)
     493              : 
     494         6256 :       rcount = SIZE(buffer_in)
     495         2086 :       rdispl(1) = 0
     496         4170 :       DO icpu = 2, para_env%num_pe
     497         4170 :          rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
     498              :       END DO
     499         2086 :       CALL para_env%allgatherv(buffer_in, buffer_out, rcount, rdispl)
     500              : 
     501        20822 :       DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
     502        18736 :          itask = buffer_out(ibuf + 3)
     503              :          ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
     504              :          ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
     505        20822 :          IF (itask .NE. 0) THEN
     506        14686 :             iatom = buffer_out(ibuf + 1)
     507        14686 :             jatom = buffer_out(ibuf + 2)
     508        14686 :             is_assoc_atomic_block(iatom, jatom) = itask
     509        14686 :             is_assoc_atomic_block(jatom, iatom) = itask
     510              :          END IF
     511              :       END DO
     512              : 
     513         2086 :       IF (ASSOCIATED(map_atoms_to_cpus)) THEN
     514         2670 :          DO icpu = 1, para_env%num_pe
     515         1780 :             DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
     516         2670 :             DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
     517              :          END DO
     518          890 :          DEALLOCATE (map_atoms_to_cpus)
     519              :       END IF
     520              : 
     521         2086 :       natom = SIZE(is_assoc_atomic_block, 1)
     522        10428 :       ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
     523         6258 :       ALLOCATE (counter(para_env%num_pe))
     524         6256 :       counter = 0
     525              : 
     526         8408 :       DO iatom = 1, natom
     527        23192 :          DO jatom = iatom, natom
     528        14784 :             icpu = is_assoc_atomic_block(jatom, iatom)
     529        21106 :             IF (icpu > 0) counter(icpu) = counter(icpu) + 1
     530              :          END DO
     531              :       END DO
     532         6256 :       DO icpu = 1, para_env%num_pe
     533        12454 :          ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
     534        10370 :          ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
     535              :       END DO
     536         6256 :       counter = 0
     537         8408 :       DO iatom = 1, natom
     538        23192 :          DO jatom = iatom, natom
     539        14784 :             icpu = is_assoc_atomic_block(jatom, iatom)
     540        21106 :             IF (icpu > 0) THEN
     541        14686 :                counter(icpu) = counter(icpu) + 1
     542        14686 :                map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
     543        14686 :                map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
     544              :             END IF
     545              :          END DO
     546              :       END DO
     547              : 
     548         2086 :       DEALLOCATE (counter)
     549              : 
     550         2086 :       ncpu = para_env%num_pe
     551         2086 :       offset = 1
     552        31654 :       atomic_block_offset = 0
     553         8342 :       block_offset = 0
     554         6256 :       DO icpu = 1, ncpu
     555         4170 :          iatom_list => map_atoms_to_cpus(icpu)%iatom_list
     556         4170 :          jatom_list => map_atoms_to_cpus(icpu)%jatom_list
     557         4170 :          block_offset(icpu) = offset
     558        20942 :          DO ilist = 1, SIZE(iatom_list)
     559        14686 :             iatom = iatom_list(ilist)
     560        14686 :             ikind = kind_of(iatom)
     561        14686 :             jatom = jatom_list(ilist)
     562        14686 :             jkind = kind_of(jatom)
     563        14686 :             atomic_block_offset(iatom, jatom) = offset
     564        14686 :             atomic_block_offset(jatom, iatom) = offset
     565        18856 :             offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
     566              :          END DO
     567              :       END DO
     568         2086 :       block_offset(ncpu + 1) = offset
     569       148478 :       set_offset = 0
     570         5888 :       DO ikind = 1, nkind
     571         3802 :          nseta = basis_parameter(ikind)%nset
     572         3802 :          nsgfa => basis_parameter(ikind)%nsgf
     573        14322 :          DO jkind = 1, nkind
     574         8434 :             nsetb = basis_parameter(jkind)%nset
     575         8434 :             nsgfb => basis_parameter(jkind)%nsgf
     576         8434 :             offset = 1
     577        34554 :             DO iset = 1, nseta
     578       106470 :                DO jset = 1, nsetb
     579        75718 :                   set_offset(jset, iset, jkind, ikind) = offset
     580        98036 :                   offset = offset + nsgfa(iset)*nsgfb(jset)
     581              :                END DO
     582              :             END DO
     583              :          END DO
     584              :       END DO
     585              : 
     586         2086 :       CALL timestop(handle)
     587              : 
     588         4172 :    END SUBROUTINE get_atomic_block_maps
     589              : 
     590              : END MODULE hfx_communication
        

Generated by: LCOV version 2.0-1