LCOV - code coverage report
Current view: top level - src - qs_fb_atomic_matrix_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 98.0 % 251 246
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 5 5

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : MODULE qs_fb_atomic_matrix_methods
       9              : 
      10              :    USE cp_dbcsr_api,                    ONLY: dbcsr_get_block_p,&
      11              :                                               dbcsr_get_info,&
      12              :                                               dbcsr_get_stored_coordinates,&
      13              :                                               dbcsr_type
      14              :    USE kinds,                           ONLY: dp,&
      15              :                                               int_8
      16              :    USE message_passing,                 ONLY: mp_para_env_type
      17              :    USE qs_fb_atomic_halo_types,         ONLY: fb_atomic_halo_atom_global2halo,&
      18              :                                               fb_atomic_halo_get,&
      19              :                                               fb_atomic_halo_has_data,&
      20              :                                               fb_atomic_halo_list_get,&
      21              :                                               fb_atomic_halo_list_obj,&
      22              :                                               fb_atomic_halo_obj
      23              :    USE qs_fb_com_tasks_types,           ONLY: &
      24              :         TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
      25              :         fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
      26              :         fb_com_atom_pairs_get, fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, &
      27              :         fb_com_atom_pairs_nullify, fb_com_atom_pairs_obj, fb_com_atom_pairs_release, &
      28              :         fb_com_tasks_build_atom_pairs, fb_com_tasks_create, fb_com_tasks_decode_pair, &
      29              :         fb_com_tasks_encode_pair, fb_com_tasks_get, fb_com_tasks_nullify, fb_com_tasks_obj, &
      30              :         fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
      31              :    USE qs_fb_matrix_data_types,         ONLY: fb_matrix_data_get,&
      32              :                                               fb_matrix_data_has_data,&
      33              :                                               fb_matrix_data_obj
      34              : #include "./base/base_uses.f90"
      35              : 
      36              :    IMPLICIT NONE
      37              : 
      38              :    PRIVATE
      39              : 
      40              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_atomic_matrix_methods'
      41              : 
      42              :    PUBLIC :: fb_atmatrix_calc_size, &
      43              :              fb_atmatrix_construct, &
      44              :              fb_atmatrix_construct_2, &
      45              :              fb_atmatrix_generate_com_pairs_2
      46              : 
      47              : CONTAINS
      48              : 
      49              : ! **********************************************************************
      50              : !> \brief Calculates the atomic matrix size from a given DBCSR matrix
      51              : !>        and atomic halo. It also calculates the first row (col) or the
      52              : !>        row (col) atomic blocks in the atomic matrix
      53              : !> \param dbcsr_mat : pointer to the DBCSR matrix the atomic matrix is
      54              : !>                    to be constructed from
      55              : !> \param atomic_halo : the atomic halo used for defining the atomic
      56              : !>                      matrix from the DBCSR matrix
      57              : !> \param nrows : outputs total number of rows in the atomic matrix
      58              : !> \param ncols : outputs total number of cols in the atomic matrix
      59              : !> \param blk_row_start : first row in each atomic blk row in the
      60              : !>                        atomic matrix
      61              : !> \param blk_col_start : first col in each atomic blk col in the
      62              : !>                        atomic matrix
      63              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      64              : ! **************************************************************************************************
      65         1280 :    SUBROUTINE fb_atmatrix_calc_size(dbcsr_mat, &
      66              :                                     atomic_halo, &
      67              :                                     nrows, &
      68              :                                     ncols, &
      69          640 :                                     blk_row_start, &
      70          640 :                                     blk_col_start)
      71              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
      72              :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
      73              :       INTEGER, INTENT(OUT)                               :: nrows, ncols
      74              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: blk_row_start, blk_col_start
      75              : 
      76              :       INTEGER                                            :: ii, natoms_in_halo
      77          640 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, halo_atoms, &
      78          640 :                                                             row_block_size_data
      79              :       LOGICAL                                            :: check_ok
      80              : 
      81          640 :       NULLIFY (halo_atoms, row_block_size_data, col_block_size_data)
      82              : 
      83          640 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
      84              :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
      85              :                               natoms=natoms_in_halo, &
      86          640 :                               halo_atoms=halo_atoms)
      87          640 :       check_ok = SIZE(blk_row_start) >= (natoms_in_halo + 1)
      88          640 :       CPASSERT(check_ok)
      89          640 :       check_ok = SIZE(blk_col_start) >= (natoms_in_halo + 1)
      90          640 :       CPASSERT(check_ok)
      91         6400 :       blk_row_start = 0
      92         6400 :       blk_col_start = 0
      93          640 :       nrows = 0
      94          640 :       ncols = 0
      95         5760 :       DO ii = 1, natoms_in_halo
      96         5120 :          blk_row_start(ii) = nrows + 1
      97         5120 :          blk_col_start(ii) = ncols + 1
      98         5120 :          nrows = nrows + row_block_size_data(halo_atoms(ii))
      99         5760 :          ncols = ncols + col_block_size_data(halo_atoms(ii))
     100              :       END DO
     101          640 :       blk_row_start(natoms_in_halo + 1) = nrows + 1
     102          640 :       blk_col_start(natoms_in_halo + 1) = ncols + 1
     103          640 :    END SUBROUTINE fb_atmatrix_calc_size
     104              : 
     105              : ! ****************************************************************************
     106              : !> \brief Constructs atomic matrix for filter basis method from a given
     107              : !>        DBCSR matrix and a set of atomic send and recv pairs
     108              : !>        corresponding to the matrix blocks that needs to be included
     109              : !>        in the atomic matrix. This version is for when we do MPI
     110              : !>        communications at every step, for each atomic matrix.
     111              : !> \param dbcsr_mat : the DBCSR matrix the atomic matrix is to be
     112              : !>                    constructed from
     113              : !> \param atomic_halo : the atomic halo conrresponding to this atomic
     114              : !>                      matrix
     115              : !> \param para_env : cp2k parallel environment
     116              : !> \param atomic_matrix : the atomic matrix to be constructed, it should
     117              : !>                        have already been allocated prior entering
     118              : !>                        this subroutine
     119              : !> \param blk_row_start : first row in each atomic blk row in the
     120              : !>                        atomic matrix
     121              : !> \param blk_col_start : first col in each atomic blk col in the
     122              : !>                        atomic matrix
     123              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     124              : ! **************************************************************************************************
     125          512 :    SUBROUTINE fb_atmatrix_construct(dbcsr_mat, &
     126              :                                     atomic_halo, &
     127              :                                     para_env, &
     128          512 :                                     atomic_matrix, &
     129          512 :                                     blk_row_start, &
     130          512 :                                     blk_col_start)
     131              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     132              :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     133              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     134              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
     135              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
     136              : 
     137              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct'
     138              : 
     139              :       INTEGER :: handle, iatom, iatom_in_halo, ii, ind, ipair, ipe, jatom, jatom_in_halo, jj, &
     140              :          ncols_blk, npairs_recv, npairs_send, nrows_blk, numprocs, pe, recv_encode, send_encode
     141          512 :       INTEGER(KIND=int_8), DIMENSION(:), POINTER         :: pairs_recv, pairs_send
     142          512 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_pair_count, recv_pair_disps, &
     143          512 :          recv_sizes, send_disps, send_pair_count, send_pair_disps, send_sizes
     144          512 :       INTEGER, DIMENSION(:), POINTER                     :: col_block_size_data, row_block_size_data
     145              :       LOGICAL                                            :: found
     146          512 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_buf, send_buf
     147          512 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     148              :       TYPE(fb_com_atom_pairs_obj)                        :: atom_pairs_recv, atom_pairs_send
     149              : 
     150          512 :       CALL timeset(routineN, handle)
     151              : 
     152          512 :       NULLIFY (pairs_send, pairs_recv, mat_block, &
     153          512 :                row_block_size_data, col_block_size_data)
     154          512 :       CALL fb_com_atom_pairs_nullify(atom_pairs_send)
     155          512 :       CALL fb_com_atom_pairs_nullify(atom_pairs_recv)
     156              : 
     157              :       ! initialise atomic matrix
     158          512 :       IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
     159      5591552 :          atomic_matrix = 0.0_dp
     160              :       END IF
     161              : 
     162              :       ! generate send and receive atomic pairs
     163          512 :       CALL fb_com_atom_pairs_create(atom_pairs_send)
     164          512 :       CALL fb_com_atom_pairs_create(atom_pairs_recv)
     165              :       CALL fb_atmatrix_generate_com_pairs(dbcsr_mat, &
     166              :                                           atomic_halo, &
     167              :                                           para_env, &
     168              :                                           atom_pairs_send, &
     169          512 :                                           atom_pairs_recv)
     170              : 
     171              :       ! get com pair informations
     172              :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_send, &
     173              :                                  pairs=pairs_send, &
     174              :                                  npairs=npairs_send, &
     175          512 :                                  natoms_encode=send_encode)
     176              :       CALL fb_com_atom_pairs_get(atom_pairs=atom_pairs_recv, &
     177              :                                  pairs=pairs_recv, &
     178              :                                  npairs=npairs_recv, &
     179          512 :                                  natoms_encode=recv_encode)
     180              : 
     181              :       ! get para_env info
     182          512 :       numprocs = para_env%num_pe
     183              : 
     184              :       ! get dbcsr row and col block sizes
     185          512 :       CALL dbcsr_get_info(dbcsr_mat, row_blk_size=row_block_size_data, col_blk_size=col_block_size_data)
     186              : 
     187              :       ! allocate temporary arrays for send
     188         1536 :       ALLOCATE (send_sizes(numprocs))
     189         1024 :       ALLOCATE (send_disps(numprocs))
     190         1024 :       ALLOCATE (send_pair_count(numprocs))
     191         1024 :       ALLOCATE (send_pair_disps(numprocs))
     192              : 
     193              :       ! setup send buffer sizes
     194              :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_send, &
     195              :                                                numprocs, &
     196              :                                                row_block_size_data, &
     197              :                                                col_block_size_data, &
     198              :                                                send_sizes, &
     199              :                                                send_disps, &
     200              :                                                send_pair_count, &
     201          512 :                                                send_pair_disps)
     202              :       ! allocate send buffer
     203         2560 :       ALLOCATE (send_buf(SUM(send_sizes)))
     204              : 
     205              :       ! allocate temporary arrays for recv
     206         1024 :       ALLOCATE (recv_sizes(numprocs))
     207         1024 :       ALLOCATE (recv_disps(numprocs))
     208         1024 :       ALLOCATE (recv_pair_count(numprocs))
     209         1024 :       ALLOCATE (recv_pair_disps(numprocs))
     210              : 
     211              :       ! setup recv buffer sizes
     212              :       CALL fb_com_atom_pairs_calc_buffer_sizes(atom_pairs_recv, &
     213              :                                                numprocs, &
     214              :                                                row_block_size_data, &
     215              :                                                col_block_size_data, &
     216              :                                                recv_sizes, &
     217              :                                                recv_disps, &
     218              :                                                recv_pair_count, &
     219          512 :                                                recv_pair_disps)
     220              :       ! allocate recv buffer
     221         2560 :       ALLOCATE (recv_buf(SUM(recv_sizes)))
     222              :       ! do packing
     223         1536 :       DO ipe = 1, numprocs
     224              :          ! need to reuse send_sizes as an accumulative displacement, so recalculate
     225         1024 :          send_sizes(ipe) = 0
     226        19968 :          DO ipair = 1, send_pair_count(ipe)
     227              :             CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
     228        18432 :                                           pe, iatom, jatom, send_encode)
     229        18432 :             nrows_blk = row_block_size_data(iatom)
     230        18432 :             ncols_blk = col_block_size_data(jatom)
     231              :             CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
     232              :                                    row=iatom, col=jatom, block=mat_block, &
     233        18432 :                                    found=found)
     234        19456 :             IF (.NOT. found) THEN
     235            0 :                CPABORT("Matrix block not found")
     236              :             ELSE
     237              :                ! we have found the matrix block
     238       258048 :                DO jj = 1, ncols_blk
     239      3373056 :                   DO ii = 1, nrows_blk
     240              :                      ! column major format in blocks
     241      3115008 :                      ind = send_disps(ipe) + send_sizes(ipe) + ii + (jj - 1)*nrows_blk
     242      3354624 :                      send_buf(ind) = mat_block(ii, jj)
     243              :                   END DO ! ii
     244              :                END DO ! jj
     245        18432 :                send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
     246              :             END IF
     247              :          END DO ! ipair
     248              :       END DO ! ipe
     249              : 
     250              :       ! do communication
     251              :       CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
     252          512 :                              recv_buf, recv_sizes, recv_disps)
     253              : 
     254              :       ! cleanup temporary arrays no longer needed
     255          512 :       DEALLOCATE (send_buf)
     256          512 :       DEALLOCATE (send_sizes)
     257          512 :       DEALLOCATE (send_disps)
     258          512 :       DEALLOCATE (send_pair_count)
     259          512 :       DEALLOCATE (send_pair_disps)
     260              : 
     261              :       ! do unpacking
     262         1536 :       DO ipe = 1, numprocs
     263         1024 :          recv_sizes(ipe) = 0
     264        19968 :          DO ipair = 1, recv_pair_count(ipe)
     265              :             CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
     266        18432 :                                           pe, iatom, jatom, recv_encode)
     267              :             ! nrows_blk = last_row(iatom) - first_row(iatom) + 1
     268              :             ! ncols_blk = last_col(jatom) - first_col(jatom) + 1
     269        18432 :             nrows_blk = row_block_size_data(iatom)
     270        18432 :             ncols_blk = col_block_size_data(jatom)
     271              :             ! get the corresponding atom indices in halo
     272              :             ! the atoms from the recv_pairs should be in the atomic_halo, because
     273              :             ! the recv_pairs are the matrix blocks requested by the local proc for
     274              :             ! this particular atomic_halo
     275              :             CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
     276              :                                                  iatom, iatom_in_halo, &
     277        18432 :                                                  found)
     278        18432 :             CPASSERT(found)
     279              :             CALL fb_atomic_halo_atom_global2halo(atomic_halo, &
     280              :                                                  jatom, jatom_in_halo, &
     281        18432 :                                                  found)
     282        18432 :             CPASSERT(found)
     283              :             ! put block into the full conventional matrix
     284       258048 :             DO jj = 1, ncols_blk
     285      3373056 :                DO ii = 1, nrows_blk
     286              :                   ! column major format in blocks
     287      3115008 :                   ind = recv_disps(ipe) + recv_sizes(ipe) + ii + (jj - 1)*nrows_blk
     288              :                   atomic_matrix(blk_row_start(iatom_in_halo) + ii - 1, &
     289      3354624 :                                 blk_col_start(jatom_in_halo) + jj - 1) = recv_buf(ind)
     290              : 
     291              :                END DO ! ii
     292              :             END DO ! jj
     293        56320 :             recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
     294              :          END DO ! ipair
     295              :       END DO ! ipe
     296              : 
     297              :       ! the constructed matrix is upper triangular, fill it up to full
     298        53248 :       DO ii = 2, SIZE(atomic_matrix, 1)
     299      2795520 :          DO jj = 1, ii - 1
     300      2795008 :             atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
     301              :          END DO
     302              :       END DO
     303              : 
     304              :       ! cleanup rest of the temporary arrays
     305          512 :       DEALLOCATE (recv_buf)
     306          512 :       DEALLOCATE (recv_sizes)
     307          512 :       DEALLOCATE (recv_disps)
     308          512 :       DEALLOCATE (recv_pair_count)
     309          512 :       DEALLOCATE (recv_pair_disps)
     310          512 :       CALL fb_com_atom_pairs_release(atom_pairs_send)
     311          512 :       CALL fb_com_atom_pairs_release(atom_pairs_recv)
     312              : 
     313          512 :       CALL timestop(handle)
     314              : 
     315         1024 :    END SUBROUTINE fb_atmatrix_construct
     316              : 
     317              : ! ****************************************************************************
     318              : !> \brief Constructs atomic matrix for filter basis method from a given
     319              : !>        DBCSR matrix and a set of atomic send and recv pairs
     320              : !>        corresponding to the matrix blocks that needs to be included
     321              : !>        in the atomic matrix. This version is for when we do MPI
     322              : !>        communications collectively in one go at the beginning.
     323              : !> \param matrix_storage : data storing the relevant DBCSR matrix blocks
     324              : !>                         needed for constructing the atomic matrix
     325              : !> \param atomic_halo : the atomic halo conrresponding to this atomic
     326              : !>                      matrix
     327              : !> \param atomic_matrix : the atomic matrix to be constructed, it should
     328              : !>                        have already been allocated prior entering
     329              : !>                        this subroutine
     330              : !> \param blk_row_start : first row in each atomic blk row in the
     331              : !>                        atomic matrix
     332              : !> \param blk_col_start : first col in each atomic blk col in the
     333              : !>                        atomic matrix
     334              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     335              : ! **************************************************************************************************
     336          384 :    SUBROUTINE fb_atmatrix_construct_2(matrix_storage, &
     337              :                                       atomic_halo, &
     338          128 :                                       atomic_matrix, &
     339          256 :                                       blk_row_start, &
     340          128 :                                       blk_col_start)
     341              :       TYPE(fb_matrix_data_obj), INTENT(IN)               :: matrix_storage
     342              :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     343              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: atomic_matrix
     344              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_row_start, blk_col_start
     345              : 
     346              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_construct_2'
     347              : 
     348              :       INTEGER                                            :: handle, iatom, iatom_global, icol, ii, &
     349              :                                                             irow, jatom, jatom_global, jj, &
     350              :                                                             natoms_in_halo
     351          128 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     352              :       LOGICAL                                            :: check_ok, found
     353          128 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: blk_p
     354              : 
     355          128 :       CALL timeset(routineN, handle)
     356              : 
     357          128 :       check_ok = fb_matrix_data_has_data(matrix_storage)
     358          128 :       CPASSERT(check_ok)
     359          128 :       check_ok = fb_atomic_halo_has_data(atomic_halo)
     360          128 :       CPASSERT(check_ok)
     361              : 
     362          128 :       NULLIFY (halo_atoms, blk_p)
     363              : 
     364              :       ! initialise atomic matrix
     365          128 :       IF (SIZE(atomic_matrix, 1) > 0 .AND. SIZE(atomic_matrix, 2) > 0) THEN
     366      1397888 :          atomic_matrix = 0.0_dp
     367              :       END IF
     368              : 
     369              :       ! get atomic halo information
     370              :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     371              :                               natoms=natoms_in_halo, &
     372          128 :                               halo_atoms=halo_atoms)
     373              : 
     374              :       ! construct atomic matrix using data from matrix_storage
     375         1152 :       DO iatom = 1, natoms_in_halo
     376         1024 :          iatom_global = halo_atoms(iatom)
     377         9344 :          DO jatom = 1, natoms_in_halo
     378         8192 :             jatom_global = halo_atoms(jatom)
     379              :             ! atomic matrices are symmetric, fill only the top
     380              :             ! triangular part
     381         9216 :             IF (jatom_global >= iatom_global) THEN
     382              :                CALL fb_matrix_data_get(matrix_storage, &
     383              :                                        iatom_global, &
     384              :                                        jatom_global, &
     385              :                                        blk_p, &
     386         4608 :                                        found)
     387              :                ! copy data to atomic_matrix if found
     388         4608 :                IF (found) THEN
     389        64512 :                   DO jj = 1, SIZE(blk_p, 2)
     390        59904 :                      icol = blk_col_start(jatom) + jj - 1
     391       843264 :                      DO ii = 1, SIZE(blk_p, 1)
     392       778752 :                         irow = blk_row_start(iatom) + ii - 1
     393       838656 :                         atomic_matrix(irow, icol) = blk_p(ii, jj)
     394              :                      END DO ! ii
     395              :                   END DO ! jj
     396              :                END IF
     397              :             END IF
     398              :          END DO ! jatom
     399              :       END DO ! iatom
     400              : 
     401              :       ! the constructed matrix is upper triangular, fill it up to full
     402        13312 :       DO ii = 2, SIZE(atomic_matrix, 1)
     403       698880 :          DO jj = 1, ii - 1
     404       698752 :             atomic_matrix(ii, jj) = atomic_matrix(jj, ii)
     405              :          END DO
     406              :       END DO
     407              : 
     408          128 :       CALL timestop(handle)
     409              : 
     410          128 :    END SUBROUTINE fb_atmatrix_construct_2
     411              : 
     412              : ! ****************************************************************************
     413              : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
     414              : !>        sent and received in order to construct an atomic matrix
     415              : !>        corresponding to a given atomic halo. This version is for the case
     416              : !>        when we do MPI communications at each step, for each atomic matrix.
     417              : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
     418              : !> \param atomic_halo : the atomic halo used to construct the atomic
     419              : !>                      matrix
     420              : !> \param para_env : cp2k parallel environment
     421              : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
     422              : !>                          data to be sent
     423              : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
     424              : !>                          data to be recveived
     425              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     426              : ! **************************************************************************************************
     427          512 :    SUBROUTINE fb_atmatrix_generate_com_pairs(dbcsr_mat, &
     428              :                                              atomic_halo, &
     429              :                                              para_env, &
     430              :                                              atom_pairs_send, &
     431              :                                              atom_pairs_recv)
     432              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     433              :       TYPE(fb_atomic_halo_obj), INTENT(IN)               :: atomic_halo
     434              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     435              :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
     436              : 
     437              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs'
     438              : 
     439              :       INTEGER :: counter, handle, iatom, iatom_global, itask, jatom, jatom_global, natoms_in_halo, &
     440              :          nblkrows_total, nencode, ntasks_recv, ntasks_send, src
     441              :       INTEGER(KIND=int_8)                                :: pair
     442          512 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
     443          512 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     444              :       LOGICAL                                            :: found
     445          512 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     446              :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
     447              : 
     448          512 :       CALL timeset(routineN, handle)
     449              : 
     450          512 :       NULLIFY (halo_atoms, tasks_send, tasks_recv)
     451          512 :       CALL fb_com_tasks_nullify(com_tasks_send)
     452          512 :       CALL fb_com_tasks_nullify(com_tasks_recv)
     453              : 
     454              :       ! initialise atom_pairs_send and atom_pairs_receive
     455          512 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
     456          512 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
     457              :       ELSE
     458            0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
     459              :       END IF
     460          512 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
     461          512 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
     462              :       ELSE
     463            0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
     464              :       END IF
     465              : 
     466              :       ! get atomic halo information
     467              :       CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
     468              :                               natoms=natoms_in_halo, &
     469          512 :                               halo_atoms=halo_atoms)
     470              : 
     471              :       ! get the total number of atoms, we can obtain this directly
     472              :       ! from the global block row dimension of the dbcsr matrix
     473              :       CALL dbcsr_get_info(matrix=dbcsr_mat, &
     474          512 :                           nblkrows_total=nblkrows_total)
     475              : 
     476              :       ! generate recv task list (tasks_recv)
     477              : 
     478              :       ! a recv task corresponds to the copying or transferring of a
     479              :       ! matrix block in the part of the DBCSR matrix owned by the src
     480              :       ! proc to this proc in order to construct the atomic matrix
     481              :       ! corresponding to the given atomic halo. As an upper-bound, the
     482              :       ! number of matrix blocks required do not exceed natoms_in_halo**2
     483          512 :       ntasks_recv = natoms_in_halo*natoms_in_halo
     484              : 
     485         1536 :       ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
     486              : 
     487              :       ! destination proc is always the local processor
     488              :       ASSOCIATE (dest => para_env%mepos)
     489              :          ! now that tasks_recv has been allocated, generate the tasks
     490          512 :          itask = 1
     491         4608 :          DO iatom = 1, natoms_in_halo
     492         4096 :             iatom_global = halo_atoms(iatom)
     493        37376 :             DO jatom = 1, natoms_in_halo
     494        32768 :                jatom_global = halo_atoms(jatom)
     495              :                ! atomic matrix is symmetric, and only upper triangular part
     496              :                ! is stored in DBCSR matrix
     497        36864 :                IF (jatom_global >= iatom_global) THEN
     498              :                   ! find the source proc that supposed to own the block
     499              :                   ! (iatom_global, jatom_global)
     500              :                   CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
     501              :                                                     iatom_global, &
     502              :                                                     jatom_global, &
     503        18432 :                                                     processor=src)
     504              :                   ! we must encode the global atom indices rather the halo
     505              :                   ! atomic indices in each task, because halo atomic
     506              :                   ! indices are local to each halo, and each processor is
     507              :                   ! working on a different halo local to them. So one
     508              :                   ! processor would not have the information about the halo
     509              :                   ! on another processor, rendering the halo atomic indices
     510              :                   ! rather useless outside the local processor.
     511        18432 :                   tasks_recv(TASK_DEST, itask) = dest
     512        18432 :                   tasks_recv(TASK_SRC, itask) = src
     513              : 
     514              :                   CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
     515              :                                                 iatom_global, jatom_global, &
     516        18432 :                                                 nblkrows_total)
     517              :                   ! calculation of cost not implemented at the moment
     518        18432 :                   tasks_recv(TASK_COST, itask) = 0
     519        18432 :                   itask = itask + 1
     520              :                END IF
     521              :             END DO ! jatom
     522              :          END DO ! iatom
     523              :       END ASSOCIATE
     524              : 
     525              :       ! get the actual number of tasks
     526          512 :       ntasks_recv = itask - 1
     527              : 
     528              :       ! create tasks
     529          512 :       CALL fb_com_tasks_create(com_tasks_recv)
     530          512 :       CALL fb_com_tasks_create(com_tasks_send)
     531              : 
     532              :       CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
     533              :                             task_dim=TASK_N_RECORDS, &
     534              :                             ntasks=ntasks_recv, &
     535              :                             nencode=nblkrows_total, &
     536          512 :                             tasks=tasks_recv)
     537              : 
     538              :       ! genearte the send task list (tasks_send) from the recv task list
     539              :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
     540          512 :                                            para_env)
     541              : 
     542              :       CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
     543              :                             ntasks=ntasks_send, &
     544              :                             tasks=tasks_send, &
     545          512 :                             nencode=nencode)
     546              : 
     547              :       ! because the atomic_halos and the neighbor_list_set used to
     548              :       ! generate the sparse structure of the DBCSR matrix do not
     549              :       ! necessarily have to coincide, we must check of the blocks in
     550              :       ! tasks_send (these should be local to the processor) do indeed
     551              :       ! exist in the DBCSR matrix, if not, then we need to prune these
     552              :       ! out of the task list
     553              : 
     554          512 :       counter = 0
     555        18944 :       DO itask = 1, ntasks_send
     556        18432 :          pair = tasks_send(TASK_PAIR, itask)
     557        18432 :          CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
     558              :          ! check if block exists in DBCSR matrix
     559              :          CALL dbcsr_get_block_p(matrix=dbcsr_mat, &
     560              :                                 row=iatom_global, col=jatom_global, block=mat_block, &
     561        18432 :                                 found=found)
     562        18944 :          IF (found) THEN
     563        18432 :             counter = counter + 1
     564              :             ! we can do this here, because essencially we are inspecting
     565              :             ! the send tasks one by one, and then omit ones which the
     566              :             ! block is not found in the DBCSR matrix. itask is always
     567              :             ! >= counter
     568        92160 :             tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
     569              :          END IF
     570              :       END DO
     571              :       ! the new send task list should have size counter. counter
     572              :       ! <= the old ntasks_send, thus the task list does not really
     573              :       ! need to be reallocated (as it is just a temporary array), and
     574              :       ! the useful data will cutoff at counter, and the rest of the
     575              :       ! array will just be garbage
     576          512 :       ntasks_send = counter
     577              : 
     578              :       ! tasks_send is set through the pointer already
     579              :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
     580          512 :                             ntasks=ntasks_send)
     581              : 
     582              :       ! now, re-distribute the new send tasks list to other processors
     583              :       ! to build the updated recv tasks list
     584              :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
     585          512 :                                            para_env)
     586              : 
     587              :       ! task lists are now complete, now construct the atom_pairs_send
     588              :       ! and atom_pairs_recv from the tasks lists
     589              :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
     590              :                                          atom_pairs=atom_pairs_send, &
     591              :                                          natoms_encode=nencode, &
     592          512 :                                          send_or_recv="send")
     593              :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
     594              :                                          atom_pairs=atom_pairs_recv, &
     595              :                                          natoms_encode=nencode, &
     596          512 :                                          send_or_recv="recv")
     597              : 
     598              :       ! cleanup
     599          512 :       CALL fb_com_tasks_release(com_tasks_recv)
     600          512 :       CALL fb_com_tasks_release(com_tasks_send)
     601              : 
     602          512 :       CALL timestop(handle)
     603              : 
     604         1536 :    END SUBROUTINE fb_atmatrix_generate_com_pairs
     605              : 
     606              : ! ****************************************************************************
     607              : !> \brief generate list of blocks (atom pairs) of a DBCSR matrix to be
     608              : !>        sent and received in order to construct all local atomic matrices
     609              : !>        corresponding to the atomic halos. This version is for the case
     610              : !>        when we do MPI communications collectively in one go at the
     611              : !>        beginning.
     612              : !> \param dbcsr_mat : The DBCSR matrix the atom blocks come from
     613              : !> \param atomic_halos : the list of all atomic halos local to the process
     614              : !> \param para_env : cp2k parallel environment
     615              : !> \param atom_pairs_send : list of atom blocks from local DBCSR matrix
     616              : !>                          data to be sent
     617              : !> \param atom_pairs_recv : list of atom blocks from remote DBCSR matrix
     618              : !>                          data to be recveived
     619              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     620              : ! **************************************************************************************************
     621           32 :    SUBROUTINE fb_atmatrix_generate_com_pairs_2(dbcsr_mat, &
     622              :                                                atomic_halos, &
     623              :                                                para_env, &
     624              :                                                atom_pairs_send, &
     625              :                                                atom_pairs_recv)
     626              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     627              :       TYPE(fb_atomic_halo_list_obj), INTENT(IN)          :: atomic_halos
     628              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     629              :       TYPE(fb_com_atom_pairs_obj), INTENT(INOUT)         :: atom_pairs_send, atom_pairs_recv
     630              : 
     631              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_atmatrix_generate_com_pairs_2'
     632              : 
     633              :       INTEGER :: counter, handle, iatom, iatom_global, ihalo, itask, jatom, jatom_global, &
     634              :          natoms_in_halo, nblkrows_total, nencode, nhalos, ntasks_recv, ntasks_send, src
     635              :       INTEGER(KIND=int_8)                                :: pair
     636           32 :       INTEGER(KIND=int_8), DIMENSION(:, :), POINTER      :: tasks_recv, tasks_send
     637           32 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms
     638              :       LOGICAL                                            :: found
     639           32 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: mat_block
     640           32 :       TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER    :: halos
     641              :       TYPE(fb_com_tasks_obj)                             :: com_tasks_recv, com_tasks_send
     642              : 
     643           32 :       CALL timeset(routineN, handle)
     644              : 
     645           32 :       NULLIFY (halo_atoms, tasks_send, tasks_recv)
     646           32 :       CALL fb_com_tasks_nullify(com_tasks_send)
     647           32 :       CALL fb_com_tasks_nullify(com_tasks_recv)
     648              : 
     649              :       ! initialise atom_pairs_send and atom_pairs_receive
     650           32 :       IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
     651           32 :          CALL fb_com_atom_pairs_init(atom_pairs_send)
     652              :       ELSE
     653            0 :          CALL fb_com_atom_pairs_create(atom_pairs_send)
     654              :       END IF
     655           32 :       IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
     656           32 :          CALL fb_com_atom_pairs_init(atom_pairs_recv)
     657              :       ELSE
     658            0 :          CALL fb_com_atom_pairs_create(atom_pairs_recv)
     659              :       END IF
     660              : 
     661              :       ! get atomic halo list information
     662              :       CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
     663              :                                    nhalos=nhalos, &
     664           32 :                                    halos=halos)
     665              :       ! get the total number of atoms, we can obtain this directly
     666              :       ! from the global block row dimension of the dbcsr matrix
     667              :       CALL dbcsr_get_info(matrix=dbcsr_mat, &
     668           32 :                           nblkrows_total=nblkrows_total)
     669              : 
     670              :       ! estimate the maximum number of blocks to be received
     671           32 :       ntasks_recv = 0
     672          160 :       DO ihalo = 1, nhalos
     673              :          CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
     674          128 :                                  natoms=natoms_in_halo)
     675          160 :          ntasks_recv = ntasks_recv + natoms_in_halo*natoms_in_halo
     676              :       END DO
     677           96 :       ALLOCATE (tasks_recv(TASK_N_RECORDS, ntasks_recv))
     678              : 
     679              :       ! now that tasks_recv has been allocated, generate the tasks
     680              : 
     681              :       ! destination proc is always the local process
     682              :       ASSOCIATE (dest => para_env%mepos)
     683           32 :          itask = 1
     684          160 :          DO ihalo = 1, nhalos
     685              :             CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
     686              :                                     natoms=natoms_in_halo, &
     687          128 :                                     halo_atoms=halo_atoms)
     688         1184 :             DO iatom = 1, natoms_in_halo
     689         1024 :                iatom_global = halo_atoms(iatom)
     690         9344 :                DO jatom = 1, natoms_in_halo
     691         8192 :                   jatom_global = halo_atoms(jatom)
     692              :                   ! atomic matrices are always symmetric, treat it as such.
     693              :                   ! so only deal with upper triangular parts
     694         9216 :                   IF (jatom_global >= iatom_global) THEN
     695              :                      ! find the source proc that supposed to own the block
     696              :                      ! (iatom_global, jatom_global)
     697              :                      CALL dbcsr_get_stored_coordinates(dbcsr_mat, &
     698              :                                                        iatom_global, &
     699              :                                                        jatom_global, &
     700         4608 :                                                        processor=src)
     701              :                      ! we must encode the global atom indices rather the halo
     702              :                      ! atomic indices in each task, because halo atomic indices
     703              :                      ! are local to each halo, and each processor is working on a
     704              :                      ! different halo local to them. So one processor would not
     705              :                      ! have the information about the halo on another processor,
     706              :                      ! rendering the halo atomic indices rather useless outside
     707              :                      ! the local processor.
     708         4608 :                      tasks_recv(TASK_DEST, itask) = dest
     709         4608 :                      tasks_recv(TASK_SRC, itask) = src
     710              :                      CALL fb_com_tasks_encode_pair(tasks_recv(TASK_PAIR, itask), &
     711              :                                                    iatom_global, jatom_global, &
     712         4608 :                                                    nblkrows_total)
     713              :                      ! calculation of cost not implemented at the moment
     714         4608 :                      tasks_recv(TASK_COST, itask) = 0
     715         4608 :                      itask = itask + 1
     716              :                   END IF
     717              :                END DO ! jatom
     718              :             END DO ! iatom
     719              :          END DO ! ihalo
     720              :       END ASSOCIATE
     721              : 
     722              :       ! set the actual number of tasks obtained
     723           32 :       ntasks_recv = itask - 1
     724              : 
     725              :       ! create tasks
     726           32 :       CALL fb_com_tasks_create(com_tasks_recv)
     727           32 :       CALL fb_com_tasks_create(com_tasks_send)
     728              : 
     729              :       CALL fb_com_tasks_set(com_tasks=com_tasks_recv, &
     730              :                             task_dim=TASK_N_RECORDS, &
     731              :                             ntasks=ntasks_recv, &
     732              :                             nencode=nblkrows_total, &
     733           32 :                             tasks=tasks_recv)
     734              : 
     735              :       ! genearte the send task list (tasks_send) from the recv task list
     736              :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, ">", com_tasks_send, &
     737           32 :                                            para_env)
     738              : 
     739              :       CALL fb_com_tasks_get(com_tasks=com_tasks_send, &
     740              :                             ntasks=ntasks_send, &
     741              :                             tasks=tasks_send, &
     742           32 :                             nencode=nencode)
     743              : 
     744              :       ! because the atomic_halos and the neighbor_list_set used to
     745              :       ! generate the sparse structure of the DBCSR matrix do not
     746              :       ! necessarily have to coincide, we must check of the blocks in
     747              :       ! tasks_send (these should be local to the processor) do indeed
     748              :       ! exist in the DBCSR matrix, if not, then we need to prune these
     749              :       ! out of the task list
     750              : 
     751           32 :       counter = 0
     752         4640 :       DO itask = 1, ntasks_send
     753         4608 :          pair = tasks_send(TASK_PAIR, itask)
     754         4608 :          CALL fb_com_tasks_decode_pair(pair, iatom_global, jatom_global, nencode)
     755              :          ! check if block exists in DBCSR matrix
     756              :          CALL dbcsr_get_block_p(matrix=dbcsr_mat, row=iatom_global, &
     757              :                                 col=jatom_global, block=mat_block, &
     758         4608 :                                 found=found)
     759         4640 :          IF (found) THEN
     760         4608 :             counter = counter + 1
     761              :             ! we can do this here, because essencially we are inspecting
     762              :             ! the send tasks one by one, and then omit ones which the
     763              :             ! block is not found in the DBCSR matrix. itask is always
     764              :             ! >= counter
     765        23040 :             tasks_send(1:TASK_N_RECORDS, counter) = tasks_send(1:TASK_N_RECORDS, itask)
     766              :          END IF
     767              :       END DO
     768              :       ! the new send task list should have size counter. counter
     769              :       ! <= the old ntasks_send, thus the task list does not really
     770              :       ! need to be reallocated (as it is just a temporary array), and
     771              :       ! the useful data will cutoff at counter, and the rest of the
     772              :       ! array will just be garbage
     773           32 :       ntasks_send = counter
     774              : 
     775              :       ! tasks_send is set through the pointer already
     776              :       CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
     777           32 :                             ntasks=ntasks_send)
     778              : 
     779              :       ! now, re-distribute the new send tasks list to other processors
     780              :       ! to build the updated recv tasks list
     781              :       CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
     782           32 :                                            para_env)
     783              : 
     784              :       ! task lists are now complete, now construct the atom_pairs_send
     785              :       ! and atom_pairs_recv from the tasks lists
     786              :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
     787              :                                          atom_pairs=atom_pairs_send, &
     788              :                                          natoms_encode=nencode, &
     789           32 :                                          send_or_recv="send")
     790              :       CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
     791              :                                          atom_pairs=atom_pairs_recv, &
     792              :                                          natoms_encode=nencode, &
     793           32 :                                          send_or_recv="recv")
     794              : 
     795              :       ! cleanup
     796           32 :       CALL fb_com_tasks_release(com_tasks_recv)
     797           32 :       CALL fb_com_tasks_release(com_tasks_send)
     798              : 
     799           32 :       CALL timestop(handle)
     800              : 
     801           96 :    END SUBROUTINE fb_atmatrix_generate_com_pairs_2
     802              : 
     803              : END MODULE qs_fb_atomic_matrix_methods
        

Generated by: LCOV version 2.0-1