LCOV - code coverage report
Current view: top level - src/fm - cp_cfm_types.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 87.8 % 436 383
Test Date: 2025-07-25 12:55:17 Functions: 75.0 % 20 15

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Represents a complex full matrix distributed on many processors.
      10              : !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
      11              : ! **************************************************************************************************
      12              : MODULE cp_cfm_types
      13              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      14              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      15              :                                               cp_fm_struct_get,&
      16              :                                               cp_fm_struct_release,&
      17              :                                               cp_fm_struct_retain,&
      18              :                                               cp_fm_struct_type
      19              :    USE cp_fm_types,                     ONLY: cp_fm_type
      20              :    USE kinds,                           ONLY: dp
      21              :    USE mathconstants,                   ONLY: z_one,&
      22              :                                               z_zero
      23              :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      24              :                                               mp_any_source,&
      25              :                                               mp_para_env_type,&
      26              :                                               mp_proc_null,&
      27              :                                               mp_request_null,&
      28              :                                               mp_request_type,&
      29              :                                               mp_waitall
      30              : #include "../base/base_uses.f90"
      31              : 
      32              :    IMPLICIT NONE
      33              :    PRIVATE
      34              : 
      35              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      36              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
      37              :    INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      38              : 
      39              :    PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
      40              :    PUBLIC :: cp_cfm_cleanup_copy_general, &
      41              :              cp_cfm_create, &
      42              :              cp_cfm_finish_copy_general, &
      43              :              cp_cfm_get_element, &
      44              :              cp_cfm_get_info, &
      45              :              cp_cfm_get_submatrix, &
      46              :              cp_cfm_release, &
      47              :              cp_cfm_set_all, &
      48              :              cp_cfm_set_element, &
      49              :              cp_cfm_set_submatrix, &
      50              :              cp_cfm_start_copy_general, &
      51              :              cp_cfm_to_cfm, &
      52              :              cp_cfm_to_fm, &
      53              :              cp_fm_to_cfm
      54              : 
      55              :    INTERFACE cp_cfm_to_cfm
      56              :       MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
      57              :          cp_cfm_to_cfm_columns ! just a number of columns
      58              :    END INTERFACE
      59              : 
      60              : ! **************************************************************************************************
      61              : !> \brief Represent a complex full matrix.
      62              : !> \param name           the name of the matrix, used for printing
      63              : !> \param matrix_struct structure of this matrix
      64              : !> \param local_data    array with the data of the matrix (its content depends on
      65              : !>                      the matrix type used: in parallel run it will be in
      66              : !>                      ScaLAPACK format, in sequential run it will simply contain the matrix)
      67              : ! **************************************************************************************************
      68              :    TYPE cp_cfm_type
      69              :       CHARACTER(len=60) :: name = ""
      70              :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
      71              :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
      72              :    END TYPE cp_cfm_type
      73              : 
      74              : ! **************************************************************************************************
      75              : !> \brief Just to build arrays of pointers to matrices.
      76              : !> \param matrix the pointer to the matrix
      77              : ! **************************************************************************************************
      78              :    TYPE cp_cfm_p_type
      79              :       TYPE(cp_cfm_type), POINTER :: matrix => NULL()
      80              :    END TYPE cp_cfm_p_type
      81              : 
      82              : ! **************************************************************************************************
      83              : !> \brief Stores the state of a copy between cp_cfm_start_copy_general
      84              : !>        and cp_cfm_finish_copy_general.
      85              : !> \par History
      86              : !>      Jan 2017  derived type 'copy_info_type' has been created [Mark T]
      87              : !>      Jan 2018  the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
      88              : ! **************************************************************************************************
      89              :    TYPE copy_cfm_info_type
      90              :       !> number of MPI processes that send data
      91              :       INTEGER                                     :: send_size = -1
      92              :       !> number of locally stored rows (1) and columns (2) of the destination matrix
      93              :       INTEGER, DIMENSION(2)                       :: nlocal_recv = -1
      94              :       !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
      95              :       INTEGER, DIMENSION(2)                       :: nblock_src = -1
      96              :       !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
      97              :       INTEGER, DIMENSION(2)                       :: src_num_pe = -1
      98              :       !> displacements into recv_buf
      99              :       INTEGER, ALLOCATABLE, DIMENSION(:)          :: recv_disp
     100              :       !> MPI requests for non-blocking receive and send operations
     101              :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)          :: recv_request, send_request
     102              :       !> global column and row indices of locally stored elements of the destination matrix
     103              :       INTEGER, DIMENSION(:), POINTER              :: recv_col_indices => NULL(), recv_row_indices => NULL()
     104              :       !> rank of MPI process with BLACS coordinates (prow, pcol)
     105              :       INTEGER, ALLOCATABLE, DIMENSION(:, :)       :: src_blacs2mpi
     106              :       !> receiving and sending buffers for non-blocking MPI communication
     107              :       COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
     108              :    END TYPE copy_cfm_info_type
     109              : 
     110              : CONTAINS
     111              : 
     112              : ! **************************************************************************************************
     113              : !> \brief Creates a new full matrix with the given structure.
     114              : !> \param matrix        matrix to be created
     115              : !> \param matrix_struct structure of the matrix
     116              : !> \param name          name of the matrix
     117              : !> \note
     118              : !>      preferred allocation routine
     119              : ! **************************************************************************************************
     120       166859 :    SUBROUTINE cp_cfm_create(matrix, matrix_struct, name)
     121              :       TYPE(cp_cfm_type), INTENT(OUT)                     :: matrix
     122              :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     123              :       CHARACTER(len=*), INTENT(in), OPTIONAL             :: name
     124              : 
     125              :       INTEGER                                            :: ncol_local, npcol, nprow, nrow_local
     126              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     127              : 
     128       166859 :       context => matrix_struct%context
     129       166859 :       matrix%matrix_struct => matrix_struct
     130       166859 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     131              : 
     132       166859 :       nprow = context%num_pe(1)
     133       166859 :       npcol = context%num_pe(2)
     134       166859 :       NULLIFY (matrix%local_data)
     135              : 
     136       166859 :       nrow_local = matrix_struct%local_leading_dimension
     137       166859 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     138       667436 :       ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     139              : 
     140              :       ! do not initialise created matrix as it is up to the user to do so
     141              :       !CALL zcopy(nrow_local*ncol_local, z_zero, 0, matrix%local_data, 1)
     142              : 
     143       166859 :       IF (PRESENT(name)) THEN
     144        18320 :          matrix%name = name
     145              :       ELSE
     146       148539 :          matrix%name = 'full complex matrix'
     147              :       END IF
     148       166859 :    END SUBROUTINE cp_cfm_create
     149              : 
     150              : ! **************************************************************************************************
     151              : !> \brief Releases a full matrix.
     152              : !> \param matrix the matrix to release
     153              : ! **************************************************************************************************
     154       182888 :    SUBROUTINE cp_cfm_release(matrix)
     155              :       TYPE(cp_cfm_type), INTENT(INOUT)                   :: matrix
     156              : 
     157       182888 :       IF (ASSOCIATED(matrix%local_data)) THEN
     158       166859 :          DEALLOCATE (matrix%local_data)
     159              :       END IF
     160       182888 :       matrix%name = ""
     161       182888 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     162       182888 :    END SUBROUTINE cp_cfm_release
     163              : 
     164              : ! **************************************************************************************************
     165              : !> \brief Set all elements of the full matrix to alpha. Besides, set all
     166              : !>        diagonal matrix elements to beta (if given explicitly).
     167              : !> \param matrix  matrix to initialise
     168              : !> \param alpha   value of off-diagonal matrix elements
     169              : !> \param beta    value of diagonal matrix elements (equal to alpha if absent)
     170              : !> \date    12.06.2001
     171              : !> \author  Matthias Krack
     172              : !> \version 1.0
     173              : ! **************************************************************************************************
     174        50244 :    SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
     175              :       TYPE(cp_cfm_type), INTENT(IN)                   :: matrix
     176              :       COMPLEX(kind=dp), INTENT(in)                       :: alpha
     177              :       COMPLEX(kind=dp), INTENT(in), OPTIONAL             :: beta
     178              : 
     179              :       INTEGER                                            :: irow_local, nrow_local
     180              : #if defined(__parallel)
     181              :       INTEGER                                            :: icol_local, ncol_local
     182        50244 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     183              : #endif
     184              : 
     185       150732 :       CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
     186              : 
     187        50244 :       IF (PRESENT(beta)) THEN
     188              : #if defined(__parallel)
     189              :          CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
     190         9274 :                               row_indices=row_indices, col_indices=col_indices)
     191              : 
     192         9274 :          icol_local = 1
     193         9274 :          irow_local = 1
     194              : 
     195        60894 :          DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
     196        60894 :             IF (row_indices(irow_local) < col_indices(icol_local)) THEN
     197            0 :                irow_local = irow_local + 1
     198        51620 :             ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
     199        21116 :                icol_local = icol_local + 1
     200              :             ELSE
     201        30504 :                matrix%local_data(irow_local, icol_local) = beta
     202        30504 :                irow_local = irow_local + 1
     203        30504 :                icol_local = icol_local + 1
     204              :             END IF
     205              :          END DO
     206              : #else
     207              :          nrow_local = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     208              : 
     209              :          DO irow_local = 1, nrow_local
     210              :             matrix%local_data(irow_local, irow_local) = beta
     211              :          END DO
     212              : #endif
     213              :       END IF
     214              : 
     215        50244 :    END SUBROUTINE cp_cfm_set_all
     216              : 
     217              : ! **************************************************************************************************
     218              : !> \brief Get the matrix element by its global index.
     219              : !> \param matrix      full matrix
     220              : !> \param irow_global global row index
     221              : !> \param icol_global global column index
     222              : !> \param alpha       matrix element
     223              : !> \par History
     224              : !>      , TCH, created
     225              : !>      always return the answer
     226              : ! **************************************************************************************************
     227         9282 :    SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
     228              :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     229              :       INTEGER, INTENT(in)                                :: irow_global, icol_global
     230              :       COMPLEX(kind=dp), INTENT(out)                      :: alpha
     231              : 
     232              : #if defined(__parallel)
     233              :       INTEGER                                            :: icol_local, ipcol, iprow, irow_local, &
     234              :                                                             mypcol, myprow, npcol, nprow
     235              :       INTEGER, DIMENSION(9)                              :: desca
     236              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     237              : #endif
     238              : 
     239              : #if defined(__parallel)
     240         9282 :       context => matrix%matrix_struct%context
     241         9282 :       myprow = context%mepos(1)
     242         9282 :       mypcol = context%mepos(2)
     243         9282 :       nprow = context%num_pe(1)
     244         9282 :       npcol = context%num_pe(2)
     245              : 
     246        92820 :       desca(:) = matrix%matrix_struct%descriptor(:)
     247              : 
     248              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     249         9282 :                    irow_local, icol_local, iprow, ipcol)
     250              : 
     251         9282 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     252         4641 :          alpha = matrix%local_data(irow_local, icol_local)
     253         4641 :          CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
     254              :       ELSE
     255         4641 :          CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     256              :       END IF
     257              : 
     258              : #else
     259              :       alpha = matrix%local_data(irow_global, icol_global)
     260              : #endif
     261              : 
     262         9282 :    END SUBROUTINE cp_cfm_get_element
     263              : 
     264              : ! **************************************************************************************************
     265              : !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
     266              : !> \param matrix      full matrix
     267              : !> \param irow_global global row index
     268              : !> \param icol_global global column index
     269              : !> \param alpha       value of the matrix element
     270              : !> \date    12.06.2001
     271              : !> \author  Matthias Krack
     272              : !> \version 1.0
     273              : ! **************************************************************************************************
     274        79956 :    SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
     275              :       TYPE(cp_cfm_type), INTENT(IN)                   :: matrix
     276              :       INTEGER, INTENT(in)                                :: irow_global, icol_global
     277              :       COMPLEX(kind=dp), INTENT(in)                       :: alpha
     278              : 
     279              : #if defined(__parallel)
     280              :       INTEGER                                            :: icol_local, ipcol, iprow, irow_local, &
     281              :                                                             mypcol, myprow, npcol, nprow
     282              :       INTEGER, DIMENSION(9)                              :: desca
     283              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     284              : #endif
     285              : 
     286              : #if defined(__parallel)
     287        79956 :       context => matrix%matrix_struct%context
     288        79956 :       myprow = context%mepos(1)
     289        79956 :       mypcol = context%mepos(2)
     290        79956 :       nprow = context%num_pe(1)
     291        79956 :       npcol = context%num_pe(2)
     292              : 
     293       799560 :       desca(:) = matrix%matrix_struct%descriptor(:)
     294              : 
     295              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     296        79956 :                    irow_local, icol_local, iprow, ipcol)
     297              : 
     298        79956 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     299        39978 :          matrix%local_data(irow_local, icol_local) = alpha
     300              :       END IF
     301              : 
     302              : #else
     303              :       matrix%local_data(irow_global, icol_global) = alpha
     304              : #endif
     305              : 
     306        79956 :    END SUBROUTINE cp_cfm_set_element
     307              : 
     308              : ! **************************************************************************************************
     309              : !> \brief Extract a sub-matrix from the full matrix:
     310              : !>        op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
     311              : !>        Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
     312              : !> \param fm          full matrix you want to get the elements from
     313              : !> \param target_m    2-D array to store the extracted sub-matrix
     314              : !> \param start_row   global row index of the matrix element target_m(1,1) (defaults to 1)
     315              : !> \param start_col   global column index of the matrix element target_m(1,1) (defaults to 1)
     316              : !> \param n_rows      number of rows to extract (defaults to size(op(target_m),1))
     317              : !> \param n_cols      number of columns to extract (defaults to size(op(target_m),2))
     318              : !> \param transpose   indicates that the extracted sub-matrix target_m should be transposed:
     319              : !>                    op(target_m) = target_m^T if .TRUE.,
     320              : !>                    op(target_m) = target_m   if .FALSE. (defaults to false)
     321              : !> \par History
     322              : !>   * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
     323              : !>   * 01.2018 drop innermost conditional branching [Sergey Chulkov]
     324              : !> \author Lianheng Tong
     325              : !> \note
     326              : !>      Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
     327              : ! **************************************************************************************************
     328          392 :    SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
     329              :       TYPE(cp_cfm_type), INTENT(IN)                      :: fm
     330              :       COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out)     :: target_m
     331              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     332              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     333              : 
     334              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_get_submatrix'
     335              : 
     336          392 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: local_data
     337              :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
     338              :          ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
     339              :          start_row_global, start_row_local, this_col
     340          392 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     341              :       LOGICAL                                            :: do_zero, tr_a
     342              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     343              : 
     344          392 :       CALL timeset(routineN, handle)
     345              : 
     346         1176 :       IF (SIZE(target_m) /= 0) THEN
     347              : #if defined(__parallel)
     348          392 :          do_zero = .TRUE.
     349              : #else
     350              :          do_zero = .FALSE.
     351              : #endif
     352              : 
     353          392 :          tr_a = .FALSE.
     354          392 :          IF (PRESENT(transpose)) tr_a = transpose
     355              : 
     356              :          ! find out the first and last global row/column indices
     357          392 :          start_row_global = 1
     358          392 :          start_col_global = 1
     359          392 :          IF (PRESENT(start_row)) start_row_global = start_row
     360          392 :          IF (PRESENT(start_col)) start_col_global = start_col
     361              : 
     362          392 :          IF (tr_a) THEN
     363            0 :             end_row_global = SIZE(target_m, 2)
     364            0 :             end_col_global = SIZE(target_m, 1)
     365              :          ELSE
     366          392 :             end_row_global = SIZE(target_m, 1)
     367          392 :             end_col_global = SIZE(target_m, 2)
     368              :          END IF
     369          392 :          IF (PRESENT(n_rows)) end_row_global = n_rows
     370          392 :          IF (PRESENT(n_cols)) end_col_global = n_cols
     371              : 
     372          392 :          end_row_global = end_row_global + start_row_global - 1
     373          392 :          end_col_global = end_col_global + start_col_global - 1
     374              : 
     375              :          CALL cp_cfm_get_info(matrix=fm, &
     376              :                               nrow_global=nrow_global, ncol_global=ncol_global, &
     377              :                               nrow_local=nrow_local, ncol_local=ncol_local, &
     378          392 :                               row_indices=row_indices, col_indices=col_indices)
     379          392 :          IF (end_row_global > nrow_global) THEN
     380              :             end_row_global = nrow_global
     381              :             do_zero = .TRUE.
     382              :          END IF
     383          392 :          IF (end_col_global > ncol_global) THEN
     384              :             end_col_global = ncol_global
     385              :             do_zero = .TRUE.
     386              :          END IF
     387              : 
     388              :          ! find out row/column indices of locally stored matrix elements that needs to be copied.
     389              :          ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
     390         1868 :          DO start_row_local = 1, nrow_local
     391         1868 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
     392              :          END DO
     393              : 
     394         6237 :          DO end_row_local = start_row_local, nrow_local
     395         6237 :             IF (row_indices(end_row_local) > end_row_global) EXIT
     396              :          END DO
     397          392 :          end_row_local = end_row_local - 1
     398              : 
     399         3898 :          DO start_col_local = 1, ncol_local
     400         3898 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
     401              :          END DO
     402              : 
     403         9734 :          DO end_col_local = start_col_local, ncol_local
     404         9734 :             IF (col_indices(end_col_local) > end_col_global) EXIT
     405              :          END DO
     406          392 :          end_col_local = end_col_local - 1
     407              : 
     408          392 :          para_env => fm%matrix_struct%para_env
     409          392 :          local_data => fm%local_data
     410              : 
     411              :          ! wipe the content of the target matrix if:
     412              :          !  * the source matrix is distributed across a number of processes, or
     413              :          !  * not all elements of the target matrix will be assigned, e.g.
     414              :          !        when the target matrix is larger then the source matrix
     415              :          IF (do_zero) &
     416         1176 :             CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
     417              : 
     418          392 :          IF (tr_a) THEN
     419            0 :             DO j = start_col_local, end_col_local
     420            0 :                this_col = col_indices(j) - start_col_global + 1
     421            0 :                DO i = start_row_local, end_row_local
     422            0 :                   target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
     423              :                END DO
     424              :             END DO
     425              :          ELSE
     426         9734 :             DO j = start_col_local, end_col_local
     427         9342 :                this_col = col_indices(j) - start_col_global + 1
     428       147316 :                DO i = start_row_local, end_row_local
     429       146924 :                   target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
     430              :                END DO
     431              :             END DO
     432              :          END IF
     433              : 
     434       577852 :          CALL para_env%sum(target_m)
     435              :       END IF
     436              : 
     437          392 :       CALL timestop(handle)
     438          392 :    END SUBROUTINE cp_cfm_get_submatrix
     439              : 
     440              : ! **************************************************************************************************
     441              : !> \brief Set a sub-matrix of the full matrix:
     442              : !>       matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
     443              : !>       = alpha*op(new_values)(1:n_rows,1:n_cols) +
     444              : !>         beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
     445              : !> \param matrix      full to update
     446              : !> \param new_values  replicated 2-D array that holds new elements of the updated sub-matrix
     447              : !> \param start_row   global row index of the matrix element new_values(1,1) (defaults to 1)
     448              : !> \param start_col   global column index of the matrix element new_values(1,1) (defaults to 1)
     449              : !> \param n_rows      number of rows to update (defaults to size(op(new_values),1))
     450              : !> \param n_cols      number of columns to update (defaults to size(op(new_values),2))
     451              : !> \param alpha       scale factor for the new values (defaults to (1.0,0.0))
     452              : !> \param beta        scale factor for the old values (defaults to (0.0,0.0))
     453              : !> \param transpose   indicates that the matrix new_values should be transposed:
     454              : !>                    op(new_values) = new_values^T if .TRUE.,
     455              : !>                    op(new_values) = new_values   if .FALSE. (defaults to false)
     456              : !> \par History
     457              : !>   * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
     458              : !>   * 01.2018 drop innermost conditional branching [Sergey Chulkov]
     459              : !> \author Lianheng Tong
     460              : !> \note
     461              : !>      Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
     462              : !>      All matrix elements 'new_values' need to be valid on all CPUs
     463              : ! **************************************************************************************************
     464          240 :    SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
     465              :                                    start_col, n_rows, n_cols, alpha, beta, transpose)
     466              :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     467              :       COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in)      :: new_values
     468              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     469              :       COMPLEX(kind=dp), INTENT(in), OPTIONAL             :: alpha, beta
     470              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     471              : 
     472              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_set_submatrix'
     473              : 
     474              :       COMPLEX(kind=dp)                                   :: al, be
     475          240 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: local_data
     476              :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
     477              :          ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
     478              :          start_row_global, start_row_local, this_col
     479          240 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     480              :       LOGICAL                                            :: tr_a
     481              : 
     482          240 :       CALL timeset(routineN, handle)
     483              : 
     484          240 :       al = z_one
     485          240 :       be = z_zero
     486          240 :       IF (PRESENT(alpha)) al = alpha
     487          240 :       IF (PRESENT(beta)) be = beta
     488              : 
     489              :       ! find out the first and last global row/column indices
     490          240 :       start_row_global = 1
     491          240 :       start_col_global = 1
     492          240 :       IF (PRESENT(start_row)) start_row_global = start_row
     493          240 :       IF (PRESENT(start_col)) start_col_global = start_col
     494              : 
     495          240 :       tr_a = .FALSE.
     496          240 :       IF (PRESENT(transpose)) tr_a = transpose
     497              : 
     498            0 :       IF (tr_a) THEN
     499            0 :          end_row_global = SIZE(new_values, 2)
     500            0 :          end_col_global = SIZE(new_values, 1)
     501              :       ELSE
     502          240 :          end_row_global = SIZE(new_values, 1)
     503          240 :          end_col_global = SIZE(new_values, 2)
     504              :       END IF
     505          240 :       IF (PRESENT(n_rows)) end_row_global = n_rows
     506          240 :       IF (PRESENT(n_cols)) end_col_global = n_cols
     507              : 
     508          240 :       end_row_global = end_row_global + start_row_global - 1
     509          240 :       end_col_global = end_col_global + start_col_global - 1
     510              : 
     511              :       CALL cp_cfm_get_info(matrix=matrix, &
     512              :                            nrow_global=nrow_global, ncol_global=ncol_global, &
     513              :                            nrow_local=nrow_local, ncol_local=ncol_local, &
     514          240 :                            row_indices=row_indices, col_indices=col_indices)
     515          240 :       IF (end_row_global > nrow_global) end_row_global = nrow_global
     516          240 :       IF (end_col_global > ncol_global) end_col_global = ncol_global
     517              : 
     518              :       ! find out row/column indices of locally stored matrix elements that needs to be set.
     519              :       ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
     520          978 :       DO start_row_local = 1, nrow_local
     521          978 :          IF (row_indices(start_row_local) >= start_row_global) EXIT
     522              :       END DO
     523              : 
     524         2919 :       DO end_row_local = start_row_local, nrow_local
     525         2919 :          IF (row_indices(end_row_local) > end_row_global) EXIT
     526              :       END DO
     527          240 :       end_row_local = end_row_local - 1
     528              : 
     529          944 :       DO start_col_local = 1, ncol_local
     530          944 :          IF (col_indices(start_col_local) >= start_col_global) EXIT
     531              :       END DO
     532              : 
     533         6698 :       DO end_col_local = start_col_local, ncol_local
     534         6698 :          IF (col_indices(end_col_local) > end_col_global) EXIT
     535              :       END DO
     536          240 :       end_col_local = end_col_local - 1
     537              : 
     538          240 :       local_data => matrix%local_data
     539              : 
     540          240 :       IF (al == z_one .AND. be == z_zero) THEN
     541          240 :          IF (tr_a) THEN
     542            0 :             DO j = start_col_local, end_col_local
     543            0 :                this_col = col_indices(j) - start_col_global + 1
     544            0 :                DO i = start_row_local, end_row_local
     545            0 :                   local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
     546              :                END DO
     547              :             END DO
     548              :          ELSE
     549         6698 :             DO j = start_col_local, end_col_local
     550         6458 :                this_col = col_indices(j) - start_col_global + 1
     551        84098 :                DO i = start_row_local, end_row_local
     552        83858 :                   local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
     553              :                END DO
     554              :             END DO
     555              :          END IF
     556              :       ELSE
     557            0 :          IF (tr_a) THEN
     558            0 :             DO j = start_col_local, end_col_local
     559            0 :                this_col = col_indices(j) - start_col_global + 1
     560            0 :                DO i = start_row_local, end_row_local
     561              :                   local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
     562            0 :                                      be*local_data(i, j)
     563              :                END DO
     564              :             END DO
     565              :          ELSE
     566            0 :             DO j = start_col_local, end_col_local
     567            0 :                this_col = col_indices(j) - start_col_global + 1
     568            0 :                DO i = start_row_local, end_row_local
     569              :                   local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
     570            0 :                                      be*local_data(i, j)
     571              :                END DO
     572              :             END DO
     573              :          END IF
     574              :       END IF
     575              : 
     576          240 :       CALL timestop(handle)
     577          240 :    END SUBROUTINE cp_cfm_set_submatrix
     578              : 
     579              : ! **************************************************************************************************
     580              : !> \brief Returns information about a full matrix.
     581              : !> \param matrix        matrix
     582              : !> \param name          name of the matrix
     583              : !> \param nrow_global   total number of rows
     584              : !> \param ncol_global   total number of columns
     585              : !> \param nrow_block    number of rows per ScaLAPACK block
     586              : !> \param ncol_block    number of columns per ScaLAPACK block
     587              : !> \param nrow_local    number of locally stored rows
     588              : !> \param ncol_local    number of locally stored columns
     589              : !> \param row_indices   global indices of locally stored rows
     590              : !> \param col_indices   global indices of locally stored columns
     591              : !> \param local_data    locally stored matrix elements
     592              : !> \param context       BLACS context
     593              : !> \param matrix_struct matrix structure
     594              : !> \param para_env      parallel environment
     595              : !> \date    12.06.2001
     596              : !> \author  Matthias Krack
     597              : !> \version 1.0
     598              : ! **************************************************************************************************
     599       248038 :    SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
     600              :                               nrow_block, ncol_block, nrow_local, ncol_local, &
     601              :                               row_indices, col_indices, local_data, context, &
     602              :                               matrix_struct, para_env)
     603              :       TYPE(cp_cfm_type), INTENT(IN)                      :: matrix
     604              :       CHARACTER(len=*), INTENT(OUT), OPTIONAL            :: name
     605              :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
     606              :                                                             ncol_block, nrow_local, ncol_local
     607              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
     608              :       COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
     609              :          OPTIONAL, POINTER                               :: local_data
     610              :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
     611              :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
     612              :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
     613              : 
     614            0 :       IF (PRESENT(name)) name = matrix%name
     615       248038 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
     616       248038 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
     617              : 
     618              :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
     619              :                             ncol_local=ncol_local, nrow_global=nrow_global, &
     620              :                             ncol_global=ncol_global, nrow_block=nrow_block, &
     621              :                             ncol_block=ncol_block, context=context, &
     622       248038 :                             row_indices=row_indices, col_indices=col_indices, para_env=para_env)
     623              : 
     624       248038 :    END SUBROUTINE cp_cfm_get_info
     625              : 
     626              : ! **************************************************************************************************
     627              : !> \brief Copy content of a full matrix into another full matrix of the same size.
     628              : !> \param source      source matrix
     629              : !> \param destination destination matrix
     630              : !> \author Joost VandeVondele
     631              : ! **************************************************************************************************
     632       215708 :    SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
     633              :       TYPE(cp_cfm_type), INTENT(IN)                      :: source, destination
     634              : 
     635              :       INTEGER                                            :: npcol, nprow
     636              : 
     637       215708 :       nprow = source%matrix_struct%context%num_pe(1)
     638       215708 :       npcol = source%matrix_struct%context%num_pe(2)
     639              : 
     640       215708 :       IF (.NOT. cp2k_is_parallel .OR. &
     641              :           cp_fm_struct_equivalent(source%matrix_struct, &
     642              :                                   destination%matrix_struct)) THEN
     643       215708 :          IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
     644              :              SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
     645            0 :             CPABORT("internal local_data has different sizes")
     646       647124 :          CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
     647              :       ELSE
     648            0 :          IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
     649            0 :             CPABORT("cannot copy between full matrixes of differen sizes")
     650            0 :          IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
     651            0 :             CPABORT("cannot copy between full matrixes of differen sizes")
     652              : #if defined(__parallel)
     653              :          CALL pzcopy(source%matrix_struct%nrow_global* &
     654              :                      source%matrix_struct%ncol_global, &
     655              :                      source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
     656            0 :                      destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
     657              : #else
     658              :          CPABORT("")
     659              : #endif
     660              :       END IF
     661       215708 :    END SUBROUTINE cp_cfm_to_cfm_matrix
     662              : 
     663              : ! **************************************************************************************************
     664              : !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
     665              : !> \param msource      source matrix
     666              : !> \param mtarget      destination matrix
     667              : !> \param ncol         number of columns to copy
     668              : !> \param source_start global index of the first column to copy within the source matrix
     669              : !> \param target_start global index of the first column to copy within the destination matrix
     670              : ! **************************************************************************************************
     671        16710 :    SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
     672              :                                     target_start)
     673              : 
     674              :       TYPE(cp_cfm_type), INTENT(IN)                   :: msource, mtarget
     675              :       INTEGER, INTENT(IN)                                :: ncol
     676              :       INTEGER, INTENT(IN), OPTIONAL                      :: source_start, target_start
     677              : 
     678              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
     679              : 
     680        16710 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: a, b
     681              :       INTEGER                                            :: handle, n, ss, ts
     682              : #if defined(__parallel)
     683              :       INTEGER                                            :: i
     684              :       INTEGER, DIMENSION(9)                              :: desca, descb
     685              : #endif
     686              : 
     687        16710 :       CALL timeset(routineN, handle)
     688              : 
     689        16710 :       ss = 1
     690        16710 :       ts = 1
     691              : 
     692        16710 :       IF (PRESENT(source_start)) ss = source_start
     693        16710 :       IF (PRESENT(target_start)) ts = target_start
     694              : 
     695        16710 :       n = msource%matrix_struct%nrow_global
     696              : 
     697        16710 :       a => msource%local_data
     698        16710 :       b => mtarget%local_data
     699              : 
     700              : #if defined(__parallel)
     701       167100 :       desca(:) = msource%matrix_struct%descriptor(:)
     702       167100 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     703       333414 :       DO i = 0, ncol - 1
     704       333414 :          CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
     705              :       END DO
     706              : #else
     707              :       CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
     708              : #endif
     709              : 
     710        16710 :       CALL timestop(handle)
     711              : 
     712        16710 :    END SUBROUTINE cp_cfm_to_cfm_columns
     713              : 
     714              : ! **************************************************************************************************
     715              : !> \brief Copy just a triangular matrix.
     716              : !> \param msource source matrix
     717              : !> \param mtarget target matrix
     718              : !> \param uplo    'U' for upper triangular, 'L' for lower triangular
     719              : ! **************************************************************************************************
     720            0 :    SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
     721              :       TYPE(cp_cfm_type), INTENT(IN)                   :: msource, mtarget
     722              :       CHARACTER(len=*), INTENT(IN)                       :: uplo
     723              : 
     724              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
     725              : 
     726            0 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: aa, bb
     727              :       INTEGER                                            :: handle, ncol, nrow
     728              : #if defined(__parallel)
     729              :       INTEGER, DIMENSION(9)                              :: desca, descb
     730              : #endif
     731              : 
     732            0 :       CALL timeset(routineN, handle)
     733              : 
     734            0 :       nrow = msource%matrix_struct%nrow_global
     735            0 :       ncol = msource%matrix_struct%ncol_global
     736              : 
     737            0 :       aa => msource%local_data
     738            0 :       bb => mtarget%local_data
     739              : 
     740              : #if defined(__parallel)
     741            0 :       desca(:) = msource%matrix_struct%descriptor(:)
     742            0 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     743            0 :       CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
     744              : #else
     745              :       CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
     746              : #endif
     747              : 
     748            0 :       CALL timestop(handle)
     749            0 :    END SUBROUTINE cp_cfm_to_cfm_triangular
     750              : 
     751              : ! **************************************************************************************************
     752              : !> \brief Copy real and imaginary parts of a complex full matrix into
     753              : !>        separate real-value full matrices.
     754              : !> \param msource    complex matrix
     755              : !> \param mtargetr   (optional) real part of the source matrix
     756              : !> \param mtargeti   (optional) imaginary part of the source matrix
     757              : !> \note
     758              : !>        Matrix structures are assumed to be equivalent.
     759              : ! **************************************************************************************************
     760        82172 :    SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
     761              : 
     762              :       TYPE(cp_cfm_type), INTENT(IN)                      :: msource
     763              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: mtargetr, mtargeti
     764              : 
     765              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_cfm_to_fm'
     766              : 
     767        82172 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: zmat
     768              :       INTEGER                                            :: handle
     769        82172 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: imat, rmat
     770              : 
     771        82172 :       CALL timeset(routineN, handle)
     772              : 
     773        82172 :       zmat => msource%local_data
     774        82172 :       IF (PRESENT(mtargetr)) THEN
     775        73992 :          rmat => mtargetr%local_data
     776              :          IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
     777        73992 :              (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
     778              :              (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
     779            0 :             CPABORT("size of local_data of mtargetr differ to msource")
     780              :          END IF
     781              :          ! copy local data
     782     40851917 :          rmat = REAL(zmat, kind=dp)
     783              :       ELSE
     784        82172 :          NULLIFY (rmat)
     785              :       END IF
     786        82172 :       IF (PRESENT(mtargeti)) THEN
     787        66962 :          imat => mtargeti%local_data
     788              :          IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
     789        66962 :              (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
     790              :              (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
     791            0 :             CPABORT("size of local_data of mtargeti differ to msource")
     792              :          END IF
     793              :          ! copy local data
     794     40512335 :          imat = REAL(AIMAG(zmat), kind=dp)
     795              :       ELSE
     796        82172 :          NULLIFY (imat)
     797              :       END IF
     798              : 
     799        82172 :       CALL timestop(handle)
     800              : 
     801        82172 :    END SUBROUTINE cp_cfm_to_fm
     802              : 
     803              : ! **************************************************************************************************
     804              : !> \brief Construct a complex full matrix by taking its real and imaginary parts from
     805              : !>        two separate real-value full matrices.
     806              : !> \param msourcer   (optional) real part of the complex matrix (defaults to 0.0)
     807              : !> \param msourcei   (optional) imaginary part of the complex matrix (defaults to 0.0)
     808              : !> \param mtarget    resulting complex matrix
     809              : !> \note
     810              : !>        Matrix structures are assumed to be equivalent.
     811              : ! **************************************************************************************************
     812       107067 :    SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
     813              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: msourcer, msourcei
     814              :       TYPE(cp_cfm_type), INTENT(IN)                      :: mtarget
     815              : 
     816              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_to_cfm'
     817              : 
     818       107067 :       COMPLEX(kind=dp), DIMENSION(:, :), POINTER         :: zmat
     819              :       INTEGER                                            :: handle, mode
     820       107067 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: imat, rmat
     821              : 
     822       107067 :       CALL timeset(routineN, handle)
     823              : 
     824       107067 :       mode = 0
     825       107067 :       zmat => mtarget%local_data
     826       107067 :       IF (PRESENT(msourcer)) THEN
     827       102093 :          rmat => msourcer%local_data
     828              :          IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
     829       102093 :              (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
     830              :              (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
     831            0 :             CPABORT("size of local_data of msourcer differ to mtarget")
     832              :          END IF
     833              :          mode = mode + 1
     834              :       ELSE
     835              :          NULLIFY (rmat)
     836              :       END IF
     837       107067 :       IF (PRESENT(msourcei)) THEN
     838        65070 :          imat => msourcei%local_data
     839              :          IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
     840        65070 :              (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
     841              :              (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
     842            0 :             CPABORT("size of local_data of msourcei differ to mtarget")
     843              :          END IF
     844        65070 :          mode = mode + 2
     845              :       ELSE
     846              :          NULLIFY (imat)
     847              :       END IF
     848              :       ! copy local data
     849              :       SELECT CASE (mode)
     850              :       CASE (0)
     851            0 :          zmat(:, :) = z_zero
     852              :       CASE (1)
     853      5834878 :          zmat(:, :) = CMPLX(rmat(:, :), 0.0_dp, kind=dp)
     854              :       CASE (2)
     855        65798 :          zmat(:, :) = CMPLX(0.0_dp, imat(:, :), kind=dp)
     856              :       CASE (3)
     857     21009787 :          zmat(:, :) = CMPLX(rmat(:, :), imat(:, :), kind=dp)
     858              :       END SELECT
     859              : 
     860       107067 :       CALL timestop(handle)
     861              : 
     862       107067 :    END SUBROUTINE cp_fm_to_cfm
     863              : 
     864              : ! **************************************************************************************************
     865              : !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
     866              : !> \param source       input complex-valued fm matrix
     867              : !> \param destination  output complex-valued fm matrix
     868              : !> \param para_env     parallel environment corresponding to the BLACS env that covers all parts
     869              : !>                     of the input and output matrices
     870              : !> \param info         all of the data that will be needed to complete the copy operation
     871              : !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
     872              : !>       allocatable arrays instead of pointers wherever possible.
     873              : ! **************************************************************************************************
     874        91000 :    SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
     875              :       TYPE(cp_cfm_type), INTENT(IN)                      :: source, destination
     876              :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env
     877              :       TYPE(copy_cfm_info_type), INTENT(out)              :: info
     878              : 
     879              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_start_copy_general'
     880              : 
     881              :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
     882              :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
     883              :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
     884              :          recv_rank, recv_size, send_rank, send_size
     885         9100 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
     886        18200 :                                                             recv_count, send_count, send_disp, &
     887         9100 :                                                             source2global, src_p, src_q
     888         9100 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
     889              :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
     890              :                                                             src_block, src_block_tmp, src_num_pe
     891        18200 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
     892        18200 :                                                             send_col_indices, send_row_indices
     893              :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
     894       127400 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
     895              : 
     896         9100 :       CALL timeset(routineN, handle)
     897              : 
     898              :       IF (.NOT. cp2k_is_parallel) THEN
     899              :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
     900              :          nrow_local_send = SIZE(source%local_data, 1)
     901              :          ncol_local_send = SIZE(source%local_data, 2)
     902              :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
     903              :          k = 0
     904              :          DO j = 1, ncol_local_send
     905              :             DO i = 1, nrow_local_send
     906              :                k = k + 1
     907              :                info%send_buf(k) = source%local_data(i, j)
     908              :             END DO
     909              :          END DO
     910              :       ELSE
     911         9100 :          NULLIFY (recv_dist, send_dist)
     912         9100 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
     913              : 
     914              :          ! The 'global' communicator contains both the source and destination decompositions
     915         9100 :          global_size = para_env%num_pe
     916         9100 :          global_rank = para_env%mepos
     917              : 
     918              :          ! The source/send decomposition and destination/recv decompositions may only exist on
     919              :          ! on a subset of the processes involved in the communication
     920              :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
     921              :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
     922         9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     923         9100 :             recv_dist => destination%matrix_struct
     924         9100 :             recv_rank = recv_dist%para_env%mepos
     925              :          ELSE
     926            0 :             recv_rank = mp_proc_null
     927              :          END IF
     928              : 
     929         9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     930         4550 :             send_dist => source%matrix_struct
     931         4550 :             send_rank = send_dist%para_env%mepos
     932              :          ELSE
     933         4550 :             send_rank = mp_proc_null
     934              :          END IF
     935              : 
     936              :          ! Map the rank in the source/dest communicator to the global rank
     937        27300 :          ALLOCATE (all_ranks(0:global_size - 1))
     938              : 
     939         9100 :          CALL para_env%allgather(send_rank, all_ranks)
     940         9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     941        45500 :             ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
     942        27300 :             DO i = 0, global_size - 1
     943        27300 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
     944         9100 :                   source2global(all_ranks(i)) = i
     945              :                END IF
     946              :             END DO
     947              :          END IF
     948              : 
     949         9100 :          CALL para_env%allgather(recv_rank, all_ranks)
     950         9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     951        22750 :             ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
     952        13650 :             DO i = 0, global_size - 1
     953        13650 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
     954         9100 :                   dest2global(all_ranks(i)) = i
     955              :                END IF
     956              :             END DO
     957              :          END IF
     958         9100 :          DEALLOCATE (all_ranks)
     959              : 
     960              :          ! Some data from the two decompositions will be needed by all processes in the global group :
     961              :          ! process grid shape, block size, and the BLACS-to-MPI mapping
     962              : 
     963              :          ! The global root process will receive the data (from the root process in each decomposition)
     964        63700 :          send_req(:) = mp_request_null
     965         9100 :          IF (global_rank == 0) THEN
     966        31850 :             recv_req(:) = mp_request_null
     967         4550 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
     968         4550 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
     969         4550 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
     970         4550 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
     971              :          END IF
     972              : 
     973         9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
     974         4550 :             IF ((send_rank == 0)) THEN
     975              :                ! need to use separate buffers here in case this is actually global rank 0
     976        13650 :                src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
     977         4550 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
     978         4550 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
     979              :             END IF
     980              :          END IF
     981              : 
     982         9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
     983         9100 :             IF ((recv_rank == 0)) THEN
     984        13650 :                dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
     985         4550 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
     986         4550 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
     987              :             END IF
     988              :          END IF
     989              : 
     990         9100 :          IF (global_rank == 0) THEN
     991         4550 :             CALL mp_waitall(recv_req(1:4))
     992              :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
     993            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
     994        31850 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
     995         4550 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
     996         4550 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
     997              :          END IF
     998              : 
     999         9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1000         4550 :             IF ((send_rank == 0)) THEN
    1001         4550 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1002              :             END IF
    1003              :          END IF
    1004              : 
    1005         9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1006         9100 :             IF ((recv_rank == 0)) THEN
    1007         4550 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1008              :             END IF
    1009              :          END IF
    1010              : 
    1011         9100 :          IF (global_rank == 0) THEN
    1012         4550 :             CALL mp_waitall(recv_req(5:6))
    1013              :          END IF
    1014              : 
    1015              :          ! Finally, broadcast the data to all processes in the global communicator
    1016         9100 :          CALL para_env%bcast(src_block, 0)
    1017         9100 :          CALL para_env%bcast(dest_block, 0)
    1018         9100 :          CALL para_env%bcast(src_num_pe, 0)
    1019         9100 :          CALL para_env%bcast(dest_num_pe, 0)
    1020        27300 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1021        27300 :          info%nblock_src(1:2) = src_block(1:2)
    1022         9100 :          IF (global_rank /= 0) THEN
    1023            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1024        31850 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
    1025              :          END IF
    1026         9100 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1027         9100 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1028              : 
    1029         9100 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1030         9100 :          send_size = src_num_pe(1)*src_num_pe(2)
    1031         9100 :          info%send_size = send_size
    1032         9100 :          CALL mp_waitall(send_req(:))
    1033              : 
    1034              :          ! Setup is now complete, we can start the actual communication here.
    1035              :          ! The order implemented here is:
    1036              :          !  DEST_1
    1037              :          !      compute recv sizes
    1038              :          !      call irecv
    1039              :          !  SRC_1
    1040              :          !      compute send sizes
    1041              :          !      pack send buffers
    1042              :          !      call isend
    1043              :          !  DEST_2
    1044              :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1045              :          !  SRC_2
    1046              :          !      wait for the sends
    1047              : 
    1048              :          ! DEST_1
    1049         9100 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1050              :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1051         9100 :                                   col_indices=recv_col_indices)
    1052         9100 :             info%recv_col_indices => recv_col_indices
    1053         9100 :             info%recv_row_indices => recv_row_indices
    1054         9100 :             nrow_block_src = src_block(1)
    1055         9100 :             ncol_block_src = src_block(2)
    1056        54600 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1057              : 
    1058              :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1059         9100 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1060         9100 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1061         9100 :             info%nlocal_recv(1) = nrow_local_recv
    1062         9100 :             info%nlocal_recv(2) = ncol_local_recv
    1063              :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1064        45500 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1065        98140 :             DO i = 1, nrow_local_recv
    1066              :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1067              :                ! then work out which row block it comes from, and which process row that row block comes from.
    1068        98140 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1069              :             END DO
    1070       187180 :             DO j = 1, ncol_local_recv
    1071              :                ! Similarly for the columns
    1072       187180 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1073              :             END DO
    1074              :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1075              : 
    1076        18200 :             DO q = 0, src_num_pe(2) - 1
    1077       187180 :                ncols = COUNT(src_q .EQ. q)
    1078        27300 :                DO p = 0, src_num_pe(1) - 1
    1079        98140 :                   nrows = COUNT(src_p .EQ. p)
    1080              :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1081        18200 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1082              :                END DO
    1083              :             END DO
    1084         9100 :             DEALLOCATE (src_p, src_q)
    1085              : 
    1086              :             ! Use one long buffer (and displacements into that buffer)
    1087              :             !     this prevents the need for a rectangular array where not all elements will be populated
    1088        36400 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1089         9100 :             info%recv_disp(0) = 0
    1090         9100 :             DO i = 1, send_size - 1
    1091         9100 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1092              :             END DO
    1093              : 
    1094              :             ! Issue receive calls on ranks which expect data
    1095        18200 :             DO k = 0, send_size - 1
    1096        18200 :                IF (recv_count(k) .GT. 0) THEN
    1097              :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1098         9100 :                                       source2global(k), info%recv_request(k))
    1099              :                ELSE
    1100            0 :                   info%recv_request(k) = mp_request_null
    1101              :                END IF
    1102              :             END DO
    1103         9100 :             DEALLOCATE (source2global)
    1104              :          END IF ! ASSOCIATED(destination)
    1105              : 
    1106              :          ! SRC_1
    1107         9100 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1108              :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1109         4550 :                                   col_indices=send_col_indices)
    1110         4550 :             nrow_block_dest = dest_block(1)
    1111         4550 :             ncol_block_dest = dest_block(2)
    1112        31850 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1113              : 
    1114              :             ! Determine the send counts, allocate the send buffers
    1115         4550 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1116         4550 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1117              : 
    1118              :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1119              :             !   i.e. number of rows,cols in the sending distribution
    1120        22750 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1121              : 
    1122        93590 :             DO i = 1, nrow_local_send
    1123              :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1124        93590 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1125              :             END DO
    1126        93590 :             DO j = 1, ncol_local_send
    1127        93590 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1128              :             END DO
    1129              :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1130              : 
    1131         9100 :             DO q = 0, dest_num_pe(2) - 1
    1132        93590 :                ncols = COUNT(dest_q .EQ. q)
    1133        18200 :                DO p = 0, dest_num_pe(1) - 1
    1134       187180 :                   nrows = COUNT(dest_p .EQ. p)
    1135        13650 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1136              :                END DO
    1137              :             END DO
    1138         4550 :             DEALLOCATE (dest_p, dest_q)
    1139              : 
    1140              :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1141        22750 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1142         4550 :             send_disp(0) = 0
    1143         9100 :             DO k = 1, recv_size - 1
    1144         9100 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1145              :             END DO
    1146              : 
    1147              :             ! Loop over the smat, pack the send buffers
    1148        13650 :             send_count(:) = 0
    1149        93590 :             DO j = 1, ncol_local_send
    1150              :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1151        89040 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1152      2069270 :                DO i = 1, nrow_local_send
    1153      1975680 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1154      1975680 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1155      1975680 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1156      2064720 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1157              :                END DO
    1158              :             END DO
    1159              : 
    1160              :             ! For each non-zero send_count, call mpi_isend
    1161        13650 :             DO k = 0, recv_size - 1
    1162        13650 :                IF (send_count(k) .GT. 0) THEN
    1163              :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1164         9100 :                                       dest2global(k), info%send_request(k))
    1165              :                ELSE
    1166            0 :                   info%send_request(k) = mp_request_null
    1167              :                END IF
    1168              :             END DO
    1169         4550 :             DEALLOCATE (send_count, send_disp, dest2global)
    1170              :          END IF ! ASSOCIATED(source)
    1171         9100 :          DEALLOCATE (dest_blacs2mpi)
    1172              : 
    1173              :       END IF !IF (.NOT. cp2k_is_parallel)
    1174              : 
    1175         9100 :       CALL timestop(handle)
    1176              : 
    1177        36400 :    END SUBROUTINE cp_cfm_start_copy_general
    1178              : 
    1179              : ! **************************************************************************************************
    1180              : !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
    1181              : !> \param destination  output cfm matrix
    1182              : !> \param info         all of the data that will be needed to complete the copy operation
    1183              : !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
    1184              : !>       allocatable arrays instead of pointers wherever possible.
    1185              : ! **************************************************************************************************
    1186         9100 :    SUBROUTINE cp_cfm_finish_copy_general(destination, info)
    1187              :       TYPE(cp_cfm_type), INTENT(IN)                      :: destination
    1188              :       TYPE(copy_cfm_info_type), INTENT(inout)            :: info
    1189              : 
    1190              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_finish_copy_general'
    1191              : 
    1192              :       INTEGER                                            :: handle, i, j, k, mpi_rank, ni, nj, &
    1193              :                                                             src_q_j
    1194         9100 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count, src_p_i
    1195         9100 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1196              : 
    1197         9100 :       CALL timeset(routineN, handle)
    1198              : 
    1199              :       IF (.NOT. cp2k_is_parallel) THEN
    1200              :          ! Now unpack the data from the 'send buffer'
    1201              :          k = 0
    1202              :          DO j = 1, SIZE(destination%local_data, 2)
    1203              :             DO i = 1, SIZE(destination%local_data, 1)
    1204              :                k = k + 1
    1205              :                destination%local_data(i, j) = info%send_buf(k)
    1206              :             END DO
    1207              :          END DO
    1208              :          DEALLOCATE (info%send_buf)
    1209              :       ELSE
    1210              :          ! Set up local variables ...
    1211         9100 :          recv_col_indices => info%recv_col_indices
    1212         9100 :          recv_row_indices => info%recv_row_indices
    1213              : 
    1214              :          ! ... use the local variables to do the work
    1215              :          ! DEST_2
    1216         9100 :          CALL mp_waitall(info%recv_request(:))
    1217              : 
    1218         9100 :          nj = info%nlocal_recv(2)
    1219         9100 :          ni = info%nlocal_recv(1)
    1220        45500 :          ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
    1221              :          ! Loop over the rmat, filling it in with data from the recv buffers
    1222              :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1223        18200 :          recv_count(:) = 0
    1224        98140 :          DO i = 1, ni
    1225        98140 :             src_p_i(i) = MOD(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
    1226              :          END DO
    1227              : 
    1228       187180 :          DO j = 1, nj
    1229       178080 :             src_q_j = MOD(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
    1230      2162860 :             DO i = 1, ni
    1231      1975680 :                mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
    1232      1975680 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1233      2153760 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1234              :             END DO
    1235              :          END DO
    1236              : 
    1237         9100 :          DEALLOCATE (recv_count, src_p_i)
    1238              :          ! Invalidate the stored state
    1239         9100 :          NULLIFY (info%recv_col_indices, info%recv_row_indices)
    1240         9100 :          DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1241              :       END IF
    1242              : 
    1243         9100 :       CALL timestop(handle)
    1244              : 
    1245         9100 :    END SUBROUTINE cp_cfm_finish_copy_general
    1246              : 
    1247              : ! **************************************************************************************************
    1248              : !> \brief Complete the copy operation: wait for comms clean up MPI state.
    1249              : !> \param info    all of the data that will be needed to complete the copy operation
    1250              : !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
    1251              : !>       allocatable arrays instead of pointers wherever possible.
    1252              : ! **************************************************************************************************
    1253         4550 :    SUBROUTINE cp_cfm_cleanup_copy_general(info)
    1254              :       TYPE(copy_cfm_info_type), INTENT(inout)            :: info
    1255              : 
    1256              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_cleanup_copy_general'
    1257              : 
    1258              :       INTEGER                                            :: handle
    1259              : 
    1260         4550 :       CALL timeset(routineN, handle)
    1261              : 
    1262              :       IF (cp2k_is_parallel) THEN
    1263              :          ! SRC_2
    1264              :          ! If this process is also in the destination decomposition, this deallocate
    1265              :          ! Was already done in cp_fm_finish_copy_general
    1266         4550 :          IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
    1267         4550 :          CALL mp_waitall(info%send_request(:))
    1268         4550 :          DEALLOCATE (info%send_request, info%send_buf)
    1269              :       END IF
    1270              : 
    1271         4550 :       CALL timestop(handle)
    1272         4550 :    END SUBROUTINE cp_cfm_cleanup_copy_general
    1273            0 : END MODULE cp_cfm_types
        

Generated by: LCOV version 2.0-1