LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 85.6 % 842 721
Test Date: 2025-12-04 06:27:48 Functions: 80.4 % 46 37

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief represent a full matrix distributed on many processors
      10              : !> \par History
      11              : !>      3) separated structure object, removed globenv, renamed to full matrix
      12              : !>         many changes (fawzi 08.2002)
      13              : !> \author Matthias Krack (22.05.2001)
      14              : ! **************************************************************************************************
      15              : MODULE cp_fm_types
      16              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17              :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      19              :                                               cp_fm_struct_get,&
      20              :                                               cp_fm_struct_release,&
      21              :                                               cp_fm_struct_retain,&
      22              :                                               cp_fm_struct_type,&
      23              :                                               cp_fm_struct_write_info
      24              :    USE kinds,                           ONLY: dp,&
      25              :                                               sp
      26              :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      27              :                                               mp_any_source,&
      28              :                                               mp_para_env_type,&
      29              :                                               mp_proc_null,&
      30              :                                               mp_request_null,&
      31              :                                               mp_request_type,&
      32              :                                               mp_waitall
      33              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      34              :                                               rng_stream_type
      35              : #include "../base/base_uses.f90"
      36              : 
      37              :    IMPLICIT NONE
      38              : 
      39              :    PRIVATE
      40              : 
      41              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      42              :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      43              :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      44              : 
      45              :    INTEGER, PRIVATE :: cp_fm_mm_type = 1
      46              : 
      47              :    PUBLIC :: cp_fm_type, &
      48              :              cp_fm_p_type, copy_info_type
      49              : 
      50              :    PUBLIC :: cp_fm_add_to_element, &
      51              :              cp_fm_create, &
      52              :              cp_fm_release, &
      53              :              cp_fm_get_info, &
      54              :              cp_fm_set_element, &
      55              :              cp_fm_get_element, &
      56              :              cp_fm_get_diag, & ! get diagonal
      57              :              cp_fm_set_all, & ! set all elements and diagonal
      58              :              cp_fm_set_all_submatrix, & ! set a submatrix to a given value
      59              :              cp_fm_set_submatrix, & ! set a submatrix to given values
      60              :              cp_fm_get_submatrix, & ! get a submatrix of given values
      61              :              cp_fm_init_random, &
      62              :              cp_fm_maxabsval, & ! find the maximum absolute value
      63              :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      64              :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      65              :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      66              :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      67              :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      68              :              cp_fm_to_fm_triangular, &
      69              :              cp_fm_copy_general, &
      70              :              cp_fm_start_copy_general, &
      71              :              cp_fm_finish_copy_general, &
      72              :              cp_fm_cleanup_copy_general, &
      73              :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      74              :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      75              :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      76              :              cp_fm_setup, & ! allows to set flags for fms
      77              :              cp_fm_get_mm_type, &
      78              :              cp_fm_write_info, &
      79              :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      80              : 
      81              :    PUBLIC :: cp_fm_pilaenv
      82              : 
      83              :    INTERFACE cp_fm_to_fm
      84              :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      85              :          cp_fm_to_fm_columns ! just a number of columns
      86              :    END INTERFACE
      87              : 
      88              :    INTERFACE cp_fm_release
      89              :       MODULE PROCEDURE cp_fm_release_aa0, &
      90              :          cp_fm_release_aa1, &
      91              :          cp_fm_release_aa2, &
      92              :          cp_fm_release_aa3, &
      93              :          cp_fm_release_ap1, &
      94              :          cp_fm_release_ap2, &
      95              :          cp_fm_release_pa1, &
      96              :          cp_fm_release_pa2, &
      97              :          cp_fm_release_pa3, &
      98              :          cp_fm_release_pp1, &
      99              :          cp_fm_release_pp2
     100              :    END INTERFACE
     101              : 
     102              : ! **************************************************************************************************
     103              : !> \brief represent a full matrix
     104              : !> \param name the name of the matrix, used for printing
     105              : !> \param matrix_struct structure of this matrix
     106              : !> \param local_data array with the data of the matrix (its contents
     107              : !>        depend on the matrix type used: in parallel runs it will be
     108              : !>        in scalapack format, in sequential, it will simply contain
     109              : !>        the matrix)
     110              : !> \par History
     111              : !>      08.2002 created [fawzi]
     112              : !> \author fawzi
     113              : ! **************************************************************************************************
     114              :    TYPE cp_fm_type
     115              : !    PRIVATE
     116              :       CHARACTER(LEN=60) :: name = ""
     117              :       LOGICAL :: use_sp = .FALSE.
     118              :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     119              :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     120              :       REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
     121              :    END TYPE cp_fm_type
     122              : 
     123              : ! **************************************************************************************************
     124              : !> \brief just to build arrays of pointers to matrices
     125              : !> \param matrix the pointer to the matrix
     126              : !> \par History
     127              : !>      08.2002 created [fawzi]
     128              : !> \author fawzi
     129              : ! **************************************************************************************************
     130              :    TYPE cp_fm_p_type
     131              :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     132              :    END TYPE cp_fm_p_type
     133              : 
     134              : ! **************************************************************************************************
     135              : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     136              : !>        and cp_fm_finish_copy_general
     137              : !> \par History
     138              : !>      Jan 2017  [Mark T]
     139              : ! **************************************************************************************************
     140              :    TYPE copy_info_type
     141              :       INTEGER :: send_size = -1
     142              :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     143              :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     144              :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     145              :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     146              :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     147              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     148              :    END TYPE copy_info_type
     149              : 
     150              : CONTAINS
     151              : 
     152              : ! **************************************************************************************************
     153              : !> \brief creates a new full matrix with the given structure
     154              : !> \param matrix the matrix to be created
     155              : !> \param matrix_struct the structure of matrix
     156              : !> \param name ...
     157              : !> \param use_sp ...
     158              : !> \param set_zero ...
     159              : !> \par History
     160              : !>      08.2002 created [fawzi]
     161              : !> \author Fawzi Mohamed
     162              : !> \note
     163              : !>      preferred allocation routine
     164              : ! **************************************************************************************************
     165      1338009 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp, set_zero)
     166              :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     167              :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     168              :       CHARACTER(LEN=*), INTENT(in), OPTIONAL             :: name
     169              :       LOGICAL, INTENT(in), OPTIONAL                      :: use_sp, set_zero
     170              : 
     171              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_create'
     172              : 
     173              :       INTEGER                                            :: handle, ncol_local, npcol, nprow, &
     174              :                                                             nrow_local
     175              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     176              : 
     177      1338009 :       CALL timeset(routineN, handle)
     178              : 
     179      1338009 :       context => matrix_struct%context
     180      1338009 :       matrix%matrix_struct => matrix_struct
     181      1338009 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     182              : 
     183      1338009 :       matrix%use_sp = .FALSE.
     184      1338009 :       IF (PRESENT(use_sp)) matrix%use_sp = use_sp
     185              : 
     186      1338009 :       nprow = context%num_pe(1)
     187      1338009 :       npcol = context%num_pe(2)
     188      1338009 :       NULLIFY (matrix%local_data)
     189      1338009 :       NULLIFY (matrix%local_data_sp)
     190              : 
     191              :       ! OK, we allocate here at least a 1 x 1 matrix
     192              :       ! this must (and is) compatible with the descinit call
     193              :       ! in cp_fm_struct
     194      1338009 :       nrow_local = matrix_struct%local_leading_dimension
     195      1338009 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     196      1338009 :       IF (matrix%use_sp) THEN
     197            0 :          ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
     198              :       ELSE
     199      5352036 :          ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     200              :       END IF
     201              : 
     202      1338009 :       IF (PRESENT(set_zero)) THEN
     203       185549 :          IF (set_zero) THEN
     204       185549 :             IF (matrix%use_sp) THEN
     205            0 :                matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
     206              :             ELSE
     207     53160382 :                matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     208              :             END IF
     209              :          END IF
     210              :       END IF
     211              : 
     212      1338009 :       IF (PRESENT(name)) THEN
     213       676735 :          matrix%name = name
     214              :       ELSE
     215       661274 :          matrix%name = 'full matrix'
     216              :       END IF
     217              : 
     218      1338009 :       CALL timestop(handle)
     219              : 
     220      1338009 :    END SUBROUTINE cp_fm_create
     221              : 
     222              : ! **************************************************************************************************
     223              : !> \brief releases a full matrix
     224              : !> \param matrix the matrix to release
     225              : !> \par History
     226              : !>      08.2002 created [fawzi]
     227              : !> \author Fawzi Mohamed
     228              : ! **************************************************************************************************
     229      1349305 :    SUBROUTINE cp_fm_release_aa0(matrix)
     230              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     231              : 
     232      1349305 :       IF (ASSOCIATED(matrix%local_data)) THEN
     233      1337293 :          DEALLOCATE (matrix%local_data)
     234              :          NULLIFY (matrix%local_data)
     235              :       END IF
     236      1349305 :       IF (ASSOCIATED(matrix%local_data_sp)) THEN
     237            0 :          DEALLOCATE (matrix%local_data_sp)
     238              :          NULLIFY (matrix%local_data_sp)
     239              :       END IF
     240      1349305 :       matrix%name = ""
     241      1349305 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     242              : 
     243      1349305 :    END SUBROUTINE cp_fm_release_aa0
     244              : 
     245              : ! **************************************************************************************************
     246              : !> \brief ...
     247              : !> \param matrices ...
     248              : ! **************************************************************************************************
     249        73690 :    SUBROUTINE cp_fm_release_aa1(matrices)
     250              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     251              : 
     252              :       INTEGER                                            :: i
     253              : 
     254        73690 :       IF (ALLOCATED(matrices)) THEN
     255       198707 :          DO i = 1, SIZE(matrices)
     256       198707 :             CALL cp_fm_release(matrices(i))
     257              :          END DO
     258        72544 :          DEALLOCATE (matrices)
     259              :       END IF
     260        73690 :    END SUBROUTINE cp_fm_release_aa1
     261              : 
     262              : ! **************************************************************************************************
     263              : !> \brief ...
     264              : !> \param matrices ...
     265              : ! **************************************************************************************************
     266         8150 :    SUBROUTINE cp_fm_release_aa2(matrices)
     267              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     268              : 
     269              :       INTEGER                                            :: i, j
     270              : 
     271         8150 :       IF (ALLOCATED(matrices)) THEN
     272        17312 :          DO i = 1, SIZE(matrices, 1)
     273        47916 :             DO j = 1, SIZE(matrices, 2)
     274        42156 :                CALL cp_fm_release(matrices(i, j))
     275              :             END DO
     276              :          END DO
     277         5760 :          DEALLOCATE (matrices)
     278              :       END IF
     279         8150 :    END SUBROUTINE cp_fm_release_aa2
     280              : 
     281              : ! **************************************************************************************************
     282              : !> \brief ...
     283              : !> \param matrices ...
     284              : ! **************************************************************************************************
     285           36 :    SUBROUTINE cp_fm_release_aa3(matrices)
     286              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     287              : 
     288              :       INTEGER                                            :: i, j, k
     289              : 
     290           36 :       IF (ALLOCATED(matrices)) THEN
     291          388 :          DO i = 1, SIZE(matrices, 1)
     292         1120 :             DO j = 1, SIZE(matrices, 2)
     293         1896 :                DO k = 1, SIZE(matrices, 3)
     294         1544 :                   CALL cp_fm_release(matrices(i, j, k))
     295              :                END DO
     296              :             END DO
     297              :          END DO
     298           36 :          DEALLOCATE (matrices)
     299              :       END IF
     300           36 :    END SUBROUTINE cp_fm_release_aa3
     301              : 
     302              : ! **************************************************************************************************
     303              : !> \brief ...
     304              : !> \param matrices ...
     305              : ! **************************************************************************************************
     306       152284 :    SUBROUTINE cp_fm_release_pa1(matrices)
     307              :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     308              : 
     309              :       INTEGER                                            :: i
     310              : 
     311       152284 :       IF (ASSOCIATED(matrices)) THEN
     312       121447 :          DO i = 1, SIZE(matrices)
     313       121447 :             CALL cp_fm_release(matrices(i))
     314              :          END DO
     315        52146 :          DEALLOCATE (matrices)
     316              :          NULLIFY (matrices)
     317              :       END IF
     318       152284 :    END SUBROUTINE cp_fm_release_pa1
     319              : 
     320              : ! **************************************************************************************************
     321              : !> \brief ...
     322              : !> \param matrices ...
     323              : ! **************************************************************************************************
     324        29042 :    SUBROUTINE cp_fm_release_pa2(matrices)
     325              :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     326              : 
     327              :       INTEGER                                            :: i, j
     328              : 
     329        29042 :       IF (ASSOCIATED(matrices)) THEN
     330        45662 :          DO i = 1, SIZE(matrices, 1)
     331        89382 :             DO j = 1, SIZE(matrices, 2)
     332        78030 :                CALL cp_fm_release(matrices(i, j))
     333              :             END DO
     334              :          END DO
     335        11352 :          DEALLOCATE (matrices)
     336              :          NULLIFY (matrices)
     337              :       END IF
     338        29042 :    END SUBROUTINE cp_fm_release_pa2
     339              : 
     340              : ! **************************************************************************************************
     341              : !> \brief ...
     342              : !> \param matrices ...
     343              : ! **************************************************************************************************
     344            0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     345              :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     346              : 
     347              :       INTEGER                                            :: i, j, k
     348              : 
     349            0 :       IF (ASSOCIATED(matrices)) THEN
     350            0 :          DO i = 1, SIZE(matrices, 1)
     351            0 :             DO j = 1, SIZE(matrices, 2)
     352            0 :                DO k = 1, SIZE(matrices, 3)
     353            0 :                   CALL cp_fm_release(matrices(i, j, k))
     354              :                END DO
     355              :             END DO
     356              :          END DO
     357            0 :          DEALLOCATE (matrices)
     358              :          NULLIFY (matrices)
     359              :       END IF
     360            0 :    END SUBROUTINE cp_fm_release_pa3
     361              : 
     362              : ! **************************************************************************************************
     363              : !> \brief ...
     364              : !> \param matrices ...
     365              : ! **************************************************************************************************
     366            0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     367              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     368              : 
     369              :       INTEGER                                            :: i
     370              : 
     371            0 :       IF (ALLOCATED(matrices)) THEN
     372            0 :          DO i = 1, SIZE(matrices)
     373            0 :             CALL cp_fm_release(matrices(i)%matrix)
     374            0 :             DEALLOCATE (matrices(i)%matrix)
     375              :          END DO
     376            0 :          DEALLOCATE (matrices)
     377              :       END IF
     378            0 :    END SUBROUTINE cp_fm_release_ap1
     379              : 
     380              : ! **************************************************************************************************
     381              : !> \brief ...
     382              : !> \param matrices ...
     383              : ! **************************************************************************************************
     384            0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     385              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     386              : 
     387              :       INTEGER                                            :: i, j
     388              : 
     389            0 :       IF (ALLOCATED(matrices)) THEN
     390            0 :          DO i = 1, SIZE(matrices, 1)
     391            0 :             DO j = 1, SIZE(matrices, 2)
     392            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     393            0 :                DEALLOCATE (matrices(i, j)%matrix)
     394              :             END DO
     395              :          END DO
     396            0 :          DEALLOCATE (matrices)
     397              :       END IF
     398            0 :    END SUBROUTINE cp_fm_release_ap2
     399              : 
     400              : ! **************************************************************************************************
     401              : !> \brief ...
     402              : !> \param matrices ...
     403              : ! **************************************************************************************************
     404            0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     405              :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     406              : 
     407              :       INTEGER                                            :: i
     408              : 
     409            0 :       IF (ASSOCIATED(matrices)) THEN
     410            0 :          DO i = 1, SIZE(matrices)
     411            0 :             CALL cp_fm_release(matrices(i)%matrix)
     412            0 :             DEALLOCATE (matrices(i)%matrix)
     413              :          END DO
     414            0 :          DEALLOCATE (matrices)
     415              :          NULLIFY (matrices)
     416              :       END IF
     417            0 :    END SUBROUTINE cp_fm_release_pp1
     418              : 
     419              : ! **************************************************************************************************
     420              : !> \brief ...
     421              : !> \param matrices ...
     422              : ! **************************************************************************************************
     423            0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     424              :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     425              : 
     426              :       INTEGER                                            :: i, j
     427              : 
     428            0 :       IF (ASSOCIATED(matrices)) THEN
     429            0 :          DO i = 1, SIZE(matrices, 1)
     430            0 :             DO j = 1, SIZE(matrices, 2)
     431            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     432            0 :                DEALLOCATE (matrices(i, j)%matrix)
     433              :             END DO
     434              :          END DO
     435            0 :          DEALLOCATE (matrices)
     436              :          NULLIFY (matrices)
     437              :       END IF
     438            0 :    END SUBROUTINE cp_fm_release_pp2
     439              : 
     440              : ! **************************************************************************************************
     441              : !> \brief fills a matrix with random numbers
     442              : !> \param matrix : to be initialized
     443              : !> \param ncol : numbers of cols to fill
     444              : !> \param start_col : starting at coll number
     445              : !> \author Joost VandeVondele
     446              : !> \note
     447              : !>      the value of a_ij is independent of the number of cpus
     448              : ! **************************************************************************************************
     449         2634 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     450              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     451              :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     452              : 
     453              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     454              : 
     455              :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     456              :          ncol_local, nrow_global, nrow_local
     457         5268 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     458         2634 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     459              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     460         2634 :          POINTER                                         :: local_data
     461              :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     462              :          seed = RESHAPE([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
     463              :       TYPE(rng_stream_type)                              :: rng
     464              : 
     465         2634 :       CALL timeset(routineN, handle)
     466              : 
     467              :       ! guarantee same seed over all tasks
     468         2634 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     469              : 
     470              :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     471         2634 :                             extended_precision=.TRUE., seed=seed)
     472              : 
     473         2634 :       CPASSERT(.NOT. matrix%use_sp)
     474              : 
     475              :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     476              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     477              :                           local_data=local_data, &
     478         2634 :                           row_indices=row_indices, col_indices=col_indices)
     479              : 
     480         2634 :       my_start_col = 1
     481         2634 :       IF (PRESENT(start_col)) my_start_col = start_col
     482         2634 :       my_ncol = matrix%matrix_struct%ncol_global
     483         2634 :       IF (PRESENT(ncol)) my_ncol = ncol
     484              : 
     485         2634 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     486            0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     487              : 
     488         7902 :       ALLOCATE (buff(nrow_global))
     489              : 
     490              :       ! each global row has its own substream, in order to reach the stream for the local col,
     491              :       ! we just reset to the next substream
     492              :       ! following this, we fill the full buff with random numbers, and pick those we need
     493         2634 :       icol_global = 0
     494        21729 :       DO icol_local = 1, ncol_local
     495        19095 :          CPASSERT(col_indices(icol_local) > icol_global)
     496              :          DO
     497        19095 :             CALL rng%reset_to_next_substream()
     498        19095 :             icol_global = icol_global + 1
     499        19095 :             IF (icol_global == col_indices(icol_local)) EXIT
     500              :          END DO
     501        19095 :          CALL rng%fill(buff)
     502       790280 :          DO irow_local = 1, nrow_local
     503       787646 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     504              :          END DO
     505              :       END DO
     506              : 
     507         2634 :       DEALLOCATE (buff)
     508              : 
     509              :       ! store seed before deletion (unclear if this is the proper seed)
     510              : 
     511              :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     512              :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     513              :       ! stream of random numbers. While this seems odd and contrary to the original design,
     514              :       ! it was probably introduced to improve reproducibility.
     515              :       ! See also https://github.com/cp2k/cp2k/pull/506
     516         2634 :       CALL rng%get(ig=seed)
     517              : 
     518         2634 :       CALL timestop(handle)
     519              : 
     520        73752 :    END SUBROUTINE cp_fm_init_random
     521              : 
     522              : ! **************************************************************************************************
     523              : !> \brief set all elements of a matrix to the same value,
     524              : !>      and optionally the diagonal to a different one
     525              : !> \param matrix input matrix
     526              : !> \param alpha scalar used to set all elements of the matrix
     527              : !> \param beta scalar used to set diagonal of the matrix
     528              : !> \note
     529              : !>      can be used to zero a matrix
     530              : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     531              : ! **************************************************************************************************
     532       346843 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     533              : 
     534              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     535              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     536              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     537              : 
     538              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     539              : 
     540              :       INTEGER                                            :: handle, i, n
     541              : 
     542       346843 :       CALL timeset(routineN, handle)
     543              : 
     544       346843 :       IF (matrix%use_sp) THEN
     545            0 :          matrix%local_data_sp(:, :) = REAL(alpha, sp)
     546              :       ELSE
     547    194753306 :          matrix%local_data(:, :) = alpha
     548              :       END IF
     549              : 
     550       346843 :       IF (PRESENT(beta)) THEN
     551        58341 :          CPASSERT(.NOT. matrix%use_sp)
     552        58341 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     553       529389 :          DO i = 1, n
     554       529389 :             CALL cp_fm_set_element(matrix, i, i, beta)
     555              :          END DO
     556              :       END IF
     557              : 
     558       346843 :       CALL timestop(handle)
     559              : 
     560       346843 :    END SUBROUTINE cp_fm_set_all
     561              : 
     562              : ! **************************************************************************************************
     563              : !> \brief returns the diagonal elements of a fm
     564              : !> \param matrix ...
     565              : !> \param diag ...
     566              : ! **************************************************************************************************
     567        13954 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     568              : 
     569              :       ! arguments
     570              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     571              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     572              : 
     573              :       ! locals
     574              :       INTEGER :: i, nrow_global
     575              : 
     576              : #if defined(__parallel)
     577              :       INTEGER, DIMENSION(9) :: desca
     578              :       TYPE(cp_blacs_env_type), POINTER :: context
     579              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     580              :                  nprow
     581        13954 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     582        13954 :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
     583              : #endif
     584              : 
     585        13954 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     586              : 
     587              : #if defined(__parallel)
     588       178660 :       diag = 0.0_dp
     589        13954 :       context => matrix%matrix_struct%context
     590        13954 :       myprow = context%mepos(1)
     591        13954 :       mypcol = context%mepos(2)
     592        13954 :       nprow = context%num_pe(1)
     593        13954 :       npcol = context%num_pe(2)
     594              : 
     595        13954 :       a => matrix%local_data
     596        13954 :       a_sp => matrix%local_data_sp
     597       139540 :       desca(:) = matrix%matrix_struct%descriptor(:)
     598              : 
     599       178660 :       DO i = 1, nrow_global
     600              :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     601       164706 :                       irow_local, icol_local, iprow, ipcol)
     602       178660 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     603        82437 :             IF (matrix%use_sp) THEN
     604            0 :                diag(i) = REAL(a_sp(irow_local, icol_local), dp)
     605              :             ELSE
     606        82437 :                diag(i) = a(irow_local, icol_local)
     607              :             END IF
     608              :          END IF
     609              :       END DO
     610              : #else
     611              :       DO i = 1, nrow_global
     612              :          IF (matrix%use_sp) THEN
     613              :             diag(i) = REAL(matrix%local_data_sp(i, i), dp)
     614              :          ELSE
     615              :             diag(i) = matrix%local_data(i, i)
     616              :          END IF
     617              :       END DO
     618              : #endif
     619       343366 :       CALL matrix%matrix_struct%para_env%sum(diag)
     620              : 
     621        13954 :    END SUBROUTINE cp_fm_get_diag
     622              : 
     623              : ! **************************************************************************************************
     624              : !> \brief returns an element of a fm
     625              : !>      this value is valid on every cpu
     626              : !>      using this call is expensive
     627              : !> \param matrix the matrix to read
     628              : !> \param irow_global the row
     629              : !> \param icol_global the col
     630              : !> \param alpha the value of matrix(irow_global, icol_global)
     631              : !> \param local true if the element is on this cpu, false otherwise
     632              : !> \note
     633              : !>      - modified semantics. now this function always returns the value
     634              : !>        previously the value was zero on cpus that didn't own the relevant
     635              : !>        part of the matrix (Joost VandeVondele, May 2003)
     636              : !>      - usage of the function should be avoided, as it is likely to rather slow
     637              : !>        using row_indices/col_indices/local_data + some smart scheme normally
     638              : !>        yields a real parallel code
     639              : ! **************************************************************************************************
     640      2504148 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     641              : 
     642              :       ! arguments
     643              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     644              :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     645              :       INTEGER, INTENT(IN)                       :: icol_global, &
     646              :                                                    irow_global
     647              :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     648              : 
     649              :       ! locals
     650              : #if defined(__parallel)
     651              :       INTEGER, DIMENSION(9) :: desca
     652              :       TYPE(cp_blacs_env_type), POINTER :: context
     653              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     654              :                  nprow
     655      2504148 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     656              : #endif
     657              : 
     658              : #if defined(__parallel)
     659      2504148 :       context => matrix%matrix_struct%context
     660      2504148 :       myprow = context%mepos(1)
     661      2504148 :       mypcol = context%mepos(2)
     662      2504148 :       nprow = context%num_pe(1)
     663      2504148 :       npcol = context%num_pe(2)
     664              : 
     665      2504148 :       a => matrix%local_data
     666     25041480 :       desca(:) = matrix%matrix_struct%descriptor(:)
     667              : 
     668              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     669      2504148 :                    irow_local, icol_local, iprow, ipcol)
     670              : 
     671      2504148 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     672      1252074 :          alpha = a(irow_local, icol_local)
     673      1252074 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     674      1252074 :          IF (PRESENT(local)) local = .TRUE.
     675              :       ELSE
     676      1252074 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     677      1252074 :          IF (PRESENT(local)) local = .FALSE.
     678              :       END IF
     679              : 
     680              : #else
     681              :       IF (PRESENT(local)) local = .TRUE.
     682              :       alpha = matrix%local_data(irow_global, icol_global)
     683              : #endif
     684              : 
     685      2504148 :    END SUBROUTINE cp_fm_get_element
     686              : 
     687              : ! **************************************************************************************************
     688              : !> \brief sets an element of a matrix
     689              : !> \param matrix ...
     690              : !> \param irow_global ...
     691              : !> \param icol_global ...
     692              : !> \param alpha ...
     693              : !> \note
     694              : !>      we expect all cpus to have the same arguments in the call to this function
     695              : !>      (otherwise one should use local_data tricks)
     696              : ! **************************************************************************************************
     697       701258 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     698              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     699              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     700              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     701              : 
     702              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     703              :       TYPE(cp_blacs_env_type), POINTER         :: context
     704              : #if defined(__parallel)
     705              :       INTEGER                                  :: icol_local, ipcol, iprow, &
     706              :                                                   irow_local
     707              :       INTEGER, DIMENSION(9)                    :: desca
     708       701258 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     709              : #endif
     710              : 
     711       701258 :       context => matrix%matrix_struct%context
     712       701258 :       myprow = context%mepos(1)
     713       701258 :       mypcol = context%mepos(2)
     714       701258 :       nprow = context%num_pe(1)
     715       701258 :       npcol = context%num_pe(2)
     716              : 
     717            0 :       CPASSERT(.NOT. matrix%use_sp)
     718              : 
     719              : #if defined(__parallel)
     720              : 
     721       701258 :       a => matrix%local_data
     722              : 
     723      7012580 :       desca(:) = matrix%matrix_struct%descriptor(:)
     724              : 
     725              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     726       701258 :                    irow_local, icol_local, iprow, ipcol)
     727              : 
     728       701258 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     729       354439 :          a(irow_local, icol_local) = alpha
     730              :       END IF
     731              : 
     732              : #else
     733              : 
     734              :       matrix%local_data(irow_global, icol_global) = alpha
     735              : 
     736              : #endif
     737       701258 :    END SUBROUTINE cp_fm_set_element
     738              : 
     739              : ! **************************************************************************************************
     740              : !> \brief sets a submatrix of a full matrix
     741              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     742              : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     743              : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     744              : !> \param fm the full to change
     745              : !> \param new_values a replicated full matrix with the new values
     746              : !> \param start_row the starting row of b_matrix (defaults to 1)
     747              : !> \param start_col the starting col of b_matrix (defaults to 1)
     748              : !> \param n_rows the number of row to change in b (defaults to
     749              : !>        size(op(new_values),1))
     750              : !> \param n_cols the number of columns to change in b (defaults to
     751              : !>        size(op(new_values),2))
     752              : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     753              : !> \param beta rescaling factor for the old values (defaults to 0.0)
     754              : !> \param transpose if new_values should be transposed: if true
     755              : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     756              : !>        (defaults to false)
     757              : !> \par History
     758              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     759              : !> \author Fawzi Mohamed
     760              : !> \note
     761              : !>      optimized for full column updates and alpha=1.0, beta=0.0
     762              : !>      the new_values need to be valid on all cpus
     763              : ! **************************************************************************************************
     764        62535 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     765              :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     766              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     767              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     768              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     769              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     770              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     771              : 
     772              :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     773              :                                                             ncol_global, ncol_local, nrow, &
     774              :                                                             nrow_block, nrow_global, nrow_local, &
     775              :                                                             this_col, this_row
     776        62535 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     777              :       LOGICAL                                            :: tr_a
     778              :       REAL(KIND=dp)                                      :: al, be
     779        62535 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     780              : 
     781        62535 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     782              :       ! can be called too many times, making it a bit useless
     783              :       ! CALL timeset(routineN//','//moduleN,handle)
     784              : 
     785            0 :       CPASSERT(.NOT. fm%use_sp)
     786              : 
     787        62535 :       IF (PRESENT(alpha)) al = alpha
     788        62535 :       IF (PRESENT(beta)) be = beta
     789        62535 :       IF (PRESENT(start_row)) i0 = start_row
     790        62535 :       IF (PRESENT(start_col)) j0 = start_col
     791        62535 :       IF (PRESENT(transpose)) tr_a = transpose
     792        15721 :       IF (tr_a) THEN
     793        15629 :          nrow = SIZE(new_values, 2)
     794        15629 :          ncol = SIZE(new_values, 1)
     795              :       ELSE
     796        46906 :          nrow = SIZE(new_values, 1)
     797        46906 :          ncol = SIZE(new_values, 2)
     798              :       END IF
     799        62535 :       IF (PRESENT(n_rows)) nrow = n_rows
     800        62535 :       IF (PRESENT(n_cols)) ncol = n_cols
     801              : 
     802        62535 :       full_block => fm%local_data
     803              : 
     804              :       CALL cp_fm_get_info(matrix=fm, &
     805              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     806              :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     807              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     808        62535 :                           row_indices=row_indices, col_indices=col_indices)
     809              : 
     810        62535 :       IF (al == 1.0 .AND. be == 0.0) THEN
     811      1298820 :          DO j = 1, ncol_local
     812      1245423 :             this_col = col_indices(j) - j0 + 1
     813      1298820 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     814       284905 :                IF (tr_a) THEN
     815         6491 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     816       158082 :                      DO i = 1, nrow_local
     817       158082 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     818              :                      END DO
     819              :                   ELSE
     820          594 :                      DO i = 1, nrow_local
     821          510 :                         this_row = row_indices(i) - i0 + 1
     822          594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     823          255 :                            full_block(i, j) = new_values(this_col, this_row)
     824              :                         END IF
     825              :                      END DO
     826              :                   END IF
     827              :                ELSE
     828       278414 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     829      8910230 :                      DO i = 1, nrow_local
     830      8910230 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     831              :                      END DO
     832              :                   ELSE
     833         4347 :                      DO i = 1, nrow_local
     834         3745 :                         this_row = row_indices(i) - i0 + 1
     835         4347 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     836          301 :                            full_block(i, j) = new_values(this_row, this_col)
     837              :                         END IF
     838              :                      END DO
     839              :                   END IF
     840              :                END IF
     841              :             END IF
     842              :          END DO
     843              :       ELSE
     844       831608 :          DO j = 1, ncol_local
     845       822470 :             this_col = col_indices(j) - j0 + 1
     846       831608 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     847       822470 :                IF (tr_a) THEN
     848     88223375 :                   DO i = 1, nrow_local
     849     87400905 :                      this_row = row_indices(i) - i0 + 1
     850     88223375 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     851              :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     852       411235 :                                            be*full_block(i, j)
     853              :                      END IF
     854              :                   END DO
     855              :                ELSE
     856            0 :                   DO i = 1, nrow_local
     857            0 :                      this_row = row_indices(i) - i0 + 1
     858            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     859              :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     860            0 :                                            be*full_block(i, j)
     861              :                      END IF
     862              :                   END DO
     863              :                END IF
     864              :             END IF
     865              :          END DO
     866              :       END IF
     867              : 
     868              :       ! CALL timestop(handle)
     869              : 
     870        62535 :    END SUBROUTINE cp_fm_set_submatrix
     871              : 
     872              : ! **************************************************************************************************
     873              : !> \brief sets a submatrix of a full matrix to a given value
     874              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
     875              : !> \param fm the full to change
     876              : !> \param new_value  ...
     877              : !> \param start_row the starting row of matrix
     878              : !> \param start_col the starting col of matrix
     879              : !> \param n_rows the number of rows to change
     880              : !> \param n_cols the number of columns to change
     881              : !> \par History
     882              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     883              : !>      12.2025 created from cp_fm_set_submatrix
     884              : !> \author JGH
     885              : ! **************************************************************************************************
     886      1759110 :    SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
     887              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     888              :       REAL(KIND=dp), INTENT(in)                          :: new_value
     889              :       INTEGER, INTENT(in)                                :: start_row, start_col, n_rows, n_cols
     890              : 
     891              :       INTEGER                                            :: i, i0, j, j0, ncol_global, ncol_local, &
     892              :                                                             nrow_global, nrow_local, this_col, &
     893              :                                                             this_row
     894       879555 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     895       879555 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     896              : 
     897            0 :       CPASSERT(.NOT. fm%use_sp)
     898              : 
     899       879555 :       full_block => fm%local_data
     900              : 
     901              :       CALL cp_fm_get_info(matrix=fm, &
     902              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     903              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     904       879555 :                           row_indices=row_indices, col_indices=col_indices)
     905              : 
     906       879555 :       i0 = start_row
     907       879555 :       j0 = start_col
     908     16184616 :       DO j = 1, ncol_local
     909     15305061 :          this_col = col_indices(j) - j0 + 1
     910     16184616 :          IF (this_col >= 1 .AND. this_col <= n_cols) THEN
     911    530547503 :             DO i = 1, nrow_local
     912    515301724 :                this_row = row_indices(i) - i0 + 1
     913    530547503 :                IF (this_row >= 1 .AND. this_row <= n_rows) THEN
     914    508262398 :                   full_block(i, j) = new_value
     915              :                END IF
     916              :             END DO
     917              :          END IF
     918              :       END DO
     919              : 
     920       879555 :    END SUBROUTINE cp_fm_set_all_submatrix
     921              : 
     922              : ! **************************************************************************************************
     923              : !> \brief gets a submatrix of a full matrix
     924              : !>       op(target_m)(1:n_rows,1:n_cols)
     925              : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     926              : !>      target_m is replicated on all cpus
     927              : !>      using this call is expensive
     928              : !> \param fm the full you want to get the info from
     929              : !> \param target_m a replicated full matrix that will contain the result
     930              : !> \param start_row the starting row of b_matrix (defaults to 1)
     931              : !> \param start_col the starting col of b_matrix (defaults to 1)
     932              : !> \param n_rows the number of row to change in b (defaults to
     933              : !>        size(op(new_values),1))
     934              : !> \param n_cols the number of columns to change in b (defaults to
     935              : !>        size(op(new_values),2))
     936              : !> \param transpose if target_m should be transposed: if true
     937              : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     938              : !>        (defaults to false)
     939              : !> \par History
     940              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     941              : !> \author Fawzi Mohamed
     942              : !> \note
     943              : !>      optimized for full column updates. Zeros out a little too much
     944              : !>      of target_m
     945              : !>      the target_m is replicated and valid on all cpus
     946              : ! **************************************************************************************************
     947        85194 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     948              :                                   start_col, n_rows, n_cols, transpose)
     949              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     950              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     951              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     952              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     953              : 
     954              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     955              : 
     956              :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     957              :                                                             ncol_local, nrow, nrow_global, &
     958              :                                                             nrow_local, this_col, this_row
     959        85194 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     960              :       LOGICAL                                            :: tr_a
     961        85194 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     962              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     963              : 
     964        85194 :       CALL timeset(routineN, handle)
     965              : 
     966        85194 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     967              : 
     968        85194 :       CPASSERT(.NOT. fm%use_sp)
     969              : 
     970        85194 :       IF (PRESENT(start_row)) i0 = start_row
     971        85194 :       IF (PRESENT(start_col)) j0 = start_col
     972        85194 :       IF (PRESENT(transpose)) tr_a = transpose
     973         6366 :       IF (tr_a) THEN
     974         2446 :          nrow = SIZE(target_m, 2)
     975         2446 :          ncol = SIZE(target_m, 1)
     976              :       ELSE
     977        82748 :          nrow = SIZE(target_m, 1)
     978        82748 :          ncol = SIZE(target_m, 2)
     979              :       END IF
     980        85194 :       IF (PRESENT(n_rows)) nrow = n_rows
     981        85194 :       IF (PRESENT(n_cols)) ncol = n_cols
     982              : 
     983        85194 :       para_env => fm%matrix_struct%para_env
     984              : 
     985        85194 :       full_block => fm%local_data
     986              : #if defined(__parallel)
     987              :       ! zero-out whole target_m
     988        85194 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
     989       101554 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
     990              :       END IF
     991              : #endif
     992              : 
     993              :       CALL cp_fm_get_info(matrix=fm, &
     994              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     995              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     996        85194 :                           row_indices=row_indices, col_indices=col_indices)
     997              : 
     998       553898 :       DO j = 1, ncol_local
     999       468704 :          this_col = col_indices(j) - j0 + 1
    1000       553898 :          IF (this_col >= 1 .AND. this_col <= ncol) THEN
    1001       280502 :             IF (tr_a) THEN
    1002         2446 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
    1003        83016 :                   DO i = 1, nrow_local
    1004        83016 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
    1005              :                   END DO
    1006              :                ELSE
    1007            0 :                   DO i = 1, nrow_local
    1008            0 :                      this_row = row_indices(i) - i0 + 1
    1009            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
    1010            0 :                         target_m(this_col, this_row) = full_block(i, j)
    1011              :                      END IF
    1012              :                   END DO
    1013              :                END IF
    1014              :             ELSE
    1015       278056 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
    1016      5985647 :                   DO i = 1, nrow_local
    1017      5985647 :                      target_m(row_indices(i), this_col) = full_block(i, j)
    1018              :                   END DO
    1019              :                ELSE
    1020      1276396 :                   DO i = 1, nrow_local
    1021      1235440 :                      this_row = row_indices(i) - i0 + 1
    1022      1276396 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
    1023        32148 :                         target_m(this_row, this_col) = full_block(i, j)
    1024              :                      END IF
    1025              :                   END DO
    1026              :                END IF
    1027              :             END IF
    1028              :          END IF
    1029              :       END DO
    1030              : 
    1031     15930174 :       CALL para_env%sum(target_m)
    1032              : 
    1033        85194 :       CALL timestop(handle)
    1034              : 
    1035        85194 :    END SUBROUTINE cp_fm_get_submatrix
    1036              : 
    1037              : ! **************************************************************************************************
    1038              : !> \brief returns all kind of information about the full matrix
    1039              : !> \param matrix ...
    1040              : !> \param name ...
    1041              : !> \param nrow_global ...
    1042              : !> \param ncol_global ...
    1043              : !> \param nrow_block ...
    1044              : !> \param ncol_block ...
    1045              : !> \param nrow_local ...
    1046              : !> \param ncol_local ...
    1047              : !> \param row_indices ...
    1048              : !> \param col_indices ...
    1049              : !> \param local_data ...
    1050              : !> \param context ...
    1051              : !> \param nrow_locals ...
    1052              : !> \param ncol_locals ...
    1053              : !> \param matrix_struct ...
    1054              : !> \param para_env ...
    1055              : !> \note
    1056              : !>       see also cp_fm_struct for explanation
    1057              : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1058              : !>         access to the local blacs block
    1059              : ! **************************************************************************************************
    1060      6449277 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1061              :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1062              :                              row_indices, col_indices, local_data, context, &
    1063              :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1064              : 
    1065              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1066              :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1067              :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1068              :                                                             ncol_block, nrow_local, ncol_local
    1069              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1070              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1071              :          OPTIONAL, POINTER                               :: local_data
    1072              :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1073              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1074              :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1075              :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1076              : 
    1077            8 :       IF (PRESENT(name)) name = matrix%name
    1078      6449277 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1079      6449277 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1080              : 
    1081              :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1082              :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1083              :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1084              :                             ncol_block=ncol_block, row_indices=row_indices, &
    1085              :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1086      6449277 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1087              : 
    1088      6449277 :    END SUBROUTINE cp_fm_get_info
    1089              : 
    1090              : ! **************************************************************************************************
    1091              : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1092              : !> \param matrix a cp_fm_type instance
    1093              : !> \param io_unit the I/O unit to use for writing
    1094              : ! **************************************************************************************************
    1095            3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1096              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1097              :       INTEGER, INTENT(IN)                                :: io_unit
    1098              : 
    1099            3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1100            3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1101            3 :    END SUBROUTINE cp_fm_write_info
    1102              : 
    1103              : ! **************************************************************************************************
    1104              : !> \brief find the maximum absolute value of the matrix element
    1105              : !>      maxval(abs(matrix))
    1106              : !> \param matrix ...
    1107              : !> \param a_max ...
    1108              : !> \param ir_max ...
    1109              : !> \param ic_max ...
    1110              : ! **************************************************************************************************
    1111        92723 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1112              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1113              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1114              :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1115              : 
    1116              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1117              : 
    1118              :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1119              :                                                             j, mepos, ncol_local, nrow_local, &
    1120              :                                                             num_pe
    1121        92723 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1122        92723 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1123              :       REAL(dp)                                           :: my_max
    1124        92723 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1125        92723 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1126        92723 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: my_block_sp
    1127              : 
    1128        92723 :       CALL timeset(routineN, handle)
    1129              : 
    1130        92723 :       my_block => matrix%local_data
    1131        92723 :       my_block_sp => matrix%local_data_sp
    1132              : 
    1133              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1134        92723 :                           row_indices=row_indices, col_indices=col_indices)
    1135              : 
    1136        92723 :       IF (matrix%use_sp) THEN
    1137            0 :          a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
    1138              :       ELSE
    1139     54697870 :          a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1140              :       END IF
    1141              : 
    1142        92723 :       IF (PRESENT(ir_max)) THEN
    1143            0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1144            0 :          mepos = matrix%matrix_struct%para_env%mepos
    1145            0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1146            0 :          ir_max_vec(0:num_pe - 1) = 0
    1147            0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1148            0 :          ic_max_vec(0:num_pe - 1) = 0
    1149            0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1150            0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1151            0 :          my_max = 0.0_dp
    1152              : 
    1153            0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1154            0 :             DO i = 1, ncol_local
    1155            0 :                DO j = 1, nrow_local
    1156            0 :                   IF (matrix%use_sp) THEN
    1157            0 :                      IF (ABS(my_block_sp(j, i)) > my_max) THEN
    1158            0 :                         my_max = REAL(my_block_sp(j, i), dp)
    1159            0 :                         ir_max_local = j
    1160            0 :                         ic_max_local = i
    1161              :                      END IF
    1162              :                   ELSE
    1163            0 :                      IF (ABS(my_block(j, i)) > my_max) THEN
    1164            0 :                         my_max = my_block(j, i)
    1165            0 :                         ir_max_local = j
    1166            0 :                         ic_max_local = i
    1167              :                      END IF
    1168              :                   END IF
    1169              :                END DO
    1170              :             END DO
    1171              : 
    1172            0 :             a_max_vec(mepos) = my_max
    1173            0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1174            0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1175              : 
    1176              :          END IF
    1177              : 
    1178            0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1179            0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1180            0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1181              : 
    1182            0 :          my_max = 0.0_dp
    1183            0 :          DO i = 0, num_pe - 1
    1184            0 :             IF (a_max_vec(i) > my_max) THEN
    1185            0 :                ir_max = ir_max_vec(i)
    1186            0 :                ic_max = ic_max_vec(i)
    1187              :             END IF
    1188              :          END DO
    1189              : 
    1190            0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1191            0 :          CPASSERT(ic_max > 0)
    1192            0 :          CPASSERT(ir_max > 0)
    1193              : 
    1194              :       END IF
    1195              : 
    1196        92723 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1197              : 
    1198        92723 :       CALL timestop(handle)
    1199              : 
    1200       185446 :    END SUBROUTINE cp_fm_maxabsval
    1201              : 
    1202              : ! **************************************************************************************************
    1203              : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1204              : !>      = || A ||_infinity
    1205              : !> \param matrix ...
    1206              : !> \param a_max ...
    1207              : !> \note
    1208              : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1209              : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1210              : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1211              : !>      (but the bound is not so tight in the general case)
    1212              : ! **************************************************************************************************
    1213         4246 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1214              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1215              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1216              : 
    1217              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1218              : 
    1219              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1220              :                                                             nrow_local
    1221         4246 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1222              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1223         4246 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1224              : 
    1225         4246 :       CALL timeset(routineN, handle)
    1226              : 
    1227         4246 :       my_block => matrix%local_data
    1228              : 
    1229         4246 :       CPASSERT(.NOT. matrix%use_sp)
    1230              : 
    1231              :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1232         4246 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1233              : 
    1234              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1235        12738 :       ALLOCATE (values(nrow_global))
    1236        62136 :       values = 0.0_dp
    1237        62136 :       DO j = 1, ncol_local
    1238       525661 :          DO i = 1, nrow_local
    1239       521415 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1240              :          END DO
    1241              :       END DO
    1242         4246 :       CALL matrix%matrix_struct%para_env%sum(values)
    1243        66382 :       a_max = MAXVAL(values)
    1244         4246 :       DEALLOCATE (values)
    1245              : 
    1246         4246 :       CALL timestop(handle)
    1247         8492 :    END SUBROUTINE cp_fm_maxabsrownorm
    1248              : 
    1249              : ! **************************************************************************************************
    1250              : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1251              : !> \param matrix ...
    1252              : !> \param norm_array ...
    1253              : ! **************************************************************************************************
    1254         1282 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1255              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1256              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1257              : 
    1258              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1259              : 
    1260              :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1261              :                                                             nrow_local
    1262         1282 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1263         1282 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1264              : 
    1265         1282 :       CALL timeset(routineN, handle)
    1266              : 
    1267         1282 :       my_block => matrix%local_data
    1268              : 
    1269         1282 :       CPASSERT(.NOT. matrix%use_sp)
    1270              : 
    1271              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1272         1282 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1273              : 
    1274              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1275        29746 :       norm_array = 0.0_dp
    1276        29746 :       DO j = 1, ncol_local
    1277      1261016 :          DO i = 1, nrow_local
    1278      1259734 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1279              :          END DO
    1280              :       END DO
    1281        58210 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1282        29746 :       norm_array = SQRT(norm_array)
    1283              : 
    1284         1282 :       CALL timestop(handle)
    1285         1282 :    END SUBROUTINE cp_fm_vectorsnorm
    1286              : 
    1287              : ! **************************************************************************************************
    1288              : !> \brief summing up all the elements along the matrix's i-th index
    1289              : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1290              : !>        or
    1291              : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1292              : !> \param matrix     an input matrix A
    1293              : !> \param sum_array  sums of elements in each column/row
    1294              : !> \param dir ...
    1295              : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1296              : !>        the maximum overlap method
    1297              : !>        added row variation
    1298              : ! **************************************************************************************************
    1299         7174 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1300              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1301              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1302              :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1303              : 
    1304              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1305              : 
    1306              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1307         7174 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1308              :       LOGICAL                                            :: docol
    1309         7174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1310              : 
    1311         7174 :       CALL timeset(routineN, handle)
    1312              : 
    1313         7174 :       IF (PRESENT(dir)) THEN
    1314         7134 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1315              :             docol = .TRUE.
    1316              :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1317              :             docol = .FALSE.
    1318              :          ELSE
    1319            0 :             CPABORT('Wrong argument DIR')
    1320              :          END IF
    1321              :       ELSE
    1322              :          docol = .TRUE.
    1323              :       END IF
    1324              : 
    1325         7174 :       my_block => matrix%local_data
    1326              : 
    1327         7174 :       CPASSERT(.NOT. matrix%use_sp)
    1328              : 
    1329              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1330         7174 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1331              : 
    1332              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1333       220970 :       sum_array(:) = 0.0_dp
    1334         7174 :       IF (docol) THEN
    1335          448 :       DO j = 1, ncol_local
    1336         3628 :          DO i = 1, nrow_local
    1337         3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1338              :          END DO
    1339              :       END DO
    1340              :       ELSE
    1341        81994 :       DO j = 1, ncol_local
    1342      6429038 :          DO i = 1, nrow_local
    1343      6421904 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1344              :          END DO
    1345              :       END DO
    1346              :       END IF
    1347       434766 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1348              : 
    1349         7174 :       CALL timestop(handle)
    1350         7174 :    END SUBROUTINE cp_fm_vectorssum
    1351              : 
    1352              : ! **************************************************************************************************
    1353              : !> \brief copy one identically sized matrix in the other
    1354              : !> \param source ...
    1355              : !> \param destination ...
    1356              : !> \note
    1357              : !>      see also cp_fm_to_fm_columns
    1358              : ! **************************************************************************************************
    1359       368011 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1360              : 
    1361              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1362              : 
    1363              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1364              : 
    1365              :       INTEGER                                            :: handle, npcol, nprow
    1366              : 
    1367       368011 :       CALL timeset(routineN, handle)
    1368              : 
    1369       368011 :       nprow = source%matrix_struct%context%num_pe(1)
    1370       368011 :       npcol = source%matrix_struct%context%num_pe(2)
    1371              : 
    1372       368011 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1373              :           cp_fm_struct_equivalent(source%matrix_struct, &
    1374              :                                   destination%matrix_struct)) THEN
    1375       368011 :          IF (source%use_sp .AND. destination%use_sp) THEN
    1376            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1377              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
    1378              :                CALL cp_abort(__LOCATION__, &
    1379              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1380              :                              "> to full matrix <"//TRIM(destination%name)// &
    1381            0 :                              ">. The local_data blocks have different sizes.")
    1382              :             CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
    1383            0 :                        source%local_data_sp, 1, destination%local_data_sp, 1)
    1384       368011 :          ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
    1385            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
    1386              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
    1387              :                CALL cp_abort(__LOCATION__, &
    1388              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1389              :                              "> to full matrix <"//TRIM(destination%name)// &
    1390            0 :                              ">. The local_data blocks have different sizes.")
    1391            0 :             destination%local_data = REAL(source%local_data_sp, dp)
    1392       368011 :          ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
    1393            0 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1394              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
    1395              :                CALL cp_abort(__LOCATION__, &
    1396              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1397              :                              "> to full matrix <"//TRIM(destination%name)// &
    1398            0 :                              ">. The local_data blocks have different sizes.")
    1399            0 :             destination%local_data_sp = REAL(source%local_data, sp)
    1400              :          ELSE
    1401       368011 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1402              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1403              :                CALL cp_abort(__LOCATION__, &
    1404              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1405              :                              "> to full matrix <"//TRIM(destination%name)// &
    1406            0 :                              ">. The local_data blocks have different sizes.")
    1407              :             CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1408       368011 :                        source%local_data, 1, destination%local_data, 1)
    1409              :          END IF
    1410              :       ELSE
    1411            0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1412              :       END IF
    1413              : 
    1414       368011 :       CALL timestop(handle)
    1415              : 
    1416       368011 :    END SUBROUTINE cp_fm_to_fm_matrix
    1417              : 
    1418              : ! **************************************************************************************************
    1419              : !> \brief copy just a subset of columns of a fm to a fm
    1420              : !> \param msource ...
    1421              : !> \param mtarget ...
    1422              : !> \param ncol ...
    1423              : !> \param source_start ...
    1424              : !> \param target_start ...
    1425              : ! **************************************************************************************************
    1426       164402 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1427              :                                   target_start)
    1428              : 
    1429              :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1430              :       INTEGER, INTENT(IN)                      :: ncol
    1431              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1432              : 
    1433              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1434              : 
    1435              :       INTEGER                                  :: handle, n, ss, ts
    1436              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1437              : #if defined(__parallel)
    1438              :       INTEGER                                  :: i
    1439              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1440              : #endif
    1441              : 
    1442       164402 :       CALL timeset(routineN, handle)
    1443              : 
    1444       164402 :       ss = 1
    1445       164402 :       ts = 1
    1446              : 
    1447       164402 :       IF (PRESENT(source_start)) ss = source_start
    1448       164402 :       IF (PRESENT(target_start)) ts = target_start
    1449              : 
    1450       164402 :       n = msource%matrix_struct%nrow_global
    1451              : 
    1452       164402 :       a => msource%local_data
    1453       164402 :       b => mtarget%local_data
    1454              : 
    1455              : #if defined(__parallel)
    1456      1644020 :       desca(:) = msource%matrix_struct%descriptor(:)
    1457      1644020 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1458       662412 :       DO i = 0, ncol - 1
    1459       662412 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1460              :       END DO
    1461              : #else
    1462              :       IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
    1463              :          CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1464              :       END IF
    1465              : #endif
    1466              : 
    1467       164402 :       CALL timestop(handle)
    1468              : 
    1469       164402 :    END SUBROUTINE cp_fm_to_fm_columns
    1470              : 
    1471              : ! **************************************************************************************************
    1472              : !> \brief copy just a triangular matrix
    1473              : !> \param msource ...
    1474              : !> \param mtarget ...
    1475              : !> \param uplo ...
    1476              : ! **************************************************************************************************
    1477          370 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1478              : 
    1479              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1480              :       CHARACTER(LEN=1), OPTIONAL, INTENT(IN)   :: uplo
    1481              : 
    1482              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1483              : 
    1484              :       CHARACTER(LEN=1)                         :: myuplo
    1485              :       INTEGER                                  :: handle, ncol, nrow
    1486          370 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1487              : #if defined(__parallel)
    1488              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1489              : #endif
    1490              : 
    1491          370 :       CALL timeset(routineN, handle)
    1492              : 
    1493          370 :       myuplo = 'U'
    1494          370 :       IF (PRESENT(uplo)) myuplo = uplo
    1495              : 
    1496          370 :       nrow = msource%matrix_struct%nrow_global
    1497          370 :       ncol = msource%matrix_struct%ncol_global
    1498              : 
    1499          370 :       a => msource%local_data
    1500          370 :       b => mtarget%local_data
    1501              : 
    1502              : #if defined(__parallel)
    1503         3700 :       desca(:) = msource%matrix_struct%descriptor(:)
    1504         3700 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1505          370 :       CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1506              : #else
    1507              :       CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1508              : #endif
    1509              : 
    1510          370 :       CALL timestop(handle)
    1511              : 
    1512          370 :    END SUBROUTINE cp_fm_to_fm_triangular
    1513              : 
    1514              : ! **************************************************************************************************
    1515              : !> \brief copy just a part ot the matrix
    1516              : !> \param msource ...
    1517              : !> \param mtarget ...
    1518              : !> \param nrow ...
    1519              : !> \param ncol ...
    1520              : !> \param s_firstrow ...
    1521              : !> \param s_firstcol ...
    1522              : !> \param t_firstrow ...
    1523              : !> \param t_firstcol ...
    1524              : ! **************************************************************************************************
    1525              : 
    1526        10940 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1527              : 
    1528              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1529              :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1530              :                                                   s_firstcol, t_firstrow, &
    1531              :                                                   t_firstcol
    1532              : 
    1533              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1534              : 
    1535              :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1536              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1537              : #if defined(__parallel)
    1538              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1539              : #endif
    1540              : 
    1541        10940 :       CALL timeset(routineN, handle)
    1542              : 
    1543        10940 :       a => msource%local_data
    1544        10940 :       b => mtarget%local_data
    1545              : 
    1546        10940 :       na = msource%matrix_struct%nrow_global
    1547        10940 :       nb = mtarget%matrix_struct%nrow_global
    1548              : !    nrow must be <= na and nb
    1549        10940 :       IF (nrow > na) &
    1550            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1551        10940 :       IF (nrow > nb) &
    1552            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1553        10940 :       na = msource%matrix_struct%ncol_global
    1554        10940 :       nb = mtarget%matrix_struct%ncol_global
    1555              : !    ncol must be <= na_col and nb_col
    1556        10940 :       IF (ncol > na) &
    1557            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1558        10940 :       IF (ncol > nb) &
    1559            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1560              : 
    1561              : #if defined(__parallel)
    1562       109400 :       desca(:) = msource%matrix_struct%descriptor(:)
    1563       109400 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1564       119984 :       DO i = 0, ncol - 1
    1565       109044 :          ss = s_firstcol + i
    1566       109044 :          ts = t_firstcol + i
    1567       119984 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1568              :       END DO
    1569              : #else
    1570              :       DO i = 0, ncol - 1
    1571              :          ss = s_firstcol + i
    1572              :          ts = t_firstcol + i
    1573              :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1574              :       END DO
    1575              : #endif
    1576              : 
    1577        10940 :       CALL timestop(handle)
    1578        10940 :    END SUBROUTINE cp_fm_to_fm_submat
    1579              : 
    1580              : ! **************************************************************************************************
    1581              : !> \brief General copy of a fm matrix to another fm matrix.
    1582              : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1583              : !>
    1584              : !> \param source          input fm matrix
    1585              : !> \param destination     output fm matrix
    1586              : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1587              : !>                        of the input and output matrices
    1588              : !> \par History
    1589              : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1590              : ! **************************************************************************************************
    1591         7982 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1592              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1593              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1594              : 
    1595              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1596              : 
    1597              :       INTEGER                                            :: handle
    1598        71838 :       TYPE(copy_info_type)                               :: info
    1599              : 
    1600         7982 :       CALL timeset(routineN, handle)
    1601              : 
    1602         7982 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1603         7982 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1604         7968 :          CALL cp_fm_finish_copy_general(destination, info)
    1605              :       END IF
    1606         7982 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1607         7830 :          CALL cp_fm_cleanup_copy_general(info)
    1608              :       END IF
    1609              : 
    1610         7982 :       CALL timestop(handle)
    1611         7982 :    END SUBROUTINE cp_fm_copy_general
    1612              : 
    1613              : ! **************************************************************************************************
    1614              : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1615              : !> \param source input fm matrix
    1616              : !> \param destination output fm matrix
    1617              : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1618              : !>                        of the input and output matrices
    1619              : !> \param info all of the data that will be needed to complete the copy operation
    1620              : ! **************************************************************************************************
    1621      1747460 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1622              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1623              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1624              :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1625              : 
    1626              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1627              : 
    1628              :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1629              :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1630              :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1631              :          recv_rank, recv_size, send_rank, send_size
    1632       174746 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1633       349492 :                                                             recv_count, send_count, send_disp, &
    1634       174746 :                                                             source2global, src_p, src_q
    1635       174746 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1636              :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1637              :                                                             src_block, src_block_tmp, src_num_pe
    1638       349492 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1639       349492 :                                                             send_col_indices, send_row_indices
    1640              :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1641      2446444 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1642              : 
    1643       174746 :       CALL timeset(routineN, handle)
    1644              : 
    1645              :       IF (.NOT. cp2k_is_parallel) THEN
    1646              :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1647              :          nrow_local_send = SIZE(source%local_data, 1)
    1648              :          ncol_local_send = SIZE(source%local_data, 2)
    1649              :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1650              :          k = 0
    1651              :          DO j = 1, ncol_local_send
    1652              :             DO i = 1, nrow_local_send
    1653              :                k = k + 1
    1654              :                info%send_buf(k) = source%local_data(i, j)
    1655              :             END DO
    1656              :          END DO
    1657              :       ELSE
    1658              :          ! assumption of double precision data is carried forward from ScaLAPACK version
    1659       174746 :          IF (source%use_sp) CPABORT("only DP kind implemented")
    1660       174746 :          IF (destination%use_sp) CPABORT("only DP kind implemented")
    1661              : 
    1662       174746 :          NULLIFY (recv_dist, send_dist)
    1663       174746 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1664              : 
    1665              :          ! The 'global' communicator contains both the source and destination decompositions
    1666       174746 :          global_size = para_env%num_pe
    1667       174746 :          global_rank = para_env%mepos
    1668              : 
    1669              :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1670              :          ! on a subset of the processes involved in the communication
    1671              :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1672              :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1673       174746 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1674       143780 :             recv_dist => destination%matrix_struct
    1675       143780 :             recv_rank = recv_dist%para_env%mepos
    1676              :          ELSE
    1677        30966 :             recv_rank = mp_proc_null
    1678              :          END IF
    1679              : 
    1680       174746 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1681       156714 :             send_dist => source%matrix_struct
    1682       156714 :             send_rank = send_dist%para_env%mepos
    1683              :          ELSE
    1684        18032 :             send_rank = mp_proc_null
    1685              :          END IF
    1686              : 
    1687              :          ! Map the rank in the source/dest communicator to the global rank
    1688       524238 :          ALLOCATE (all_ranks(0:global_size - 1))
    1689              : 
    1690       174746 :          CALL para_env%allgather(send_rank, all_ranks)
    1691       174746 :          IF (ASSOCIATED(recv_dist)) THEN
    1692       718900 :             ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1693       431340 :             DO i = 0, global_size - 1
    1694       431340 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1695       251496 :                   source2global(all_ranks(i)) = i
    1696              :                END IF
    1697              :             END DO
    1698              :          END IF
    1699              : 
    1700       174746 :          CALL para_env%allgather(recv_rank, all_ranks)
    1701       174746 :          IF (ASSOCIATED(send_dist)) THEN
    1702       783570 :             ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1703       470142 :             DO i = 0, global_size - 1
    1704       470142 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1705       251496 :                   dest2global(all_ranks(i)) = i
    1706              :                END IF
    1707              :             END DO
    1708              :          END IF
    1709       174746 :          DEALLOCATE (all_ranks)
    1710              : 
    1711              :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1712              :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1713              : 
    1714              :          ! The global root process will receive the data (from the root process in each decomposition)
    1715      1223222 :          send_req(:) = mp_request_null
    1716       174746 :          IF (global_rank == 0) THEN
    1717       611611 :             recv_req(:) = mp_request_null
    1718        87373 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1719        87373 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1720        87373 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1721        87373 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1722              :          END IF
    1723              : 
    1724       174746 :          IF (ASSOCIATED(send_dist)) THEN
    1725       156714 :             IF ((send_rank == 0)) THEN
    1726              :                ! need to use separate buffers here in case this is actually global rank 0
    1727       262119 :                src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
    1728        87373 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1729        87373 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1730              :             END IF
    1731              :          END IF
    1732              : 
    1733       174746 :          IF (ASSOCIATED(recv_dist)) THEN
    1734       143780 :             IF ((recv_rank == 0)) THEN
    1735       262119 :                dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
    1736        87373 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1737        87373 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1738              :             END IF
    1739              :          END IF
    1740              : 
    1741       174746 :          IF (global_rank == 0) THEN
    1742        87373 :             CALL mp_waitall(recv_req(1:4))
    1743              :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1744            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1745              :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1746       611611 :                       )
    1747        87373 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1748        87373 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1749              :          END IF
    1750              : 
    1751       174746 :          IF (ASSOCIATED(send_dist)) THEN
    1752       156714 :             IF ((send_rank == 0)) THEN
    1753        87373 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1754              :             END IF
    1755              :          END IF
    1756              : 
    1757       174746 :          IF (ASSOCIATED(recv_dist)) THEN
    1758       143780 :             IF ((recv_rank == 0)) THEN
    1759        87373 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1760              :             END IF
    1761              :          END IF
    1762              : 
    1763       174746 :          IF (global_rank == 0) THEN
    1764        87373 :             CALL mp_waitall(recv_req(5:6))
    1765              :          END IF
    1766              : 
    1767              :          ! Finally, broadcast the data to all processes in the global communicator
    1768       174746 :          CALL para_env%bcast(src_block, 0)
    1769       174746 :          CALL para_env%bcast(dest_block, 0)
    1770       174746 :          CALL para_env%bcast(src_num_pe, 0)
    1771       174746 :          CALL para_env%bcast(dest_num_pe, 0)
    1772       524238 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1773       524238 :          info%nblock_src(1:2) = src_block(1:2)
    1774       174746 :          IF (global_rank /= 0) THEN
    1775            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1776            0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1777       611611 :                       )
    1778              :          END IF
    1779       174746 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1780       174746 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1781              : 
    1782       174746 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1783       174746 :          send_size = src_num_pe(1)*src_num_pe(2)
    1784       174746 :          info%send_size = send_size
    1785       174746 :          CALL mp_waitall(send_req(:))
    1786              : 
    1787              :          ! Setup is now complete, we can start the actual communication here.
    1788              :          ! The order implemented here is:
    1789              :          !  DEST_1
    1790              :          !      compute recv sizes
    1791              :          !      call irecv
    1792              :          !  SRC_1
    1793              :          !      compute send sizes
    1794              :          !      pack send buffers
    1795              :          !      call isend
    1796              :          !  DEST_2
    1797              :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1798              :          !  SRC_2
    1799              :          !      wait for the sends
    1800              : 
    1801              :          ! DEST_1
    1802       174746 :          IF (ASSOCIATED(recv_dist)) THEN
    1803              :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1804              :                                   col_indices=recv_col_indices &
    1805       143780 :                                   )
    1806       143780 :             info%recv_col_indices => recv_col_indices
    1807       143780 :             info%recv_row_indices => recv_row_indices
    1808       143780 :             nrow_block_src = src_block(1)
    1809       143780 :             ncol_block_src = src_block(2)
    1810       970396 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1811              : 
    1812              :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1813       143780 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1814       143780 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1815       143780 :             info%nlocal_recv(1) = nrow_local_recv
    1816       143780 :             info%nlocal_recv(2) = ncol_local_recv
    1817              :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1818       718900 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1819      2544581 :             DO i = 1, nrow_local_recv
    1820              :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1821              :                ! then work out which row block it comes from, and which process row that row block comes from.
    1822      2544581 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1823              :             END DO
    1824      4295772 :             DO j = 1, ncol_local_recv
    1825              :                ! Similarly for the columns
    1826      4295772 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1827              :             END DO
    1828              :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1829              : 
    1830       287560 :             DO q = 0, src_num_pe(2) - 1
    1831      4295772 :                ncols = COUNT(src_q == q)
    1832       539056 :                DO p = 0, src_num_pe(1) - 1
    1833      4678370 :                   nrows = COUNT(src_p == p)
    1834              :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1835       395276 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1836              :                END DO
    1837              :             END DO
    1838       143780 :             DEALLOCATE (src_p, src_q)
    1839              : 
    1840              :             ! Use one long buffer (and displacements into that buffer)
    1841              :             !     this prevents the need for a rectangular array where not all elements will be populated
    1842       682836 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1843       143780 :             info%recv_disp(0) = 0
    1844       251496 :             DO i = 1, send_size - 1
    1845       251496 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1846              :             END DO
    1847              : 
    1848              :             ! Issue receive calls on ranks which expect data
    1849       395276 :             DO k = 0, send_size - 1
    1850       395276 :                IF (recv_count(k) > 0) THEN
    1851              :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1852       175190 :                                       source2global(k), info%recv_request(k))
    1853              :                END IF
    1854              :             END DO
    1855       143780 :             DEALLOCATE (source2global)
    1856              :          END IF ! ASSOCIATED(recv_dist)
    1857              : 
    1858              :          ! SRC_1
    1859       174746 :          IF (ASSOCIATED(send_dist)) THEN
    1860              :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1861              :                                   col_indices=send_col_indices &
    1862       156714 :                                   )
    1863       156714 :             nrow_block_dest = dest_block(1)
    1864       156714 :             ncol_block_dest = dest_block(2)
    1865      1035066 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1866              : 
    1867              :             ! Determine the send counts, allocate the send buffers
    1868       156714 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1869       156714 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1870              : 
    1871              :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1872              :             !   i.e. number of rows,cols in the sending distribution
    1873       783570 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1874              : 
    1875      2557515 :             DO i = 1, nrow_local_send
    1876              :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1877      2557515 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1878              :             END DO
    1879      4576642 :             DO j = 1, ncol_local_send
    1880      4576642 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1881              :             END DO
    1882              :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1883              : 
    1884       313428 :             DO q = 0, dest_num_pe(2) - 1
    1885      4576642 :                ncols = COUNT(dest_q == q)
    1886       564924 :                DO p = 0, dest_num_pe(1) - 1
    1887      4410794 :                   nrows = COUNT(dest_p == p)
    1888       408210 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1889              :                END DO
    1890              :             END DO
    1891       156714 :             DEALLOCATE (dest_p, dest_q)
    1892              : 
    1893              :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1894       721638 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1895       156714 :             send_disp(0) = 0
    1896       251496 :             DO k = 1, recv_size - 1
    1897       251496 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1898              :             END DO
    1899              : 
    1900              :             ! Loop over the smat, pack the send buffers
    1901       408210 :             send_count(:) = 0
    1902      4576642 :             DO j = 1, ncol_local_send
    1903              :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1904      4419928 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1905    200734687 :                DO i = 1, nrow_local_send
    1906    196158045 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1907    196158045 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1908    196158045 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1909    200577973 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1910              :                END DO
    1911              :             END DO
    1912              : 
    1913              :             ! For each non-zero send_count, call mpi_isend
    1914       408210 :             DO k = 0, recv_size - 1
    1915       408210 :                IF (send_count(k) > 0) THEN
    1916              :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1917       175190 :                                       dest2global(k), info%send_request(k))
    1918              :                END IF
    1919              :             END DO
    1920       156714 :             DEALLOCATE (send_count, send_disp, dest2global)
    1921              :          END IF ! ASSOCIATED(send_dist)
    1922       174746 :          DEALLOCATE (dest_blacs2mpi)
    1923              : 
    1924              :       END IF !IF (.NOT. cp2k_is_parallel)
    1925              : 
    1926       174746 :       CALL timestop(handle)
    1927              : 
    1928       698984 :    END SUBROUTINE cp_fm_start_copy_general
    1929              : 
    1930              : ! **************************************************************************************************
    1931              : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1932              : !> \param destination output fm matrix
    1933              : !> \param info all of the data that will be needed to complete the copy operation
    1934              : ! **************************************************************************************************
    1935       143780 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1936              :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1937              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1938              : 
    1939              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1940              : 
    1941              :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1942              :                                                             src_p_i, src_q_j
    1943       143780 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1944              :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1945       143780 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1946              : 
    1947       143780 :       CALL timeset(routineN, handle)
    1948              : 
    1949              :       IF (.NOT. cp2k_is_parallel) THEN
    1950              :          ! Now unpack the data from the 'send buffer'
    1951              :          k = 0
    1952              :          DO j = 1, SIZE(destination%local_data, 2)
    1953              :             DO i = 1, SIZE(destination%local_data, 1)
    1954              :                k = k + 1
    1955              :                destination%local_data(i, j) = info%send_buf(k)
    1956              :             END DO
    1957              :          END DO
    1958              :          DEALLOCATE (info%send_buf)
    1959              :       ELSE
    1960              :          ! Set up local variables ...
    1961       143780 :          send_size = info%send_size
    1962       431340 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1963       431340 :          nblock_src(1:2) = info%nblock_src(:)
    1964       431340 :          src_num_pe(1:2) = info%src_num_pe(:)
    1965       143780 :          recv_col_indices => info%recv_col_indices
    1966       143780 :          recv_row_indices => info%recv_row_indices
    1967              : 
    1968              :          ! ... use the local variables to do the work
    1969              :          ! DEST_2
    1970       143780 :          CALL mp_waitall(info%recv_request(:))
    1971       431340 :          ALLOCATE (recv_count(0:send_size - 1))
    1972              :          ! Loop over the rmat, filling it in with data from the recv buffers
    1973              :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1974       395276 :          recv_count(:) = 0
    1975      4295772 :          DO j = 1, nlocal_recv(2)
    1976      4151992 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    1977    200453817 :             DO i = 1, nlocal_recv(1)
    1978    196158045 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    1979    196158045 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    1980    196158045 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1981    200310037 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1982              :             END DO
    1983              :          END DO
    1984       143780 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1985              :          ! Invalidate the stored state
    1986              :          NULLIFY (info%recv_col_indices, &
    1987       143780 :                   info%recv_row_indices)
    1988              : 
    1989              :       END IF
    1990              : 
    1991       143780 :       CALL timestop(handle)
    1992              : 
    1993       143780 :    END SUBROUTINE cp_fm_finish_copy_general
    1994              : 
    1995              : ! **************************************************************************************************
    1996              : !> \brief Completes the copy operation: wait for comms clean up MPI state
    1997              : !> \param info all of the data that will be needed to complete the copy operation
    1998              : ! **************************************************************************************************
    1999       156050 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    2000              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    2001              : 
    2002              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    2003              : 
    2004              :       INTEGER                                            :: handle
    2005              : 
    2006       156050 :       CALL timeset(routineN, handle)
    2007              : 
    2008              :       IF (.NOT. cp2k_is_parallel) THEN
    2009              :          ! Don't do anything - no MPI state for the serial case
    2010              :       ELSE
    2011              :          ! SRC_2
    2012              :          ! If this process is also in the destination decomposition, this deallocate
    2013              :          ! Was already done in cp_fm_finish_copy_general
    2014       156050 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    2015        30302 :             DEALLOCATE (info%src_blacs2mpi)
    2016              :          END IF
    2017       156050 :          CALL mp_waitall(info%send_request)
    2018       156050 :          DEALLOCATE (info%send_request, info%send_buf)
    2019              : 
    2020              :       END IF
    2021              : 
    2022       156050 :       CALL timestop(handle)
    2023              : 
    2024       156050 :    END SUBROUTINE cp_fm_cleanup_copy_general
    2025              : 
    2026              : ! **************************************************************************************************
    2027              : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    2028              : !>        The two matrices can have different contexts.
    2029              : !>
    2030              : !>        Summary of distribution routines for dense matrices
    2031              : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    2032              : !>
    2033              : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    2034              : !>
    2035              : !>        A process that is not a part of the context of A should set descA(2)
    2036              : !>        to -1, and similarly for B.
    2037              : !>
    2038              : !> \param source          input fm matrix
    2039              : !> \param destination     output fm matrix
    2040              : !> \param nrows           number of rows of sub matrix to be copied
    2041              : !> \param ncols           number of cols of sub matrix to be copied
    2042              : !> \param s_firstrow      starting global row index of sub matrix in source
    2043              : !> \param s_firstcol      starting global col index of sub matrix in source
    2044              : !> \param d_firstrow      starting global row index of sub matrix in destination
    2045              : !> \param d_firstcol      starting global col index of sub matrix in destination
    2046              : !> \param global_context  process grid that covers all parts of either A or B.
    2047              : ! **************************************************************************************************
    2048        11174 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    2049              :                                          destination, &
    2050              :                                          nrows, &
    2051              :                                          ncols, &
    2052              :                                          s_firstrow, &
    2053              :                                          s_firstcol, &
    2054              :                                          d_firstrow, &
    2055              :                                          d_firstcol, &
    2056              :                                          global_context)
    2057              : 
    2058              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    2059              :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    2060              :                                                             d_firstrow, d_firstcol
    2061              : 
    2062              :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    2063              : 
    2064              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    2065              : 
    2066              :       LOGICAL                                  :: debug
    2067              :       INTEGER                                  :: handle
    2068              : #if defined(__parallel)
    2069              :       INTEGER, DIMENSION(9)                    :: desca, descb
    2070              :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    2071        11174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    2072              : #endif
    2073              : 
    2074        11174 :       CALL timeset(routineN, handle)
    2075              : 
    2076        11174 :       debug = debug_this_module
    2077              : 
    2078              :       IF (.NOT. cp2k_is_parallel) THEN
    2079              :          CALL cp_fm_to_fm_submat(source, &
    2080              :                                  destination, &
    2081              :                                  nrows, &
    2082              :                                  ncols, &
    2083              :                                  s_firstrow, &
    2084              :                                  s_firstcol, &
    2085              :                                  d_firstrow, &
    2086              :                                  d_firstcol)
    2087              :       ELSE
    2088              : #ifdef __parallel
    2089        11174 :          NULLIFY (smat, dmat)
    2090              :          ! check whether source is available on this process
    2091        11174 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2092       111740 :             desca = source%matrix_struct%descriptor
    2093        11174 :             IF (source%use_sp) CPABORT("only DP kind implemented")
    2094        11174 :             IF (nrows > source%matrix_struct%nrow_global) &
    2095            0 :                CPABORT("nrows is greater than nrow_global of source")
    2096        11174 :             IF (ncols > source%matrix_struct%ncol_global) &
    2097            0 :                CPABORT("ncols is greater than ncol_global of source")
    2098        11174 :             smat => source%local_data
    2099              :          ELSE
    2100            0 :             desca = -1
    2101            0 :             smat => dummy
    2102              :          END IF
    2103              :          ! check destination is available on this process
    2104        11174 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2105       111740 :             descb = destination%matrix_struct%descriptor
    2106        11174 :             IF (destination%use_sp) CPABORT("only DP kind implemented")
    2107        11174 :             IF (nrows > destination%matrix_struct%nrow_global) &
    2108            0 :                CPABORT("nrows is greater than nrow_global of destination")
    2109        11174 :             IF (ncols > destination%matrix_struct%ncol_global) &
    2110            0 :                CPABORT("ncols is greater than ncol_global of destination")
    2111        11174 :             dmat => destination%local_data
    2112              :          ELSE
    2113            0 :             descb = -1
    2114            0 :             dmat => dummy
    2115              :          END IF
    2116              :          ! do copy
    2117              : 
    2118              :          CALL pdgemr2d(nrows, &
    2119              :                        ncols, &
    2120              :                        smat, &
    2121              :                        s_firstrow, &
    2122              :                        s_firstcol, &
    2123              :                        desca, &
    2124              :                        dmat, &
    2125              :                        d_firstrow, &
    2126              :                        d_firstcol, &
    2127              :                        descb, &
    2128        11174 :                        global_context%get_handle())
    2129              : #else
    2130              :          MARK_USED(global_context)
    2131              :          CPABORT("this subroutine only supports SCALAPACK")
    2132              : #endif
    2133              :       END IF
    2134              : 
    2135        11174 :       CALL timestop(handle)
    2136              : 
    2137        11174 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2138              : 
    2139              : ! **************************************************************************************************
    2140              : !> \brief ...
    2141              : !> \param matrix ...
    2142              : !> \param irow_global ...
    2143              : !> \param icol_global ...
    2144              : !> \param alpha ...
    2145              : ! **************************************************************************************************
    2146          240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2147              : 
    2148              :       ! Add alpha to the matrix element specified by the global indices
    2149              :       ! irow_global and icol_global
    2150              : 
    2151              :       ! - Creation (05.05.06,MK)
    2152              : 
    2153              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2154              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2155              :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2156              : 
    2157              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2158          240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2159              :       TYPE(cp_blacs_env_type), POINTER         :: context
    2160              : #if defined(__parallel)
    2161              :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2162              :                                                   irow_local
    2163              :       INTEGER, DIMENSION(9)                    :: desca
    2164              : #endif
    2165              : 
    2166          240 :       context => matrix%matrix_struct%context
    2167              : 
    2168          240 :       myprow = context%mepos(1)
    2169          240 :       mypcol = context%mepos(2)
    2170              : 
    2171          240 :       nprow = context%num_pe(1)
    2172          240 :       npcol = context%num_pe(2)
    2173              : 
    2174          240 :       a => matrix%local_data
    2175              : 
    2176              : #if defined(__parallel)
    2177              : 
    2178         2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2179              : 
    2180              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2181          240 :                    irow_local, icol_local, iprow, ipcol)
    2182              : 
    2183          240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2184          120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2185              :       END IF
    2186              : 
    2187              : #else
    2188              : 
    2189              :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2190              : 
    2191              : #endif
    2192              : 
    2193          240 :    END SUBROUTINE cp_fm_add_to_element
    2194              : 
    2195              : ! **************************************************************************************************
    2196              : !> \brief ...
    2197              : !> \param fm ...
    2198              : !> \param unit ...
    2199              : ! **************************************************************************************************
    2200        79169 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2201              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2202              :       INTEGER, INTENT(IN)                      :: unit
    2203              : 
    2204              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2205              : 
    2206              :       INTEGER                                  :: handle, j, max_block, &
    2207              :                                                   ncol_global, nrow_global
    2208              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2209              : #if defined(__parallel)
    2210              :       INTEGER                                  :: i, i_block, icol_local, &
    2211              :                                                   in, info, ipcol, &
    2212              :                                                   iprow, irow_local, &
    2213              :                                                   mepos, &
    2214              :                                                   num_pe, rb, tag
    2215              :       INTEGER, DIMENSION(9)                    :: desc
    2216        79169 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2217              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2218              :       TYPE(cp_blacs_type) :: ictxt_loc
    2219              :       INTEGER, EXTERNAL                        :: numroc
    2220              : #endif
    2221              : 
    2222        79169 :       CALL timeset(routineN, handle)
    2223              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2224        79169 :                           para_env=para_env)
    2225              : 
    2226              : #if defined(__parallel)
    2227        79169 :       num_pe = para_env%num_pe
    2228        79169 :       mepos = para_env%mepos
    2229        79169 :       rb = nrow_global
    2230        79169 :       tag = 0
    2231              :       ! get a new context
    2232        79169 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2233        79169 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2234        79169 :       CPASSERT(info == 0)
    2235              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2236              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2237       158338 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2238              : 
    2239       316676 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2240              : 
    2241              :          ! do the actual scalapack to cols reordering
    2242              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2243              :                        fm%matrix_struct%descriptor, &
    2244        79169 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2245              : 
    2246       237507 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2247     55114469 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2248              : 
    2249       348203 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2250       189865 :             i_block = MIN(max_block, ncol_global - i + 1)
    2251              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2252       189865 :                          irow_local, icol_local, iprow, ipcol)
    2253       189865 :             IF (ipcol == mypcol) THEN
    2254       940875 :                DO j = 1, i_block
    2255    146258055 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2256              :                END DO
    2257              :             END IF
    2258              : 
    2259       189865 :             IF (ipcol == 0) THEN
    2260              :                ! do nothing
    2261              :             ELSE
    2262        65252 :                IF (ipcol == mypcol) THEN
    2263     36162328 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2264              :                END IF
    2265        65252 :                IF (mypcol == 0) THEN
    2266     72292030 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2267              :                END IF
    2268              :             END IF
    2269              : 
    2270       269034 :             IF (unit > 0) THEN
    2271       909891 :                DO j = 1, i_block
    2272       909891 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2273              :                END DO
    2274              :             END IF
    2275              : 
    2276              :          END DO
    2277              :       END ASSOCIATE
    2278        79169 :       DEALLOCATE (vecbuf)
    2279              : 
    2280        79169 :       CALL ictxt_loc%gridexit()
    2281              : 
    2282        79169 :       DEALLOCATE (newdat)
    2283              : 
    2284              : #else
    2285              : 
    2286              :       IF (unit > 0) THEN
    2287              :          DO j = 1, ncol_global
    2288              :             WRITE (unit) fm%local_data(:, j)
    2289              :          END DO
    2290              :       END IF
    2291              : 
    2292              : #endif
    2293        79169 :       CALL timestop(handle)
    2294              : 
    2295       395845 :    END SUBROUTINE cp_fm_write_unformatted
    2296              : 
    2297              : ! **************************************************************************************************
    2298              : !> \brief Write out a full matrix in plain text.
    2299              : !> \param fm the matrix to be outputted
    2300              : !> \param unit the unit number for I/O
    2301              : !> \param header optional header
    2302              : !> \param value_format ...
    2303              : ! **************************************************************************************************
    2304         1122 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2305              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2306              :       INTEGER, INTENT(IN)                      :: unit
    2307              :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2308              : 
    2309              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2310              : 
    2311              :       CHARACTER(LEN=21)                        :: my_value_format
    2312              :       INTEGER                                  :: handle, i, j, max_block, &
    2313              :                                                   ncol_global, nrow_global
    2314              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2315              : #if defined(__parallel)
    2316              :       INTEGER                                  :: i_block, icol_local, &
    2317              :                                                   in, info, ipcol, &
    2318              :                                                   iprow, irow_local, &
    2319              :                                                   mepos, num_pe, rb, tag, k, &
    2320              :                                                   icol, irow
    2321              :       INTEGER, DIMENSION(9)                    :: desc
    2322         1122 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2323         1122 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2324              :       TYPE(cp_blacs_type) :: ictxt_loc
    2325              :       INTEGER, EXTERNAL                        :: numroc
    2326              : #endif
    2327              : 
    2328         1122 :       CALL timeset(routineN, handle)
    2329              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2330         1122 :                           para_env=para_env)
    2331              : 
    2332         1122 :       IF (PRESENT(value_format)) THEN
    2333            0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2334            0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2335              :       ELSE
    2336         1122 :          my_value_format = "(I10, I10, ES24.12)"
    2337              :       END IF
    2338              : 
    2339         1122 :       IF (unit > 0) THEN
    2340            5 :          IF (PRESENT(header)) WRITE (unit, *) header
    2341            5 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2342              :       END IF
    2343              : 
    2344              : #if defined(__parallel)
    2345         1122 :       num_pe = para_env%num_pe
    2346         1122 :       mepos = para_env%mepos
    2347         1122 :       rb = nrow_global
    2348         1122 :       tag = 0
    2349              :       ! get a new context
    2350         1122 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2351         1122 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2352         1122 :       CPASSERT(info == 0)
    2353              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2354              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2355         2244 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2356              : 
    2357         4488 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2358              : 
    2359              :          ! do the actual scalapack to cols reordering
    2360              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2361              :                        fm%matrix_struct%descriptor, &
    2362         1122 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2363              : 
    2364         3366 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2365         3872 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2366         1122 :          irow = 1
    2367         1122 :          icol = 1
    2368              : 
    2369         4488 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2370         2244 :             i_block = MIN(max_block, ncol_global - i + 1)
    2371              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2372         2244 :                          irow_local, icol_local, iprow, ipcol)
    2373         2244 :             IF (ipcol == mypcol) THEN
    2374         2264 :                DO j = 1, i_block
    2375         7932 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2376              :                END DO
    2377              :             END IF
    2378              : 
    2379         2244 :             IF (ipcol == 0) THEN
    2380              :                ! do nothing
    2381              :             ELSE
    2382         1118 :                IF (ipcol == mypcol) THEN
    2383         1857 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2384              :                END IF
    2385         1118 :                IF (mypcol == 0) THEN
    2386         3155 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2387              :                END IF
    2388              :             END IF
    2389              : 
    2390         3366 :             IF (unit > 0) THEN
    2391           40 :                DO j = 1, i_block
    2392          650 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2393          610 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2394          610 :                      irow = irow + 1
    2395          640 :                      IF (irow > nrow_global) THEN
    2396           30 :                         irow = 1
    2397           30 :                         icol = icol + 1
    2398              :                      END IF
    2399              :                   END DO
    2400              :                END DO
    2401              :             END IF
    2402              : 
    2403              :          END DO
    2404              :       END ASSOCIATE
    2405         1122 :       DEALLOCATE (vecbuf)
    2406              : 
    2407         1122 :       CALL ictxt_loc%gridexit()
    2408              : 
    2409         1122 :       DEALLOCATE (newdat)
    2410              : 
    2411              : #else
    2412              : 
    2413              :       IF (unit > 0) THEN
    2414              :          DO j = 1, ncol_global
    2415              :             DO i = 1, nrow_global
    2416              :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2417              :             END DO
    2418              :          END DO
    2419              :       END IF
    2420              : 
    2421              : #endif
    2422         1122 :       CALL timestop(handle)
    2423              : 
    2424         5610 :    END SUBROUTINE cp_fm_write_formatted
    2425              : 
    2426              : ! **************************************************************************************************
    2427              : !> \brief ...
    2428              : !> \param fm ...
    2429              : !> \param unit ...
    2430              : ! **************************************************************************************************
    2431         1352 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2432              :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2433              :       INTEGER, INTENT(IN)                      :: unit
    2434              : 
    2435              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2436              : 
    2437              :       INTEGER                                  :: handle, j, max_block, &
    2438              :                                                   ncol_global, nrow_global
    2439              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2440              : #if defined(__parallel)
    2441              :       INTEGER                                  :: k, n_cols
    2442              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2443              : #endif
    2444              : 
    2445         1352 :       CALL timeset(routineN, handle)
    2446              : 
    2447              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2448         1352 :                           para_env=para_env)
    2449              : 
    2450              : #if defined(__parallel)
    2451              : 
    2452              :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2453              : 
    2454         5408 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2455              : 
    2456         5190 :       DO j = 1, ncol_global, max_block
    2457              : 
    2458         3838 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2459         3838 :          IF (para_env%mepos == 0) THEN
    2460        31344 :             DO k = 1, n_cols
    2461        31344 :                READ (unit) vecbuf(:, k)
    2462              :             END DO
    2463              :          END IF
    2464     11457886 :          CALL para_env%bcast(vecbuf, 0)
    2465         5190 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2466              : 
    2467              :       END DO
    2468              : 
    2469         1352 :       DEALLOCATE (vecbuf)
    2470              : 
    2471              : #else
    2472              : 
    2473              :       DO j = 1, ncol_global
    2474              :          READ (unit) fm%local_data(:, j)
    2475              :       END DO
    2476              : 
    2477              : #endif
    2478              : 
    2479         1352 :       CALL timestop(handle)
    2480              : 
    2481         1352 :    END SUBROUTINE cp_fm_read_unformatted
    2482              : 
    2483              : ! **************************************************************************************************
    2484              : !> \brief ...
    2485              : !> \param mm_type ...
    2486              : ! **************************************************************************************************
    2487         9881 :    SUBROUTINE cp_fm_setup(mm_type)
    2488              :       INTEGER, INTENT(IN)                                :: mm_type
    2489              : 
    2490         9881 :       cp_fm_mm_type = mm_type
    2491         9881 :    END SUBROUTINE cp_fm_setup
    2492              : 
    2493              : ! **************************************************************************************************
    2494              : !> \brief ...
    2495              : !> \return ...
    2496              : ! **************************************************************************************************
    2497      1444282 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2498              :       INTEGER                                            :: res
    2499              : 
    2500      1444282 :       res = cp_fm_mm_type
    2501      1444282 :    END FUNCTION cp_fm_get_mm_type
    2502              : 
    2503              : ! **************************************************************************************************
    2504              : !> \brief ...
    2505              : !> \param ictxt ...
    2506              : !> \param prec ...
    2507              : !> \return ...
    2508              : ! **************************************************************************************************
    2509           10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2510              :       INTEGER                                            :: ictxt
    2511              :       CHARACTER(LEN=1)                                   :: prec
    2512              :       INTEGER                                            :: res
    2513              : #if defined(__parallel)
    2514              :       INTEGER                                            :: pilaenv
    2515           10 :       res = pilaenv(ictxt, prec)
    2516              : #else
    2517              :       MARK_USED(ictxt)
    2518              :       MARK_USED(prec)
    2519              :       res = -1
    2520              : #endif
    2521              : 
    2522           10 :    END FUNCTION cp_fm_pilaenv
    2523              : 
    2524            0 : END MODULE cp_fm_types
        

Generated by: LCOV version 2.0-1