LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 85.4 % 824 704
Test Date: 2025-07-25 12:55:17 Functions: 80.0 % 45 36

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief represent a full matrix distributed on many processors
      10              : !> \par History
      11              : !>      3) separated structure object, removed globenv, renamed to full matrix
      12              : !>         many changes (fawzi 08.2002)
      13              : !> \author Matthias Krack (22.05.2001)
      14              : ! **************************************************************************************************
      15              : MODULE cp_fm_types
      16              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17              :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      19              :                                               cp_fm_struct_get,&
      20              :                                               cp_fm_struct_release,&
      21              :                                               cp_fm_struct_retain,&
      22              :                                               cp_fm_struct_type,&
      23              :                                               cp_fm_struct_write_info
      24              :    USE kinds,                           ONLY: dp,&
      25              :                                               sp
      26              :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      27              :                                               mp_any_source,&
      28              :                                               mp_para_env_type,&
      29              :                                               mp_proc_null,&
      30              :                                               mp_request_null,&
      31              :                                               mp_request_type,&
      32              :                                               mp_waitall
      33              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      34              :                                               rng_stream_type
      35              : #include "../base/base_uses.f90"
      36              : 
      37              :    IMPLICIT NONE
      38              : 
      39              :    PRIVATE
      40              : 
      41              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      42              :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      43              :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      44              : 
      45              :    INTEGER, PRIVATE :: cp_fm_mm_type = 1
      46              : 
      47              :    PUBLIC :: cp_fm_type, &
      48              :              cp_fm_p_type, copy_info_type
      49              : 
      50              :    PUBLIC :: cp_fm_add_to_element, &
      51              :              cp_fm_create, &
      52              :              cp_fm_release, &
      53              :              cp_fm_get_info, &
      54              :              cp_fm_set_element, &
      55              :              cp_fm_get_element, &
      56              :              cp_fm_get_diag, & ! get diagonal
      57              :              cp_fm_set_all, & ! set all elements and diagonal
      58              :              cp_fm_set_submatrix, & ! set a submatrix to given values
      59              :              cp_fm_get_submatrix, & ! get a submatrix of given values
      60              :              cp_fm_init_random, &
      61              :              cp_fm_maxabsval, & ! find the maximum absolute value
      62              :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      63              :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      64              :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      65              :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      66              :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      67              :              cp_fm_to_fm_triangular, &
      68              :              cp_fm_copy_general, &
      69              :              cp_fm_start_copy_general, &
      70              :              cp_fm_finish_copy_general, &
      71              :              cp_fm_cleanup_copy_general, &
      72              :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      73              :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      74              :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      75              :              cp_fm_setup, & ! allows to set flags for fms
      76              :              cp_fm_get_mm_type, &
      77              :              cp_fm_write_info, &
      78              :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      79              : 
      80              :    PUBLIC :: cp_fm_pilaenv
      81              : 
      82              :    INTERFACE cp_fm_to_fm
      83              :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      84              :          cp_fm_to_fm_columns ! just a number of columns
      85              :    END INTERFACE
      86              : 
      87              :    INTERFACE cp_fm_release
      88              :       MODULE PROCEDURE cp_fm_release_aa0, &
      89              :          cp_fm_release_aa1, &
      90              :          cp_fm_release_aa2, &
      91              :          cp_fm_release_aa3, &
      92              :          cp_fm_release_ap1, &
      93              :          cp_fm_release_ap2, &
      94              :          cp_fm_release_pa1, &
      95              :          cp_fm_release_pa2, &
      96              :          cp_fm_release_pa3, &
      97              :          cp_fm_release_pp1, &
      98              :          cp_fm_release_pp2
      99              :    END INTERFACE
     100              : 
     101              : ! **************************************************************************************************
     102              : !> \brief represent a full matrix
     103              : !> \param name the name of the matrix, used for printing
     104              : !> \param matrix_struct structure of this matrix
     105              : !> \param local_data array with the data of the matrix (its contents
     106              : !>        depend on the matrix type used: in parallel runs it will be
     107              : !>        in scalapack format, in sequential, it will simply contain
     108              : !>        the matrix)
     109              : !> \par History
     110              : !>      08.2002 created [fawzi]
     111              : !> \author fawzi
     112              : ! **************************************************************************************************
     113              :    TYPE cp_fm_type
     114              : !    PRIVATE
     115              :       CHARACTER(LEN=60) :: name = ""
     116              :       LOGICAL :: use_sp = .FALSE.
     117              :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     118              :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     119              :       REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
     120              :    END TYPE cp_fm_type
     121              : 
     122              : ! **************************************************************************************************
     123              : !> \brief just to build arrays of pointers to matrices
     124              : !> \param matrix the pointer to the matrix
     125              : !> \par History
     126              : !>      08.2002 created [fawzi]
     127              : !> \author fawzi
     128              : ! **************************************************************************************************
     129              :    TYPE cp_fm_p_type
     130              :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     131              :    END TYPE cp_fm_p_type
     132              : 
     133              : ! **************************************************************************************************
     134              : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     135              : !>        and cp_fm_finish_copy_general
     136              : !> \par History
     137              : !>      Jan 2017  [Mark T]
     138              : ! **************************************************************************************************
     139              :    TYPE copy_info_type
     140              :       INTEGER :: send_size = -1
     141              :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     142              :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     143              :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     144              :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     145              :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     146              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     147              :    END TYPE copy_info_type
     148              : 
     149              : CONTAINS
     150              : 
     151              : ! **************************************************************************************************
     152              : !> \brief creates a new full matrix with the given structure
     153              : !> \param matrix the matrix to be created
     154              : !> \param matrix_struct the structure of matrix
     155              : !> \param name ...
     156              : !> \param use_sp ...
     157              : !> \par History
     158              : !>      08.2002 created [fawzi]
     159              : !> \author Fawzi Mohamed
     160              : !> \note
     161              : !>      preferred allocation routine
     162              : ! **************************************************************************************************
     163      1310522 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
     164              :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     165              :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     166              :       CHARACTER(LEN=*), INTENT(in), OPTIONAL             :: name
     167              :       LOGICAL, INTENT(in), OPTIONAL                      :: use_sp
     168              : 
     169              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_create'
     170              : 
     171              :       INTEGER                                            :: handle, ncol_local, npcol, nprow, &
     172              :                                                             nrow_local
     173              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     174              : 
     175      1310522 :       CALL timeset(routineN, handle)
     176              : 
     177      1310522 :       context => matrix_struct%context
     178      1310522 :       matrix%matrix_struct => matrix_struct
     179      1310522 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     180              : 
     181      1310522 :       matrix%use_sp = .FALSE.
     182      1310522 :       IF (PRESENT(use_sp)) matrix%use_sp = use_sp
     183              : 
     184      1310522 :       nprow = context%num_pe(1)
     185      1310522 :       npcol = context%num_pe(2)
     186      1310522 :       NULLIFY (matrix%local_data)
     187      1310522 :       NULLIFY (matrix%local_data_sp)
     188              : 
     189              :       ! OK, we allocate here at least a 1 x 1 matrix
     190              :       ! this must (and is) compatible with the descinit call
     191              :       ! in cp_fm_struct
     192      1310522 :       nrow_local = matrix_struct%local_leading_dimension
     193      1310522 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     194      1310522 :       IF (matrix%use_sp) THEN
     195            0 :          ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
     196              :       ELSE
     197      5242088 :          ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     198              :       END IF
     199              : 
     200              :       ! JVDV we should remove this, as it is up to the user to zero afterwards
     201      1310522 :       IF (matrix%use_sp) THEN
     202            0 :          matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
     203              :       ELSE
     204    627550519 :          matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     205              :       END IF
     206              : 
     207      1310522 :       IF (PRESENT(name)) THEN
     208       654012 :          matrix%name = name
     209              :       ELSE
     210       656510 :          matrix%name = 'full matrix'
     211              :       END IF
     212              : 
     213      1310522 :       CALL timestop(handle)
     214              : 
     215      1310522 :    END SUBROUTINE cp_fm_create
     216              : 
     217              : ! **************************************************************************************************
     218              : !> \brief releases a full matrix
     219              : !> \param matrix the matrix to release
     220              : !> \par History
     221              : !>      08.2002 created [fawzi]
     222              : !> \author Fawzi Mohamed
     223              : ! **************************************************************************************************
     224      1321786 :    SUBROUTINE cp_fm_release_aa0(matrix)
     225              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     226              : 
     227      1321786 :       IF (ASSOCIATED(matrix%local_data)) THEN
     228      1309806 :          DEALLOCATE (matrix%local_data)
     229              :          NULLIFY (matrix%local_data)
     230              :       END IF
     231      1321786 :       IF (ASSOCIATED(matrix%local_data_sp)) THEN
     232            0 :          DEALLOCATE (matrix%local_data_sp)
     233              :          NULLIFY (matrix%local_data_sp)
     234              :       END IF
     235      1321786 :       matrix%name = ""
     236      1321786 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     237              : 
     238      1321786 :    END SUBROUTINE cp_fm_release_aa0
     239              : 
     240              : ! **************************************************************************************************
     241              : !> \brief ...
     242              : !> \param matrices ...
     243              : ! **************************************************************************************************
     244        72602 :    SUBROUTINE cp_fm_release_aa1(matrices)
     245              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     246              : 
     247              :       INTEGER                                            :: i
     248              : 
     249        72602 :       IF (ALLOCATED(matrices)) THEN
     250       196367 :          DO i = 1, SIZE(matrices)
     251       196367 :             CALL cp_fm_release(matrices(i))
     252              :          END DO
     253        71496 :          DEALLOCATE (matrices)
     254              :       END IF
     255        72602 :    END SUBROUTINE cp_fm_release_aa1
     256              : 
     257              : ! **************************************************************************************************
     258              : !> \brief ...
     259              : !> \param matrices ...
     260              : ! **************************************************************************************************
     261         7946 :    SUBROUTINE cp_fm_release_aa2(matrices)
     262              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     263              : 
     264              :       INTEGER                                            :: i, j
     265              : 
     266         7946 :       IF (ALLOCATED(matrices)) THEN
     267        16968 :          DO i = 1, SIZE(matrices, 1)
     268        47012 :             DO j = 1, SIZE(matrices, 2)
     269        41376 :                CALL cp_fm_release(matrices(i, j))
     270              :             END DO
     271              :          END DO
     272         5636 :          DEALLOCATE (matrices)
     273              :       END IF
     274         7946 :    END SUBROUTINE cp_fm_release_aa2
     275              : 
     276              : ! **************************************************************************************************
     277              : !> \brief ...
     278              : !> \param matrices ...
     279              : ! **************************************************************************************************
     280           36 :    SUBROUTINE cp_fm_release_aa3(matrices)
     281              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     282              : 
     283              :       INTEGER                                            :: i, j, k
     284              : 
     285           36 :       IF (ALLOCATED(matrices)) THEN
     286          388 :          DO i = 1, SIZE(matrices, 1)
     287         1120 :             DO j = 1, SIZE(matrices, 2)
     288         1896 :                DO k = 1, SIZE(matrices, 3)
     289         1544 :                   CALL cp_fm_release(matrices(i, j, k))
     290              :                END DO
     291              :             END DO
     292              :          END DO
     293           36 :          DEALLOCATE (matrices)
     294              :       END IF
     295           36 :    END SUBROUTINE cp_fm_release_aa3
     296              : 
     297              : ! **************************************************************************************************
     298              : !> \brief ...
     299              : !> \param matrices ...
     300              : ! **************************************************************************************************
     301       152098 :    SUBROUTINE cp_fm_release_pa1(matrices)
     302              :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     303              : 
     304              :       INTEGER                                            :: i
     305              : 
     306       152098 :       IF (ASSOCIATED(matrices)) THEN
     307       120989 :          DO i = 1, SIZE(matrices)
     308       120989 :             CALL cp_fm_release(matrices(i))
     309              :          END DO
     310        52018 :          DEALLOCATE (matrices)
     311              :          NULLIFY (matrices)
     312              :       END IF
     313       152098 :    END SUBROUTINE cp_fm_release_pa1
     314              : 
     315              : ! **************************************************************************************************
     316              : !> \brief ...
     317              : !> \param matrices ...
     318              : ! **************************************************************************************************
     319        22188 :    SUBROUTINE cp_fm_release_pa2(matrices)
     320              :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     321              : 
     322              :       INTEGER                                            :: i, j
     323              : 
     324        22188 :       IF (ASSOCIATED(matrices)) THEN
     325        46222 :          DO i = 1, SIZE(matrices, 1)
     326        90210 :             DO j = 1, SIZE(matrices, 2)
     327        78660 :                CALL cp_fm_release(matrices(i, j))
     328              :             END DO
     329              :          END DO
     330        11550 :          DEALLOCATE (matrices)
     331              :          NULLIFY (matrices)
     332              :       END IF
     333        22188 :    END SUBROUTINE cp_fm_release_pa2
     334              : 
     335              : ! **************************************************************************************************
     336              : !> \brief ...
     337              : !> \param matrices ...
     338              : ! **************************************************************************************************
     339            0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     340              :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     341              : 
     342              :       INTEGER                                            :: i, j, k
     343              : 
     344            0 :       IF (ASSOCIATED(matrices)) THEN
     345            0 :          DO i = 1, SIZE(matrices, 1)
     346            0 :             DO j = 1, SIZE(matrices, 2)
     347            0 :                DO k = 1, SIZE(matrices, 3)
     348            0 :                   CALL cp_fm_release(matrices(i, j, k))
     349              :                END DO
     350              :             END DO
     351              :          END DO
     352            0 :          DEALLOCATE (matrices)
     353              :          NULLIFY (matrices)
     354              :       END IF
     355            0 :    END SUBROUTINE cp_fm_release_pa3
     356              : 
     357              : ! **************************************************************************************************
     358              : !> \brief ...
     359              : !> \param matrices ...
     360              : ! **************************************************************************************************
     361            0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     362              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     363              : 
     364              :       INTEGER                                            :: i
     365              : 
     366            0 :       IF (ALLOCATED(matrices)) THEN
     367            0 :          DO i = 1, SIZE(matrices)
     368            0 :             CALL cp_fm_release(matrices(i)%matrix)
     369            0 :             DEALLOCATE (matrices(i)%matrix)
     370              :          END DO
     371            0 :          DEALLOCATE (matrices)
     372              :       END IF
     373            0 :    END SUBROUTINE cp_fm_release_ap1
     374              : 
     375              : ! **************************************************************************************************
     376              : !> \brief ...
     377              : !> \param matrices ...
     378              : ! **************************************************************************************************
     379            0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     380              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     381              : 
     382              :       INTEGER                                            :: i, j
     383              : 
     384            0 :       IF (ALLOCATED(matrices)) THEN
     385            0 :          DO i = 1, SIZE(matrices, 1)
     386            0 :             DO j = 1, SIZE(matrices, 2)
     387            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     388            0 :                DEALLOCATE (matrices(i, j)%matrix)
     389              :             END DO
     390              :          END DO
     391            0 :          DEALLOCATE (matrices)
     392              :       END IF
     393            0 :    END SUBROUTINE cp_fm_release_ap2
     394              : 
     395              : ! **************************************************************************************************
     396              : !> \brief ...
     397              : !> \param matrices ...
     398              : ! **************************************************************************************************
     399            0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     400              :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     401              : 
     402              :       INTEGER                                            :: i
     403              : 
     404            0 :       IF (ASSOCIATED(matrices)) THEN
     405            0 :          DO i = 1, SIZE(matrices)
     406            0 :             CALL cp_fm_release(matrices(i)%matrix)
     407            0 :             DEALLOCATE (matrices(i)%matrix)
     408              :          END DO
     409            0 :          DEALLOCATE (matrices)
     410              :          NULLIFY (matrices)
     411              :       END IF
     412            0 :    END SUBROUTINE cp_fm_release_pp1
     413              : 
     414              : ! **************************************************************************************************
     415              : !> \brief ...
     416              : !> \param matrices ...
     417              : ! **************************************************************************************************
     418            0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     419              :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     420              : 
     421              :       INTEGER                                            :: i, j
     422              : 
     423            0 :       IF (ASSOCIATED(matrices)) THEN
     424            0 :          DO i = 1, SIZE(matrices, 1)
     425            0 :             DO j = 1, SIZE(matrices, 2)
     426            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     427            0 :                DEALLOCATE (matrices(i, j)%matrix)
     428              :             END DO
     429              :          END DO
     430            0 :          DEALLOCATE (matrices)
     431              :          NULLIFY (matrices)
     432              :       END IF
     433            0 :    END SUBROUTINE cp_fm_release_pp2
     434              : 
     435              : ! **************************************************************************************************
     436              : !> \brief fills a matrix with random numbers
     437              : !> \param matrix : to be initialized
     438              : !> \param ncol : numbers of cols to fill
     439              : !> \param start_col : starting at coll number
     440              : !> \author Joost VandeVondele
     441              : !> \note
     442              : !>      the value of a_ij is independent of the number of cpus
     443              : ! **************************************************************************************************
     444         2578 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     445              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     446              :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     447              : 
     448              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     449              : 
     450              :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     451              :          ncol_local, nrow_global, nrow_local
     452         5156 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     453         2578 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     454              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     455         2578 :          POINTER                                         :: local_data
     456              :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     457              :          seed = RESHAPE((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
     458              :       TYPE(rng_stream_type)                              :: rng
     459              : 
     460         2578 :       CALL timeset(routineN, handle)
     461              : 
     462              :       ! guarantee same seed over all tasks
     463         2578 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     464              : 
     465              :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     466         2578 :                             extended_precision=.TRUE., seed=seed)
     467              : 
     468         2578 :       CPASSERT(.NOT. matrix%use_sp)
     469              : 
     470              :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     471              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     472              :                           local_data=local_data, &
     473         2578 :                           row_indices=row_indices, col_indices=col_indices)
     474              : 
     475         2578 :       my_start_col = 1
     476         2578 :       IF (PRESENT(start_col)) my_start_col = start_col
     477         2578 :       my_ncol = matrix%matrix_struct%ncol_global
     478         2578 :       IF (PRESENT(ncol)) my_ncol = ncol
     479              : 
     480         2578 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     481            0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     482              : 
     483         7734 :       ALLOCATE (buff(nrow_global))
     484              : 
     485              :       ! each global row has its own substream, in order to reach the stream for the local col,
     486              :       ! we just reset to the next substream
     487              :       ! following this, we fill the full buff with random numbers, and pick those we need
     488         2578 :       icol_global = 0
     489        21437 :       DO icol_local = 1, ncol_local
     490        18859 :          CPASSERT(col_indices(icol_local) > icol_global)
     491              :          DO
     492        18859 :             CALL rng%reset_to_next_substream()
     493        18859 :             icol_global = icol_global + 1
     494        18859 :             IF (icol_global == col_indices(icol_local)) EXIT
     495              :          END DO
     496        18859 :          CALL rng%fill(buff)
     497       787210 :          DO irow_local = 1, nrow_local
     498       784632 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     499              :          END DO
     500              :       END DO
     501              : 
     502         2578 :       DEALLOCATE (buff)
     503              : 
     504              :       ! store seed before deletion (unclear if this is the proper seed)
     505              : 
     506              :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     507              :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     508              :       ! stream of random numbers. While this seems odd and contrary to the original design,
     509              :       ! it was probably introduced to improve reproducibility.
     510              :       ! See also https://github.com/cp2k/cp2k/pull/506
     511         2578 :       CALL rng%get(ig=seed)
     512              : 
     513         2578 :       CALL timestop(handle)
     514              : 
     515        72184 :    END SUBROUTINE cp_fm_init_random
     516              : 
     517              : ! **************************************************************************************************
     518              : !> \brief set all elements of a matrix to the same value,
     519              : !>      and optionally the diagonal to a different one
     520              : !> \param matrix input matrix
     521              : !> \param alpha scalar used to set all elements of the matrix
     522              : !> \param beta scalar used to set diagonal of the matrix
     523              : !> \note
     524              : !>      can be used to zero a matrix
     525              : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     526              : ! **************************************************************************************************
     527       331146 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     528              : 
     529              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     530              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     531              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     532              : 
     533              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     534              : 
     535              :       INTEGER                                            :: handle, i, n
     536              : 
     537       331146 :       CALL timeset(routineN, handle)
     538              : 
     539       331146 :       IF (matrix%use_sp) THEN
     540            0 :          matrix%local_data_sp(:, :) = REAL(alpha, sp)
     541              :       ELSE
     542    187332947 :          matrix%local_data(:, :) = alpha
     543              :       END IF
     544              : 
     545       331146 :       IF (PRESENT(beta)) THEN
     546        57273 :          CPASSERT(.NOT. matrix%use_sp)
     547        57273 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     548       523299 :          DO i = 1, n
     549       523299 :             CALL cp_fm_set_element(matrix, i, i, beta)
     550              :          END DO
     551              :       END IF
     552              : 
     553       331146 :       CALL timestop(handle)
     554              : 
     555       331146 :    END SUBROUTINE cp_fm_set_all
     556              : 
     557              : ! **************************************************************************************************
     558              : !> \brief returns the diagonal elements of a fm
     559              : !> \param matrix ...
     560              : !> \param diag ...
     561              : ! **************************************************************************************************
     562        13888 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     563              : 
     564              :       ! arguments
     565              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     566              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     567              : 
     568              :       ! locals
     569              :       INTEGER :: i, nrow_global
     570              : 
     571              : #if defined(__parallel)
     572              :       INTEGER, DIMENSION(9) :: desca
     573              :       TYPE(cp_blacs_env_type), POINTER :: context
     574              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     575              :                  nprow
     576        13888 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     577        13888 :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
     578              : #endif
     579              : 
     580        13888 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     581              : 
     582              : #if defined(__parallel)
     583       178580 :       diag = 0.0_dp
     584        13888 :       context => matrix%matrix_struct%context
     585        13888 :       myprow = context%mepos(1)
     586        13888 :       mypcol = context%mepos(2)
     587        13888 :       nprow = context%num_pe(1)
     588        13888 :       npcol = context%num_pe(2)
     589              : 
     590        13888 :       a => matrix%local_data
     591        13888 :       a_sp => matrix%local_data_sp
     592       138880 :       desca(:) = matrix%matrix_struct%descriptor(:)
     593              : 
     594       178580 :       DO i = 1, nrow_global
     595              :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     596       164692 :                       irow_local, icol_local, iprow, ipcol)
     597       178580 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     598        82430 :             IF (matrix%use_sp) THEN
     599            0 :                diag(i) = REAL(a_sp(irow_local, icol_local), dp)
     600              :             ELSE
     601        82430 :                diag(i) = a(irow_local, icol_local)
     602              :             END IF
     603              :          END IF
     604              :       END DO
     605              : #else
     606              :       DO i = 1, nrow_global
     607              :          IF (matrix%use_sp) THEN
     608              :             diag(i) = REAL(matrix%local_data_sp(i, i), dp)
     609              :          ELSE
     610              :             diag(i) = matrix%local_data(i, i)
     611              :          END IF
     612              :       END DO
     613              : #endif
     614       343272 :       CALL matrix%matrix_struct%para_env%sum(diag)
     615              : 
     616        13888 :    END SUBROUTINE cp_fm_get_diag
     617              : 
     618              : ! **************************************************************************************************
     619              : !> \brief returns an element of a fm
     620              : !>      this value is valid on every cpu
     621              : !>      using this call is expensive
     622              : !> \param matrix the matrix to read
     623              : !> \param irow_global the row
     624              : !> \param icol_global the col
     625              : !> \param alpha the value of matrix(irow_global, icol_global)
     626              : !> \param local true if the element is on this cpu, false otherwise
     627              : !> \note
     628              : !>      - modified semantics. now this function always returns the value
     629              : !>        previously the value was zero on cpus that didn't own the relevant
     630              : !>        part of the matrix (Joost VandeVondele, May 2003)
     631              : !>      - usage of the function should be avoided, as it is likely to rather slow
     632              : !>        using row_indices/col_indices/local_data + some smart scheme normally
     633              : !>        yields a real parallel code
     634              : ! **************************************************************************************************
     635      2504322 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     636              : 
     637              :       ! arguments
     638              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     639              :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     640              :       INTEGER, INTENT(IN)                       :: icol_global, &
     641              :                                                    irow_global
     642              :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     643              : 
     644              :       ! locals
     645              : #if defined(__parallel)
     646              :       INTEGER, DIMENSION(9) :: desca
     647              :       TYPE(cp_blacs_env_type), POINTER :: context
     648              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     649              :                  nprow
     650      2504322 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     651              : #endif
     652              : 
     653              : #if defined(__parallel)
     654      2504322 :       context => matrix%matrix_struct%context
     655      2504322 :       myprow = context%mepos(1)
     656      2504322 :       mypcol = context%mepos(2)
     657      2504322 :       nprow = context%num_pe(1)
     658      2504322 :       npcol = context%num_pe(2)
     659              : 
     660      2504322 :       a => matrix%local_data
     661     25043220 :       desca(:) = matrix%matrix_struct%descriptor(:)
     662              : 
     663              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     664      2504322 :                    irow_local, icol_local, iprow, ipcol)
     665              : 
     666      2504322 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     667      1252161 :          alpha = a(irow_local, icol_local)
     668      1252161 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     669      1252161 :          IF (PRESENT(local)) local = .TRUE.
     670              :       ELSE
     671      1252161 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     672      1252161 :          IF (PRESENT(local)) local = .FALSE.
     673              :       END IF
     674              : 
     675              : #else
     676              :       IF (PRESENT(local)) local = .TRUE.
     677              :       alpha = matrix%local_data(irow_global, icol_global)
     678              : #endif
     679              : 
     680      2504322 :    END SUBROUTINE cp_fm_get_element
     681              : 
     682              : ! **************************************************************************************************
     683              : !> \brief sets an element of a matrix
     684              : !> \param matrix ...
     685              : !> \param irow_global ...
     686              : !> \param icol_global ...
     687              : !> \param alpha ...
     688              : !> \note
     689              : !>      we expect all cpus to have the same arguments in the call to this function
     690              : !>      (otherwise one should use local_data tricks)
     691              : ! **************************************************************************************************
     692       696156 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     693              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     694              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     695              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     696              : 
     697              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     698              :       TYPE(cp_blacs_env_type), POINTER         :: context
     699              : #if defined(__parallel)
     700              :       INTEGER                                  :: icol_local, ipcol, iprow, &
     701              :                                                   irow_local
     702              :       INTEGER, DIMENSION(9)                    :: desca
     703       696156 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     704              : #endif
     705              : 
     706       696156 :       context => matrix%matrix_struct%context
     707       696156 :       myprow = context%mepos(1)
     708       696156 :       mypcol = context%mepos(2)
     709       696156 :       nprow = context%num_pe(1)
     710       696156 :       npcol = context%num_pe(2)
     711              : 
     712            0 :       CPASSERT(.NOT. matrix%use_sp)
     713              : 
     714              : #if defined(__parallel)
     715              : 
     716       696156 :       a => matrix%local_data
     717              : 
     718      6961560 :       desca(:) = matrix%matrix_struct%descriptor(:)
     719              : 
     720              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     721       696156 :                    irow_local, icol_local, iprow, ipcol)
     722              : 
     723       696156 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     724       351888 :          a(irow_local, icol_local) = alpha
     725              :       END IF
     726              : 
     727              : #else
     728              : 
     729              :       matrix%local_data(irow_global, icol_global) = alpha
     730              : 
     731              : #endif
     732       696156 :    END SUBROUTINE cp_fm_set_element
     733              : 
     734              : ! **************************************************************************************************
     735              : !> \brief sets a submatrix of a full matrix
     736              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     737              : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     738              : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     739              : !> \param fm the full to change
     740              : !> \param new_values a replicated full matrix with the new values
     741              : !> \param start_row the starting row of b_matrix (defaults to 1)
     742              : !> \param start_col the starting col of b_matrix (defaults to 1)
     743              : !> \param n_rows the number of row to change in b (defaults to
     744              : !>        size(op(new_values),1))
     745              : !> \param n_cols the number of columns to change in b (defaults to
     746              : !>        size(op(new_values),2))
     747              : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     748              : !> \param beta rescaling factor for the old values (defaults to 0.0)
     749              : !> \param transpose if new_values should be transposed: if true
     750              : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     751              : !>        (defaults to false)
     752              : !> \par History
     753              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     754              : !> \author Fawzi Mohamed
     755              : !> \note
     756              : !>      optimized for full column updates and alpha=1.0, beta=0.0
     757              : !>      the new_values need to be valid on all cpus
     758              : ! **************************************************************************************************
     759        62353 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     760              :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     761              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     762              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     763              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     764              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     765              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     766              : 
     767              :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     768              :                                                             ncol_global, ncol_local, nrow, &
     769              :                                                             nrow_block, nrow_global, nrow_local, &
     770              :                                                             this_col, this_row
     771        62353 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     772              :       LOGICAL                                            :: tr_a
     773              :       REAL(KIND=dp)                                      :: al, be
     774        62353 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     775              : 
     776        62353 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     777              :       ! can be called too many times, making it a bit useless
     778              :       ! CALL timeset(routineN//','//moduleN,handle)
     779              : 
     780            0 :       CPASSERT(.NOT. fm%use_sp)
     781              : 
     782        62353 :       IF (PRESENT(alpha)) al = alpha
     783        62353 :       IF (PRESENT(beta)) be = beta
     784        62353 :       IF (PRESENT(start_row)) i0 = start_row
     785        62353 :       IF (PRESENT(start_col)) j0 = start_col
     786        62353 :       IF (PRESENT(transpose)) tr_a = transpose
     787        15715 :       IF (tr_a) THEN
     788        15623 :          nrow = SIZE(new_values, 2)
     789        15623 :          ncol = SIZE(new_values, 1)
     790              :       ELSE
     791        46730 :          nrow = SIZE(new_values, 1)
     792        46730 :          ncol = SIZE(new_values, 2)
     793              :       END IF
     794        62353 :       IF (PRESENT(n_rows)) nrow = n_rows
     795        62353 :       IF (PRESENT(n_cols)) ncol = n_cols
     796              : 
     797        62353 :       full_block => fm%local_data
     798              : 
     799              :       CALL cp_fm_get_info(matrix=fm, &
     800              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     801              :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     802              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     803        62353 :                           row_indices=row_indices, col_indices=col_indices)
     804              : 
     805        62353 :       IF (al == 1.0 .AND. be == 0.0) THEN
     806      1297114 :          DO j = 1, ncol_local
     807      1243899 :             this_col = col_indices(j) - j0 + 1
     808      1297114 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     809       283381 :                IF (tr_a) THEN
     810         6485 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     811       158016 :                      DO i = 1, nrow_local
     812       158016 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     813              :                      END DO
     814              :                   ELSE
     815          594 :                      DO i = 1, nrow_local
     816          510 :                         this_row = row_indices(i) - i0 + 1
     817          594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     818          255 :                            full_block(i, j) = new_values(this_col, this_row)
     819              :                         END IF
     820              :                      END DO
     821              :                   END IF
     822              :                ELSE
     823       276896 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     824      8901573 :                      DO i = 1, nrow_local
     825      8901573 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     826              :                      END DO
     827              :                   ELSE
     828         4347 :                      DO i = 1, nrow_local
     829         3745 :                         this_row = row_indices(i) - i0 + 1
     830         4347 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     831          301 :                            full_block(i, j) = new_values(this_row, this_col)
     832              :                         END IF
     833              :                      END DO
     834              :                   END IF
     835              :                END IF
     836              :             END IF
     837              :          END DO
     838              :       ELSE
     839       831608 :          DO j = 1, ncol_local
     840       822470 :             this_col = col_indices(j) - j0 + 1
     841       831608 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     842       822470 :                IF (tr_a) THEN
     843     88223375 :                   DO i = 1, nrow_local
     844     87400905 :                      this_row = row_indices(i) - i0 + 1
     845     88223375 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     846              :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     847       411235 :                                            be*full_block(i, j)
     848              :                      END IF
     849              :                   END DO
     850              :                ELSE
     851            0 :                   DO i = 1, nrow_local
     852            0 :                      this_row = row_indices(i) - i0 + 1
     853            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     854              :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     855            0 :                                            be*full_block(i, j)
     856              :                      END IF
     857              :                   END DO
     858              :                END IF
     859              :             END IF
     860              :          END DO
     861              :       END IF
     862              : 
     863              :       ! CALL timestop(handle)
     864              : 
     865        62353 :    END SUBROUTINE cp_fm_set_submatrix
     866              : 
     867              : ! **************************************************************************************************
     868              : !> \brief gets a submatrix of a full matrix
     869              : !>       op(target_m)(1:n_rows,1:n_cols)
     870              : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     871              : !>      target_m is replicated on all cpus
     872              : !>      using this call is expensive
     873              : !> \param fm the full you want to get the info from
     874              : !> \param target_m a replicated full matrix that will contain the result
     875              : !> \param start_row the starting row of b_matrix (defaults to 1)
     876              : !> \param start_col the starting col of b_matrix (defaults to 1)
     877              : !> \param n_rows the number of row to change in b (defaults to
     878              : !>        size(op(new_values),1))
     879              : !> \param n_cols the number of columns to change in b (defaults to
     880              : !>        size(op(new_values),2))
     881              : !> \param transpose if target_m should be transposed: if true
     882              : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     883              : !>        (defaults to false)
     884              : !> \par History
     885              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     886              : !> \author Fawzi Mohamed
     887              : !> \note
     888              : !>      optimized for full column updates. Zeros out a little too much
     889              : !>      of target_m
     890              : !>      the target_m is replicated and valid on all cpus
     891              : ! **************************************************************************************************
     892        80958 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     893              :                                   start_col, n_rows, n_cols, transpose)
     894              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     895              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     896              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     897              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     898              : 
     899              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     900              : 
     901              :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     902              :                                                             ncol_local, nrow, nrow_global, &
     903              :                                                             nrow_local, this_col, this_row
     904        80958 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     905              :       LOGICAL                                            :: tr_a
     906        80958 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     907              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     908              : 
     909        80958 :       CALL timeset(routineN, handle)
     910              : 
     911        80958 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     912              : 
     913        80958 :       CPASSERT(.NOT. fm%use_sp)
     914              : 
     915        80958 :       IF (PRESENT(start_row)) i0 = start_row
     916        80958 :       IF (PRESENT(start_col)) j0 = start_col
     917        80958 :       IF (PRESENT(transpose)) tr_a = transpose
     918         6294 :       IF (tr_a) THEN
     919         2384 :          nrow = SIZE(target_m, 2)
     920         2384 :          ncol = SIZE(target_m, 1)
     921              :       ELSE
     922        78574 :          nrow = SIZE(target_m, 1)
     923        78574 :          ncol = SIZE(target_m, 2)
     924              :       END IF
     925        80958 :       IF (PRESENT(n_rows)) nrow = n_rows
     926        80958 :       IF (PRESENT(n_cols)) ncol = n_cols
     927              : 
     928        80958 :       para_env => fm%matrix_struct%para_env
     929              : 
     930        80958 :       full_block => fm%local_data
     931              : #if defined(__parallel)
     932              :       ! zero-out whole target_m
     933        80958 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
     934        97318 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
     935              :       END IF
     936              : #endif
     937              : 
     938              :       CALL cp_fm_get_info(matrix=fm, &
     939              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     940              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     941        80958 :                           row_indices=row_indices, col_indices=col_indices)
     942              : 
     943       527426 :       DO j = 1, ncol_local
     944       446468 :          this_col = col_indices(j) - j0 + 1
     945       527426 :          IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     946       284308 :             IF (tr_a) THEN
     947         2384 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     948        82110 :                   DO i = 1, nrow_local
     949        82110 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
     950              :                   END DO
     951              :                ELSE
     952            0 :                   DO i = 1, nrow_local
     953            0 :                      this_row = row_indices(i) - i0 + 1
     954            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     955            0 :                         target_m(this_col, this_row) = full_block(i, j)
     956              :                      END IF
     957              :                   END DO
     958              :                END IF
     959              :             ELSE
     960       281924 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     961      6049292 :                   DO i = 1, nrow_local
     962      6049292 :                      target_m(row_indices(i), this_col) = full_block(i, j)
     963              :                   END DO
     964              :                ELSE
     965      1226014 :                   DO i = 1, nrow_local
     966      1192060 :                      this_row = row_indices(i) - i0 + 1
     967      1226014 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     968        28422 :                         target_m(this_row, this_col) = full_block(i, j)
     969              :                      END IF
     970              :                   END DO
     971              :                END IF
     972              :             END IF
     973              :          END IF
     974              :       END DO
     975              : 
     976     16100910 :       CALL para_env%sum(target_m)
     977              : 
     978        80958 :       CALL timestop(handle)
     979              : 
     980        80958 :    END SUBROUTINE cp_fm_get_submatrix
     981              : 
     982              : ! **************************************************************************************************
     983              : !> \brief returns all kind of information about the full matrix
     984              : !> \param matrix ...
     985              : !> \param name ...
     986              : !> \param nrow_global ...
     987              : !> \param ncol_global ...
     988              : !> \param nrow_block ...
     989              : !> \param ncol_block ...
     990              : !> \param nrow_local ...
     991              : !> \param ncol_local ...
     992              : !> \param row_indices ...
     993              : !> \param col_indices ...
     994              : !> \param local_data ...
     995              : !> \param context ...
     996              : !> \param nrow_locals ...
     997              : !> \param ncol_locals ...
     998              : !> \param matrix_struct ...
     999              : !> \param para_env ...
    1000              : !> \note
    1001              : !>       see also cp_fm_struct for explanation
    1002              : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1003              : !>         access to the local blacs block
    1004              : ! **************************************************************************************************
    1005      5503769 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1006              :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1007              :                              row_indices, col_indices, local_data, context, &
    1008              :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1009              : 
    1010              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1011              :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1012              :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1013              :                                                             ncol_block, nrow_local, ncol_local
    1014              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1015              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1016              :          OPTIONAL, POINTER                               :: local_data
    1017              :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1018              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1019              :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1020              :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1021              : 
    1022            8 :       IF (PRESENT(name)) name = matrix%name
    1023      5503769 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1024      5503769 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1025              : 
    1026              :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1027              :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1028              :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1029              :                             ncol_block=ncol_block, row_indices=row_indices, &
    1030              :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1031      5503769 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1032              : 
    1033      5503769 :    END SUBROUTINE cp_fm_get_info
    1034              : 
    1035              : ! **************************************************************************************************
    1036              : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1037              : !> \param matrix a cp_fm_type instance
    1038              : !> \param io_unit the I/O unit to use for writing
    1039              : ! **************************************************************************************************
    1040            3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1041              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1042              :       INTEGER, INTENT(IN)                                :: io_unit
    1043              : 
    1044            3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1045            3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1046            3 :    END SUBROUTINE cp_fm_write_info
    1047              : 
    1048              : ! **************************************************************************************************
    1049              : !> \brief find the maximum absolute value of the matrix element
    1050              : !>      maxval(abs(matrix))
    1051              : !> \param matrix ...
    1052              : !> \param a_max ...
    1053              : !> \param ir_max ...
    1054              : !> \param ic_max ...
    1055              : ! **************************************************************************************************
    1056        99219 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1057              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1058              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1059              :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1060              : 
    1061              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1062              : 
    1063              :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1064              :                                                             j, mepos, ncol_local, nrow_local, &
    1065              :                                                             num_pe
    1066        99219 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1067        99219 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1068              :       REAL(dp)                                           :: my_max
    1069        99219 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1070        99219 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1071        99219 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: my_block_sp
    1072              : 
    1073        99219 :       CALL timeset(routineN, handle)
    1074              : 
    1075        99219 :       my_block => matrix%local_data
    1076        99219 :       my_block_sp => matrix%local_data_sp
    1077              : 
    1078              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1079        99219 :                           row_indices=row_indices, col_indices=col_indices)
    1080              : 
    1081        99219 :       IF (matrix%use_sp) THEN
    1082            0 :          a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
    1083              :       ELSE
    1084     55935052 :          a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1085              :       END IF
    1086              : 
    1087        99219 :       IF (PRESENT(ir_max)) THEN
    1088            0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1089            0 :          mepos = matrix%matrix_struct%para_env%mepos
    1090            0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1091            0 :          ir_max_vec(0:num_pe - 1) = 0
    1092            0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1093            0 :          ic_max_vec(0:num_pe - 1) = 0
    1094            0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1095            0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1096            0 :          my_max = 0.0_dp
    1097              : 
    1098            0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1099            0 :             DO i = 1, ncol_local
    1100            0 :                DO j = 1, nrow_local
    1101            0 :                   IF (matrix%use_sp) THEN
    1102            0 :                      IF (ABS(my_block_sp(j, i)) > my_max) THEN
    1103            0 :                         my_max = REAL(my_block_sp(j, i), dp)
    1104            0 :                         ir_max_local = j
    1105            0 :                         ic_max_local = i
    1106              :                      END IF
    1107              :                   ELSE
    1108            0 :                      IF (ABS(my_block(j, i)) > my_max) THEN
    1109            0 :                         my_max = my_block(j, i)
    1110            0 :                         ir_max_local = j
    1111            0 :                         ic_max_local = i
    1112              :                      END IF
    1113              :                   END IF
    1114              :                END DO
    1115              :             END DO
    1116              : 
    1117            0 :             a_max_vec(mepos) = my_max
    1118            0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1119            0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1120              : 
    1121              :          END IF
    1122              : 
    1123            0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1124            0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1125            0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1126              : 
    1127            0 :          my_max = 0.0_dp
    1128            0 :          DO i = 0, num_pe - 1
    1129            0 :             IF (a_max_vec(i) > my_max) THEN
    1130            0 :                ir_max = ir_max_vec(i)
    1131            0 :                ic_max = ic_max_vec(i)
    1132              :             END IF
    1133              :          END DO
    1134              : 
    1135            0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1136            0 :          CPASSERT(ic_max > 0)
    1137            0 :          CPASSERT(ir_max > 0)
    1138              : 
    1139              :       END IF
    1140              : 
    1141        99219 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1142              : 
    1143        99219 :       CALL timestop(handle)
    1144              : 
    1145       198438 :    END SUBROUTINE cp_fm_maxabsval
    1146              : 
    1147              : ! **************************************************************************************************
    1148              : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1149              : !>      = || A ||_infinity
    1150              : !> \param matrix ...
    1151              : !> \param a_max ...
    1152              : !> \note
    1153              : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1154              : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1155              : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1156              : !>      (but the bound is not so tight in the general case)
    1157              : ! **************************************************************************************************
    1158         4246 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1159              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1160              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1161              : 
    1162              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1163              : 
    1164              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1165              :                                                             nrow_local
    1166         4246 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1167              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1168         4246 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1169              : 
    1170         4246 :       CALL timeset(routineN, handle)
    1171              : 
    1172         4246 :       my_block => matrix%local_data
    1173              : 
    1174         4246 :       CPASSERT(.NOT. matrix%use_sp)
    1175              : 
    1176              :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1177         4246 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1178              : 
    1179              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1180        12738 :       ALLOCATE (values(nrow_global))
    1181        62136 :       values = 0.0_dp
    1182        62136 :       DO j = 1, ncol_local
    1183       525661 :          DO i = 1, nrow_local
    1184       521415 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1185              :          END DO
    1186              :       END DO
    1187         4246 :       CALL matrix%matrix_struct%para_env%sum(values)
    1188        66382 :       a_max = MAXVAL(values)
    1189         4246 :       DEALLOCATE (values)
    1190              : 
    1191         4246 :       CALL timestop(handle)
    1192         8492 :    END SUBROUTINE
    1193              : 
    1194              : ! **************************************************************************************************
    1195              : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1196              : !> \param matrix ...
    1197              : !> \param norm_array ...
    1198              : ! **************************************************************************************************
    1199         1282 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1200              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1201              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1202              : 
    1203              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1204              : 
    1205              :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1206              :                                                             nrow_local
    1207         1282 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1208         1282 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1209              : 
    1210         1282 :       CALL timeset(routineN, handle)
    1211              : 
    1212         1282 :       my_block => matrix%local_data
    1213              : 
    1214         1282 :       CPASSERT(.NOT. matrix%use_sp)
    1215              : 
    1216              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1217         1282 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1218              : 
    1219              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1220        29746 :       norm_array = 0.0_dp
    1221        29746 :       DO j = 1, ncol_local
    1222      1261016 :          DO i = 1, nrow_local
    1223      1259734 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1224              :          END DO
    1225              :       END DO
    1226        58210 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1227        29746 :       norm_array = SQRT(norm_array)
    1228              : 
    1229         1282 :       CALL timestop(handle)
    1230         1282 :    END SUBROUTINE
    1231              : 
    1232              : ! **************************************************************************************************
    1233              : !> \brief summing up all the elements along the matrix's i-th index
    1234              : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1235              : !>        or
    1236              : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1237              : !> \param matrix     an input matrix A
    1238              : !> \param sum_array  sums of elements in each column/row
    1239              : !> \param dir ...
    1240              : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1241              : !>        the maximum overlap method
    1242              : !>        added row variation
    1243              : ! **************************************************************************************************
    1244         7174 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1245              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1246              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1247              :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1248              : 
    1249              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1250              : 
    1251              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1252         7174 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1253              :       LOGICAL                                            :: docol
    1254         7174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1255              : 
    1256         7174 :       CALL timeset(routineN, handle)
    1257              : 
    1258         7174 :       IF (PRESENT(dir)) THEN
    1259         7134 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1260              :             docol = .TRUE.
    1261              :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1262              :             docol = .FALSE.
    1263              :          ELSE
    1264            0 :             CPABORT('Wrong argument DIR')
    1265              :          END IF
    1266              :       ELSE
    1267              :          docol = .TRUE.
    1268              :       END IF
    1269              : 
    1270         7174 :       my_block => matrix%local_data
    1271              : 
    1272         7174 :       CPASSERT(.NOT. matrix%use_sp)
    1273              : 
    1274              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1275         7174 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1276              : 
    1277              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1278       220970 :       sum_array(:) = 0.0_dp
    1279         7174 :       IF (docol) THEN
    1280          448 :       DO j = 1, ncol_local
    1281         3628 :          DO i = 1, nrow_local
    1282         3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1283              :          END DO
    1284              :       END DO
    1285              :       ELSE
    1286        81994 :       DO j = 1, ncol_local
    1287      6429038 :          DO i = 1, nrow_local
    1288      6421904 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1289              :          END DO
    1290              :       END DO
    1291              :       END IF
    1292       434766 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1293              : 
    1294         7174 :       CALL timestop(handle)
    1295         7174 :    END SUBROUTINE
    1296              : 
    1297              : ! **************************************************************************************************
    1298              : !> \brief copy one identically sized matrix in the other
    1299              : !> \param source ...
    1300              : !> \param destination ...
    1301              : !> \note
    1302              : !>      see also cp_fm_to_fm_columns
    1303              : ! **************************************************************************************************
    1304       369522 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1305              : 
    1306              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1307              : 
    1308              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1309              : 
    1310              :       INTEGER                                            :: handle, npcol, nprow
    1311              : 
    1312       369522 :       CALL timeset(routineN, handle)
    1313              : 
    1314       369522 :       nprow = source%matrix_struct%context%num_pe(1)
    1315       369522 :       npcol = source%matrix_struct%context%num_pe(2)
    1316              : 
    1317       369522 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1318              :           cp_fm_struct_equivalent(source%matrix_struct, &
    1319              :                                   destination%matrix_struct)) THEN
    1320       369522 :          IF (source%use_sp .AND. destination%use_sp) THEN
    1321            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1322              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
    1323              :                CALL cp_abort(__LOCATION__, &
    1324              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1325              :                              "> to full matrix <"//TRIM(destination%name)// &
    1326            0 :                              ">. The local_data blocks have different sizes.")
    1327              :             CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
    1328            0 :                        source%local_data_sp, 1, destination%local_data_sp, 1)
    1329       369522 :          ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
    1330            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
    1331              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
    1332              :                CALL cp_abort(__LOCATION__, &
    1333              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1334              :                              "> to full matrix <"//TRIM(destination%name)// &
    1335            0 :                              ">. The local_data blocks have different sizes.")
    1336            0 :             destination%local_data = REAL(source%local_data_sp, dp)
    1337       369522 :          ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
    1338            0 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1339              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
    1340              :                CALL cp_abort(__LOCATION__, &
    1341              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1342              :                              "> to full matrix <"//TRIM(destination%name)// &
    1343            0 :                              ">. The local_data blocks have different sizes.")
    1344            0 :             destination%local_data_sp = REAL(source%local_data, sp)
    1345              :          ELSE
    1346       369522 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1347              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1348              :                CALL cp_abort(__LOCATION__, &
    1349              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1350              :                              "> to full matrix <"//TRIM(destination%name)// &
    1351            0 :                              ">. The local_data blocks have different sizes.")
    1352              :             CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1353       369522 :                        source%local_data, 1, destination%local_data, 1)
    1354              :          END IF
    1355              :       ELSE
    1356            0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1357              :       END IF
    1358              : 
    1359       369522 :       CALL timestop(handle)
    1360              : 
    1361       369522 :    END SUBROUTINE cp_fm_to_fm_matrix
    1362              : 
    1363              : ! **************************************************************************************************
    1364              : !> \brief copy just a subset of columns of a fm to a fm
    1365              : !> \param msource ...
    1366              : !> \param mtarget ...
    1367              : !> \param ncol ...
    1368              : !> \param source_start ...
    1369              : !> \param target_start ...
    1370              : ! **************************************************************************************************
    1371       165914 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1372              :                                   target_start)
    1373              : 
    1374              :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1375              :       INTEGER, INTENT(IN)                      :: ncol
    1376              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1377              : 
    1378              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1379              : 
    1380              :       INTEGER                                  :: handle, n, ss, ts
    1381              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1382              : #if defined(__parallel)
    1383              :       INTEGER                                  :: i
    1384              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1385              : #endif
    1386              : 
    1387       165914 :       CALL timeset(routineN, handle)
    1388              : 
    1389       165914 :       ss = 1
    1390       165914 :       ts = 1
    1391              : 
    1392       165914 :       IF (PRESENT(source_start)) ss = source_start
    1393       165914 :       IF (PRESENT(target_start)) ts = target_start
    1394              : 
    1395       165914 :       n = msource%matrix_struct%nrow_global
    1396              : 
    1397       165914 :       a => msource%local_data
    1398       165914 :       b => mtarget%local_data
    1399              : 
    1400              : #if defined(__parallel)
    1401      1659140 :       desca(:) = msource%matrix_struct%descriptor(:)
    1402      1659140 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1403       666840 :       DO i = 0, ncol - 1
    1404       666840 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1405              :       END DO
    1406              : #else
    1407              :       IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
    1408              :          CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1409              :       END IF
    1410              : #endif
    1411              : 
    1412       165914 :       CALL timestop(handle)
    1413              : 
    1414       165914 :    END SUBROUTINE cp_fm_to_fm_columns
    1415              : 
    1416              : ! **************************************************************************************************
    1417              : !> \brief copy just a triangular matrix
    1418              : !> \param msource ...
    1419              : !> \param mtarget ...
    1420              : !> \param uplo ...
    1421              : ! **************************************************************************************************
    1422          370 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1423              : 
    1424              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1425              :       CHARACTER(LEN=1), OPTIONAL, INTENT(IN)   :: uplo
    1426              : 
    1427              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1428              : 
    1429              :       CHARACTER(LEN=1)                         :: myuplo
    1430              :       INTEGER                                  :: handle, ncol, nrow
    1431          370 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1432              : #if defined(__parallel)
    1433              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1434              : #endif
    1435              : 
    1436          370 :       CALL timeset(routineN, handle)
    1437              : 
    1438          370 :       myuplo = 'U'
    1439          370 :       IF (PRESENT(uplo)) myuplo = uplo
    1440              : 
    1441          370 :       nrow = msource%matrix_struct%nrow_global
    1442          370 :       ncol = msource%matrix_struct%ncol_global
    1443              : 
    1444          370 :       a => msource%local_data
    1445          370 :       b => mtarget%local_data
    1446              : 
    1447              : #if defined(__parallel)
    1448         3700 :       desca(:) = msource%matrix_struct%descriptor(:)
    1449         3700 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1450          370 :       CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1451              : #else
    1452              :       CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1453              : #endif
    1454              : 
    1455          370 :       CALL timestop(handle)
    1456              : 
    1457          370 :    END SUBROUTINE cp_fm_to_fm_triangular
    1458              : 
    1459              : ! **************************************************************************************************
    1460              : !> \brief copy just a part ot the matrix
    1461              : !> \param msource ...
    1462              : !> \param mtarget ...
    1463              : !> \param nrow ...
    1464              : !> \param ncol ...
    1465              : !> \param s_firstrow ...
    1466              : !> \param s_firstcol ...
    1467              : !> \param t_firstrow ...
    1468              : !> \param t_firstcol ...
    1469              : ! **************************************************************************************************
    1470              : 
    1471         9524 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1472              : 
    1473              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1474              :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1475              :                                                   s_firstcol, t_firstrow, &
    1476              :                                                   t_firstcol
    1477              : 
    1478              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1479              : 
    1480              :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1481              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1482              : #if defined(__parallel)
    1483              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1484              : #endif
    1485              : 
    1486         9524 :       CALL timeset(routineN, handle)
    1487              : 
    1488         9524 :       a => msource%local_data
    1489         9524 :       b => mtarget%local_data
    1490              : 
    1491         9524 :       na = msource%matrix_struct%nrow_global
    1492         9524 :       nb = mtarget%matrix_struct%nrow_global
    1493              : !    nrow must be <= na and nb
    1494         9524 :       IF (nrow > na) &
    1495            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1496         9524 :       IF (nrow > nb) &
    1497            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1498         9524 :       na = msource%matrix_struct%ncol_global
    1499         9524 :       nb = mtarget%matrix_struct%ncol_global
    1500              : !    ncol must be <= na_col and nb_col
    1501         9524 :       IF (ncol > na) &
    1502            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1503         9524 :       IF (ncol > nb) &
    1504            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1505              : 
    1506              : #if defined(__parallel)
    1507        95240 :       desca(:) = msource%matrix_struct%descriptor(:)
    1508        95240 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1509       111562 :       DO i = 0, ncol - 1
    1510       102038 :          ss = s_firstcol + i
    1511       102038 :          ts = t_firstcol + i
    1512       111562 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1513              :       END DO
    1514              : #else
    1515              :       DO i = 0, ncol - 1
    1516              :          ss = s_firstcol + i
    1517              :          ts = t_firstcol + i
    1518              :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1519              :       END DO
    1520              : #endif
    1521              : 
    1522         9524 :       CALL timestop(handle)
    1523         9524 :    END SUBROUTINE cp_fm_to_fm_submat
    1524              : 
    1525              : ! **************************************************************************************************
    1526              : !> \brief General copy of a fm matrix to another fm matrix.
    1527              : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1528              : !>
    1529              : !> \param source          input fm matrix
    1530              : !> \param destination     output fm matrix
    1531              : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1532              : !>                        of the input and output matrices
    1533              : !> \par History
    1534              : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1535              : ! **************************************************************************************************
    1536         7954 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1537              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1538              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1539              : 
    1540              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1541              : 
    1542              :       INTEGER                                            :: handle
    1543        71586 :       TYPE(copy_info_type)                               :: info
    1544              : 
    1545         7954 :       CALL timeset(routineN, handle)
    1546              : 
    1547         7954 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1548         7954 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1549         7942 :          CALL cp_fm_finish_copy_general(destination, info)
    1550              :       END IF
    1551         7954 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1552         7802 :          CALL cp_fm_cleanup_copy_general(info)
    1553              :       END IF
    1554              : 
    1555         7954 :       CALL timestop(handle)
    1556         7954 :    END SUBROUTINE cp_fm_copy_general
    1557              : 
    1558              : ! **************************************************************************************************
    1559              : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1560              : !> \param source input fm matrix
    1561              : !> \param destination output fm matrix
    1562              : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1563              : !>                        of the input and output matrices
    1564              : !> \param info all of the data that will be needed to complete the copy operation
    1565              : ! **************************************************************************************************
    1566      1751020 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1567              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1568              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1569              :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1570              : 
    1571              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1572              : 
    1573              :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1574              :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1575              :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1576              :          recv_rank, recv_size, send_rank, send_size
    1577       175102 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1578       350204 :                                                             recv_count, send_count, send_disp, &
    1579       175102 :                                                             source2global, src_p, src_q
    1580       175102 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1581              :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1582              :                                                             src_block, src_block_tmp, src_num_pe
    1583       350204 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1584       350204 :                                                             send_col_indices, send_row_indices
    1585              :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1586      2451428 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1587              : 
    1588       175102 :       CALL timeset(routineN, handle)
    1589              : 
    1590              :       IF (.NOT. cp2k_is_parallel) THEN
    1591              :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1592              :          nrow_local_send = SIZE(source%local_data, 1)
    1593              :          ncol_local_send = SIZE(source%local_data, 2)
    1594              :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1595              :          k = 0
    1596              :          DO j = 1, ncol_local_send
    1597              :             DO i = 1, nrow_local_send
    1598              :                k = k + 1
    1599              :                info%send_buf(k) = source%local_data(i, j)
    1600              :             END DO
    1601              :          END DO
    1602              :       ELSE
    1603              :          ! assumption of double precision data is carried forward from ScaLAPACK version
    1604       175102 :          IF (source%use_sp) CPABORT("only DP kind implemented")
    1605       175102 :          IF (destination%use_sp) CPABORT("only DP kind implemented")
    1606              : 
    1607       175102 :          NULLIFY (recv_dist, send_dist)
    1608       175102 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1609              : 
    1610              :          ! The 'global' communicator contains both the source and destination decompositions
    1611       175102 :          global_size = para_env%num_pe
    1612       175102 :          global_rank = para_env%mepos
    1613              : 
    1614              :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1615              :          ! on a subset of the processes involved in the communication
    1616              :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1617              :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1618       175102 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1619       143114 :             recv_dist => destination%matrix_struct
    1620       143114 :             recv_rank = recv_dist%para_env%mepos
    1621              :          ELSE
    1622        31988 :             recv_rank = mp_proc_null
    1623              :          END IF
    1624              : 
    1625       175102 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1626       156558 :             send_dist => source%matrix_struct
    1627       156558 :             send_rank = send_dist%para_env%mepos
    1628              :          ELSE
    1629        18544 :             send_rank = mp_proc_null
    1630              :          END IF
    1631              : 
    1632              :          ! Map the rank in the source/dest communicator to the global rank
    1633       525306 :          ALLOCATE (all_ranks(0:global_size - 1))
    1634              : 
    1635       175102 :          CALL para_env%allgather(send_rank, all_ranks)
    1636       175102 :          IF (ASSOCIATED(recv_dist)) THEN
    1637       715570 :             ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1638       429342 :             DO i = 0, global_size - 1
    1639       429342 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1640       249140 :                   source2global(all_ranks(i)) = i
    1641              :                END IF
    1642              :             END DO
    1643              :          END IF
    1644              : 
    1645       175102 :          CALL para_env%allgather(recv_rank, all_ranks)
    1646       175102 :          IF (ASSOCIATED(send_dist)) THEN
    1647       782790 :             ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1648       469674 :             DO i = 0, global_size - 1
    1649       469674 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1650       249140 :                   dest2global(all_ranks(i)) = i
    1651              :                END IF
    1652              :             END DO
    1653              :          END IF
    1654       175102 :          DEALLOCATE (all_ranks)
    1655              : 
    1656              :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1657              :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1658              : 
    1659              :          ! The global root process will receive the data (from the root process in each decomposition)
    1660      1225714 :          send_req(:) = mp_request_null
    1661       175102 :          IF (global_rank == 0) THEN
    1662       612857 :             recv_req(:) = mp_request_null
    1663        87551 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1664        87551 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1665        87551 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1666        87551 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1667              :          END IF
    1668              : 
    1669       175102 :          IF (ASSOCIATED(send_dist)) THEN
    1670       156558 :             IF ((send_rank .EQ. 0)) THEN
    1671              :                ! need to use separate buffers here in case this is actually global rank 0
    1672       262653 :                src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
    1673        87551 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1674        87551 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1675              :             END IF
    1676              :          END IF
    1677              : 
    1678       175102 :          IF (ASSOCIATED(recv_dist)) THEN
    1679       143114 :             IF ((recv_rank .EQ. 0)) THEN
    1680       262653 :                dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
    1681        87551 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1682        87551 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1683              :             END IF
    1684              :          END IF
    1685              : 
    1686       175102 :          IF (global_rank == 0) THEN
    1687        87551 :             CALL mp_waitall(recv_req(1:4))
    1688              :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1689            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1690              :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1691       612857 :                       )
    1692        87551 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1693        87551 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1694              :          END IF
    1695              : 
    1696       175102 :          IF (ASSOCIATED(send_dist)) THEN
    1697       156558 :             IF ((send_rank .EQ. 0)) THEN
    1698        87551 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1699              :             END IF
    1700              :          END IF
    1701              : 
    1702       175102 :          IF (ASSOCIATED(recv_dist)) THEN
    1703       143114 :             IF ((recv_rank .EQ. 0)) THEN
    1704        87551 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1705              :             END IF
    1706              :          END IF
    1707              : 
    1708       175102 :          IF (global_rank == 0) THEN
    1709        87551 :             CALL mp_waitall(recv_req(5:6))
    1710              :          END IF
    1711              : 
    1712              :          ! Finally, broadcast the data to all processes in the global communicator
    1713       175102 :          CALL para_env%bcast(src_block, 0)
    1714       175102 :          CALL para_env%bcast(dest_block, 0)
    1715       175102 :          CALL para_env%bcast(src_num_pe, 0)
    1716       175102 :          CALL para_env%bcast(dest_num_pe, 0)
    1717       525306 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1718       525306 :          info%nblock_src(1:2) = src_block(1:2)
    1719       175102 :          IF (global_rank .NE. 0) THEN
    1720            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1721            0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1722       612857 :                       )
    1723              :          END IF
    1724       175102 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1725       175102 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1726              : 
    1727       175102 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1728       175102 :          send_size = src_num_pe(1)*src_num_pe(2)
    1729       175102 :          info%send_size = send_size
    1730       175102 :          CALL mp_waitall(send_req(:))
    1731              : 
    1732              :          ! Setup is now complete, we can start the actual communication here.
    1733              :          ! The order implemented here is:
    1734              :          !  DEST_1
    1735              :          !      compute recv sizes
    1736              :          !      call irecv
    1737              :          !  SRC_1
    1738              :          !      compute send sizes
    1739              :          !      pack send buffers
    1740              :          !      call isend
    1741              :          !  DEST_2
    1742              :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1743              :          !  SRC_2
    1744              :          !      wait for the sends
    1745              : 
    1746              :          ! DEST_1
    1747       175102 :          IF (ASSOCIATED(recv_dist)) THEN
    1748              :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1749              :                                   col_indices=recv_col_indices &
    1750       143114 :                                   )
    1751       143114 :             info%recv_col_indices => recv_col_indices
    1752       143114 :             info%recv_row_indices => recv_row_indices
    1753       143114 :             nrow_block_src = src_block(1)
    1754       143114 :             ncol_block_src = src_block(2)
    1755       964710 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1756              : 
    1757              :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1758       143114 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1759       143114 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1760       143114 :             info%nlocal_recv(1) = nrow_local_recv
    1761       143114 :             info%nlocal_recv(2) = ncol_local_recv
    1762              :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1763       715570 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1764      2550523 :             DO i = 1, nrow_local_recv
    1765              :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1766              :                ! then work out which row block it comes from, and which process row that row block comes from.
    1767      2550523 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1768              :             END DO
    1769      4300742 :             DO j = 1, ncol_local_recv
    1770              :                ! Similarly for the columns
    1771      4300742 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1772              :             END DO
    1773              :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1774              : 
    1775       286228 :             DO q = 0, src_num_pe(2) - 1
    1776      4300742 :                ncols = COUNT(src_q .EQ. q)
    1777       535368 :                DO p = 0, src_num_pe(1) - 1
    1778      4685078 :                   nrows = COUNT(src_p .EQ. p)
    1779              :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1780       392254 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1781              :                END DO
    1782              :             END DO
    1783       143114 :             DEALLOCATE (src_p, src_q)
    1784              : 
    1785              :             ! Use one long buffer (and displacements into that buffer)
    1786              :             !     this prevents the need for a rectangular array where not all elements will be populated
    1787       678482 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1788       143114 :             info%recv_disp(0) = 0
    1789       249140 :             DO i = 1, send_size - 1
    1790       249140 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1791              :             END DO
    1792              : 
    1793              :             ! Issue receive calls on ranks which expect data
    1794       392254 :             DO k = 0, send_size - 1
    1795       392254 :                IF (recv_count(k) .GT. 0) THEN
    1796              :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1797       175546 :                                       source2global(k), info%recv_request(k))
    1798              :                END IF
    1799              :             END DO
    1800       143114 :             DEALLOCATE (source2global)
    1801              :          END IF ! ASSOCIATED(recv_dist)
    1802              : 
    1803              :          ! SRC_1
    1804       175102 :          IF (ASSOCIATED(send_dist)) THEN
    1805              :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1806              :                                   col_indices=send_col_indices &
    1807       156558 :                                   )
    1808       156558 :             nrow_block_dest = dest_block(1)
    1809       156558 :             ncol_block_dest = dest_block(2)
    1810      1031930 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1811              : 
    1812              :             ! Determine the send counts, allocate the send buffers
    1813       156558 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1814       156558 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1815              : 
    1816              :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1817              :             !   i.e. number of rows,cols in the sending distribution
    1818       782790 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1819              : 
    1820      2563967 :             DO i = 1, nrow_local_send
    1821              :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1822      2563967 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1823              :             END DO
    1824      4586150 :             DO j = 1, ncol_local_send
    1825      4586150 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1826              :             END DO
    1827              :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1828              : 
    1829       313116 :             DO q = 0, dest_num_pe(2) - 1
    1830      4586150 :                ncols = COUNT(dest_q .EQ. q)
    1831       562256 :                DO p = 0, dest_num_pe(1) - 1
    1832      4413618 :                   nrows = COUNT(dest_p .EQ. p)
    1833       405698 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1834              :                END DO
    1835              :             END DO
    1836       156558 :             DEALLOCATE (dest_p, dest_q)
    1837              : 
    1838              :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1839       718814 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1840       156558 :             send_disp(0) = 0
    1841       249140 :             DO k = 1, recv_size - 1
    1842       249140 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1843              :             END DO
    1844              : 
    1845              :             ! Loop over the smat, pack the send buffers
    1846       405698 :             send_count(:) = 0
    1847      4586150 :             DO j = 1, ncol_local_send
    1848              :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1849      4429592 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1850    200821067 :                DO i = 1, nrow_local_send
    1851    196234917 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1852    196234917 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1853    196234917 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1854    200664509 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1855              :                END DO
    1856              :             END DO
    1857              : 
    1858              :             ! For each non-zero send_count, call mpi_isend
    1859       405698 :             DO k = 0, recv_size - 1
    1860       405698 :                IF (send_count(k) .GT. 0) THEN
    1861              :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1862       175546 :                                       dest2global(k), info%send_request(k))
    1863              :                END IF
    1864              :             END DO
    1865       156558 :             DEALLOCATE (send_count, send_disp, dest2global)
    1866              :          END IF ! ASSOCIATED(send_dist)
    1867       175102 :          DEALLOCATE (dest_blacs2mpi)
    1868              : 
    1869              :       END IF !IF (.NOT. cp2k_is_parallel)
    1870              : 
    1871       175102 :       CALL timestop(handle)
    1872              : 
    1873       700408 :    END SUBROUTINE cp_fm_start_copy_general
    1874              : 
    1875              : ! **************************************************************************************************
    1876              : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1877              : !> \param destination output fm matrix
    1878              : !> \param info all of the data that will be needed to complete the copy operation
    1879              : ! **************************************************************************************************
    1880       143114 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1881              :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1882              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1883              : 
    1884              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1885              : 
    1886              :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1887              :                                                             src_p_i, src_q_j
    1888       143114 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1889              :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1890       143114 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1891              : 
    1892       143114 :       CALL timeset(routineN, handle)
    1893              : 
    1894              :       IF (.NOT. cp2k_is_parallel) THEN
    1895              :          ! Now unpack the data from the 'send buffer'
    1896              :          k = 0
    1897              :          DO j = 1, SIZE(destination%local_data, 2)
    1898              :             DO i = 1, SIZE(destination%local_data, 1)
    1899              :                k = k + 1
    1900              :                destination%local_data(i, j) = info%send_buf(k)
    1901              :             END DO
    1902              :          END DO
    1903              :          DEALLOCATE (info%send_buf)
    1904              :       ELSE
    1905              :          ! Set up local variables ...
    1906       143114 :          send_size = info%send_size
    1907       429342 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1908       429342 :          nblock_src(1:2) = info%nblock_src(:)
    1909       429342 :          src_num_pe(1:2) = info%src_num_pe(:)
    1910       143114 :          recv_col_indices => info%recv_col_indices
    1911       143114 :          recv_row_indices => info%recv_row_indices
    1912              : 
    1913              :          ! ... use the local variables to do the work
    1914              :          ! DEST_2
    1915       143114 :          CALL mp_waitall(info%recv_request(:))
    1916       429342 :          ALLOCATE (recv_count(0:send_size - 1))
    1917              :          ! Loop over the rmat, filling it in with data from the recv buffers
    1918              :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1919       392254 :          recv_count(:) = 0
    1920      4300742 :          DO j = 1, nlocal_recv(2)
    1921      4157628 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    1922    200535659 :             DO i = 1, nlocal_recv(1)
    1923    196234917 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    1924    196234917 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    1925    196234917 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1926    200392545 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1927              :             END DO
    1928              :          END DO
    1929       143114 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1930              :          ! Invalidate the stored state
    1931              :          NULLIFY (info%recv_col_indices, &
    1932       143114 :                   info%recv_row_indices)
    1933              : 
    1934              :       END IF
    1935              : 
    1936       143114 :       CALL timestop(handle)
    1937              : 
    1938       143114 :    END SUBROUTINE cp_fm_finish_copy_general
    1939              : 
    1940              : ! **************************************************************************************************
    1941              : !> \brief Completes the copy operation: wait for comms clean up MPI state
    1942              : !> \param info all of the data that will be needed to complete the copy operation
    1943              : ! **************************************************************************************************
    1944       155894 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    1945              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1946              : 
    1947              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    1948              : 
    1949              :       INTEGER                                            :: handle
    1950              : 
    1951       155894 :       CALL timeset(routineN, handle)
    1952              : 
    1953              :       IF (.NOT. cp2k_is_parallel) THEN
    1954              :          ! Don't do anything - no MPI state for the serial case
    1955              :       ELSE
    1956              :          ! SRC_2
    1957              :          ! If this process is also in the destination decomposition, this deallocate
    1958              :          ! Was already done in cp_fm_finish_copy_general
    1959       155894 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    1960        31324 :             DEALLOCATE (info%src_blacs2mpi)
    1961              :          END IF
    1962       155894 :          CALL mp_waitall(info%send_request)
    1963       155894 :          DEALLOCATE (info%send_request, info%send_buf)
    1964              : 
    1965              :       END IF
    1966              : 
    1967       155894 :       CALL timestop(handle)
    1968              : 
    1969       155894 :    END SUBROUTINE cp_fm_cleanup_copy_general
    1970              : 
    1971              : ! **************************************************************************************************
    1972              : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    1973              : !>        The two matrices can have different contexts.
    1974              : !>
    1975              : !>        Summary of distribution routines for dense matrices
    1976              : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    1977              : !>
    1978              : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    1979              : !>
    1980              : !>        A process that is not a part of the context of A should set descA(2)
    1981              : !>        to -1, and similarly for B.
    1982              : !>
    1983              : !> \param source          input fm matrix
    1984              : !> \param destination     output fm matrix
    1985              : !> \param nrows           number of rows of sub matrix to be copied
    1986              : !> \param ncols           number of cols of sub matrix to be copied
    1987              : !> \param s_firstrow      starting global row index of sub matrix in source
    1988              : !> \param s_firstcol      starting global col index of sub matrix in source
    1989              : !> \param d_firstrow      starting global row index of sub matrix in destination
    1990              : !> \param d_firstcol      starting global col index of sub matrix in destination
    1991              : !> \param global_context  process grid that covers all parts of either A or B.
    1992              : ! **************************************************************************************************
    1993        11174 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    1994              :                                          destination, &
    1995              :                                          nrows, &
    1996              :                                          ncols, &
    1997              :                                          s_firstrow, &
    1998              :                                          s_firstcol, &
    1999              :                                          d_firstrow, &
    2000              :                                          d_firstcol, &
    2001              :                                          global_context)
    2002              : 
    2003              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    2004              :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    2005              :                                                             d_firstrow, d_firstcol
    2006              : 
    2007              :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    2008              : 
    2009              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    2010              : 
    2011              :       LOGICAL                                  :: debug
    2012              :       INTEGER                                  :: handle
    2013              : #if defined(__parallel)
    2014              :       INTEGER, DIMENSION(9)                    :: desca, descb
    2015              :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    2016        11174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    2017              : #endif
    2018              : 
    2019        11174 :       CALL timeset(routineN, handle)
    2020              : 
    2021        11174 :       debug = debug_this_module
    2022              : 
    2023              :       IF (.NOT. cp2k_is_parallel) THEN
    2024              :          CALL cp_fm_to_fm_submat(source, &
    2025              :                                  destination, &
    2026              :                                  nrows, &
    2027              :                                  ncols, &
    2028              :                                  s_firstrow, &
    2029              :                                  s_firstcol, &
    2030              :                                  d_firstrow, &
    2031              :                                  d_firstcol)
    2032              :       ELSE
    2033              : #ifdef __parallel
    2034        11174 :          NULLIFY (smat, dmat)
    2035              :          ! check whether source is available on this process
    2036        11174 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2037       111740 :             desca = source%matrix_struct%descriptor
    2038        11174 :             IF (source%use_sp) CPABORT("only DP kind implemented")
    2039        11174 :             IF (nrows .GT. source%matrix_struct%nrow_global) &
    2040            0 :                CPABORT("nrows is greater than nrow_global of source")
    2041        11174 :             IF (ncols .GT. source%matrix_struct%ncol_global) &
    2042            0 :                CPABORT("ncols is greater than ncol_global of source")
    2043        11174 :             smat => source%local_data
    2044              :          ELSE
    2045            0 :             desca = -1
    2046            0 :             smat => dummy
    2047              :          END IF
    2048              :          ! check destination is available on this process
    2049        11174 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2050       111740 :             descb = destination%matrix_struct%descriptor
    2051        11174 :             IF (destination%use_sp) CPABORT("only DP kind implemented")
    2052        11174 :             IF (nrows .GT. destination%matrix_struct%nrow_global) &
    2053            0 :                CPABORT("nrows is greater than nrow_global of destination")
    2054        11174 :             IF (ncols .GT. destination%matrix_struct%ncol_global) &
    2055            0 :                CPABORT("ncols is greater than ncol_global of destination")
    2056        11174 :             dmat => destination%local_data
    2057              :          ELSE
    2058            0 :             descb = -1
    2059            0 :             dmat => dummy
    2060              :          END IF
    2061              :          ! do copy
    2062              : 
    2063              :          CALL pdgemr2d(nrows, &
    2064              :                        ncols, &
    2065              :                        smat, &
    2066              :                        s_firstrow, &
    2067              :                        s_firstcol, &
    2068              :                        desca, &
    2069              :                        dmat, &
    2070              :                        d_firstrow, &
    2071              :                        d_firstcol, &
    2072              :                        descb, &
    2073        11174 :                        global_context%get_handle())
    2074              : #else
    2075              :          MARK_USED(global_context)
    2076              :          CPABORT("this subroutine only supports SCALAPACK")
    2077              : #endif
    2078              :       END IF
    2079              : 
    2080        11174 :       CALL timestop(handle)
    2081              : 
    2082        11174 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2083              : 
    2084              : ! **************************************************************************************************
    2085              : !> \brief ...
    2086              : !> \param matrix ...
    2087              : !> \param irow_global ...
    2088              : !> \param icol_global ...
    2089              : !> \param alpha ...
    2090              : ! **************************************************************************************************
    2091          240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2092              : 
    2093              :       ! Add alpha to the matrix element specified by the global indices
    2094              :       ! irow_global and icol_global
    2095              : 
    2096              :       ! - Creation (05.05.06,MK)
    2097              : 
    2098              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2099              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2100              :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2101              : 
    2102              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2103          240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2104              :       TYPE(cp_blacs_env_type), POINTER         :: context
    2105              : #if defined(__parallel)
    2106              :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2107              :                                                   irow_local
    2108              :       INTEGER, DIMENSION(9)                    :: desca
    2109              : #endif
    2110              : 
    2111          240 :       context => matrix%matrix_struct%context
    2112              : 
    2113          240 :       myprow = context%mepos(1)
    2114          240 :       mypcol = context%mepos(2)
    2115              : 
    2116          240 :       nprow = context%num_pe(1)
    2117          240 :       npcol = context%num_pe(2)
    2118              : 
    2119          240 :       a => matrix%local_data
    2120              : 
    2121              : #if defined(__parallel)
    2122              : 
    2123         2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2124              : 
    2125              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2126          240 :                    irow_local, icol_local, iprow, ipcol)
    2127              : 
    2128          240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2129          120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2130              :       END IF
    2131              : 
    2132              : #else
    2133              : 
    2134              :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2135              : 
    2136              : #endif
    2137              : 
    2138          240 :    END SUBROUTINE cp_fm_add_to_element
    2139              : 
    2140              : ! **************************************************************************************************
    2141              : !> \brief ...
    2142              : !> \param fm ...
    2143              : !> \param unit ...
    2144              : ! **************************************************************************************************
    2145        79769 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2146              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2147              :       INTEGER, INTENT(IN)                      :: unit
    2148              : 
    2149              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2150              : 
    2151              :       INTEGER                                  :: handle, j, max_block, &
    2152              :                                                   ncol_global, nrow_global
    2153              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2154              : #if defined(__parallel)
    2155              :       INTEGER                                  :: i, i_block, icol_local, &
    2156              :                                                   in, info, ipcol, &
    2157              :                                                   iprow, irow_local, &
    2158              :                                                   mepos, &
    2159              :                                                   num_pe, rb, tag
    2160              :       INTEGER, DIMENSION(9)                    :: desc
    2161        79769 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2162              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2163              :       TYPE(cp_blacs_type) :: ictxt_loc
    2164              :       INTEGER, EXTERNAL                        :: numroc
    2165              : #endif
    2166              : 
    2167        79769 :       CALL timeset(routineN, handle)
    2168              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2169        79769 :                           para_env=para_env)
    2170              : 
    2171              : #if defined(__parallel)
    2172        79769 :       num_pe = para_env%num_pe
    2173        79769 :       mepos = para_env%mepos
    2174        79769 :       rb = nrow_global
    2175        79769 :       tag = 0
    2176              :       ! get a new context
    2177        79769 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2178        79769 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2179        79769 :       CPASSERT(info == 0)
    2180              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2181              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2182       159538 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2183              : 
    2184       319076 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2185              : 
    2186              :          ! do the actual scalapack to cols reordering
    2187              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2188              :                        fm%matrix_struct%descriptor, &
    2189        79769 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2190              : 
    2191       239307 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2192     55210315 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2193              : 
    2194       350719 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2195       191181 :             i_block = MIN(max_block, ncol_global - i + 1)
    2196              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2197       191181 :                          irow_local, icol_local, iprow, ipcol)
    2198       191181 :             IF (ipcol == mypcol) THEN
    2199       946863 :                DO j = 1, i_block
    2200    146483253 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2201              :                END DO
    2202              :             END IF
    2203              : 
    2204       191181 :             IF (ipcol == 0) THEN
    2205              :                ! do nothing
    2206              :             ELSE
    2207        65854 :                IF (ipcol == mypcol) THEN
    2208     36211342 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2209              :                END IF
    2210        65854 :                IF (mypcol == 0) THEN
    2211     72389757 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2212              :                END IF
    2213              :             END IF
    2214              : 
    2215       270950 :             IF (unit > 0) THEN
    2216       917623 :                DO j = 1, i_block
    2217       917623 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2218              :                END DO
    2219              :             END IF
    2220              : 
    2221              :          END DO
    2222              :       END ASSOCIATE
    2223        79769 :       DEALLOCATE (vecbuf)
    2224              : 
    2225        79769 :       CALL ictxt_loc%gridexit()
    2226              : 
    2227        79769 :       DEALLOCATE (newdat)
    2228              : 
    2229              : #else
    2230              : 
    2231              :       IF (unit > 0) THEN
    2232              :          DO j = 1, ncol_global
    2233              :             WRITE (unit) fm%local_data(:, j)
    2234              :          END DO
    2235              :       END IF
    2236              : 
    2237              : #endif
    2238        79769 :       CALL timestop(handle)
    2239              : 
    2240       398845 :    END SUBROUTINE cp_fm_write_unformatted
    2241              : 
    2242              : ! **************************************************************************************************
    2243              : !> \brief Write out a full matrix in plain text.
    2244              : !> \param fm the matrix to be outputted
    2245              : !> \param unit the unit number for I/O
    2246              : !> \param header optional header
    2247              : !> \param value_format ...
    2248              : ! **************************************************************************************************
    2249         1022 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2250              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2251              :       INTEGER, INTENT(IN)                      :: unit
    2252              :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2253              : 
    2254              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2255              : 
    2256              :       CHARACTER(LEN=21)                        :: my_value_format
    2257              :       INTEGER                                  :: handle, i, j, max_block, &
    2258              :                                                   ncol_global, nrow_global
    2259              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2260              : #if defined(__parallel)
    2261              :       INTEGER                                  :: i_block, icol_local, &
    2262              :                                                   in, info, ipcol, &
    2263              :                                                   iprow, irow_local, &
    2264              :                                                   mepos, num_pe, rb, tag, k, &
    2265              :                                                   icol, irow
    2266              :       INTEGER, DIMENSION(9)                    :: desc
    2267         1022 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2268         1022 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2269              :       TYPE(cp_blacs_type) :: ictxt_loc
    2270              :       INTEGER, EXTERNAL                        :: numroc
    2271              : #endif
    2272              : 
    2273         1022 :       CALL timeset(routineN, handle)
    2274              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2275         1022 :                           para_env=para_env)
    2276              : 
    2277         1022 :       IF (PRESENT(value_format)) THEN
    2278            0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2279            0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2280              :       ELSE
    2281         1022 :          my_value_format = "(I10, I10, ES24.12)"
    2282              :       END IF
    2283              : 
    2284         1022 :       IF (unit > 0) THEN
    2285            5 :          IF (PRESENT(header)) WRITE (unit, *) header
    2286            5 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2287              :       END IF
    2288              : 
    2289              : #if defined(__parallel)
    2290         1022 :       num_pe = para_env%num_pe
    2291         1022 :       mepos = para_env%mepos
    2292         1022 :       rb = nrow_global
    2293         1022 :       tag = 0
    2294              :       ! get a new context
    2295         1022 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2296         1022 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2297         1022 :       CPASSERT(info == 0)
    2298              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2299              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2300         2044 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2301              : 
    2302         4088 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2303              : 
    2304              :          ! do the actual scalapack to cols reordering
    2305              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2306              :                        fm%matrix_struct%descriptor, &
    2307         1022 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2308              : 
    2309         3066 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2310         4124 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2311         1022 :          irow = 1
    2312         1022 :          icol = 1
    2313              : 
    2314         4082 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2315         2038 :             i_block = MIN(max_block, ncol_global - i + 1)
    2316              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2317         2038 :                          irow_local, icol_local, iprow, ipcol)
    2318         2038 :             IF (ipcol == mypcol) THEN
    2319         2061 :                DO j = 1, i_block
    2320         7329 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2321              :                END DO
    2322              :             END IF
    2323              : 
    2324         2038 :             IF (ipcol == 0) THEN
    2325              :                ! do nothing
    2326              :             ELSE
    2327         1016 :                IF (ipcol == mypcol) THEN
    2328         1890 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2329              :                END IF
    2330         1016 :                IF (mypcol == 0) THEN
    2331         3272 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2332              :                END IF
    2333              :             END IF
    2334              : 
    2335         3060 :             IF (unit > 0) THEN
    2336           37 :                DO j = 1, i_block
    2337          647 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2338          610 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2339          610 :                      irow = irow + 1
    2340          640 :                      IF (irow > nrow_global) THEN
    2341           30 :                         irow = 1
    2342           30 :                         icol = icol + 1
    2343              :                      END IF
    2344              :                   END DO
    2345              :                END DO
    2346              :             END IF
    2347              : 
    2348              :          END DO
    2349              :       END ASSOCIATE
    2350         1022 :       DEALLOCATE (vecbuf)
    2351              : 
    2352         1022 :       CALL ictxt_loc%gridexit()
    2353              : 
    2354         1022 :       DEALLOCATE (newdat)
    2355              : 
    2356              : #else
    2357              : 
    2358              :       IF (unit > 0) THEN
    2359              :          DO j = 1, ncol_global
    2360              :             DO i = 1, nrow_global
    2361              :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2362              :             END DO
    2363              :          END DO
    2364              :       END IF
    2365              : 
    2366              : #endif
    2367         1022 :       CALL timestop(handle)
    2368              : 
    2369         5110 :    END SUBROUTINE cp_fm_write_formatted
    2370              : 
    2371              : ! **************************************************************************************************
    2372              : !> \brief ...
    2373              : !> \param fm ...
    2374              : !> \param unit ...
    2375              : ! **************************************************************************************************
    2376         1352 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2377              :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2378              :       INTEGER, INTENT(IN)                      :: unit
    2379              : 
    2380              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2381              : 
    2382              :       INTEGER                                  :: handle, j, max_block, &
    2383              :                                                   ncol_global, nrow_global
    2384              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2385              : #if defined(__parallel)
    2386              :       INTEGER                                  :: k, n_cols
    2387              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2388              : #endif
    2389              : 
    2390         1352 :       CALL timeset(routineN, handle)
    2391              : 
    2392              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2393         1352 :                           para_env=para_env)
    2394              : 
    2395              : #if defined(__parallel)
    2396              : 
    2397              :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2398              : 
    2399         5408 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2400              : 
    2401         5190 :       DO j = 1, ncol_global, max_block
    2402              : 
    2403         3838 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2404         3838 :          IF (para_env%mepos == 0) THEN
    2405        31344 :             DO k = 1, n_cols
    2406        31344 :                READ (unit) vecbuf(:, k)
    2407              :             END DO
    2408              :          END IF
    2409     11457886 :          CALL para_env%bcast(vecbuf, 0)
    2410         5190 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2411              : 
    2412              :       END DO
    2413              : 
    2414         1352 :       DEALLOCATE (vecbuf)
    2415              : 
    2416              : #else
    2417              : 
    2418              :       DO j = 1, ncol_global
    2419              :          READ (unit) fm%local_data(:, j)
    2420              :       END DO
    2421              : 
    2422              : #endif
    2423              : 
    2424         1352 :       CALL timestop(handle)
    2425              : 
    2426         1352 :    END SUBROUTINE cp_fm_read_unformatted
    2427              : 
    2428              : ! **************************************************************************************************
    2429              : !> \brief ...
    2430              : !> \param mm_type ...
    2431              : ! **************************************************************************************************
    2432         9835 :    SUBROUTINE cp_fm_setup(mm_type)
    2433              :       INTEGER, INTENT(IN)                                :: mm_type
    2434              : 
    2435         9835 :       cp_fm_mm_type = mm_type
    2436         9835 :    END SUBROUTINE cp_fm_setup
    2437              : 
    2438              : ! **************************************************************************************************
    2439              : !> \brief ...
    2440              : !> \return ...
    2441              : ! **************************************************************************************************
    2442      1455137 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2443              :       INTEGER                                            :: res
    2444              : 
    2445      1455137 :       res = cp_fm_mm_type
    2446      1455137 :    END FUNCTION
    2447              : 
    2448              : ! **************************************************************************************************
    2449              : !> \brief ...
    2450              : !> \param ictxt ...
    2451              : !> \param prec ...
    2452              : !> \return ...
    2453              : ! **************************************************************************************************
    2454           10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2455              :       INTEGER                                            :: ictxt
    2456              :       CHARACTER(LEN=1)                                   :: prec
    2457              :       INTEGER                                            :: res
    2458              : #if defined(__parallel)
    2459              :       INTEGER                                            :: pilaenv
    2460           10 :       res = pilaenv(ictxt, prec)
    2461              : #else
    2462              :       MARK_USED(ictxt)
    2463              :       MARK_USED(prec)
    2464              :       res = -1
    2465              : #endif
    2466              : 
    2467           10 :    END FUNCTION
    2468              : 
    2469            0 : END MODULE cp_fm_types
        

Generated by: LCOV version 2.0-1