LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 87.8 % 800 702
Test Date: 2026-07-04 06:36:57 Functions: 80.4 % 46 37

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief represent a full matrix distributed on many processors
      10              : !> \par History
      11              : !>      3) separated structure object, removed globenv, renamed to full matrix
      12              : !>         many changes (fawzi 08.2002)
      13              : !> \author Matthias Krack (22.05.2001)
      14              : ! **************************************************************************************************
      15              : MODULE cp_fm_types
      16              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17              :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      19              :                                               cp_fm_struct_equivalent,&
      20              :                                               cp_fm_struct_get,&
      21              :                                               cp_fm_struct_release,&
      22              :                                               cp_fm_struct_retain,&
      23              :                                               cp_fm_struct_type,&
      24              :                                               cp_fm_struct_write_info
      25              :    USE kinds,                           ONLY: dp
      26              :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      27              :                                               mp_any_source,&
      28              :                                               mp_para_env_type,&
      29              :                                               mp_proc_null,&
      30              :                                               mp_request_null,&
      31              :                                               mp_request_type,&
      32              :                                               mp_waitall
      33              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      34              :                                               rng_stream_type
      35              : #include "../base/base_uses.f90"
      36              : 
      37              :    IMPLICIT NONE
      38              : 
      39              :    PRIVATE
      40              : 
      41              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      42              :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      43              :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      44              : 
      45              :    INTEGER, PRIVATE :: cp_fm_mm_type = 1
      46              : 
      47              :    PUBLIC :: cp_fm_type, &
      48              :              cp_fm_p_type, copy_info_type
      49              : 
      50              :    PUBLIC :: cp_fm_add_to_element, &
      51              :              cp_fm_create, &
      52              :              cp_fm_release, &
      53              :              cp_fm_get_info, &
      54              :              cp_fm_set_element, &
      55              :              cp_fm_get_element, &
      56              :              cp_fm_get_diag, & ! get diagonal
      57              :              cp_fm_set_all, & ! set all elements and diagonal
      58              :              cp_fm_set_all_submatrix, & ! set a submatrix to a given value
      59              :              cp_fm_set_submatrix, & ! set a submatrix to given values
      60              :              cp_fm_get_submatrix, & ! get a submatrix of given values
      61              :              cp_fm_init_random, &
      62              :              cp_fm_maxabsval, & ! find the maximum absolute value
      63              :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      64              :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      65              :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      66              :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      67              :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      68              :              cp_fm_to_fm_triangular, &
      69              :              cp_fm_copy_general, &
      70              :              cp_fm_start_copy_general, &
      71              :              cp_fm_finish_copy_general, &
      72              :              cp_fm_cleanup_copy_general, &
      73              :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      74              :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      75              :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      76              :              cp_fm_setup, & ! allows to set flags for fms
      77              :              cp_fm_get_mm_type, &
      78              :              cp_fm_write_info, &
      79              :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      80              : 
      81              :    PUBLIC :: cp_fm_pilaenv
      82              : 
      83              :    INTERFACE cp_fm_to_fm
      84              :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      85              :          cp_fm_to_fm_columns ! just a number of columns
      86              :    END INTERFACE
      87              : 
      88              :    INTERFACE cp_fm_release
      89              :       MODULE PROCEDURE cp_fm_release_aa0, &
      90              :          cp_fm_release_aa1, &
      91              :          cp_fm_release_aa2, &
      92              :          cp_fm_release_aa3, &
      93              :          cp_fm_release_ap1, &
      94              :          cp_fm_release_ap2, &
      95              :          cp_fm_release_pa1, &
      96              :          cp_fm_release_pa2, &
      97              :          cp_fm_release_pa3, &
      98              :          cp_fm_release_pp1, &
      99              :          cp_fm_release_pp2
     100              :    END INTERFACE
     101              : 
     102              : ! **************************************************************************************************
     103              : !> \brief represent a full matrix
     104              : !> \param name the name of the matrix, used for printing
     105              : !> \param matrix_struct structure of this matrix
     106              : !> \param local_data array with the data of the matrix (its contents
     107              : !>        depend on the matrix type used: in parallel runs it will be
     108              : !>        in scalapack format, in sequential, it will simply contain
     109              : !>        the matrix)
     110              : !> \par History
     111              : !>      08.2002 created [fawzi]
     112              : !> \author fawzi
     113              : ! **************************************************************************************************
     114              :    TYPE cp_fm_type
     115              : !    PRIVATE
     116              :       CHARACTER(LEN=60) :: name = ""
     117              :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     118              :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     119              :    END TYPE cp_fm_type
     120              : 
     121              : ! **************************************************************************************************
     122              : !> \brief just to build arrays of pointers to matrices
     123              : !> \param matrix the pointer to the matrix
     124              : !> \par History
     125              : !>      08.2002 created [fawzi]
     126              : !> \author fawzi
     127              : ! **************************************************************************************************
     128              :    TYPE cp_fm_p_type
     129              :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     130              :    END TYPE cp_fm_p_type
     131              : 
     132              : ! **************************************************************************************************
     133              : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     134              : !>        and cp_fm_finish_copy_general
     135              : !> \par History
     136              : !>      Jan 2017  [Mark T]
     137              : ! **************************************************************************************************
     138              :    TYPE copy_info_type
     139              :       INTEGER :: send_size = -1
     140              :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     141              :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     142              :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     143              :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     144              :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     145              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     146              :    END TYPE copy_info_type
     147              : 
     148              : CONTAINS
     149              : 
     150              : ! **************************************************************************************************
     151              : !> \brief creates a new full matrix with the given structure
     152              : !> \param matrix the matrix to be created
     153              : !> \param matrix_struct the structure of matrix
     154              : !> \param name ...
     155              : !> \param nrow ...
     156              : !> \param ncol ...
     157              : !> \param set_zero ...
     158              : !> \par History
     159              : !>      08.2002 created [fawzi]
     160              : !> \author Fawzi Mohamed
     161              : !> \note
     162              : !>      preferred allocation routine
     163              : ! **************************************************************************************************
     164      1855479 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
     165              :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     166              :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     167              :       CHARACTER(LEN=*), INTENT(in), OPTIONAL             :: name
     168              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow, ncol
     169              :       LOGICAL, INTENT(in), OPTIONAL                      :: set_zero
     170              : 
     171              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_create'
     172              : 
     173              :       INTEGER                                            :: handle, ncol_global, ncol_local, &
     174              :                                                             nrow_global, nrow_local
     175              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     176              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     177              : 
     178      1855479 :       CALL timeset(routineN, handle)
     179              : 
     180      1855479 :       IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
     181         1912 :          CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
     182         1912 :          IF (PRESENT(nrow)) nrow_global = nrow
     183         1912 :          IF (PRESENT(ncol)) ncol_global = ncol
     184              :          CALL cp_fm_struct_create(fm_struct, template_fmstruct=matrix_struct, &
     185         1912 :                                   nrow_global=nrow_global, ncol_global=ncol_global)
     186              : 
     187         1912 :          context => fm_struct%context
     188         1912 :          matrix%matrix_struct => fm_struct
     189         1912 :          CALL cp_fm_struct_retain(matrix%matrix_struct)
     190              : 
     191         1912 :          nrow_local = fm_struct%local_leading_dimension
     192         1912 :          ncol_local = MAX(1, fm_struct%ncol_locals(context%mepos(2)))
     193              : 
     194         1912 :          CALL cp_fm_struct_release(fm_struct)
     195              :       ELSE
     196              : 
     197      1853567 :          context => matrix_struct%context
     198      1853567 :          matrix%matrix_struct => matrix_struct
     199      1853567 :          CALL cp_fm_struct_retain(matrix%matrix_struct)
     200              : 
     201              :          ! OK, we allocate here at least a 1 x 1 matrix
     202              :          ! this must (and is) compatible with the descinit call
     203              :          ! in cp_fm_struct
     204      1853567 :          nrow_local = matrix_struct%local_leading_dimension
     205      1853567 :          ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     206              :       END IF
     207              : 
     208      1855479 :       NULLIFY (matrix%local_data)
     209              : 
     210      7421916 :       ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     211              : 
     212      1855479 :       IF (PRESENT(set_zero)) THEN
     213       219927 :          IF (set_zero) THEN
     214     70983010 :             matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     215              :          END IF
     216              :       END IF
     217              : 
     218      1855479 :       IF (PRESENT(name)) THEN
     219       849353 :          matrix%name = name
     220              :       ELSE
     221      1006126 :          matrix%name = 'full matrix'
     222              :       END IF
     223              : 
     224      1855479 :       CALL timestop(handle)
     225              : 
     226      1855479 :    END SUBROUTINE cp_fm_create
     227              : 
     228              : ! **************************************************************************************************
     229              : !> \brief releases a full matrix
     230              : !> \param matrix the matrix to release
     231              : !> \par History
     232              : !>      08.2002 created [fawzi]
     233              : !> \author Fawzi Mohamed
     234              : ! **************************************************************************************************
     235      1875282 :    SUBROUTINE cp_fm_release_aa0(matrix)
     236              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     237              : 
     238      1875282 :       IF (ASSOCIATED(matrix%local_data)) THEN
     239      1854755 :          DEALLOCATE (matrix%local_data)
     240              :          NULLIFY (matrix%local_data)
     241              :       END IF
     242      1875282 :       matrix%name = ""
     243      1875282 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     244              : 
     245      1875282 :    END SUBROUTINE cp_fm_release_aa0
     246              : 
     247              : ! **************************************************************************************************
     248              : !> \brief ...
     249              : !> \param matrices ...
     250              : ! **************************************************************************************************
     251        79358 :    SUBROUTINE cp_fm_release_aa1(matrices)
     252              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     253              : 
     254              :       INTEGER                                            :: i
     255              : 
     256        79358 :       IF (ALLOCATED(matrices)) THEN
     257       210485 :          DO i = 1, SIZE(matrices)
     258       210485 :             CALL cp_fm_release(matrices(i))
     259              :          END DO
     260        77942 :          DEALLOCATE (matrices)
     261              :       END IF
     262        79358 :    END SUBROUTINE cp_fm_release_aa1
     263              : 
     264              : ! **************************************************************************************************
     265              : !> \brief ...
     266              : !> \param matrices ...
     267              : ! **************************************************************************************************
     268         8982 :    SUBROUTINE cp_fm_release_aa2(matrices)
     269              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     270              : 
     271              :       INTEGER                                            :: i, j
     272              : 
     273         8982 :       IF (ALLOCATED(matrices)) THEN
     274        17692 :          DO i = 1, SIZE(matrices, 1)
     275        48856 :             DO j = 1, SIZE(matrices, 2)
     276        42808 :                CALL cp_fm_release(matrices(i, j))
     277              :             END DO
     278              :          END DO
     279         6048 :          DEALLOCATE (matrices)
     280              :       END IF
     281         8982 :    END SUBROUTINE cp_fm_release_aa2
     282              : 
     283              : ! **************************************************************************************************
     284              : !> \brief ...
     285              : !> \param matrices ...
     286              : ! **************************************************************************************************
     287           48 :    SUBROUTINE cp_fm_release_aa3(matrices)
     288              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     289              : 
     290              :       INTEGER                                            :: i, j, k
     291              : 
     292           48 :       IF (ALLOCATED(matrices)) THEN
     293          550 :          DO i = 1, SIZE(matrices, 1)
     294         1582 :             DO j = 1, SIZE(matrices, 2)
     295         2686 :                DO k = 1, SIZE(matrices, 3)
     296         2184 :                   CALL cp_fm_release(matrices(i, j, k))
     297              :                END DO
     298              :             END DO
     299              :          END DO
     300           48 :          DEALLOCATE (matrices)
     301              :       END IF
     302           48 :    END SUBROUTINE cp_fm_release_aa3
     303              : 
     304              : ! **************************************************************************************************
     305              : !> \brief ...
     306              : !> \param matrices ...
     307              : ! **************************************************************************************************
     308       188992 :    SUBROUTINE cp_fm_release_pa1(matrices)
     309              :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     310              : 
     311              :       INTEGER                                            :: i
     312              : 
     313       188992 :       IF (ASSOCIATED(matrices)) THEN
     314       145519 :          DO i = 1, SIZE(matrices)
     315       145519 :             CALL cp_fm_release(matrices(i))
     316              :          END DO
     317        60668 :          DEALLOCATE (matrices)
     318              :          NULLIFY (matrices)
     319              :       END IF
     320       188992 :    END SUBROUTINE cp_fm_release_pa1
     321              : 
     322              : ! **************************************************************************************************
     323              : !> \brief ...
     324              : !> \param matrices ...
     325              : ! **************************************************************************************************
     326        70274 :    SUBROUTINE cp_fm_release_pa2(matrices)
     327              :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     328              : 
     329              :       INTEGER                                            :: i, j
     330              : 
     331        70274 :       IF (ASSOCIATED(matrices)) THEN
     332        79942 :          DO i = 1, SIZE(matrices, 1)
     333       147650 :             DO j = 1, SIZE(matrices, 2)
     334       125358 :                CALL cp_fm_release(matrices(i, j))
     335              :             END DO
     336              :          END DO
     337        22292 :          DEALLOCATE (matrices)
     338              :          NULLIFY (matrices)
     339              :       END IF
     340        70274 :    END SUBROUTINE cp_fm_release_pa2
     341              : 
     342              : ! **************************************************************************************************
     343              : !> \brief ...
     344              : !> \param matrices ...
     345              : ! **************************************************************************************************
     346            0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     347              :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     348              : 
     349              :       INTEGER                                            :: i, j, k
     350              : 
     351            0 :       IF (ASSOCIATED(matrices)) THEN
     352            0 :          DO i = 1, SIZE(matrices, 1)
     353            0 :             DO j = 1, SIZE(matrices, 2)
     354            0 :                DO k = 1, SIZE(matrices, 3)
     355            0 :                   CALL cp_fm_release(matrices(i, j, k))
     356              :                END DO
     357              :             END DO
     358              :          END DO
     359            0 :          DEALLOCATE (matrices)
     360              :          NULLIFY (matrices)
     361              :       END IF
     362            0 :    END SUBROUTINE cp_fm_release_pa3
     363              : 
     364              : ! **************************************************************************************************
     365              : !> \brief ...
     366              : !> \param matrices ...
     367              : ! **************************************************************************************************
     368            0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     369              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     370              : 
     371              :       INTEGER                                            :: i
     372              : 
     373            0 :       IF (ALLOCATED(matrices)) THEN
     374            0 :          DO i = 1, SIZE(matrices)
     375            0 :             CALL cp_fm_release(matrices(i)%matrix)
     376            0 :             DEALLOCATE (matrices(i)%matrix)
     377              :          END DO
     378            0 :          DEALLOCATE (matrices)
     379              :       END IF
     380            0 :    END SUBROUTINE cp_fm_release_ap1
     381              : 
     382              : ! **************************************************************************************************
     383              : !> \brief ...
     384              : !> \param matrices ...
     385              : ! **************************************************************************************************
     386            0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     387              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     388              : 
     389              :       INTEGER                                            :: i, j
     390              : 
     391            0 :       IF (ALLOCATED(matrices)) THEN
     392            0 :          DO i = 1, SIZE(matrices, 1)
     393            0 :             DO j = 1, SIZE(matrices, 2)
     394            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     395            0 :                DEALLOCATE (matrices(i, j)%matrix)
     396              :             END DO
     397              :          END DO
     398            0 :          DEALLOCATE (matrices)
     399              :       END IF
     400            0 :    END SUBROUTINE cp_fm_release_ap2
     401              : 
     402              : ! **************************************************************************************************
     403              : !> \brief ...
     404              : !> \param matrices ...
     405              : ! **************************************************************************************************
     406            0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     407              :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     408              : 
     409              :       INTEGER                                            :: i
     410              : 
     411            0 :       IF (ASSOCIATED(matrices)) THEN
     412            0 :          DO i = 1, SIZE(matrices)
     413            0 :             CALL cp_fm_release(matrices(i)%matrix)
     414            0 :             DEALLOCATE (matrices(i)%matrix)
     415              :          END DO
     416            0 :          DEALLOCATE (matrices)
     417              :          NULLIFY (matrices)
     418              :       END IF
     419            0 :    END SUBROUTINE cp_fm_release_pp1
     420              : 
     421              : ! **************************************************************************************************
     422              : !> \brief ...
     423              : !> \param matrices ...
     424              : ! **************************************************************************************************
     425            0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     426              :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     427              : 
     428              :       INTEGER                                            :: i, j
     429              : 
     430            0 :       IF (ASSOCIATED(matrices)) THEN
     431            0 :          DO i = 1, SIZE(matrices, 1)
     432            0 :             DO j = 1, SIZE(matrices, 2)
     433            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     434            0 :                DEALLOCATE (matrices(i, j)%matrix)
     435              :             END DO
     436              :          END DO
     437            0 :          DEALLOCATE (matrices)
     438              :          NULLIFY (matrices)
     439              :       END IF
     440            0 :    END SUBROUTINE cp_fm_release_pp2
     441              : 
     442              : ! **************************************************************************************************
     443              : !> \brief fills a matrix with random numbers
     444              : !> \param matrix : to be initialized
     445              : !> \param ncol : numbers of cols to fill
     446              : !> \param start_col : starting at coll number
     447              : !> \author Joost VandeVondele
     448              : !> \note
     449              : !>      the value of a_ij is independent of the number of cpus
     450              : ! **************************************************************************************************
     451         2924 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     452              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     453              :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     454              : 
     455              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     456              : 
     457              :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     458              :          ncol_local, nrow_global, nrow_local
     459         5848 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     460              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     461              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     462         2924 :          POINTER                                         :: local_data
     463              :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     464              :          seed = RESHAPE([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
     465              :       TYPE(rng_stream_type)                              :: rng
     466              : 
     467         2924 :       CALL timeset(routineN, handle)
     468              : 
     469              :       ! guarantee same seed over all tasks
     470         2924 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     471              : 
     472              :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     473         2924 :                             extended_precision=.TRUE., seed=seed)
     474              : 
     475              :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     476              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     477              :                           local_data=local_data, &
     478         2924 :                           row_indices=row_indices, col_indices=col_indices)
     479              : 
     480         2924 :       my_start_col = 1
     481         2924 :       IF (PRESENT(start_col)) my_start_col = start_col
     482         2924 :       my_ncol = matrix%matrix_struct%ncol_global
     483         2924 :       IF (PRESENT(ncol)) my_ncol = ncol
     484              : 
     485         2924 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     486            0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     487              : 
     488         8772 :       ALLOCATE (buff(nrow_global))
     489              : 
     490              :       ! each global row has its own substream, in order to reach the stream for the local col,
     491              :       ! we just reset to the next substream
     492              :       ! following this, we fill the full buff with random numbers, and pick those we need
     493         2924 :       icol_global = 0
     494        22869 :       DO icol_local = 1, ncol_local
     495        19945 :          CPASSERT(col_indices(icol_local) > icol_global)
     496              :          DO
     497        19945 :             CALL rng%reset_to_next_substream()
     498        19945 :             icol_global = icol_global + 1
     499        19945 :             IF (icol_global == col_indices(icol_local)) EXIT
     500              :          END DO
     501        19945 :          CALL rng%fill(buff)
     502       722521 :          DO irow_local = 1, nrow_local
     503       719597 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     504              :          END DO
     505              :       END DO
     506              : 
     507         2924 :       DEALLOCATE (buff)
     508              : 
     509              :       ! store seed before deletion (unclear if this is the proper seed)
     510              : 
     511              :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     512              :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     513              :       ! stream of random numbers. While this seems odd and contrary to the original design,
     514              :       ! it was probably introduced to improve reproducibility.
     515              :       ! See also https://github.com/cp2k/cp2k/pull/506
     516         2924 :       CALL rng%get(ig=seed)
     517              : 
     518         2924 :       CALL timestop(handle)
     519              : 
     520        81872 :    END SUBROUTINE cp_fm_init_random
     521              : 
     522              : ! **************************************************************************************************
     523              : !> \brief set all elements of a matrix to the same value,
     524              : !>      and optionally the diagonal to a different one
     525              : !> \param matrix input matrix
     526              : !> \param alpha scalar used to set all elements of the matrix
     527              : !> \param beta scalar used to set diagonal of the matrix
     528              : !> \note
     529              : !>      can be used to zero a matrix
     530              : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     531              : ! **************************************************************************************************
     532       397920 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     533              : 
     534              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     535              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     536              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     537              : 
     538              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     539              : 
     540              :       INTEGER                                            :: handle, i, n
     541              : 
     542       397920 :       CALL timeset(routineN, handle)
     543              : 
     544    231885938 :       matrix%local_data(:, :) = alpha
     545              : 
     546       397920 :       IF (PRESENT(beta)) THEN
     547        65718 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     548       509666 :          DO i = 1, n
     549       509666 :             CALL cp_fm_set_element(matrix, i, i, beta)
     550              :          END DO
     551              :       END IF
     552              : 
     553       397920 :       CALL timestop(handle)
     554              : 
     555       397920 :    END SUBROUTINE cp_fm_set_all
     556              : 
     557              : ! **************************************************************************************************
     558              : !> \brief returns the diagonal elements of a fm
     559              : !> \param matrix ...
     560              : !> \param diag ...
     561              : ! **************************************************************************************************
     562        13654 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     563              : 
     564              :       ! arguments
     565              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     566              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     567              : 
     568              :       ! locals
     569              :       INTEGER :: i, nrow_global
     570              : 
     571              : #if defined(__parallel)
     572              :       INTEGER, DIMENSION(9) :: desca
     573              :       TYPE(cp_blacs_env_type), POINTER :: context
     574              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     575              :                  nprow
     576        13654 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     577              : #endif
     578              : 
     579        13654 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     580              : 
     581              : #if defined(__parallel)
     582       146436 :       diag = 0.0_dp
     583        13654 :       context => matrix%matrix_struct%context
     584        13654 :       myprow = context%mepos(1)
     585        13654 :       mypcol = context%mepos(2)
     586        13654 :       nprow = context%num_pe(1)
     587        13654 :       npcol = context%num_pe(2)
     588              : 
     589        13654 :       a => matrix%local_data
     590       136540 :       desca(:) = matrix%matrix_struct%descriptor(:)
     591              : 
     592       146436 :       DO i = 1, nrow_global
     593              :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     594       132782 :                       irow_local, icol_local, iprow, ipcol)
     595       146436 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     596        66475 :             diag(i) = a(irow_local, icol_local)
     597              :          END IF
     598              :       END DO
     599              : #else
     600              :       DO i = 1, nrow_global
     601              :          diag(i) = matrix%local_data(i, i)
     602              :       END DO
     603              : #endif
     604       279218 :       CALL matrix%matrix_struct%para_env%sum(diag)
     605              : 
     606        13654 :    END SUBROUTINE cp_fm_get_diag
     607              : 
     608              : ! **************************************************************************************************
     609              : !> \brief returns an element of a fm
     610              : !>      this value is valid on every cpu
     611              : !>      using this call is expensive
     612              : !> \param matrix the matrix to read
     613              : !> \param irow_global the row
     614              : !> \param icol_global the col
     615              : !> \param alpha the value of matrix(irow_global, icol_global)
     616              : !> \param local true if the element is on this cpu, false otherwise
     617              : !> \note
     618              : !>      - modified semantics. now this function always returns the value
     619              : !>        previously the value was zero on cpus that didn't own the relevant
     620              : !>        part of the matrix (Joost VandeVondele, May 2003)
     621              : !>      - usage of the function should be avoided, as it is likely to rather slow
     622              : !>        using row_indices/col_indices/local_data + some smart scheme normally
     623              : !>        yields a real parallel code
     624              : ! **************************************************************************************************
     625      1193594 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     626              : 
     627              :       ! arguments
     628              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     629              :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     630              :       INTEGER, INTENT(IN)                       :: icol_global, &
     631              :                                                    irow_global
     632              :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     633              : 
     634              :       ! locals
     635              : #if defined(__parallel)
     636              :       INTEGER, DIMENSION(9) :: desca
     637              :       TYPE(cp_blacs_env_type), POINTER :: context
     638              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     639              :                  nprow
     640      1193594 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     641              : #endif
     642              : 
     643              : #if defined(__parallel)
     644      1193594 :       context => matrix%matrix_struct%context
     645      1193594 :       myprow = context%mepos(1)
     646      1193594 :       mypcol = context%mepos(2)
     647      1193594 :       nprow = context%num_pe(1)
     648      1193594 :       npcol = context%num_pe(2)
     649              : 
     650      1193594 :       a => matrix%local_data
     651     11935940 :       desca(:) = matrix%matrix_struct%descriptor(:)
     652              : 
     653              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     654      1193594 :                    irow_local, icol_local, iprow, ipcol)
     655              : 
     656      1193594 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     657       596847 :          alpha = a(irow_local, icol_local)
     658       596847 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     659       596847 :          IF (PRESENT(local)) local = .TRUE.
     660              :       ELSE
     661       596747 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     662       596747 :          IF (PRESENT(local)) local = .FALSE.
     663              :       END IF
     664              : 
     665              : #else
     666              :       IF (PRESENT(local)) local = .TRUE.
     667              :       alpha = matrix%local_data(irow_global, icol_global)
     668              : #endif
     669              : 
     670      1193594 :    END SUBROUTINE cp_fm_get_element
     671              : 
     672              : ! **************************************************************************************************
     673              : !> \brief sets an element of a matrix
     674              : !> \param matrix ...
     675              : !> \param irow_global ...
     676              : !> \param icol_global ...
     677              : !> \param alpha ...
     678              : !> \note
     679              : !>      we expect all cpus to have the same arguments in the call to this function
     680              : !>      (otherwise one should use local_data tricks)
     681              : ! **************************************************************************************************
     682       702782 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     683              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     684              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     685              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     686              : 
     687              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     688              :       TYPE(cp_blacs_env_type), POINTER         :: context
     689              : #if defined(__parallel)
     690              :       INTEGER                                  :: icol_local, ipcol, iprow, &
     691              :                                                   irow_local
     692              :       INTEGER, DIMENSION(9)                    :: desca
     693       702782 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     694              : #endif
     695              : 
     696       702782 :       context => matrix%matrix_struct%context
     697       702782 :       myprow = context%mepos(1)
     698       702782 :       mypcol = context%mepos(2)
     699       702782 :       nprow = context%num_pe(1)
     700       702782 :       npcol = context%num_pe(2)
     701              : 
     702              : #if defined(__parallel)
     703              : 
     704       702782 :       a => matrix%local_data
     705              : 
     706      7027820 :       desca(:) = matrix%matrix_struct%descriptor(:)
     707              : 
     708              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     709       702782 :                    irow_local, icol_local, iprow, ipcol)
     710              : 
     711       702782 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     712       353296 :          a(irow_local, icol_local) = alpha
     713              :       END IF
     714              : 
     715              : #else
     716              : 
     717              :       matrix%local_data(irow_global, icol_global) = alpha
     718              : 
     719              : #endif
     720       702782 :    END SUBROUTINE cp_fm_set_element
     721              : 
     722              : ! **************************************************************************************************
     723              : !> \brief sets a submatrix of a full matrix
     724              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     725              : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     726              : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     727              : !> \param fm the full to change
     728              : !> \param new_values a replicated full matrix with the new values
     729              : !> \param start_row the starting row of b_matrix (defaults to 1)
     730              : !> \param start_col the starting col of b_matrix (defaults to 1)
     731              : !> \param n_rows the number of row to change in b (defaults to
     732              : !>        size(op(new_values),1))
     733              : !> \param n_cols the number of columns to change in b (defaults to
     734              : !>        size(op(new_values),2))
     735              : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     736              : !> \param beta rescaling factor for the old values (defaults to 0.0)
     737              : !> \param transpose if new_values should be transposed: if true
     738              : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     739              : !>        (defaults to false)
     740              : !> \par History
     741              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     742              : !> \author Fawzi Mohamed
     743              : !> \note
     744              : !>      optimized for full column updates and alpha=1.0, beta=0.0
     745              : !>      the new_values need to be valid on all cpus
     746              : ! **************************************************************************************************
     747        77583 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     748              :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     749              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     750              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     751              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     752              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     753              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     754              : 
     755              :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     756              :                                                             ncol_global, ncol_local, nrow, &
     757              :                                                             nrow_block, nrow_global, nrow_local, &
     758              :                                                             this_col, this_row
     759        77583 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     760              :       LOGICAL                                            :: tr_a
     761              :       REAL(KIND=dp)                                      :: al, be
     762        77583 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     763              : 
     764        77583 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     765              : 
     766        18876 :       IF (PRESENT(alpha)) al = alpha
     767        77583 :       IF (PRESENT(beta)) be = beta
     768        77583 :       IF (PRESENT(start_row)) i0 = start_row
     769        77583 :       IF (PRESENT(start_col)) j0 = start_col
     770        77583 :       IF (PRESENT(transpose)) tr_a = transpose
     771        15913 :       IF (tr_a) THEN
     772        15821 :          nrow = SIZE(new_values, 2)
     773        15821 :          ncol = SIZE(new_values, 1)
     774              :       ELSE
     775        61762 :          nrow = SIZE(new_values, 1)
     776        61762 :          ncol = SIZE(new_values, 2)
     777              :       END IF
     778        77583 :       IF (PRESENT(n_rows)) nrow = n_rows
     779        77583 :       IF (PRESENT(n_cols)) ncol = n_cols
     780              : 
     781        77583 :       full_block => fm%local_data
     782              : 
     783              :       CALL cp_fm_get_info(matrix=fm, &
     784              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     785              :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     786              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     787        77583 :                           row_indices=row_indices, col_indices=col_indices)
     788              : 
     789        77583 :       IF (al == 1.0 .AND. be == 0.0) THEN
     790      1322906 :          DO j = 1, ncol_local
     791      1254669 :             this_col = col_indices(j) - j0 + 1
     792      1322906 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     793       322871 :                IF (tr_a) THEN
     794         6475 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     795       158370 :                      DO i = 1, nrow_local
     796       158370 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     797              :                      END DO
     798              :                   ELSE
     799          594 :                      DO i = 1, nrow_local
     800          510 :                         this_row = row_indices(i) - i0 + 1
     801          594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     802          255 :                            full_block(i, j) = new_values(this_col, this_row)
     803              :                         END IF
     804              :                      END DO
     805              :                   END IF
     806              :                ELSE
     807       316396 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     808      7552865 :                      DO i = 1, nrow_local
     809      7552865 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     810              :                      END DO
     811              :                   ELSE
     812       538491 :                      DO i = 1, nrow_local
     813       528241 :                         this_row = row_indices(i) - i0 + 1
     814       538491 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     815        58493 :                            full_block(i, j) = new_values(this_row, this_col)
     816              :                         END IF
     817              :                      END DO
     818              :                   END IF
     819              :                END IF
     820              :             END IF
     821              :          END DO
     822              :       ELSE
     823       836336 :          DO j = 1, ncol_local
     824       826990 :             this_col = col_indices(j) - j0 + 1
     825       836336 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     826       826990 :                IF (tr_a) THEN
     827     88278291 :                   DO i = 1, nrow_local
     828     87451301 :                      this_row = row_indices(i) - i0 + 1
     829     88278291 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     830              :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     831       413495 :                                            be*full_block(i, j)
     832              :                      END IF
     833              :                   END DO
     834              :                ELSE
     835            0 :                   DO i = 1, nrow_local
     836            0 :                      this_row = row_indices(i) - i0 + 1
     837            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     838              :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     839            0 :                                            be*full_block(i, j)
     840              :                      END IF
     841              :                   END DO
     842              :                END IF
     843              :             END IF
     844              :          END DO
     845              :       END IF
     846              : 
     847        77583 :    END SUBROUTINE cp_fm_set_submatrix
     848              : 
     849              : ! **************************************************************************************************
     850              : !> \brief sets a submatrix of a full matrix to a given value
     851              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
     852              : !> \param fm the full to change
     853              : !> \param new_value  ...
     854              : !> \param start_row the starting row of matrix
     855              : !> \param start_col the starting col of matrix
     856              : !> \param n_rows the number of rows to change
     857              : !> \param n_cols the number of columns to change
     858              : !> \par History
     859              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     860              : !>      12.2025 created from cp_fm_set_submatrix
     861              : !> \author JGH
     862              : ! **************************************************************************************************
     863      1064067 :    SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
     864              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     865              :       REAL(KIND=dp), INTENT(in)                          :: new_value
     866              :       INTEGER, INTENT(in)                                :: start_row, start_col, n_rows, n_cols
     867              : 
     868              :       INTEGER                                            :: i, i0, j, j0, ncol_global, ncol_local, &
     869              :                                                             nrow_global, nrow_local, this_col, &
     870              :                                                             this_row
     871      1064067 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     872      1064067 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     873              : 
     874      1064067 :       full_block => fm%local_data
     875              : 
     876              :       CALL cp_fm_get_info(matrix=fm, &
     877              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     878              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     879      1064067 :                           row_indices=row_indices, col_indices=col_indices)
     880              : 
     881      1064067 :       i0 = start_row
     882      1064067 :       j0 = start_col
     883     20354767 :       DO j = 1, ncol_local
     884     19290700 :          this_col = col_indices(j) - j0 + 1
     885     20354767 :          IF (this_col >= 1 .AND. this_col <= n_cols) THEN
     886    640146006 :             DO i = 1, nrow_local
     887    620915268 :                this_row = row_indices(i) - i0 + 1
     888    640146006 :                IF (this_row >= 1 .AND. this_row <= n_rows) THEN
     889    613751456 :                   full_block(i, j) = new_value
     890              :                END IF
     891              :             END DO
     892              :          END IF
     893              :       END DO
     894              : 
     895      1064067 :    END SUBROUTINE cp_fm_set_all_submatrix
     896              : 
     897              : ! **************************************************************************************************
     898              : !> \brief gets a submatrix of a full matrix
     899              : !>       op(target_m)(1:n_rows,1:n_cols)
     900              : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     901              : !>      target_m is replicated on all cpus
     902              : !>      using this call is expensive
     903              : !> \param fm the full you want to get the info from
     904              : !> \param target_m a replicated full matrix that will contain the result
     905              : !> \param start_row the starting row of b_matrix (defaults to 1)
     906              : !> \param start_col the starting col of b_matrix (defaults to 1)
     907              : !> \param n_rows the number of row to change in b (defaults to
     908              : !>        size(op(new_values),1))
     909              : !> \param n_cols the number of columns to change in b (defaults to
     910              : !>        size(op(new_values),2))
     911              : !> \param transpose if target_m should be transposed: if true
     912              : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     913              : !>        (defaults to false)
     914              : !> \par History
     915              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     916              : !> \author Fawzi Mohamed
     917              : !> \note
     918              : !>      optimized for full column updates. Zeros out a little too much
     919              : !>      of target_m
     920              : !>      the target_m is replicated and valid on all cpus
     921              : ! **************************************************************************************************
     922       146652 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     923              :                                   start_col, n_rows, n_cols, transpose)
     924              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     925              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     926              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     927              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     928              : 
     929              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     930              : 
     931              :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     932              :                                                             ncol_local, nrow, nrow_global, &
     933              :                                                             nrow_local, this_col, this_row
     934       146652 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     935              :       LOGICAL                                            :: tr_a
     936       146652 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     937              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     938              : 
     939       146652 :       CALL timeset(routineN, handle)
     940              : 
     941       146652 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     942              : 
     943       146652 :       IF (PRESENT(start_row)) i0 = start_row
     944       146652 :       IF (PRESENT(start_col)) j0 = start_col
     945       146652 :       IF (PRESENT(transpose)) tr_a = transpose
     946         6152 :       IF (tr_a) THEN
     947         2206 :          nrow = SIZE(target_m, 2)
     948         2206 :          ncol = SIZE(target_m, 1)
     949              :       ELSE
     950       144446 :          nrow = SIZE(target_m, 1)
     951       144446 :          ncol = SIZE(target_m, 2)
     952              :       END IF
     953       146652 :       IF (PRESENT(n_rows)) nrow = n_rows
     954       146652 :       IF (PRESENT(n_cols)) ncol = n_cols
     955              : 
     956       146652 :       para_env => fm%matrix_struct%para_env
     957              : 
     958       146652 :       full_block => fm%local_data
     959              : #if defined(__parallel)
     960              :       ! zero-out whole target_m
     961       146652 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
     962       164842 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
     963              :       END IF
     964              : #endif
     965              : 
     966              :       CALL cp_fm_get_info(matrix=fm, &
     967              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     968              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     969       146652 :                           row_indices=row_indices, col_indices=col_indices)
     970              : 
     971       843124 :       DO j = 1, ncol_local
     972       696472 :          this_col = col_indices(j) - j0 + 1
     973       843124 :          IF (this_col >= 1 .AND. this_col <= ncol) THEN
     974       560004 :             IF (tr_a) THEN
     975         2206 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     976        80516 :                   DO i = 1, nrow_local
     977        80516 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
     978              :                   END DO
     979              :                ELSE
     980            0 :                   DO i = 1, nrow_local
     981            0 :                      this_row = row_indices(i) - i0 + 1
     982            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     983            0 :                         target_m(this_col, this_row) = full_block(i, j)
     984              :                      END IF
     985              :                   END DO
     986              :                END IF
     987              :             ELSE
     988       557798 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     989     11846989 :                   DO i = 1, nrow_local
     990     11846989 :                      target_m(row_indices(i), this_col) = full_block(i, j)
     991              :                   END DO
     992              :                ELSE
     993      1735300 :                   DO i = 1, nrow_local
     994      1694848 :                      this_row = row_indices(i) - i0 + 1
     995      1735300 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     996        85570 :                         target_m(this_row, this_col) = full_block(i, j)
     997              :                      END IF
     998              :                   END DO
     999              :                END IF
    1000              :             END IF
    1001              :          END IF
    1002              :       END DO
    1003              : 
    1004     38939236 :       CALL para_env%sum(target_m)
    1005              : 
    1006       146652 :       CALL timestop(handle)
    1007              : 
    1008       146652 :    END SUBROUTINE cp_fm_get_submatrix
    1009              : 
    1010              : ! **************************************************************************************************
    1011              : !> \brief returns all kind of information about the full matrix
    1012              : !> \param matrix ...
    1013              : !> \param name ...
    1014              : !> \param nrow_global ...
    1015              : !> \param ncol_global ...
    1016              : !> \param nrow_block ...
    1017              : !> \param ncol_block ...
    1018              : !> \param nrow_local ...
    1019              : !> \param ncol_local ...
    1020              : !> \param row_indices ...
    1021              : !> \param col_indices ...
    1022              : !> \param local_data ...
    1023              : !> \param context ...
    1024              : !> \param nrow_locals ...
    1025              : !> \param ncol_locals ...
    1026              : !> \param matrix_struct ...
    1027              : !> \param para_env ...
    1028              : !> \note
    1029              : !>       see also cp_fm_struct for explanation
    1030              : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1031              : !>         access to the local blacs block
    1032              : ! **************************************************************************************************
    1033      6159393 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1034              :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1035              :                              row_indices, col_indices, local_data, context, &
    1036              :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1037              : 
    1038              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1039              :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1040              :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1041              :                                                             ncol_block, nrow_local, ncol_local
    1042              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1043              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1044              :          OPTIONAL, POINTER                               :: local_data
    1045              :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1046              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1047              :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1048              :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1049              : 
    1050            8 :       IF (PRESENT(name)) name = matrix%name
    1051      6159393 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1052      6159393 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1053              : 
    1054              :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1055              :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1056              :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1057              :                             ncol_block=ncol_block, row_indices=row_indices, &
    1058              :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1059      6159393 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1060              : 
    1061      6159393 :    END SUBROUTINE cp_fm_get_info
    1062              : 
    1063              : ! **************************************************************************************************
    1064              : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1065              : !> \param matrix a cp_fm_type instance
    1066              : !> \param io_unit the I/O unit to use for writing
    1067              : ! **************************************************************************************************
    1068            3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1069              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1070              :       INTEGER, INTENT(IN)                                :: io_unit
    1071              : 
    1072            3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1073            3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1074            3 :    END SUBROUTINE cp_fm_write_info
    1075              : 
    1076              : ! **************************************************************************************************
    1077              : !> \brief find the maximum absolute value of the matrix element
    1078              : !>      maxval(abs(matrix))
    1079              : !> \param matrix ...
    1080              : !> \param a_max ...
    1081              : !> \param ir_max ...
    1082              : !> \param ic_max ...
    1083              : ! **************************************************************************************************
    1084       123695 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1085              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1086              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1087              :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1088              : 
    1089              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1090              : 
    1091              :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1092              :                                                             j, mepos, ncol_local, nrow_local, &
    1093              :                                                             num_pe
    1094       123695 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1095       123695 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1096              :       REAL(dp)                                           :: my_max
    1097       123695 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1098       123695 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1099              : 
    1100       123695 :       CALL timeset(routineN, handle)
    1101              : 
    1102       123695 :       my_block => matrix%local_data
    1103              : 
    1104              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1105       123695 :                           row_indices=row_indices, col_indices=col_indices)
    1106              : 
    1107     75967129 :       a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1108              : 
    1109       123695 :       IF (PRESENT(ir_max)) THEN
    1110            0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1111            0 :          mepos = matrix%matrix_struct%para_env%mepos
    1112            0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1113            0 :          ir_max_vec(0:num_pe - 1) = 0
    1114            0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1115            0 :          ic_max_vec(0:num_pe - 1) = 0
    1116            0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1117            0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1118            0 :          my_max = 0.0_dp
    1119              : 
    1120            0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1121            0 :             DO i = 1, ncol_local
    1122            0 :                DO j = 1, nrow_local
    1123            0 :                   IF (ABS(my_block(j, i)) > my_max) THEN
    1124            0 :                      my_max = my_block(j, i)
    1125            0 :                      ir_max_local = j
    1126            0 :                      ic_max_local = i
    1127              :                   END IF
    1128              :                END DO
    1129              :             END DO
    1130              : 
    1131            0 :             a_max_vec(mepos) = my_max
    1132            0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1133            0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1134              : 
    1135              :          END IF
    1136              : 
    1137            0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1138            0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1139            0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1140              : 
    1141            0 :          my_max = 0.0_dp
    1142            0 :          DO i = 0, num_pe - 1
    1143            0 :             IF (a_max_vec(i) > my_max) THEN
    1144            0 :                ir_max = ir_max_vec(i)
    1145            0 :                ic_max = ic_max_vec(i)
    1146              :             END IF
    1147              :          END DO
    1148              : 
    1149            0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1150            0 :          CPASSERT(ic_max > 0)
    1151            0 :          CPASSERT(ir_max > 0)
    1152              : 
    1153              :       END IF
    1154              : 
    1155       123695 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1156              : 
    1157       123695 :       CALL timestop(handle)
    1158              : 
    1159       247390 :    END SUBROUTINE cp_fm_maxabsval
    1160              : 
    1161              : ! **************************************************************************************************
    1162              : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1163              : !>      = || A ||_infinity
    1164              : !> \param matrix ...
    1165              : !> \param a_max ...
    1166              : !> \note
    1167              : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1168              : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1169              : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1170              : !>      (but the bound is not so tight in the general case)
    1171              : ! **************************************************************************************************
    1172         4986 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1173              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1174              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1175              : 
    1176              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1177              : 
    1178              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1179              :                                                             nrow_local
    1180         4986 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1181              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1182         4986 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1183              : 
    1184         4986 :       CALL timeset(routineN, handle)
    1185              : 
    1186         4986 :       my_block => matrix%local_data
    1187              : 
    1188              :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1189         4986 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1190              : 
    1191              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1192        14958 :       ALLOCATE (values(nrow_global))
    1193         4986 :       values = 0.0_dp
    1194        65834 :       DO j = 1, ncol_local
    1195       535250 :          DO i = 1, nrow_local
    1196       530264 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1197              :          END DO
    1198              :       END DO
    1199         4986 :       CALL matrix%matrix_struct%para_env%sum(values)
    1200        65834 :       a_max = MAXVAL(values)
    1201         4986 :       DEALLOCATE (values)
    1202              : 
    1203         4986 :       CALL timestop(handle)
    1204         4986 :    END SUBROUTINE cp_fm_maxabsrownorm
    1205              : 
    1206              : ! **************************************************************************************************
    1207              : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1208              : !> \param matrix ...
    1209              : !> \param norm_array ...
    1210              : ! **************************************************************************************************
    1211         1316 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1212              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1213              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1214              : 
    1215              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1216              : 
    1217              :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1218              :                                                             nrow_local
    1219         1316 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1220         1316 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1221              : 
    1222         1316 :       CALL timeset(routineN, handle)
    1223              : 
    1224         1316 :       my_block => matrix%local_data
    1225              : 
    1226              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1227         1316 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1228              : 
    1229              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1230        31004 :       norm_array = 0.0_dp
    1231        31004 :       DO j = 1, ncol_local
    1232      1325922 :          DO i = 1, nrow_local
    1233      1324606 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1234              :          END DO
    1235              :       END DO
    1236        60692 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1237        31004 :       norm_array = SQRT(norm_array)
    1238              : 
    1239         1316 :       CALL timestop(handle)
    1240         1316 :    END SUBROUTINE cp_fm_vectorsnorm
    1241              : 
    1242              : ! **************************************************************************************************
    1243              : !> \brief summing up all the elements along the matrix's i-th index
    1244              : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1245              : !>        or
    1246              : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1247              : !> \param matrix     an input matrix A
    1248              : !> \param sum_array  sums of elements in each column/row
    1249              : !> \param dir ...
    1250              : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1251              : !>        the maximum overlap method
    1252              : !>        added row variation
    1253              : ! **************************************************************************************************
    1254         7582 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1255              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1256              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1257              :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1258              : 
    1259              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1260              : 
    1261              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1262         7582 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1263              :       LOGICAL                                            :: docol
    1264         7582 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1265              : 
    1266         7582 :       CALL timeset(routineN, handle)
    1267              : 
    1268         7582 :       IF (PRESENT(dir)) THEN
    1269         7542 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1270              :             docol = .TRUE.
    1271              :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1272              :             docol = .FALSE.
    1273              :          ELSE
    1274            0 :             CPABORT('Wrong argument DIR')
    1275              :          END IF
    1276              :       ELSE
    1277              :          docol = .TRUE.
    1278              :       END IF
    1279              : 
    1280         7582 :       my_block => matrix%local_data
    1281              : 
    1282              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1283         7582 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1284              : 
    1285              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1286       248628 :       sum_array(:) = 0.0_dp
    1287         7582 :       IF (docol) THEN
    1288          448 :       DO j = 1, ncol_local
    1289         3628 :          DO i = 1, nrow_local
    1290         3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1291              :          END DO
    1292              :       END DO
    1293              :       ELSE
    1294        83014 :       DO j = 1, ncol_local
    1295      6445414 :          DO i = 1, nrow_local
    1296      6437872 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1297              :          END DO
    1298              :       END DO
    1299              :       END IF
    1300       489674 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1301              : 
    1302         7582 :       CALL timestop(handle)
    1303         7582 :    END SUBROUTINE cp_fm_vectorssum
    1304              : 
    1305              : ! **************************************************************************************************
    1306              : !> \brief copy one identically sized matrix in the other
    1307              : !> \param source ...
    1308              : !> \param destination ...
    1309              : !> \note
    1310              : !>      see also cp_fm_to_fm_columns
    1311              : ! **************************************************************************************************
    1312       549847 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1313              : 
    1314              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1315              : 
    1316              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1317              : 
    1318              :       INTEGER                                            :: handle, npcol, nprow
    1319              : 
    1320       549847 :       CALL timeset(routineN, handle)
    1321              : 
    1322       549847 :       nprow = source%matrix_struct%context%num_pe(1)
    1323       549847 :       npcol = source%matrix_struct%context%num_pe(2)
    1324              : 
    1325       549847 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1326              :           cp_fm_struct_equivalent(source%matrix_struct, &
    1327              :                                   destination%matrix_struct)) THEN
    1328       549847 :          IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1329              :              SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1330              :             CALL cp_abort(__LOCATION__, &
    1331              :                           "Cannot copy full matrix <"//TRIM(source%name)// &
    1332              :                           "> to full matrix <"//TRIM(destination%name)// &
    1333            0 :                           ">. The local_data blocks have different sizes.")
    1334              :          CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1335       549847 :                     source%local_data, 1, destination%local_data, 1)
    1336              :       ELSE
    1337            0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1338              :       END IF
    1339              : 
    1340       549847 :       CALL timestop(handle)
    1341              : 
    1342       549847 :    END SUBROUTINE cp_fm_to_fm_matrix
    1343              : 
    1344              : ! **************************************************************************************************
    1345              : !> \brief copy just a subset of columns of a fm to a fm
    1346              : !> \param msource ...
    1347              : !> \param mtarget ...
    1348              : !> \param ncol ...
    1349              : !> \param source_start ...
    1350              : !> \param target_start ...
    1351              : ! **************************************************************************************************
    1352       172628 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1353              :                                   target_start)
    1354              : 
    1355              :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1356              :       INTEGER, INTENT(IN)                      :: ncol
    1357              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1358              : 
    1359              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1360              : 
    1361              :       INTEGER                                  :: handle, n, ss, ts
    1362              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1363              : #if defined(__parallel)
    1364              :       INTEGER                                  :: i
    1365              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1366              : #endif
    1367              : 
    1368       172628 :       CALL timeset(routineN, handle)
    1369              : 
    1370       172628 :       ss = 1
    1371       172628 :       ts = 1
    1372              : 
    1373       172628 :       IF (PRESENT(source_start)) ss = source_start
    1374       172628 :       IF (PRESENT(target_start)) ts = target_start
    1375              : 
    1376       172628 :       n = msource%matrix_struct%nrow_global
    1377              : 
    1378       172628 :       a => msource%local_data
    1379       172628 :       b => mtarget%local_data
    1380              : 
    1381              : #if defined(__parallel)
    1382      1726280 :       desca(:) = msource%matrix_struct%descriptor(:)
    1383      1726280 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1384       742692 :       DO i = 0, ncol - 1
    1385       742692 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1386              :       END DO
    1387              : #else
    1388              :       IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
    1389              :          CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1390              :       END IF
    1391              : #endif
    1392              : 
    1393       172628 :       CALL timestop(handle)
    1394              : 
    1395       172628 :    END SUBROUTINE cp_fm_to_fm_columns
    1396              : 
    1397              : ! **************************************************************************************************
    1398              : !> \brief copy just a triangular matrix
    1399              : !> \param msource ...
    1400              : !> \param mtarget ...
    1401              : !> \param uplo ...
    1402              : ! **************************************************************************************************
    1403           58 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1404              : 
    1405              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1406              :       CHARACTER(LEN=1), OPTIONAL, INTENT(IN)   :: uplo
    1407              : 
    1408              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1409              : 
    1410              :       CHARACTER(LEN=1)                         :: myuplo
    1411              :       INTEGER                                  :: handle, ncol, nrow
    1412           58 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1413              : #if defined(__parallel)
    1414              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1415              : #endif
    1416              : 
    1417           58 :       CALL timeset(routineN, handle)
    1418              : 
    1419           58 :       myuplo = 'U'
    1420           58 :       IF (PRESENT(uplo)) myuplo = uplo
    1421              : 
    1422           58 :       nrow = msource%matrix_struct%nrow_global
    1423           58 :       ncol = msource%matrix_struct%ncol_global
    1424              : 
    1425           58 :       a => msource%local_data
    1426           58 :       b => mtarget%local_data
    1427              : 
    1428              : #if defined(__parallel)
    1429          580 :       desca(:) = msource%matrix_struct%descriptor(:)
    1430          580 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1431           58 :       CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1432              : #else
    1433              :       CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1434              : #endif
    1435              : 
    1436           58 :       CALL timestop(handle)
    1437              : 
    1438           58 :    END SUBROUTINE cp_fm_to_fm_triangular
    1439              : 
    1440              : ! **************************************************************************************************
    1441              : !> \brief copy just a part ot the matrix
    1442              : !> \param msource ...
    1443              : !> \param mtarget ...
    1444              : !> \param nrow ...
    1445              : !> \param ncol ...
    1446              : !> \param s_firstrow ...
    1447              : !> \param s_firstcol ...
    1448              : !> \param t_firstrow ...
    1449              : !> \param t_firstcol ...
    1450              : ! **************************************************************************************************
    1451              : 
    1452        13764 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1453              : 
    1454              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1455              :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1456              :                                                   s_firstcol, t_firstrow, &
    1457              :                                                   t_firstcol
    1458              : 
    1459              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1460              : 
    1461              :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1462              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1463              : #if defined(__parallel)
    1464              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1465              : #endif
    1466              : 
    1467        13764 :       CALL timeset(routineN, handle)
    1468              : 
    1469        13764 :       a => msource%local_data
    1470        13764 :       b => mtarget%local_data
    1471              : 
    1472        13764 :       na = msource%matrix_struct%nrow_global
    1473        13764 :       nb = mtarget%matrix_struct%nrow_global
    1474              : !    nrow must be <= na and nb
    1475        13764 :       IF (nrow > na) &
    1476            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1477        13764 :       IF (nrow > nb) &
    1478            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1479        13764 :       na = msource%matrix_struct%ncol_global
    1480        13764 :       nb = mtarget%matrix_struct%ncol_global
    1481              : !    ncol must be <= na_col and nb_col
    1482        13764 :       IF (ncol > na) &
    1483            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1484        13764 :       IF (ncol > nb) &
    1485            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1486              : 
    1487              : #if defined(__parallel)
    1488       137640 :       desca(:) = msource%matrix_struct%descriptor(:)
    1489       137640 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1490       145704 :       DO i = 0, ncol - 1
    1491       131940 :          ss = s_firstcol + i
    1492       131940 :          ts = t_firstcol + i
    1493       145704 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1494              :       END DO
    1495              : #else
    1496              :       DO i = 0, ncol - 1
    1497              :          ss = s_firstcol + i
    1498              :          ts = t_firstcol + i
    1499              :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1500              :       END DO
    1501              : #endif
    1502              : 
    1503        13764 :       CALL timestop(handle)
    1504        13764 :    END SUBROUTINE cp_fm_to_fm_submat
    1505              : 
    1506              : ! **************************************************************************************************
    1507              : !> \brief General copy of a fm matrix to another fm matrix.
    1508              : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1509              : !>
    1510              : !> \param source          input fm matrix
    1511              : !> \param destination     output fm matrix
    1512              : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1513              : !>                        of the input and output matrices
    1514              : !> \par History
    1515              : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1516              : ! **************************************************************************************************
    1517        18980 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1518              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1519              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1520              : 
    1521              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1522              : 
    1523              :       INTEGER                                            :: handle
    1524       170820 :       TYPE(copy_info_type)                               :: info
    1525              : 
    1526        18980 :       CALL timeset(routineN, handle)
    1527              : 
    1528        18980 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1529        18980 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1530        18966 :          CALL cp_fm_finish_copy_general(destination, info)
    1531              :       END IF
    1532        18980 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1533        18775 :          CALL cp_fm_cleanup_copy_general(info)
    1534              :       END IF
    1535              : 
    1536        18980 :       CALL timestop(handle)
    1537        18980 :    END SUBROUTINE cp_fm_copy_general
    1538              : 
    1539              : ! **************************************************************************************************
    1540              : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1541              : !> \param source input fm matrix
    1542              : !> \param destination output fm matrix
    1543              : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1544              : !>                        of the input and output matrices
    1545              : !> \param info all of the data that will be needed to complete the copy operation
    1546              : ! **************************************************************************************************
    1547      6635320 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1548              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1549              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1550              :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1551              : 
    1552              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1553              : 
    1554              :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1555              :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1556              :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1557              :          recv_rank, recv_size, send_rank, send_size
    1558       663532 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1559      1327064 :                                                             recv_count, send_count, send_disp, &
    1560       663532 :                                                             source2global, src_p, src_q
    1561       663532 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1562              :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1563              :                                                             src_block, src_block_tmp, src_num_pe
    1564      1327064 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1565      1327064 :                                                             send_col_indices, send_row_indices
    1566              :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1567      9289448 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1568              : 
    1569       663532 :       CALL timeset(routineN, handle)
    1570              : 
    1571              :       IF (.NOT. cp2k_is_parallel) THEN
    1572              :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1573              :          nrow_local_send = SIZE(source%local_data, 1)
    1574              :          ncol_local_send = SIZE(source%local_data, 2)
    1575              :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1576              :          k = 0
    1577              :          DO j = 1, ncol_local_send
    1578              :             DO i = 1, nrow_local_send
    1579              :                k = k + 1
    1580              :                info%send_buf(k) = source%local_data(i, j)
    1581              :             END DO
    1582              :          END DO
    1583              :       ELSE
    1584       663532 :          NULLIFY (recv_dist, send_dist)
    1585       663532 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1586              : 
    1587              :          ! The 'global' communicator contains both the source and destination decompositions
    1588       663532 :          global_size = para_env%num_pe
    1589       663532 :          global_rank = para_env%mepos
    1590              : 
    1591              :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1592              :          ! on a subset of the processes involved in the communication
    1593              :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1594              :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1595       663532 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1596       495266 :             recv_dist => destination%matrix_struct
    1597       495266 :             recv_rank = recv_dist%para_env%mepos
    1598              :          ELSE
    1599       168266 :             recv_rank = mp_proc_null
    1600              :          END IF
    1601              : 
    1602       663532 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1603       578863 :             send_dist => source%matrix_struct
    1604       578863 :             send_rank = send_dist%para_env%mepos
    1605              :          ELSE
    1606        84669 :             send_rank = mp_proc_null
    1607              :          END IF
    1608              : 
    1609              :          ! Map the rank in the source/dest communicator to the global rank
    1610      1990596 :          ALLOCATE (all_ranks(0:global_size - 1))
    1611              : 
    1612       663532 :          CALL para_env%allgather(send_rank, all_ranks)
    1613       663532 :          IF (ASSOCIATED(recv_dist)) THEN
    1614      2476330 :             ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1615      1485798 :             DO i = 0, global_size - 1
    1616      1485798 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1617       821194 :                   source2global(all_ranks(i)) = i
    1618              :                END IF
    1619              :             END DO
    1620              :          END IF
    1621              : 
    1622       663532 :          CALL para_env%allgather(recv_rank, all_ranks)
    1623       663532 :          IF (ASSOCIATED(send_dist)) THEN
    1624      2894315 :             ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1625      1736589 :             DO i = 0, global_size - 1
    1626      1736589 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1627       821194 :                   dest2global(all_ranks(i)) = i
    1628              :                END IF
    1629              :             END DO
    1630              :          END IF
    1631       663532 :          DEALLOCATE (all_ranks)
    1632              : 
    1633              :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1634              :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1635              : 
    1636              :          ! The global root process will receive the data (from the root process in each decomposition)
    1637      4644724 :          send_req(:) = mp_request_null
    1638       663532 :          IF (global_rank == 0) THEN
    1639      2322362 :             recv_req(:) = mp_request_null
    1640       331766 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1641       331766 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1642       331766 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1643       331766 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1644              :          END IF
    1645              : 
    1646       663532 :          IF (ASSOCIATED(send_dist)) THEN
    1647       578863 :             IF ((send_rank == 0)) THEN
    1648              :                ! need to use separate buffers here in case this is actually global rank 0
    1649       995298 :                src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
    1650       331766 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1651       331766 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1652              :             END IF
    1653              :          END IF
    1654              : 
    1655       663532 :          IF (ASSOCIATED(recv_dist)) THEN
    1656       495266 :             IF ((recv_rank == 0)) THEN
    1657       995298 :                dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
    1658       331766 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1659       331766 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1660              :             END IF
    1661              :          END IF
    1662              : 
    1663       663532 :          IF (global_rank == 0) THEN
    1664       331766 :             CALL mp_waitall(recv_req(1:4))
    1665              :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1666            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1667              :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1668      2322362 :                       )
    1669       331766 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1670       331766 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1671              :          END IF
    1672              : 
    1673       663532 :          IF (ASSOCIATED(send_dist)) THEN
    1674       578863 :             IF ((send_rank == 0)) THEN
    1675       331766 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1676              :             END IF
    1677              :          END IF
    1678              : 
    1679       663532 :          IF (ASSOCIATED(recv_dist)) THEN
    1680       495266 :             IF ((recv_rank == 0)) THEN
    1681       331766 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1682              :             END IF
    1683              :          END IF
    1684              : 
    1685       663532 :          IF (global_rank == 0) THEN
    1686       331766 :             CALL mp_waitall(recv_req(5:6))
    1687              :          END IF
    1688              : 
    1689              :          ! Finally, broadcast the data to all processes in the global communicator
    1690       663532 :          CALL para_env%bcast(src_block, 0)
    1691       663532 :          CALL para_env%bcast(dest_block, 0)
    1692       663532 :          CALL para_env%bcast(src_num_pe, 0)
    1693       663532 :          CALL para_env%bcast(dest_num_pe, 0)
    1694      1990596 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1695      1990596 :          info%nblock_src(1:2) = src_block(1:2)
    1696       663532 :          IF (global_rank /= 0) THEN
    1697            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1698            0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1699      2322362 :                       )
    1700              :          END IF
    1701       663532 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1702       663532 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1703              : 
    1704       663532 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1705       663532 :          send_size = src_num_pe(1)*src_num_pe(2)
    1706       663532 :          info%send_size = send_size
    1707       663532 :          CALL mp_waitall(send_req(:))
    1708              : 
    1709              :          ! Setup is now complete, we can start the actual communication here.
    1710              :          ! The order implemented here is:
    1711              :          !  DEST_1
    1712              :          !      compute recv sizes
    1713              :          !      call irecv
    1714              :          !  SRC_1
    1715              :          !      compute send sizes
    1716              :          !      pack send buffers
    1717              :          !      call isend
    1718              :          !  DEST_2
    1719              :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1720              :          !  SRC_2
    1721              :          !      wait for the sends
    1722              : 
    1723              :          ! DEST_1
    1724       663532 :          IF (ASSOCIATED(recv_dist)) THEN
    1725              :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1726              :                                   col_indices=recv_col_indices &
    1727       495266 :                                   )
    1728       495266 :             info%recv_col_indices => recv_col_indices
    1729       495266 :             info%recv_row_indices => recv_row_indices
    1730       495266 :             nrow_block_src = src_block(1)
    1731       495266 :             ncol_block_src = src_block(2)
    1732      3297524 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1733              : 
    1734              :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1735       495266 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1736       495266 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1737       495266 :             info%nlocal_recv(1) = nrow_local_recv
    1738       495266 :             info%nlocal_recv(2) = ncol_local_recv
    1739              :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1740      2476330 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1741     10030703 :             DO i = 1, nrow_local_recv
    1742              :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1743              :                ! then work out which row block it comes from, and which process row that row block comes from.
    1744     10030703 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1745              :             END DO
    1746     14479942 :             DO j = 1, ncol_local_recv
    1747              :                ! Similarly for the columns
    1748     14479942 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1749              :             END DO
    1750              :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1751              : 
    1752       990532 :             DO q = 0, src_num_pe(2) - 1
    1753     14479942 :                ncols = COUNT(src_q == q)
    1754      1811726 :                DO p = 0, src_num_pe(1) - 1
    1755     17538594 :                   nrows = COUNT(src_p == p)
    1756              :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1757      1316460 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1758              :                END DO
    1759              :             END DO
    1760       495266 :             DEALLOCATE (src_p, src_q)
    1761              : 
    1762              :             ! Use one long buffer (and displacements into that buffer)
    1763              :             !     this prevents the need for a rectangular array where not all elements will be populated
    1764      2306992 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1765       495266 :             info%recv_disp(0) = 0
    1766       821194 :             DO i = 1, send_size - 1
    1767       821194 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1768              :             END DO
    1769              : 
    1770              :             ! Issue receive calls on ranks which expect data
    1771      1316460 :             DO k = 0, send_size - 1
    1772      1316460 :                IF (recv_count(k) > 0) THEN
    1773              :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1774       664094 :                                       source2global(k), info%recv_request(k))
    1775              :                END IF
    1776              :             END DO
    1777       495266 :             DEALLOCATE (source2global)
    1778              :          END IF ! ASSOCIATED(recv_dist)
    1779              : 
    1780              :          ! SRC_1
    1781       663532 :          IF (ASSOCIATED(send_dist)) THEN
    1782              :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1783              :                                   col_indices=send_col_indices &
    1784       578863 :                                   )
    1785       578863 :             nrow_block_dest = dest_block(1)
    1786       578863 :             ncol_block_dest = dest_block(2)
    1787      3715509 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1788              : 
    1789              :             ! Determine the send counts, allocate the send buffers
    1790       578863 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1791       578863 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1792              : 
    1793              :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1794              :             !   i.e. number of rows,cols in the sending distribution
    1795      2894315 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1796              : 
    1797     10114300 :             DO i = 1, nrow_local_send
    1798              :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1799     10114300 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1800              :             END DO
    1801     16962651 :             DO j = 1, ncol_local_send
    1802     16962651 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1803              :             END DO
    1804              :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1805              : 
    1806      1157726 :             DO q = 0, dest_num_pe(2) - 1
    1807     16962651 :                ncols = COUNT(dest_q == q)
    1808      1978920 :                DO p = 0, dest_num_pe(1) - 1
    1809     15139984 :                   nrows = COUNT(dest_p == p)
    1810      1400057 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1811              :                END DO
    1812              :             END DO
    1813       578863 :             DEALLOCATE (dest_p, dest_q)
    1814              : 
    1815              :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1816      2557783 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1817       578863 :             send_disp(0) = 0
    1818       821194 :             DO k = 1, recv_size - 1
    1819       821194 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1820              :             END DO
    1821              : 
    1822              :             ! Loop over the smat, pack the send buffers
    1823       578863 :             send_count(:) = 0
    1824     16962651 :             DO j = 1, ncol_local_send
    1825              :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1826     16383788 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1827    540934331 :                DO i = 1, nrow_local_send
    1828    523971680 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1829    523971680 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1830    523971680 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1831    540355468 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1832              :                END DO
    1833              :             END DO
    1834              : 
    1835              :             ! For each non-zero send_count, call mpi_isend
    1836      1400057 :             DO k = 0, recv_size - 1
    1837      1400057 :                IF (send_count(k) > 0) THEN
    1838              :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1839       664094 :                                       dest2global(k), info%send_request(k))
    1840              :                END IF
    1841              :             END DO
    1842       578863 :             DEALLOCATE (send_count, send_disp, dest2global)
    1843              :          END IF ! ASSOCIATED(send_dist)
    1844       663532 :          DEALLOCATE (dest_blacs2mpi)
    1845              : 
    1846              :       END IF !IF (.NOT. cp2k_is_parallel)
    1847              : 
    1848       663532 :       CALL timestop(handle)
    1849              : 
    1850      2654128 :    END SUBROUTINE cp_fm_start_copy_general
    1851              : 
    1852              : ! **************************************************************************************************
    1853              : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1854              : !> \param destination output fm matrix
    1855              : !> \param info all of the data that will be needed to complete the copy operation
    1856              : ! **************************************************************************************************
    1857       495266 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1858              :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1859              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1860              : 
    1861              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1862              : 
    1863              :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1864              :                                                             src_p_i, src_q_j
    1865       495266 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1866              :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1867       495266 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1868              : 
    1869       495266 :       CALL timeset(routineN, handle)
    1870              : 
    1871              :       IF (.NOT. cp2k_is_parallel) THEN
    1872              :          ! Now unpack the data from the 'send buffer'
    1873              :          k = 0
    1874              :          DO j = 1, SIZE(destination%local_data, 2)
    1875              :             DO i = 1, SIZE(destination%local_data, 1)
    1876              :                k = k + 1
    1877              :                destination%local_data(i, j) = info%send_buf(k)
    1878              :             END DO
    1879              :          END DO
    1880              :          DEALLOCATE (info%send_buf)
    1881              :       ELSE
    1882              :          ! Set up local variables ...
    1883       495266 :          send_size = info%send_size
    1884      1485798 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1885      1485798 :          nblock_src(1:2) = info%nblock_src(:)
    1886      1485798 :          src_num_pe(1:2) = info%src_num_pe(:)
    1887       495266 :          recv_col_indices => info%recv_col_indices
    1888       495266 :          recv_row_indices => info%recv_row_indices
    1889              : 
    1890              :          ! ... use the local variables to do the work
    1891              :          ! DEST_2
    1892       495266 :          CALL mp_waitall(info%recv_request(:))
    1893      1485798 :          ALLOCATE (recv_count(0:send_size - 1))
    1894              :          ! Loop over the rmat, filling it in with data from the recv buffers
    1895              :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1896       495266 :          recv_count(:) = 0
    1897     14479942 :          DO j = 1, nlocal_recv(2)
    1898     13984676 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    1899    538451622 :             DO i = 1, nlocal_recv(1)
    1900    523971680 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    1901    523971680 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    1902    523971680 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1903    537956356 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1904              :             END DO
    1905              :          END DO
    1906       495266 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1907              :          ! Invalidate the stored state
    1908              :          NULLIFY (info%recv_col_indices, &
    1909       495266 :                   info%recv_row_indices)
    1910              : 
    1911              :       END IF
    1912              : 
    1913       495266 :       CALL timestop(handle)
    1914              : 
    1915       495266 :    END SUBROUTINE cp_fm_finish_copy_general
    1916              : 
    1917              : ! **************************************************************************************************
    1918              : !> \brief Completes the copy operation: wait for comms clean up MPI state
    1919              : !> \param info all of the data that will be needed to complete the copy operation
    1920              : ! **************************************************************************************************
    1921       578167 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    1922              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1923              : 
    1924              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    1925              : 
    1926              :       INTEGER                                            :: handle
    1927              : 
    1928       578167 :       CALL timeset(routineN, handle)
    1929              : 
    1930              :       IF (.NOT. cp2k_is_parallel) THEN
    1931              :          ! Don't do anything - no MPI state for the serial case
    1932              :       ELSE
    1933              :          ! SRC_2
    1934              :          ! If this process is also in the destination decomposition, this deallocate
    1935              :          ! Was already done in cp_fm_finish_copy_general
    1936       578167 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    1937       167570 :             DEALLOCATE (info%src_blacs2mpi)
    1938              :          END IF
    1939       578167 :          CALL mp_waitall(info%send_request)
    1940       578167 :          DEALLOCATE (info%send_request, info%send_buf)
    1941              : 
    1942              :       END IF
    1943              : 
    1944       578167 :       CALL timestop(handle)
    1945              : 
    1946       578167 :    END SUBROUTINE cp_fm_cleanup_copy_general
    1947              : 
    1948              : ! **************************************************************************************************
    1949              : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    1950              : !>        The two matrices can have different contexts.
    1951              : !>
    1952              : !>        Summary of distribution routines for dense matrices
    1953              : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    1954              : !>
    1955              : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    1956              : !>
    1957              : !>        A process that is not a part of the context of A should set descA(2)
    1958              : !>        to -1, and similarly for B.
    1959              : !>
    1960              : !> \param source          input fm matrix
    1961              : !> \param destination     output fm matrix
    1962              : !> \param nrows           number of rows of sub matrix to be copied
    1963              : !> \param ncols           number of cols of sub matrix to be copied
    1964              : !> \param s_firstrow      starting global row index of sub matrix in source
    1965              : !> \param s_firstcol      starting global col index of sub matrix in source
    1966              : !> \param d_firstrow      starting global row index of sub matrix in destination
    1967              : !> \param d_firstcol      starting global col index of sub matrix in destination
    1968              : !> \param global_context  process grid that covers all parts of either A or B.
    1969              : ! **************************************************************************************************
    1970        11579 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    1971              :                                          destination, &
    1972              :                                          nrows, &
    1973              :                                          ncols, &
    1974              :                                          s_firstrow, &
    1975              :                                          s_firstcol, &
    1976              :                                          d_firstrow, &
    1977              :                                          d_firstcol, &
    1978              :                                          global_context)
    1979              : 
    1980              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1981              :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    1982              :                                                             d_firstrow, d_firstcol
    1983              : 
    1984              :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    1985              : 
    1986              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    1987              : 
    1988              :       LOGICAL                                  :: debug
    1989              :       INTEGER                                  :: handle
    1990              : #if defined(__parallel)
    1991              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1992              :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    1993        11579 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    1994              : #endif
    1995              : 
    1996        11579 :       CALL timeset(routineN, handle)
    1997              : 
    1998        11579 :       debug = debug_this_module
    1999              : 
    2000              :       IF (.NOT. cp2k_is_parallel) THEN
    2001              :          CALL cp_fm_to_fm_submat(source, &
    2002              :                                  destination, &
    2003              :                                  nrows, &
    2004              :                                  ncols, &
    2005              :                                  s_firstrow, &
    2006              :                                  s_firstcol, &
    2007              :                                  d_firstrow, &
    2008              :                                  d_firstcol)
    2009              :       ELSE
    2010              : #ifdef __parallel
    2011              :          NULLIFY (smat, dmat)
    2012              :          ! check whether source is available on this process
    2013        11579 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2014       115790 :             desca = source%matrix_struct%descriptor
    2015        11579 :             IF (nrows > source%matrix_struct%nrow_global) &
    2016            0 :                CPABORT("nrows is greater than nrow_global of source")
    2017        11579 :             IF (ncols > source%matrix_struct%ncol_global) &
    2018            0 :                CPABORT("ncols is greater than ncol_global of source")
    2019        11579 :             smat => source%local_data
    2020              :          ELSE
    2021            0 :             desca = -1
    2022            0 :             smat => dummy
    2023              :          END IF
    2024              :          ! check destination is available on this process
    2025        11579 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2026       115790 :             descb = destination%matrix_struct%descriptor
    2027        11579 :             IF (nrows > destination%matrix_struct%nrow_global) &
    2028            0 :                CPABORT("nrows is greater than nrow_global of destination")
    2029        11579 :             IF (ncols > destination%matrix_struct%ncol_global) &
    2030            0 :                CPABORT("ncols is greater than ncol_global of destination")
    2031        11579 :             dmat => destination%local_data
    2032              :          ELSE
    2033            0 :             descb = -1
    2034            0 :             dmat => dummy
    2035              :          END IF
    2036              :          ! do copy
    2037              : 
    2038              :          CALL pdgemr2d(nrows, &
    2039              :                        ncols, &
    2040              :                        smat, &
    2041              :                        s_firstrow, &
    2042              :                        s_firstcol, &
    2043              :                        desca, &
    2044              :                        dmat, &
    2045              :                        d_firstrow, &
    2046              :                        d_firstcol, &
    2047              :                        descb, &
    2048        11579 :                        global_context%get_handle())
    2049              : #else
    2050              :          MARK_USED(global_context)
    2051              :          CPABORT("this subroutine only supports SCALAPACK")
    2052              : #endif
    2053              :       END IF
    2054              : 
    2055        11579 :       CALL timestop(handle)
    2056              : 
    2057        11579 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2058              : 
    2059              : ! **************************************************************************************************
    2060              : !> \brief ...
    2061              : !> \param matrix ...
    2062              : !> \param irow_global ...
    2063              : !> \param icol_global ...
    2064              : !> \param alpha ...
    2065              : ! **************************************************************************************************
    2066          240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2067              : 
    2068              :       ! Add alpha to the matrix element specified by the global indices
    2069              :       ! irow_global and icol_global
    2070              : 
    2071              :       ! - Creation (05.05.06,MK)
    2072              : 
    2073              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2074              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2075              :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2076              : 
    2077              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2078          240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2079              :       TYPE(cp_blacs_env_type), POINTER         :: context
    2080              : #if defined(__parallel)
    2081              :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2082              :                                                   irow_local
    2083              :       INTEGER, DIMENSION(9)                    :: desca
    2084              : #endif
    2085              : 
    2086          240 :       context => matrix%matrix_struct%context
    2087              : 
    2088          240 :       myprow = context%mepos(1)
    2089          240 :       mypcol = context%mepos(2)
    2090              : 
    2091          240 :       nprow = context%num_pe(1)
    2092          240 :       npcol = context%num_pe(2)
    2093              : 
    2094          240 :       a => matrix%local_data
    2095              : 
    2096              : #if defined(__parallel)
    2097              : 
    2098         2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2099              : 
    2100              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2101          240 :                    irow_local, icol_local, iprow, ipcol)
    2102              : 
    2103          240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2104          120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2105              :       END IF
    2106              : 
    2107              : #else
    2108              : 
    2109              :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2110              : 
    2111              : #endif
    2112              : 
    2113          240 :    END SUBROUTINE cp_fm_add_to_element
    2114              : 
    2115              : ! **************************************************************************************************
    2116              : !> \brief ...
    2117              : !> \param fm ...
    2118              : !> \param unit ...
    2119              : ! **************************************************************************************************
    2120        75035 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2121              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2122              :       INTEGER, INTENT(IN)                      :: unit
    2123              : 
    2124              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2125              : 
    2126              :       INTEGER                                  :: handle, j, max_block, &
    2127              :                                                   ncol_global, nrow_global
    2128              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2129              : #if defined(__parallel)
    2130              :       INTEGER                                  :: i, i_block, icol_local, &
    2131              :                                                   in, info, ipcol, &
    2132              :                                                   iprow, irow_local, &
    2133              :                                                   mepos, &
    2134              :                                                   num_pe, rb, tag
    2135              :       INTEGER, DIMENSION(9)                    :: desc
    2136        75035 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2137              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2138              :       TYPE(cp_blacs_type) :: ictxt_loc
    2139              :       INTEGER, EXTERNAL                        :: numroc
    2140              : #endif
    2141              : 
    2142        75035 :       CALL timeset(routineN, handle)
    2143              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2144        75035 :                           para_env=para_env)
    2145              : 
    2146              : #if defined(__parallel)
    2147        75035 :       num_pe = para_env%num_pe
    2148        75035 :       mepos = para_env%mepos
    2149        75035 :       rb = nrow_global
    2150        75035 :       tag = 0
    2151              :       ! get a new context
    2152        75035 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2153        75035 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2154        75035 :       CPASSERT(info == 0)
    2155              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2156              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2157       150070 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2158              : 
    2159       300140 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2160              : 
    2161              :          ! do the actual scalapack to cols reordering
    2162              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2163              :                        fm%matrix_struct%descriptor, &
    2164        75035 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2165              : 
    2166       225105 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2167     29993577 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2168              : 
    2169       323327 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2170       173257 :             i_block = MIN(max_block, ncol_global - i + 1)
    2171              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2172       173257 :                          irow_local, icol_local, iprow, ipcol)
    2173       173257 :             IF (ipcol == mypcol) THEN
    2174       661752 :                DO j = 1, i_block
    2175     76560060 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2176              :                END DO
    2177              :             END IF
    2178              : 
    2179       173257 :             IF (ipcol == 0) THEN
    2180              :                ! do nothing
    2181              :             ELSE
    2182        58944 :                IF (ipcol == mypcol) THEN
    2183     16890600 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2184              :                END IF
    2185        58944 :                IF (mypcol == 0) THEN
    2186     33751728 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2187              :                END IF
    2188              :             END IF
    2189              : 
    2190       248292 :             IF (unit > 0) THEN
    2191       659136 :                DO j = 1, i_block
    2192     38583974 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2193              :                END DO
    2194              :             END IF
    2195              : 
    2196              :          END DO
    2197              :       END ASSOCIATE
    2198        75035 :       DEALLOCATE (vecbuf)
    2199              : 
    2200        75035 :       CALL ictxt_loc%gridexit()
    2201              : 
    2202        75035 :       DEALLOCATE (newdat)
    2203              : 
    2204              : #else
    2205              : 
    2206              :       IF (unit > 0) THEN
    2207              :          DO j = 1, ncol_global
    2208              :             WRITE (unit) fm%local_data(:, j)
    2209              :          END DO
    2210              :       END IF
    2211              : 
    2212              : #endif
    2213        75035 :       CALL timestop(handle)
    2214              : 
    2215       375175 :    END SUBROUTINE cp_fm_write_unformatted
    2216              : 
    2217              : ! **************************************************************************************************
    2218              : !> \brief Write out a full matrix in plain text.
    2219              : !> \param fm the matrix to be outputted
    2220              : !> \param unit the unit number for I/O
    2221              : !> \param header optional header
    2222              : !> \param value_format ...
    2223              : ! **************************************************************************************************
    2224         1378 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2225              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2226              :       INTEGER, INTENT(IN)                      :: unit
    2227              :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2228              : 
    2229              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2230              : 
    2231              :       CHARACTER(LEN=21)                        :: my_value_format
    2232              :       INTEGER                                  :: handle, i, j, max_block, &
    2233              :                                                   ncol_global, nrow_global
    2234              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2235              : #if defined(__parallel)
    2236              :       INTEGER                                  :: i_block, icol_local, &
    2237              :                                                   in, info, ipcol, &
    2238              :                                                   iprow, irow_local, &
    2239              :                                                   mepos, num_pe, rb, tag, k, &
    2240              :                                                   icol, irow
    2241              :       INTEGER, DIMENSION(9)                    :: desc
    2242         1378 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2243         1378 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2244              :       TYPE(cp_blacs_type) :: ictxt_loc
    2245              :       INTEGER, EXTERNAL                        :: numroc
    2246              : #endif
    2247              : 
    2248         1378 :       CALL timeset(routineN, handle)
    2249              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2250         1378 :                           para_env=para_env)
    2251              : 
    2252         1378 :       IF (PRESENT(value_format)) THEN
    2253            0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2254            0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2255              :       ELSE
    2256         1378 :          my_value_format = "(I10, I10, ES24.12)"
    2257              :       END IF
    2258              : 
    2259         1378 :       IF (unit > 0) THEN
    2260           11 :          IF (PRESENT(header)) WRITE (unit, *) header
    2261           11 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2262              :       END IF
    2263              : 
    2264              : #if defined(__parallel)
    2265         1378 :       num_pe = para_env%num_pe
    2266         1378 :       mepos = para_env%mepos
    2267         1378 :       rb = nrow_global
    2268         1378 :       tag = 0
    2269              :       ! get a new context
    2270         1378 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2271         1378 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2272         1378 :       CPASSERT(info == 0)
    2273              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2274              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2275         2756 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2276              : 
    2277         5512 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2278              : 
    2279              :          ! do the actual scalapack to cols reordering
    2280              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2281              :                        fm%matrix_struct%descriptor, &
    2282         1378 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2283              : 
    2284         4134 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2285        14136 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2286         1378 :          irow = 1
    2287         1378 :          icol = 1
    2288              : 
    2289         5510 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2290         2754 :             i_block = MIN(max_block, ncol_global - i + 1)
    2291              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2292         2754 :                          irow_local, icol_local, iprow, ipcol)
    2293         2754 :             IF (ipcol == mypcol) THEN
    2294         2922 :                DO j = 1, i_block
    2295        20802 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2296              :                END DO
    2297              :             END IF
    2298              : 
    2299         2754 :             IF (ipcol == 0) THEN
    2300              :                ! do nothing
    2301              :             ELSE
    2302         1370 :                IF (ipcol == mypcol) THEN
    2303         5019 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2304              :                END IF
    2305         1370 :                IF (mypcol == 0) THEN
    2306         9353 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2307              :                END IF
    2308              :             END IF
    2309              : 
    2310         4132 :             IF (unit > 0) THEN
    2311          210 :                DO j = 1, i_block
    2312         6438 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2313         6228 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2314         6228 :                      irow = irow + 1
    2315         6417 :                      IF (irow > nrow_global) THEN
    2316          189 :                         irow = 1
    2317          189 :                         icol = icol + 1
    2318              :                      END IF
    2319              :                   END DO
    2320              :                END DO
    2321              :             END IF
    2322              : 
    2323              :          END DO
    2324              :       END ASSOCIATE
    2325         1378 :       DEALLOCATE (vecbuf)
    2326              : 
    2327         1378 :       CALL ictxt_loc%gridexit()
    2328              : 
    2329         1378 :       DEALLOCATE (newdat)
    2330              : 
    2331              : #else
    2332              : 
    2333              :       IF (unit > 0) THEN
    2334              :          DO j = 1, ncol_global
    2335              :             DO i = 1, nrow_global
    2336              :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2337              :             END DO
    2338              :          END DO
    2339              :       END IF
    2340              : 
    2341              : #endif
    2342         1378 :       CALL timestop(handle)
    2343              : 
    2344         6890 :    END SUBROUTINE cp_fm_write_formatted
    2345              : 
    2346              : ! **************************************************************************************************
    2347              : !> \brief ...
    2348              : !> \param fm ...
    2349              : !> \param unit ...
    2350              : ! **************************************************************************************************
    2351         1974 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2352              :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2353              :       INTEGER, INTENT(IN)                      :: unit
    2354              : 
    2355              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2356              : 
    2357              :       INTEGER                                  :: handle, j, max_block, &
    2358              :                                                   ncol_global, nrow_global
    2359              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2360              : #if defined(__parallel)
    2361              :       INTEGER                                  :: k, n_cols
    2362              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2363              : #endif
    2364              : 
    2365         1974 :       CALL timeset(routineN, handle)
    2366              : 
    2367              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2368         1974 :                           para_env=para_env)
    2369              : 
    2370              : #if defined(__parallel)
    2371              : 
    2372              :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2373              : 
    2374         7896 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2375              : 
    2376         6152 :       DO j = 1, ncol_global, max_block
    2377              : 
    2378         4178 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2379         4178 :          IF (para_env%mepos == 0) THEN
    2380        16918 :             DO k = 1, n_cols
    2381       368077 :                READ (unit) vecbuf(:, k)
    2382              :             END DO
    2383              :          END IF
    2384      1482498 :          CALL para_env%bcast(vecbuf, 0)
    2385         6152 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2386              : 
    2387              :       END DO
    2388              : 
    2389         1974 :       DEALLOCATE (vecbuf)
    2390              : 
    2391              : #else
    2392              : 
    2393              :       DO j = 1, ncol_global
    2394              :          READ (unit) fm%local_data(:, j)
    2395              :       END DO
    2396              : 
    2397              : #endif
    2398              : 
    2399         1974 :       CALL timestop(handle)
    2400              : 
    2401         1974 :    END SUBROUTINE cp_fm_read_unformatted
    2402              : 
    2403              : ! **************************************************************************************************
    2404              : !> \brief ...
    2405              : !> \param mm_type ...
    2406              : ! **************************************************************************************************
    2407        10991 :    SUBROUTINE cp_fm_setup(mm_type)
    2408              :       INTEGER, INTENT(IN)                                :: mm_type
    2409              : 
    2410        10991 :       cp_fm_mm_type = mm_type
    2411        10991 :    END SUBROUTINE cp_fm_setup
    2412              : 
    2413              : ! **************************************************************************************************
    2414              : !> \brief ...
    2415              : !> \return ...
    2416              : ! **************************************************************************************************
    2417      1961671 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2418              :       INTEGER                                            :: res
    2419              : 
    2420      1961671 :       res = cp_fm_mm_type
    2421      1961671 :    END FUNCTION cp_fm_get_mm_type
    2422              : 
    2423              : ! **************************************************************************************************
    2424              : !> \brief ...
    2425              : !> \param ictxt ...
    2426              : !> \param prec ...
    2427              : !> \return ...
    2428              : ! **************************************************************************************************
    2429           10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2430              :       INTEGER                                            :: ictxt
    2431              :       CHARACTER(LEN=1)                                   :: prec
    2432              :       INTEGER                                            :: res
    2433              : #if defined(__parallel)
    2434              :       INTEGER                                            :: pilaenv
    2435           10 :       res = pilaenv(ictxt, prec)
    2436              : #else
    2437              :       MARK_USED(ictxt)
    2438              :       MARK_USED(prec)
    2439              :       res = -1
    2440              : #endif
    2441              : 
    2442           10 :    END FUNCTION cp_fm_pilaenv
    2443              : 
    2444            0 : END MODULE cp_fm_types
        

Generated by: LCOV version 2.0-1