LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:ca6acae) Lines: 85.8 % 851 730
Test Date: 2026-01-02 06:29:53 Functions: 80.4 % 46 37

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief represent a full matrix distributed on many processors
      10              : !> \par History
      11              : !>      3) separated structure object, removed globenv, renamed to full matrix
      12              : !>         many changes (fawzi 08.2002)
      13              : !> \author Matthias Krack (22.05.2001)
      14              : ! **************************************************************************************************
      15              : MODULE cp_fm_types
      16              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17              :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      19              :                                               cp_fm_struct_equivalent,&
      20              :                                               cp_fm_struct_get,&
      21              :                                               cp_fm_struct_release,&
      22              :                                               cp_fm_struct_retain,&
      23              :                                               cp_fm_struct_type,&
      24              :                                               cp_fm_struct_write_info
      25              :    USE kinds,                           ONLY: dp,&
      26              :                                               sp
      27              :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      28              :                                               mp_any_source,&
      29              :                                               mp_para_env_type,&
      30              :                                               mp_proc_null,&
      31              :                                               mp_request_null,&
      32              :                                               mp_request_type,&
      33              :                                               mp_waitall
      34              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      35              :                                               rng_stream_type
      36              : #include "../base/base_uses.f90"
      37              : 
      38              :    IMPLICIT NONE
      39              : 
      40              :    PRIVATE
      41              : 
      42              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      43              :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      44              :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      45              : 
      46              :    INTEGER, PRIVATE :: cp_fm_mm_type = 1
      47              : 
      48              :    PUBLIC :: cp_fm_type, &
      49              :              cp_fm_p_type, copy_info_type
      50              : 
      51              :    PUBLIC :: cp_fm_add_to_element, &
      52              :              cp_fm_create, &
      53              :              cp_fm_release, &
      54              :              cp_fm_get_info, &
      55              :              cp_fm_set_element, &
      56              :              cp_fm_get_element, &
      57              :              cp_fm_get_diag, & ! get diagonal
      58              :              cp_fm_set_all, & ! set all elements and diagonal
      59              :              cp_fm_set_all_submatrix, & ! set a submatrix to a given value
      60              :              cp_fm_set_submatrix, & ! set a submatrix to given values
      61              :              cp_fm_get_submatrix, & ! get a submatrix of given values
      62              :              cp_fm_init_random, &
      63              :              cp_fm_maxabsval, & ! find the maximum absolute value
      64              :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      65              :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      66              :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      67              :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      68              :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      69              :              cp_fm_to_fm_triangular, &
      70              :              cp_fm_copy_general, &
      71              :              cp_fm_start_copy_general, &
      72              :              cp_fm_finish_copy_general, &
      73              :              cp_fm_cleanup_copy_general, &
      74              :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      75              :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      76              :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      77              :              cp_fm_setup, & ! allows to set flags for fms
      78              :              cp_fm_get_mm_type, &
      79              :              cp_fm_write_info, &
      80              :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      81              : 
      82              :    PUBLIC :: cp_fm_pilaenv
      83              : 
      84              :    INTERFACE cp_fm_to_fm
      85              :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      86              :          cp_fm_to_fm_columns ! just a number of columns
      87              :    END INTERFACE
      88              : 
      89              :    INTERFACE cp_fm_release
      90              :       MODULE PROCEDURE cp_fm_release_aa0, &
      91              :          cp_fm_release_aa1, &
      92              :          cp_fm_release_aa2, &
      93              :          cp_fm_release_aa3, &
      94              :          cp_fm_release_ap1, &
      95              :          cp_fm_release_ap2, &
      96              :          cp_fm_release_pa1, &
      97              :          cp_fm_release_pa2, &
      98              :          cp_fm_release_pa3, &
      99              :          cp_fm_release_pp1, &
     100              :          cp_fm_release_pp2
     101              :    END INTERFACE
     102              : 
     103              : ! **************************************************************************************************
     104              : !> \brief represent a full matrix
     105              : !> \param name the name of the matrix, used for printing
     106              : !> \param matrix_struct structure of this matrix
     107              : !> \param local_data array with the data of the matrix (its contents
     108              : !>        depend on the matrix type used: in parallel runs it will be
     109              : !>        in scalapack format, in sequential, it will simply contain
     110              : !>        the matrix)
     111              : !> \par History
     112              : !>      08.2002 created [fawzi]
     113              : !> \author fawzi
     114              : ! **************************************************************************************************
     115              :    TYPE cp_fm_type
     116              : !    PRIVATE
     117              :       CHARACTER(LEN=60) :: name = ""
     118              :       LOGICAL :: use_sp = .FALSE.
     119              :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     120              :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     121              :       REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
     122              :    END TYPE cp_fm_type
     123              : 
     124              : ! **************************************************************************************************
     125              : !> \brief just to build arrays of pointers to matrices
     126              : !> \param matrix the pointer to the matrix
     127              : !> \par History
     128              : !>      08.2002 created [fawzi]
     129              : !> \author fawzi
     130              : ! **************************************************************************************************
     131              :    TYPE cp_fm_p_type
     132              :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     133              :    END TYPE cp_fm_p_type
     134              : 
     135              : ! **************************************************************************************************
     136              : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     137              : !>        and cp_fm_finish_copy_general
     138              : !> \par History
     139              : !>      Jan 2017  [Mark T]
     140              : ! **************************************************************************************************
     141              :    TYPE copy_info_type
     142              :       INTEGER :: send_size = -1
     143              :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     144              :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     145              :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     146              :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     147              :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     148              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     149              :    END TYPE copy_info_type
     150              : 
     151              : CONTAINS
     152              : 
     153              : ! **************************************************************************************************
     154              : !> \brief creates a new full matrix with the given structure
     155              : !> \param matrix the matrix to be created
     156              : !> \param matrix_struct the structure of matrix
     157              : !> \param name ...
     158              : !> \param use_sp ...
     159              : !> \param nrow ...
     160              : !> \param ncol ...
     161              : !> \param set_zero ...
     162              : !> \par History
     163              : !>      08.2002 created [fawzi]
     164              : !> \author Fawzi Mohamed
     165              : !> \note
     166              : !>      preferred allocation routine
     167              : ! **************************************************************************************************
     168      1338943 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
     169              :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     170              :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     171              :       CHARACTER(LEN=*), INTENT(in), OPTIONAL             :: name
     172              :       LOGICAL, INTENT(in), OPTIONAL                      :: use_sp
     173              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow, ncol
     174              :       LOGICAL, INTENT(in), OPTIONAL                      :: set_zero
     175              : 
     176              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_create'
     177              : 
     178              :       INTEGER                                            :: handle, ncol_global, ncol_local, &
     179              :                                                             nrow_global, nrow_local
     180              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     181              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     182              : 
     183      1338943 :       CALL timeset(routineN, handle)
     184              : 
     185      1338943 :       IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
     186         1308 :          CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
     187         1308 :          IF (PRESENT(nrow)) nrow_global = nrow
     188         1308 :          IF (PRESENT(ncol)) ncol_global = ncol
     189              :          CALL cp_fm_struct_create(fm_struct, template_fmstruct=matrix_struct, &
     190         1308 :                                   nrow_global=nrow_global, ncol_global=ncol_global)
     191              : 
     192         1308 :          context => fm_struct%context
     193         1308 :          matrix%matrix_struct => fm_struct
     194         1308 :          CALL cp_fm_struct_retain(matrix%matrix_struct)
     195              : 
     196         1308 :          nrow_local = fm_struct%local_leading_dimension
     197         1308 :          ncol_local = MAX(1, fm_struct%ncol_locals(context%mepos(2)))
     198              : 
     199         1308 :          CALL cp_fm_struct_release(fm_struct)
     200              :       ELSE
     201              : 
     202      1337635 :          context => matrix_struct%context
     203      1337635 :          matrix%matrix_struct => matrix_struct
     204      1337635 :          CALL cp_fm_struct_retain(matrix%matrix_struct)
     205              : 
     206              :          ! OK, we allocate here at least a 1 x 1 matrix
     207              :          ! this must (and is) compatible with the descinit call
     208              :          ! in cp_fm_struct
     209      1337635 :          nrow_local = matrix_struct%local_leading_dimension
     210      1337635 :          ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     211              :       END IF
     212              : 
     213      1338943 :       matrix%use_sp = .FALSE.
     214      1338943 :       IF (PRESENT(use_sp)) matrix%use_sp = use_sp
     215              : 
     216      1338943 :       NULLIFY (matrix%local_data)
     217      1338943 :       NULLIFY (matrix%local_data_sp)
     218              : 
     219      1338943 :       IF (matrix%use_sp) THEN
     220            0 :          ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
     221              :       ELSE
     222      5355772 :          ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     223              :       END IF
     224              : 
     225      1338943 :       IF (PRESENT(set_zero)) THEN
     226       186271 :          IF (set_zero) THEN
     227       186271 :             IF (matrix%use_sp) THEN
     228            0 :                matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
     229              :             ELSE
     230     53190707 :                matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     231              :             END IF
     232              :          END IF
     233              :       END IF
     234              : 
     235      1338943 :       IF (PRESENT(name)) THEN
     236       676843 :          matrix%name = name
     237              :       ELSE
     238       662100 :          matrix%name = 'full matrix'
     239              :       END IF
     240              : 
     241      1338943 :       CALL timestop(handle)
     242              : 
     243      1338943 :    END SUBROUTINE cp_fm_create
     244              : 
     245              : ! **************************************************************************************************
     246              : !> \brief releases a full matrix
     247              : !> \param matrix the matrix to release
     248              : !> \par History
     249              : !>      08.2002 created [fawzi]
     250              : !> \author Fawzi Mohamed
     251              : ! **************************************************************************************************
     252      1350239 :    SUBROUTINE cp_fm_release_aa0(matrix)
     253              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     254              : 
     255      1350239 :       IF (ASSOCIATED(matrix%local_data)) THEN
     256      1338227 :          DEALLOCATE (matrix%local_data)
     257              :          NULLIFY (matrix%local_data)
     258              :       END IF
     259      1350239 :       IF (ASSOCIATED(matrix%local_data_sp)) THEN
     260            0 :          DEALLOCATE (matrix%local_data_sp)
     261              :          NULLIFY (matrix%local_data_sp)
     262              :       END IF
     263      1350239 :       matrix%name = ""
     264      1350239 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     265              : 
     266      1350239 :    END SUBROUTINE cp_fm_release_aa0
     267              : 
     268              : ! **************************************************************************************************
     269              : !> \brief ...
     270              : !> \param matrices ...
     271              : ! **************************************************************************************************
     272        73738 :    SUBROUTINE cp_fm_release_aa1(matrices)
     273              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     274              : 
     275              :       INTEGER                                            :: i
     276              : 
     277        73738 :       IF (ALLOCATED(matrices)) THEN
     278       198799 :          DO i = 1, SIZE(matrices)
     279       198799 :             CALL cp_fm_release(matrices(i))
     280              :          END DO
     281        72590 :          DEALLOCATE (matrices)
     282              :       END IF
     283        73738 :    END SUBROUTINE cp_fm_release_aa1
     284              : 
     285              : ! **************************************************************************************************
     286              : !> \brief ...
     287              : !> \param matrices ...
     288              : ! **************************************************************************************************
     289         8164 :    SUBROUTINE cp_fm_release_aa2(matrices)
     290              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     291              : 
     292              :       INTEGER                                            :: i, j
     293              : 
     294         8164 :       IF (ALLOCATED(matrices)) THEN
     295        17328 :          DO i = 1, SIZE(matrices, 1)
     296        47950 :             DO j = 1, SIZE(matrices, 2)
     297        42184 :                CALL cp_fm_release(matrices(i, j))
     298              :             END DO
     299              :          END DO
     300         5766 :          DEALLOCATE (matrices)
     301              :       END IF
     302         8164 :    END SUBROUTINE cp_fm_release_aa2
     303              : 
     304              : ! **************************************************************************************************
     305              : !> \brief ...
     306              : !> \param matrices ...
     307              : ! **************************************************************************************************
     308           36 :    SUBROUTINE cp_fm_release_aa3(matrices)
     309              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     310              : 
     311              :       INTEGER                                            :: i, j, k
     312              : 
     313           36 :       IF (ALLOCATED(matrices)) THEN
     314          388 :          DO i = 1, SIZE(matrices, 1)
     315         1120 :             DO j = 1, SIZE(matrices, 2)
     316         1896 :                DO k = 1, SIZE(matrices, 3)
     317         1544 :                   CALL cp_fm_release(matrices(i, j, k))
     318              :                END DO
     319              :             END DO
     320              :          END DO
     321           36 :          DEALLOCATE (matrices)
     322              :       END IF
     323           36 :    END SUBROUTINE cp_fm_release_aa3
     324              : 
     325              : ! **************************************************************************************************
     326              : !> \brief ...
     327              : !> \param matrices ...
     328              : ! **************************************************************************************************
     329       152382 :    SUBROUTINE cp_fm_release_pa1(matrices)
     330              :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     331              : 
     332              :       INTEGER                                            :: i
     333              : 
     334       152382 :       IF (ASSOCIATED(matrices)) THEN
     335       121485 :          DO i = 1, SIZE(matrices)
     336       121485 :             CALL cp_fm_release(matrices(i))
     337              :          END DO
     338        52156 :          DEALLOCATE (matrices)
     339              :          NULLIFY (matrices)
     340              :       END IF
     341       152382 :    END SUBROUTINE cp_fm_release_pa1
     342              : 
     343              : ! **************************************************************************************************
     344              : !> \brief ...
     345              : !> \param matrices ...
     346              : ! **************************************************************************************************
     347        29342 :    SUBROUTINE cp_fm_release_pa2(matrices)
     348              :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     349              : 
     350              :       INTEGER                                            :: i, j
     351              : 
     352        29342 :       IF (ASSOCIATED(matrices)) THEN
     353        45970 :          DO i = 1, SIZE(matrices, 1)
     354        89898 :             DO j = 1, SIZE(matrices, 2)
     355        78446 :                CALL cp_fm_release(matrices(i, j))
     356              :             END DO
     357              :          END DO
     358        11452 :          DEALLOCATE (matrices)
     359              :          NULLIFY (matrices)
     360              :       END IF
     361        29342 :    END SUBROUTINE cp_fm_release_pa2
     362              : 
     363              : ! **************************************************************************************************
     364              : !> \brief ...
     365              : !> \param matrices ...
     366              : ! **************************************************************************************************
     367            0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     368              :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     369              : 
     370              :       INTEGER                                            :: i, j, k
     371              : 
     372            0 :       IF (ASSOCIATED(matrices)) THEN
     373            0 :          DO i = 1, SIZE(matrices, 1)
     374            0 :             DO j = 1, SIZE(matrices, 2)
     375            0 :                DO k = 1, SIZE(matrices, 3)
     376            0 :                   CALL cp_fm_release(matrices(i, j, k))
     377              :                END DO
     378              :             END DO
     379              :          END DO
     380            0 :          DEALLOCATE (matrices)
     381              :          NULLIFY (matrices)
     382              :       END IF
     383            0 :    END SUBROUTINE cp_fm_release_pa3
     384              : 
     385              : ! **************************************************************************************************
     386              : !> \brief ...
     387              : !> \param matrices ...
     388              : ! **************************************************************************************************
     389            0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     390              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     391              : 
     392              :       INTEGER                                            :: i
     393              : 
     394            0 :       IF (ALLOCATED(matrices)) THEN
     395            0 :          DO i = 1, SIZE(matrices)
     396            0 :             CALL cp_fm_release(matrices(i)%matrix)
     397            0 :             DEALLOCATE (matrices(i)%matrix)
     398              :          END DO
     399            0 :          DEALLOCATE (matrices)
     400              :       END IF
     401            0 :    END SUBROUTINE cp_fm_release_ap1
     402              : 
     403              : ! **************************************************************************************************
     404              : !> \brief ...
     405              : !> \param matrices ...
     406              : ! **************************************************************************************************
     407            0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     408              :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     409              : 
     410              :       INTEGER                                            :: i, j
     411              : 
     412            0 :       IF (ALLOCATED(matrices)) THEN
     413            0 :          DO i = 1, SIZE(matrices, 1)
     414            0 :             DO j = 1, SIZE(matrices, 2)
     415            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     416            0 :                DEALLOCATE (matrices(i, j)%matrix)
     417              :             END DO
     418              :          END DO
     419            0 :          DEALLOCATE (matrices)
     420              :       END IF
     421            0 :    END SUBROUTINE cp_fm_release_ap2
     422              : 
     423              : ! **************************************************************************************************
     424              : !> \brief ...
     425              : !> \param matrices ...
     426              : ! **************************************************************************************************
     427            0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     428              :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     429              : 
     430              :       INTEGER                                            :: i
     431              : 
     432            0 :       IF (ASSOCIATED(matrices)) THEN
     433            0 :          DO i = 1, SIZE(matrices)
     434            0 :             CALL cp_fm_release(matrices(i)%matrix)
     435            0 :             DEALLOCATE (matrices(i)%matrix)
     436              :          END DO
     437            0 :          DEALLOCATE (matrices)
     438              :          NULLIFY (matrices)
     439              :       END IF
     440            0 :    END SUBROUTINE cp_fm_release_pp1
     441              : 
     442              : ! **************************************************************************************************
     443              : !> \brief ...
     444              : !> \param matrices ...
     445              : ! **************************************************************************************************
     446            0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     447              :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     448              : 
     449              :       INTEGER                                            :: i, j
     450              : 
     451            0 :       IF (ASSOCIATED(matrices)) THEN
     452            0 :          DO i = 1, SIZE(matrices, 1)
     453            0 :             DO j = 1, SIZE(matrices, 2)
     454            0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     455            0 :                DEALLOCATE (matrices(i, j)%matrix)
     456              :             END DO
     457              :          END DO
     458            0 :          DEALLOCATE (matrices)
     459              :          NULLIFY (matrices)
     460              :       END IF
     461            0 :    END SUBROUTINE cp_fm_release_pp2
     462              : 
     463              : ! **************************************************************************************************
     464              : !> \brief fills a matrix with random numbers
     465              : !> \param matrix : to be initialized
     466              : !> \param ncol : numbers of cols to fill
     467              : !> \param start_col : starting at coll number
     468              : !> \author Joost VandeVondele
     469              : !> \note
     470              : !>      the value of a_ij is independent of the number of cpus
     471              : ! **************************************************************************************************
     472         2634 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     473              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     474              :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     475              : 
     476              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     477              : 
     478              :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     479              :          ncol_local, nrow_global, nrow_local
     480         5268 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     481         2634 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     482              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     483         2634 :          POINTER                                         :: local_data
     484              :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     485              :          seed = RESHAPE([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
     486              :       TYPE(rng_stream_type)                              :: rng
     487              : 
     488         2634 :       CALL timeset(routineN, handle)
     489              : 
     490              :       ! guarantee same seed over all tasks
     491         2634 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     492              : 
     493              :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     494         2634 :                             extended_precision=.TRUE., seed=seed)
     495              : 
     496         2634 :       CPASSERT(.NOT. matrix%use_sp)
     497              : 
     498              :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     499              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     500              :                           local_data=local_data, &
     501         2634 :                           row_indices=row_indices, col_indices=col_indices)
     502              : 
     503         2634 :       my_start_col = 1
     504         2634 :       IF (PRESENT(start_col)) my_start_col = start_col
     505         2634 :       my_ncol = matrix%matrix_struct%ncol_global
     506         2634 :       IF (PRESENT(ncol)) my_ncol = ncol
     507              : 
     508         2634 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     509            0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     510              : 
     511         7902 :       ALLOCATE (buff(nrow_global))
     512              : 
     513              :       ! each global row has its own substream, in order to reach the stream for the local col,
     514              :       ! we just reset to the next substream
     515              :       ! following this, we fill the full buff with random numbers, and pick those we need
     516         2634 :       icol_global = 0
     517        21729 :       DO icol_local = 1, ncol_local
     518        19095 :          CPASSERT(col_indices(icol_local) > icol_global)
     519              :          DO
     520        19095 :             CALL rng%reset_to_next_substream()
     521        19095 :             icol_global = icol_global + 1
     522        19095 :             IF (icol_global == col_indices(icol_local)) EXIT
     523              :          END DO
     524        19095 :          CALL rng%fill(buff)
     525       790280 :          DO irow_local = 1, nrow_local
     526       787646 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     527              :          END DO
     528              :       END DO
     529              : 
     530         2634 :       DEALLOCATE (buff)
     531              : 
     532              :       ! store seed before deletion (unclear if this is the proper seed)
     533              : 
     534              :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     535              :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     536              :       ! stream of random numbers. While this seems odd and contrary to the original design,
     537              :       ! it was probably introduced to improve reproducibility.
     538              :       ! See also https://github.com/cp2k/cp2k/pull/506
     539         2634 :       CALL rng%get(ig=seed)
     540              : 
     541         2634 :       CALL timestop(handle)
     542              : 
     543        73752 :    END SUBROUTINE cp_fm_init_random
     544              : 
     545              : ! **************************************************************************************************
     546              : !> \brief set all elements of a matrix to the same value,
     547              : !>      and optionally the diagonal to a different one
     548              : !> \param matrix input matrix
     549              : !> \param alpha scalar used to set all elements of the matrix
     550              : !> \param beta scalar used to set diagonal of the matrix
     551              : !> \note
     552              : !>      can be used to zero a matrix
     553              : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     554              : ! **************************************************************************************************
     555       346645 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     556              : 
     557              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     558              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     559              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     560              : 
     561              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     562              : 
     563              :       INTEGER                                            :: handle, i, n
     564              : 
     565       346645 :       CALL timeset(routineN, handle)
     566              : 
     567       346645 :       IF (matrix%use_sp) THEN
     568            0 :          matrix%local_data_sp(:, :) = REAL(alpha, sp)
     569              :       ELSE
     570    194749014 :          matrix%local_data(:, :) = alpha
     571              :       END IF
     572              : 
     573       346645 :       IF (PRESENT(beta)) THEN
     574        58341 :          CPASSERT(.NOT. matrix%use_sp)
     575        58341 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     576       529389 :          DO i = 1, n
     577       529389 :             CALL cp_fm_set_element(matrix, i, i, beta)
     578              :          END DO
     579              :       END IF
     580              : 
     581       346645 :       CALL timestop(handle)
     582              : 
     583       346645 :    END SUBROUTINE cp_fm_set_all
     584              : 
     585              : ! **************************************************************************************************
     586              : !> \brief returns the diagonal elements of a fm
     587              : !> \param matrix ...
     588              : !> \param diag ...
     589              : ! **************************************************************************************************
     590        13960 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     591              : 
     592              :       ! arguments
     593              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     594              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     595              : 
     596              :       ! locals
     597              :       INTEGER :: i, nrow_global
     598              : 
     599              : #if defined(__parallel)
     600              :       INTEGER, DIMENSION(9) :: desca
     601              :       TYPE(cp_blacs_env_type), POINTER :: context
     602              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     603              :                  nprow
     604        13960 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     605        13960 :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
     606              : #endif
     607              : 
     608        13960 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     609              : 
     610              : #if defined(__parallel)
     611       178684 :       diag = 0.0_dp
     612        13960 :       context => matrix%matrix_struct%context
     613        13960 :       myprow = context%mepos(1)
     614        13960 :       mypcol = context%mepos(2)
     615        13960 :       nprow = context%num_pe(1)
     616        13960 :       npcol = context%num_pe(2)
     617              : 
     618        13960 :       a => matrix%local_data
     619        13960 :       a_sp => matrix%local_data_sp
     620       139600 :       desca(:) = matrix%matrix_struct%descriptor(:)
     621              : 
     622       178684 :       DO i = 1, nrow_global
     623              :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     624       164724 :                       irow_local, icol_local, iprow, ipcol)
     625       178684 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     626        82446 :             IF (matrix%use_sp) THEN
     627            0 :                diag(i) = REAL(a_sp(irow_local, icol_local), dp)
     628              :             ELSE
     629        82446 :                diag(i) = a(irow_local, icol_local)
     630              :             END IF
     631              :          END IF
     632              :       END DO
     633              : #else
     634              :       DO i = 1, nrow_global
     635              :          IF (matrix%use_sp) THEN
     636              :             diag(i) = REAL(matrix%local_data_sp(i, i), dp)
     637              :          ELSE
     638              :             diag(i) = matrix%local_data(i, i)
     639              :          END IF
     640              :       END DO
     641              : #endif
     642       343408 :       CALL matrix%matrix_struct%para_env%sum(diag)
     643              : 
     644        13960 :    END SUBROUTINE cp_fm_get_diag
     645              : 
     646              : ! **************************************************************************************************
     647              : !> \brief returns an element of a fm
     648              : !>      this value is valid on every cpu
     649              : !>      using this call is expensive
     650              : !> \param matrix the matrix to read
     651              : !> \param irow_global the row
     652              : !> \param icol_global the col
     653              : !> \param alpha the value of matrix(irow_global, icol_global)
     654              : !> \param local true if the element is on this cpu, false otherwise
     655              : !> \note
     656              : !>      - modified semantics. now this function always returns the value
     657              : !>        previously the value was zero on cpus that didn't own the relevant
     658              : !>        part of the matrix (Joost VandeVondele, May 2003)
     659              : !>      - usage of the function should be avoided, as it is likely to rather slow
     660              : !>        using row_indices/col_indices/local_data + some smart scheme normally
     661              : !>        yields a real parallel code
     662              : ! **************************************************************************************************
     663      2505044 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     664              : 
     665              :       ! arguments
     666              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     667              :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     668              :       INTEGER, INTENT(IN)                       :: icol_global, &
     669              :                                                    irow_global
     670              :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     671              : 
     672              :       ! locals
     673              : #if defined(__parallel)
     674              :       INTEGER, DIMENSION(9) :: desca
     675              :       TYPE(cp_blacs_env_type), POINTER :: context
     676              :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     677              :                  nprow
     678      2505044 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     679              : #endif
     680              : 
     681              : #if defined(__parallel)
     682      2505044 :       context => matrix%matrix_struct%context
     683      2505044 :       myprow = context%mepos(1)
     684      2505044 :       mypcol = context%mepos(2)
     685      2505044 :       nprow = context%num_pe(1)
     686      2505044 :       npcol = context%num_pe(2)
     687              : 
     688      2505044 :       a => matrix%local_data
     689     25050440 :       desca(:) = matrix%matrix_struct%descriptor(:)
     690              : 
     691              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     692      2505044 :                    irow_local, icol_local, iprow, ipcol)
     693              : 
     694      2505044 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     695      1252522 :          alpha = a(irow_local, icol_local)
     696      1252522 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     697      1252522 :          IF (PRESENT(local)) local = .TRUE.
     698              :       ELSE
     699      1252522 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     700      1252522 :          IF (PRESENT(local)) local = .FALSE.
     701              :       END IF
     702              : 
     703              : #else
     704              :       IF (PRESENT(local)) local = .TRUE.
     705              :       alpha = matrix%local_data(irow_global, icol_global)
     706              : #endif
     707              : 
     708      2505044 :    END SUBROUTINE cp_fm_get_element
     709              : 
     710              : ! **************************************************************************************************
     711              : !> \brief sets an element of a matrix
     712              : !> \param matrix ...
     713              : !> \param irow_global ...
     714              : !> \param icol_global ...
     715              : !> \param alpha ...
     716              : !> \note
     717              : !>      we expect all cpus to have the same arguments in the call to this function
     718              : !>      (otherwise one should use local_data tricks)
     719              : ! **************************************************************************************************
     720       701336 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     721              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     722              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     723              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     724              : 
     725              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     726              :       TYPE(cp_blacs_env_type), POINTER         :: context
     727              : #if defined(__parallel)
     728              :       INTEGER                                  :: icol_local, ipcol, iprow, &
     729              :                                                   irow_local
     730              :       INTEGER, DIMENSION(9)                    :: desca
     731       701336 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     732              : #endif
     733              : 
     734       701336 :       context => matrix%matrix_struct%context
     735       701336 :       myprow = context%mepos(1)
     736       701336 :       mypcol = context%mepos(2)
     737       701336 :       nprow = context%num_pe(1)
     738       701336 :       npcol = context%num_pe(2)
     739              : 
     740            0 :       CPASSERT(.NOT. matrix%use_sp)
     741              : 
     742              : #if defined(__parallel)
     743              : 
     744       701336 :       a => matrix%local_data
     745              : 
     746      7013360 :       desca(:) = matrix%matrix_struct%descriptor(:)
     747              : 
     748              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     749       701336 :                    irow_local, icol_local, iprow, ipcol)
     750              : 
     751       701336 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     752       354478 :          a(irow_local, icol_local) = alpha
     753              :       END IF
     754              : 
     755              : #else
     756              : 
     757              :       matrix%local_data(irow_global, icol_global) = alpha
     758              : 
     759              : #endif
     760       701336 :    END SUBROUTINE cp_fm_set_element
     761              : 
     762              : ! **************************************************************************************************
     763              : !> \brief sets a submatrix of a full matrix
     764              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     765              : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     766              : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     767              : !> \param fm the full to change
     768              : !> \param new_values a replicated full matrix with the new values
     769              : !> \param start_row the starting row of b_matrix (defaults to 1)
     770              : !> \param start_col the starting col of b_matrix (defaults to 1)
     771              : !> \param n_rows the number of row to change in b (defaults to
     772              : !>        size(op(new_values),1))
     773              : !> \param n_cols the number of columns to change in b (defaults to
     774              : !>        size(op(new_values),2))
     775              : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     776              : !> \param beta rescaling factor for the old values (defaults to 0.0)
     777              : !> \param transpose if new_values should be transposed: if true
     778              : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     779              : !>        (defaults to false)
     780              : !> \par History
     781              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     782              : !> \author Fawzi Mohamed
     783              : !> \note
     784              : !>      optimized for full column updates and alpha=1.0, beta=0.0
     785              : !>      the new_values need to be valid on all cpus
     786              : ! **************************************************************************************************
     787        63451 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     788              :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     789              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     790              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     791              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     792              :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     793              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     794              : 
     795              :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     796              :                                                             ncol_global, ncol_local, nrow, &
     797              :                                                             nrow_block, nrow_global, nrow_local, &
     798              :                                                             this_col, this_row
     799        63451 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     800              :       LOGICAL                                            :: tr_a
     801              :       REAL(KIND=dp)                                      :: al, be
     802        63451 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     803              : 
     804        63451 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     805              :       ! can be called too many times, making it a bit useless
     806              :       ! CALL timeset(routineN//','//moduleN,handle)
     807              : 
     808            0 :       CPASSERT(.NOT. fm%use_sp)
     809              : 
     810        63451 :       IF (PRESENT(alpha)) al = alpha
     811        63451 :       IF (PRESENT(beta)) be = beta
     812        63451 :       IF (PRESENT(start_row)) i0 = start_row
     813        63451 :       IF (PRESENT(start_col)) j0 = start_col
     814        63451 :       IF (PRESENT(transpose)) tr_a = transpose
     815        15721 :       IF (tr_a) THEN
     816        15629 :          nrow = SIZE(new_values, 2)
     817        15629 :          ncol = SIZE(new_values, 1)
     818              :       ELSE
     819        47822 :          nrow = SIZE(new_values, 1)
     820        47822 :          ncol = SIZE(new_values, 2)
     821              :       END IF
     822        63451 :       IF (PRESENT(n_rows)) nrow = n_rows
     823        63451 :       IF (PRESENT(n_cols)) ncol = n_cols
     824              : 
     825        63451 :       full_block => fm%local_data
     826              : 
     827              :       CALL cp_fm_get_info(matrix=fm, &
     828              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     829              :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     830              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     831        63451 :                           row_indices=row_indices, col_indices=col_indices)
     832              : 
     833        63451 :       IF (al == 1.0 .AND. be == 0.0) THEN
     834      1307040 :          DO j = 1, ncol_local
     835      1252727 :             this_col = col_indices(j) - j0 + 1
     836      1307040 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     837       288593 :                IF (tr_a) THEN
     838         6491 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     839       158082 :                      DO i = 1, nrow_local
     840       158082 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     841              :                      END DO
     842              :                   ELSE
     843          594 :                      DO i = 1, nrow_local
     844          510 :                         this_row = row_indices(i) - i0 + 1
     845          594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     846          255 :                            full_block(i, j) = new_values(this_col, this_row)
     847              :                         END IF
     848              :                      END DO
     849              :                   END IF
     850              :                ELSE
     851       282102 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     852      8928634 :                      DO i = 1, nrow_local
     853      8928634 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     854              :                      END DO
     855              :                   ELSE
     856         4347 :                      DO i = 1, nrow_local
     857         3745 :                         this_row = row_indices(i) - i0 + 1
     858         4347 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     859          301 :                            full_block(i, j) = new_values(this_row, this_col)
     860              :                         END IF
     861              :                      END DO
     862              :                   END IF
     863              :                END IF
     864              :             END IF
     865              :          END DO
     866              :       ELSE
     867       831608 :          DO j = 1, ncol_local
     868       822470 :             this_col = col_indices(j) - j0 + 1
     869       831608 :             IF (this_col >= 1 .AND. this_col <= ncol) THEN
     870       822470 :                IF (tr_a) THEN
     871     88223375 :                   DO i = 1, nrow_local
     872     87400905 :                      this_row = row_indices(i) - i0 + 1
     873     88223375 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     874              :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     875       411235 :                                            be*full_block(i, j)
     876              :                      END IF
     877              :                   END DO
     878              :                ELSE
     879            0 :                   DO i = 1, nrow_local
     880            0 :                      this_row = row_indices(i) - i0 + 1
     881            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     882              :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     883            0 :                                            be*full_block(i, j)
     884              :                      END IF
     885              :                   END DO
     886              :                END IF
     887              :             END IF
     888              :          END DO
     889              :       END IF
     890              : 
     891              :       ! CALL timestop(handle)
     892              : 
     893        63451 :    END SUBROUTINE cp_fm_set_submatrix
     894              : 
     895              : ! **************************************************************************************************
     896              : !> \brief sets a submatrix of a full matrix to a given value
     897              : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
     898              : !> \param fm the full to change
     899              : !> \param new_value  ...
     900              : !> \param start_row the starting row of matrix
     901              : !> \param start_col the starting col of matrix
     902              : !> \param n_rows the number of rows to change
     903              : !> \param n_cols the number of columns to change
     904              : !> \par History
     905              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     906              : !>      12.2025 created from cp_fm_set_submatrix
     907              : !> \author JGH
     908              : ! **************************************************************************************************
     909      1761338 :    SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
     910              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     911              :       REAL(KIND=dp), INTENT(in)                          :: new_value
     912              :       INTEGER, INTENT(in)                                :: start_row, start_col, n_rows, n_cols
     913              : 
     914              :       INTEGER                                            :: i, i0, j, j0, ncol_global, ncol_local, &
     915              :                                                             nrow_global, nrow_local, this_col, &
     916              :                                                             this_row
     917       880669 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     918       880669 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     919              : 
     920            0 :       CPASSERT(.NOT. fm%use_sp)
     921              : 
     922       880669 :       full_block => fm%local_data
     923              : 
     924              :       CALL cp_fm_get_info(matrix=fm, &
     925              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     926              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     927       880669 :                           row_indices=row_indices, col_indices=col_indices)
     928              : 
     929       880669 :       i0 = start_row
     930       880669 :       j0 = start_col
     931     16194216 :       DO j = 1, ncol_local
     932     15313547 :          this_col = col_indices(j) - j0 + 1
     933     16194216 :          IF (this_col >= 1 .AND. this_col <= n_cols) THEN
     934    530623577 :             DO i = 1, nrow_local
     935    515369352 :                this_row = row_indices(i) - i0 + 1
     936    530623577 :                IF (this_row >= 1 .AND. this_row <= n_rows) THEN
     937    508330026 :                   full_block(i, j) = new_value
     938              :                END IF
     939              :             END DO
     940              :          END IF
     941              :       END DO
     942              : 
     943       880669 :    END SUBROUTINE cp_fm_set_all_submatrix
     944              : 
     945              : ! **************************************************************************************************
     946              : !> \brief gets a submatrix of a full matrix
     947              : !>       op(target_m)(1:n_rows,1:n_cols)
     948              : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     949              : !>      target_m is replicated on all cpus
     950              : !>      using this call is expensive
     951              : !> \param fm the full you want to get the info from
     952              : !> \param target_m a replicated full matrix that will contain the result
     953              : !> \param start_row the starting row of b_matrix (defaults to 1)
     954              : !> \param start_col the starting col of b_matrix (defaults to 1)
     955              : !> \param n_rows the number of row to change in b (defaults to
     956              : !>        size(op(new_values),1))
     957              : !> \param n_cols the number of columns to change in b (defaults to
     958              : !>        size(op(new_values),2))
     959              : !> \param transpose if target_m should be transposed: if true
     960              : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     961              : !>        (defaults to false)
     962              : !> \par History
     963              : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     964              : !> \author Fawzi Mohamed
     965              : !> \note
     966              : !>      optimized for full column updates. Zeros out a little too much
     967              : !>      of target_m
     968              : !>      the target_m is replicated and valid on all cpus
     969              : ! **************************************************************************************************
     970        83070 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     971              :                                   start_col, n_rows, n_cols, transpose)
     972              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     973              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     974              :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     975              :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     976              : 
     977              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     978              : 
     979              :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     980              :                                                             ncol_local, nrow, nrow_global, &
     981              :                                                             nrow_local, this_col, this_row
     982        83070 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     983              :       LOGICAL                                            :: tr_a
     984        83070 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     985              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     986              : 
     987        83070 :       CALL timeset(routineN, handle)
     988              : 
     989        83070 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     990              : 
     991        83070 :       CPASSERT(.NOT. fm%use_sp)
     992              : 
     993        83070 :       IF (PRESENT(start_row)) i0 = start_row
     994        83070 :       IF (PRESENT(start_col)) j0 = start_col
     995        83070 :       IF (PRESENT(transpose)) tr_a = transpose
     996         6366 :       IF (tr_a) THEN
     997         2446 :          nrow = SIZE(target_m, 2)
     998         2446 :          ncol = SIZE(target_m, 1)
     999              :       ELSE
    1000        80624 :          nrow = SIZE(target_m, 1)
    1001        80624 :          ncol = SIZE(target_m, 2)
    1002              :       END IF
    1003        83070 :       IF (PRESENT(n_rows)) nrow = n_rows
    1004        83070 :       IF (PRESENT(n_cols)) ncol = n_cols
    1005              : 
    1006        83070 :       para_env => fm%matrix_struct%para_env
    1007              : 
    1008        83070 :       full_block => fm%local_data
    1009              : #if defined(__parallel)
    1010              :       ! zero-out whole target_m
    1011        83070 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
    1012        99430 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
    1013              :       END IF
    1014              : #endif
    1015              : 
    1016              :       CALL cp_fm_get_info(matrix=fm, &
    1017              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
    1018              :                           nrow_local=nrow_local, ncol_local=ncol_local, &
    1019        83070 :                           row_indices=row_indices, col_indices=col_indices)
    1020              : 
    1021       539222 :       DO j = 1, ncol_local
    1022       456152 :          this_col = col_indices(j) - j0 + 1
    1023       539222 :          IF (this_col >= 1 .AND. this_col <= ncol) THEN
    1024       278498 :             IF (tr_a) THEN
    1025         2446 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
    1026        83016 :                   DO i = 1, nrow_local
    1027        83016 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
    1028              :                   END DO
    1029              :                ELSE
    1030            0 :                   DO i = 1, nrow_local
    1031            0 :                      this_row = row_indices(i) - i0 + 1
    1032            0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
    1033            0 :                         target_m(this_col, this_row) = full_block(i, j)
    1034              :                      END IF
    1035              :                   END DO
    1036              :                END IF
    1037              :             ELSE
    1038       276052 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
    1039      5985881 :                   DO i = 1, nrow_local
    1040      5985881 :                      target_m(row_indices(i), this_col) = full_block(i, j)
    1041              :                   END DO
    1042              :                ELSE
    1043      1259554 :                   DO i = 1, nrow_local
    1044      1220674 :                      this_row = row_indices(i) - i0 + 1
    1045      1259554 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
    1046        31218 :                         target_m(this_row, this_col) = full_block(i, j)
    1047              :                      END IF
    1048              :                   END DO
    1049              :                END IF
    1050              :             END IF
    1051              :          END IF
    1052              :       END DO
    1053              : 
    1054     15920970 :       CALL para_env%sum(target_m)
    1055              : 
    1056        83070 :       CALL timestop(handle)
    1057              : 
    1058        83070 :    END SUBROUTINE cp_fm_get_submatrix
    1059              : 
    1060              : ! **************************************************************************************************
    1061              : !> \brief returns all kind of information about the full matrix
    1062              : !> \param matrix ...
    1063              : !> \param name ...
    1064              : !> \param nrow_global ...
    1065              : !> \param ncol_global ...
    1066              : !> \param nrow_block ...
    1067              : !> \param ncol_block ...
    1068              : !> \param nrow_local ...
    1069              : !> \param ncol_local ...
    1070              : !> \param row_indices ...
    1071              : !> \param col_indices ...
    1072              : !> \param local_data ...
    1073              : !> \param context ...
    1074              : !> \param nrow_locals ...
    1075              : !> \param ncol_locals ...
    1076              : !> \param matrix_struct ...
    1077              : !> \param para_env ...
    1078              : !> \note
    1079              : !>       see also cp_fm_struct for explanation
    1080              : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1081              : !>         access to the local blacs block
    1082              : ! **************************************************************************************************
    1083      6452735 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1084              :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1085              :                              row_indices, col_indices, local_data, context, &
    1086              :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1087              : 
    1088              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1089              :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1090              :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1091              :                                                             ncol_block, nrow_local, ncol_local
    1092              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1093              :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1094              :          OPTIONAL, POINTER                               :: local_data
    1095              :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1096              :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1097              :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1098              :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1099              : 
    1100            8 :       IF (PRESENT(name)) name = matrix%name
    1101      6452735 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1102      6452735 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1103              : 
    1104              :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1105              :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1106              :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1107              :                             ncol_block=ncol_block, row_indices=row_indices, &
    1108              :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1109      6452735 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1110              : 
    1111      6452735 :    END SUBROUTINE cp_fm_get_info
    1112              : 
    1113              : ! **************************************************************************************************
    1114              : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1115              : !> \param matrix a cp_fm_type instance
    1116              : !> \param io_unit the I/O unit to use for writing
    1117              : ! **************************************************************************************************
    1118            3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1119              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1120              :       INTEGER, INTENT(IN)                                :: io_unit
    1121              : 
    1122            3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1123            3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1124            3 :    END SUBROUTINE cp_fm_write_info
    1125              : 
    1126              : ! **************************************************************************************************
    1127              : !> \brief find the maximum absolute value of the matrix element
    1128              : !>      maxval(abs(matrix))
    1129              : !> \param matrix ...
    1130              : !> \param a_max ...
    1131              : !> \param ir_max ...
    1132              : !> \param ic_max ...
    1133              : ! **************************************************************************************************
    1134        92761 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1135              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1136              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1137              :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1138              : 
    1139              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1140              : 
    1141              :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1142              :                                                             j, mepos, ncol_local, nrow_local, &
    1143              :                                                             num_pe
    1144        92761 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1145        92761 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1146              :       REAL(dp)                                           :: my_max
    1147        92761 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1148        92761 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1149        92761 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: my_block_sp
    1150              : 
    1151        92761 :       CALL timeset(routineN, handle)
    1152              : 
    1153        92761 :       my_block => matrix%local_data
    1154        92761 :       my_block_sp => matrix%local_data_sp
    1155              : 
    1156              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1157        92761 :                           row_indices=row_indices, col_indices=col_indices)
    1158              : 
    1159        92761 :       IF (matrix%use_sp) THEN
    1160            0 :          a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
    1161              :       ELSE
    1162     54700824 :          a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1163              :       END IF
    1164              : 
    1165        92761 :       IF (PRESENT(ir_max)) THEN
    1166            0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1167            0 :          mepos = matrix%matrix_struct%para_env%mepos
    1168            0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1169            0 :          ir_max_vec(0:num_pe - 1) = 0
    1170            0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1171            0 :          ic_max_vec(0:num_pe - 1) = 0
    1172            0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1173            0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1174            0 :          my_max = 0.0_dp
    1175              : 
    1176            0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1177            0 :             DO i = 1, ncol_local
    1178            0 :                DO j = 1, nrow_local
    1179            0 :                   IF (matrix%use_sp) THEN
    1180            0 :                      IF (ABS(my_block_sp(j, i)) > my_max) THEN
    1181            0 :                         my_max = REAL(my_block_sp(j, i), dp)
    1182            0 :                         ir_max_local = j
    1183            0 :                         ic_max_local = i
    1184              :                      END IF
    1185              :                   ELSE
    1186            0 :                      IF (ABS(my_block(j, i)) > my_max) THEN
    1187            0 :                         my_max = my_block(j, i)
    1188            0 :                         ir_max_local = j
    1189            0 :                         ic_max_local = i
    1190              :                      END IF
    1191              :                   END IF
    1192              :                END DO
    1193              :             END DO
    1194              : 
    1195            0 :             a_max_vec(mepos) = my_max
    1196            0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1197            0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1198              : 
    1199              :          END IF
    1200              : 
    1201            0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1202            0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1203            0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1204              : 
    1205            0 :          my_max = 0.0_dp
    1206            0 :          DO i = 0, num_pe - 1
    1207            0 :             IF (a_max_vec(i) > my_max) THEN
    1208            0 :                ir_max = ir_max_vec(i)
    1209            0 :                ic_max = ic_max_vec(i)
    1210              :             END IF
    1211              :          END DO
    1212              : 
    1213            0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1214            0 :          CPASSERT(ic_max > 0)
    1215            0 :          CPASSERT(ir_max > 0)
    1216              : 
    1217              :       END IF
    1218              : 
    1219        92761 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1220              : 
    1221        92761 :       CALL timestop(handle)
    1222              : 
    1223       185522 :    END SUBROUTINE cp_fm_maxabsval
    1224              : 
    1225              : ! **************************************************************************************************
    1226              : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1227              : !>      = || A ||_infinity
    1228              : !> \param matrix ...
    1229              : !> \param a_max ...
    1230              : !> \note
    1231              : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1232              : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1233              : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1234              : !>      (but the bound is not so tight in the general case)
    1235              : ! **************************************************************************************************
    1236         4246 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1237              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1238              :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1239              : 
    1240              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1241              : 
    1242              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1243              :                                                             nrow_local
    1244         4246 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1245              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1246         4246 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1247              : 
    1248         4246 :       CALL timeset(routineN, handle)
    1249              : 
    1250         4246 :       my_block => matrix%local_data
    1251              : 
    1252         4246 :       CPASSERT(.NOT. matrix%use_sp)
    1253              : 
    1254              :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1255         4246 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1256              : 
    1257              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1258        12738 :       ALLOCATE (values(nrow_global))
    1259        62136 :       values = 0.0_dp
    1260        62136 :       DO j = 1, ncol_local
    1261       525661 :          DO i = 1, nrow_local
    1262       521415 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1263              :          END DO
    1264              :       END DO
    1265         4246 :       CALL matrix%matrix_struct%para_env%sum(values)
    1266        66382 :       a_max = MAXVAL(values)
    1267         4246 :       DEALLOCATE (values)
    1268              : 
    1269         4246 :       CALL timestop(handle)
    1270         8492 :    END SUBROUTINE cp_fm_maxabsrownorm
    1271              : 
    1272              : ! **************************************************************************************************
    1273              : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1274              : !> \param matrix ...
    1275              : !> \param norm_array ...
    1276              : ! **************************************************************************************************
    1277         1282 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1278              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1279              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1280              : 
    1281              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1282              : 
    1283              :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1284              :                                                             nrow_local
    1285         1282 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1286         1282 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1287              : 
    1288         1282 :       CALL timeset(routineN, handle)
    1289              : 
    1290         1282 :       my_block => matrix%local_data
    1291              : 
    1292         1282 :       CPASSERT(.NOT. matrix%use_sp)
    1293              : 
    1294              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1295         1282 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1296              : 
    1297              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1298        29746 :       norm_array = 0.0_dp
    1299        29746 :       DO j = 1, ncol_local
    1300      1261016 :          DO i = 1, nrow_local
    1301      1259734 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1302              :          END DO
    1303              :       END DO
    1304        58210 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1305        29746 :       norm_array = SQRT(norm_array)
    1306              : 
    1307         1282 :       CALL timestop(handle)
    1308         1282 :    END SUBROUTINE cp_fm_vectorsnorm
    1309              : 
    1310              : ! **************************************************************************************************
    1311              : !> \brief summing up all the elements along the matrix's i-th index
    1312              : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1313              : !>        or
    1314              : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1315              : !> \param matrix     an input matrix A
    1316              : !> \param sum_array  sums of elements in each column/row
    1317              : !> \param dir ...
    1318              : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1319              : !>        the maximum overlap method
    1320              : !>        added row variation
    1321              : ! **************************************************************************************************
    1322         7174 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1323              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1324              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1325              :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1326              : 
    1327              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1328              : 
    1329              :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1330         7174 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1331              :       LOGICAL                                            :: docol
    1332         7174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1333              : 
    1334         7174 :       CALL timeset(routineN, handle)
    1335              : 
    1336         7174 :       IF (PRESENT(dir)) THEN
    1337         7134 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1338              :             docol = .TRUE.
    1339              :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1340              :             docol = .FALSE.
    1341              :          ELSE
    1342            0 :             CPABORT('Wrong argument DIR')
    1343              :          END IF
    1344              :       ELSE
    1345              :          docol = .TRUE.
    1346              :       END IF
    1347              : 
    1348         7174 :       my_block => matrix%local_data
    1349              : 
    1350         7174 :       CPASSERT(.NOT. matrix%use_sp)
    1351              : 
    1352              :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1353         7174 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1354              : 
    1355              :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1356       220970 :       sum_array(:) = 0.0_dp
    1357         7174 :       IF (docol) THEN
    1358          448 :       DO j = 1, ncol_local
    1359         3628 :          DO i = 1, nrow_local
    1360         3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1361              :          END DO
    1362              :       END DO
    1363              :       ELSE
    1364        81994 :       DO j = 1, ncol_local
    1365      6429038 :          DO i = 1, nrow_local
    1366      6421904 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1367              :          END DO
    1368              :       END DO
    1369              :       END IF
    1370       434766 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1371              : 
    1372         7174 :       CALL timestop(handle)
    1373         7174 :    END SUBROUTINE cp_fm_vectorssum
    1374              : 
    1375              : ! **************************************************************************************************
    1376              : !> \brief copy one identically sized matrix in the other
    1377              : !> \param source ...
    1378              : !> \param destination ...
    1379              : !> \note
    1380              : !>      see also cp_fm_to_fm_columns
    1381              : ! **************************************************************************************************
    1382       368985 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1383              : 
    1384              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1385              : 
    1386              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1387              : 
    1388              :       INTEGER                                            :: handle, npcol, nprow
    1389              : 
    1390       368985 :       CALL timeset(routineN, handle)
    1391              : 
    1392       368985 :       nprow = source%matrix_struct%context%num_pe(1)
    1393       368985 :       npcol = source%matrix_struct%context%num_pe(2)
    1394              : 
    1395       368985 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1396              :           cp_fm_struct_equivalent(source%matrix_struct, &
    1397              :                                   destination%matrix_struct)) THEN
    1398       368985 :          IF (source%use_sp .AND. destination%use_sp) THEN
    1399            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1400              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
    1401              :                CALL cp_abort(__LOCATION__, &
    1402              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1403              :                              "> to full matrix <"//TRIM(destination%name)// &
    1404            0 :                              ">. The local_data blocks have different sizes.")
    1405              :             CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
    1406            0 :                        source%local_data_sp, 1, destination%local_data_sp, 1)
    1407       368985 :          ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
    1408            0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
    1409              :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
    1410              :                CALL cp_abort(__LOCATION__, &
    1411              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1412              :                              "> to full matrix <"//TRIM(destination%name)// &
    1413            0 :                              ">. The local_data blocks have different sizes.")
    1414            0 :             destination%local_data = REAL(source%local_data_sp, dp)
    1415       368985 :          ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
    1416            0 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1417              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
    1418              :                CALL cp_abort(__LOCATION__, &
    1419              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1420              :                              "> to full matrix <"//TRIM(destination%name)// &
    1421            0 :                              ">. The local_data blocks have different sizes.")
    1422            0 :             destination%local_data_sp = REAL(source%local_data, sp)
    1423              :          ELSE
    1424       368985 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1425              :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1426              :                CALL cp_abort(__LOCATION__, &
    1427              :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1428              :                              "> to full matrix <"//TRIM(destination%name)// &
    1429            0 :                              ">. The local_data blocks have different sizes.")
    1430              :             CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1431       368985 :                        source%local_data, 1, destination%local_data, 1)
    1432              :          END IF
    1433              :       ELSE
    1434            0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1435              :       END IF
    1436              : 
    1437       368985 :       CALL timestop(handle)
    1438              : 
    1439       368985 :    END SUBROUTINE cp_fm_to_fm_matrix
    1440              : 
    1441              : ! **************************************************************************************************
    1442              : !> \brief copy just a subset of columns of a fm to a fm
    1443              : !> \param msource ...
    1444              : !> \param mtarget ...
    1445              : !> \param ncol ...
    1446              : !> \param source_start ...
    1447              : !> \param target_start ...
    1448              : ! **************************************************************************************************
    1449       164486 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1450              :                                   target_start)
    1451              : 
    1452              :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1453              :       INTEGER, INTENT(IN)                      :: ncol
    1454              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1455              : 
    1456              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1457              : 
    1458              :       INTEGER                                  :: handle, n, ss, ts
    1459              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1460              : #if defined(__parallel)
    1461              :       INTEGER                                  :: i
    1462              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1463              : #endif
    1464              : 
    1465       164486 :       CALL timeset(routineN, handle)
    1466              : 
    1467       164486 :       ss = 1
    1468       164486 :       ts = 1
    1469              : 
    1470       164486 :       IF (PRESENT(source_start)) ss = source_start
    1471       164486 :       IF (PRESENT(target_start)) ts = target_start
    1472              : 
    1473       164486 :       n = msource%matrix_struct%nrow_global
    1474              : 
    1475       164486 :       a => msource%local_data
    1476       164486 :       b => mtarget%local_data
    1477              : 
    1478              : #if defined(__parallel)
    1479      1644860 :       desca(:) = msource%matrix_struct%descriptor(:)
    1480      1644860 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1481       662832 :       DO i = 0, ncol - 1
    1482       662832 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1483              :       END DO
    1484              : #else
    1485              :       IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
    1486              :          CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1487              :       END IF
    1488              : #endif
    1489              : 
    1490       164486 :       CALL timestop(handle)
    1491              : 
    1492       164486 :    END SUBROUTINE cp_fm_to_fm_columns
    1493              : 
    1494              : ! **************************************************************************************************
    1495              : !> \brief copy just a triangular matrix
    1496              : !> \param msource ...
    1497              : !> \param mtarget ...
    1498              : !> \param uplo ...
    1499              : ! **************************************************************************************************
    1500          370 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1501              : 
    1502              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1503              :       CHARACTER(LEN=1), OPTIONAL, INTENT(IN)   :: uplo
    1504              : 
    1505              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1506              : 
    1507              :       CHARACTER(LEN=1)                         :: myuplo
    1508              :       INTEGER                                  :: handle, ncol, nrow
    1509          370 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1510              : #if defined(__parallel)
    1511              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1512              : #endif
    1513              : 
    1514          370 :       CALL timeset(routineN, handle)
    1515              : 
    1516          370 :       myuplo = 'U'
    1517          370 :       IF (PRESENT(uplo)) myuplo = uplo
    1518              : 
    1519          370 :       nrow = msource%matrix_struct%nrow_global
    1520          370 :       ncol = msource%matrix_struct%ncol_global
    1521              : 
    1522          370 :       a => msource%local_data
    1523          370 :       b => mtarget%local_data
    1524              : 
    1525              : #if defined(__parallel)
    1526         3700 :       desca(:) = msource%matrix_struct%descriptor(:)
    1527         3700 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1528          370 :       CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1529              : #else
    1530              :       CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1531              : #endif
    1532              : 
    1533          370 :       CALL timestop(handle)
    1534              : 
    1535          370 :    END SUBROUTINE cp_fm_to_fm_triangular
    1536              : 
    1537              : ! **************************************************************************************************
    1538              : !> \brief copy just a part ot the matrix
    1539              : !> \param msource ...
    1540              : !> \param mtarget ...
    1541              : !> \param nrow ...
    1542              : !> \param ncol ...
    1543              : !> \param s_firstrow ...
    1544              : !> \param s_firstcol ...
    1545              : !> \param t_firstrow ...
    1546              : !> \param t_firstcol ...
    1547              : ! **************************************************************************************************
    1548              : 
    1549        11004 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1550              : 
    1551              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1552              :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1553              :                                                   s_firstcol, t_firstrow, &
    1554              :                                                   t_firstcol
    1555              : 
    1556              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1557              : 
    1558              :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1559              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1560              : #if defined(__parallel)
    1561              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1562              : #endif
    1563              : 
    1564        11004 :       CALL timeset(routineN, handle)
    1565              : 
    1566        11004 :       a => msource%local_data
    1567        11004 :       b => mtarget%local_data
    1568              : 
    1569        11004 :       na = msource%matrix_struct%nrow_global
    1570        11004 :       nb = mtarget%matrix_struct%nrow_global
    1571              : !    nrow must be <= na and nb
    1572        11004 :       IF (nrow > na) &
    1573            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1574        11004 :       IF (nrow > nb) &
    1575            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1576        11004 :       na = msource%matrix_struct%ncol_global
    1577        11004 :       nb = mtarget%matrix_struct%ncol_global
    1578              : !    ncol must be <= na_col and nb_col
    1579        11004 :       IF (ncol > na) &
    1580            0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1581        11004 :       IF (ncol > nb) &
    1582            0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1583              : 
    1584              : #if defined(__parallel)
    1585       110040 :       desca(:) = msource%matrix_struct%descriptor(:)
    1586       110040 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1587       120228 :       DO i = 0, ncol - 1
    1588       109224 :          ss = s_firstcol + i
    1589       109224 :          ts = t_firstcol + i
    1590       120228 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1591              :       END DO
    1592              : #else
    1593              :       DO i = 0, ncol - 1
    1594              :          ss = s_firstcol + i
    1595              :          ts = t_firstcol + i
    1596              :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1597              :       END DO
    1598              : #endif
    1599              : 
    1600        11004 :       CALL timestop(handle)
    1601        11004 :    END SUBROUTINE cp_fm_to_fm_submat
    1602              : 
    1603              : ! **************************************************************************************************
    1604              : !> \brief General copy of a fm matrix to another fm matrix.
    1605              : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1606              : !>
    1607              : !> \param source          input fm matrix
    1608              : !> \param destination     output fm matrix
    1609              : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1610              : !>                        of the input and output matrices
    1611              : !> \par History
    1612              : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1613              : ! **************************************************************************************************
    1614         7982 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1615              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1616              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1617              : 
    1618              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1619              : 
    1620              :       INTEGER                                            :: handle
    1621        71838 :       TYPE(copy_info_type)                               :: info
    1622              : 
    1623         7982 :       CALL timeset(routineN, handle)
    1624              : 
    1625         7982 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1626         7982 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1627         7968 :          CALL cp_fm_finish_copy_general(destination, info)
    1628              :       END IF
    1629         7982 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1630         7830 :          CALL cp_fm_cleanup_copy_general(info)
    1631              :       END IF
    1632              : 
    1633         7982 :       CALL timestop(handle)
    1634         7982 :    END SUBROUTINE cp_fm_copy_general
    1635              : 
    1636              : ! **************************************************************************************************
    1637              : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1638              : !> \param source input fm matrix
    1639              : !> \param destination output fm matrix
    1640              : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1641              : !>                        of the input and output matrices
    1642              : !> \param info all of the data that will be needed to complete the copy operation
    1643              : ! **************************************************************************************************
    1644      1801220 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1645              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1646              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1647              :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1648              : 
    1649              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1650              : 
    1651              :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1652              :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1653              :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1654              :          recv_rank, recv_size, send_rank, send_size
    1655       180122 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1656       360244 :                                                             recv_count, send_count, send_disp, &
    1657       180122 :                                                             source2global, src_p, src_q
    1658       180122 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1659              :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1660              :                                                             src_block, src_block_tmp, src_num_pe
    1661       360244 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1662       360244 :                                                             send_col_indices, send_row_indices
    1663              :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1664      2521708 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1665              : 
    1666       180122 :       CALL timeset(routineN, handle)
    1667              : 
    1668              :       IF (.NOT. cp2k_is_parallel) THEN
    1669              :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1670              :          nrow_local_send = SIZE(source%local_data, 1)
    1671              :          ncol_local_send = SIZE(source%local_data, 2)
    1672              :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1673              :          k = 0
    1674              :          DO j = 1, ncol_local_send
    1675              :             DO i = 1, nrow_local_send
    1676              :                k = k + 1
    1677              :                info%send_buf(k) = source%local_data(i, j)
    1678              :             END DO
    1679              :          END DO
    1680              :       ELSE
    1681              :          ! assumption of double precision data is carried forward from ScaLAPACK version
    1682       180122 :          IF (source%use_sp) CPABORT("only DP kind implemented")
    1683       180122 :          IF (destination%use_sp) CPABORT("only DP kind implemented")
    1684              : 
    1685       180122 :          NULLIFY (recv_dist, send_dist)
    1686       180122 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1687              : 
    1688              :          ! The 'global' communicator contains both the source and destination decompositions
    1689       180122 :          global_size = para_env%num_pe
    1690       180122 :          global_rank = para_env%mepos
    1691              : 
    1692              :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1693              :          ! on a subset of the processes involved in the communication
    1694              :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1695              :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1696       180122 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1697       147364 :             recv_dist => destination%matrix_struct
    1698       147364 :             recv_rank = recv_dist%para_env%mepos
    1699              :          ELSE
    1700        32758 :             recv_rank = mp_proc_null
    1701              :          END IF
    1702              : 
    1703       180122 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1704       161194 :             send_dist => source%matrix_struct
    1705       161194 :             send_rank = send_dist%para_env%mepos
    1706              :          ELSE
    1707        18928 :             send_rank = mp_proc_null
    1708              :          END IF
    1709              : 
    1710              :          ! Map the rank in the source/dest communicator to the global rank
    1711       540366 :          ALLOCATE (all_ranks(0:global_size - 1))
    1712              : 
    1713       180122 :          CALL para_env%allgather(send_rank, all_ranks)
    1714       180122 :          IF (ASSOCIATED(recv_dist)) THEN
    1715       736820 :             ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1716       442092 :             DO i = 0, global_size - 1
    1717       442092 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1718       256872 :                   source2global(all_ranks(i)) = i
    1719              :                END IF
    1720              :             END DO
    1721              :          END IF
    1722              : 
    1723       180122 :          CALL para_env%allgather(recv_rank, all_ranks)
    1724       180122 :          IF (ASSOCIATED(send_dist)) THEN
    1725       805970 :             ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
    1726       483582 :             DO i = 0, global_size - 1
    1727       483582 :                IF (all_ranks(i) /= mp_proc_null) THEN
    1728       256872 :                   dest2global(all_ranks(i)) = i
    1729              :                END IF
    1730              :             END DO
    1731              :          END IF
    1732       180122 :          DEALLOCATE (all_ranks)
    1733              : 
    1734              :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1735              :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1736              : 
    1737              :          ! The global root process will receive the data (from the root process in each decomposition)
    1738      1260854 :          send_req(:) = mp_request_null
    1739       180122 :          IF (global_rank == 0) THEN
    1740       630427 :             recv_req(:) = mp_request_null
    1741        90061 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1742        90061 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1743        90061 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1744        90061 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1745              :          END IF
    1746              : 
    1747       180122 :          IF (ASSOCIATED(send_dist)) THEN
    1748       161194 :             IF ((send_rank == 0)) THEN
    1749              :                ! need to use separate buffers here in case this is actually global rank 0
    1750       270183 :                src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
    1751        90061 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1752        90061 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1753              :             END IF
    1754              :          END IF
    1755              : 
    1756       180122 :          IF (ASSOCIATED(recv_dist)) THEN
    1757       147364 :             IF ((recv_rank == 0)) THEN
    1758       270183 :                dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
    1759        90061 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1760        90061 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1761              :             END IF
    1762              :          END IF
    1763              : 
    1764       180122 :          IF (global_rank == 0) THEN
    1765        90061 :             CALL mp_waitall(recv_req(1:4))
    1766              :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1767            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1768              :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1769       630427 :                       )
    1770        90061 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1771        90061 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1772              :          END IF
    1773              : 
    1774       180122 :          IF (ASSOCIATED(send_dist)) THEN
    1775       161194 :             IF ((send_rank == 0)) THEN
    1776        90061 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1777              :             END IF
    1778              :          END IF
    1779              : 
    1780       180122 :          IF (ASSOCIATED(recv_dist)) THEN
    1781       147364 :             IF ((recv_rank == 0)) THEN
    1782        90061 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1783              :             END IF
    1784              :          END IF
    1785              : 
    1786       180122 :          IF (global_rank == 0) THEN
    1787        90061 :             CALL mp_waitall(recv_req(5:6))
    1788              :          END IF
    1789              : 
    1790              :          ! Finally, broadcast the data to all processes in the global communicator
    1791       180122 :          CALL para_env%bcast(src_block, 0)
    1792       180122 :          CALL para_env%bcast(dest_block, 0)
    1793       180122 :          CALL para_env%bcast(src_num_pe, 0)
    1794       180122 :          CALL para_env%bcast(dest_num_pe, 0)
    1795       540366 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1796       540366 :          info%nblock_src(1:2) = src_block(1:2)
    1797       180122 :          IF (global_rank /= 0) THEN
    1798            0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1799            0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1800       630427 :                       )
    1801              :          END IF
    1802       180122 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1803       180122 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1804              : 
    1805       180122 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1806       180122 :          send_size = src_num_pe(1)*src_num_pe(2)
    1807       180122 :          info%send_size = send_size
    1808       180122 :          CALL mp_waitall(send_req(:))
    1809              : 
    1810              :          ! Setup is now complete, we can start the actual communication here.
    1811              :          ! The order implemented here is:
    1812              :          !  DEST_1
    1813              :          !      compute recv sizes
    1814              :          !      call irecv
    1815              :          !  SRC_1
    1816              :          !      compute send sizes
    1817              :          !      pack send buffers
    1818              :          !      call isend
    1819              :          !  DEST_2
    1820              :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1821              :          !  SRC_2
    1822              :          !      wait for the sends
    1823              : 
    1824              :          ! DEST_1
    1825       180122 :          IF (ASSOCIATED(recv_dist)) THEN
    1826              :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1827              :                                   col_indices=recv_col_indices &
    1828       147364 :                                   )
    1829       147364 :             info%recv_col_indices => recv_col_indices
    1830       147364 :             info%recv_row_indices => recv_row_indices
    1831       147364 :             nrow_block_src = src_block(1)
    1832       147364 :             ncol_block_src = src_block(2)
    1833       993692 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1834              : 
    1835              :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1836       147364 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1837       147364 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1838       147364 :             info%nlocal_recv(1) = nrow_local_recv
    1839       147364 :             info%nlocal_recv(2) = ncol_local_recv
    1840              :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1841       736820 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1842      2569669 :             DO i = 1, nrow_local_recv
    1843              :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1844              :                ! then work out which row block it comes from, and which process row that row block comes from.
    1845      2569669 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1846              :             END DO
    1847      4328028 :             DO j = 1, ncol_local_recv
    1848              :                ! Similarly for the columns
    1849      4328028 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1850              :             END DO
    1851              :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1852              : 
    1853       294728 :             DO q = 0, src_num_pe(2) - 1
    1854      4328028 :                ncols = COUNT(src_q == q)
    1855       551600 :                DO p = 0, src_num_pe(1) - 1
    1856      4719586 :                   nrows = COUNT(src_p == p)
    1857              :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1858       404236 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1859              :                END DO
    1860              :             END DO
    1861       147364 :             DEALLOCATE (src_p, src_q)
    1862              : 
    1863              :             ! Use one long buffer (and displacements into that buffer)
    1864              :             !     this prevents the need for a rectangular array where not all elements will be populated
    1865       698964 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1866       147364 :             info%recv_disp(0) = 0
    1867       256872 :             DO i = 1, send_size - 1
    1868       256872 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1869              :             END DO
    1870              : 
    1871              :             ! Issue receive calls on ranks which expect data
    1872       404236 :             DO k = 0, send_size - 1
    1873       404236 :                IF (recv_count(k) > 0) THEN
    1874              :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1875       180566 :                                       source2global(k), info%recv_request(k))
    1876              :                END IF
    1877              :             END DO
    1878       147364 :             DEALLOCATE (source2global)
    1879              :          END IF ! ASSOCIATED(recv_dist)
    1880              : 
    1881              :          ! SRC_1
    1882       180122 :          IF (ASSOCIATED(send_dist)) THEN
    1883              :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1884              :                                   col_indices=send_col_indices &
    1885       161194 :                                   )
    1886       161194 :             nrow_block_dest = dest_block(1)
    1887       161194 :             ncol_block_dest = dest_block(2)
    1888      1062842 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1889              : 
    1890              :             ! Determine the send counts, allocate the send buffers
    1891       161194 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1892       161194 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1893              : 
    1894              :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1895              :             !   i.e. number of rows,cols in the sending distribution
    1896       805970 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1897              : 
    1898      2583499 :             DO i = 1, nrow_local_send
    1899              :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1900      2583499 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1901              :             END DO
    1902      4616962 :             DO j = 1, ncol_local_send
    1903      4616962 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1904              :             END DO
    1905              :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1906              : 
    1907       322388 :             DO q = 0, dest_num_pe(2) - 1
    1908      4616962 :                ncols = COUNT(dest_q == q)
    1909       579260 :                DO p = 0, dest_num_pe(1) - 1
    1910      4444842 :                   nrows = COUNT(dest_p == p)
    1911       418066 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1912              :                END DO
    1913              :             END DO
    1914       161194 :             DEALLOCATE (dest_p, dest_q)
    1915              : 
    1916              :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1917       740454 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1918       161194 :             send_disp(0) = 0
    1919       256872 :             DO k = 1, recv_size - 1
    1920       256872 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1921              :             END DO
    1922              : 
    1923              :             ! Loop over the smat, pack the send buffers
    1924       418066 :             send_count(:) = 0
    1925      4616962 :             DO j = 1, ncol_local_send
    1926              :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1927      4455768 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1928    200947039 :                DO i = 1, nrow_local_send
    1929    196330077 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1930    196330077 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1931    196330077 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1932    200785845 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1933              :                END DO
    1934              :             END DO
    1935              : 
    1936              :             ! For each non-zero send_count, call mpi_isend
    1937       418066 :             DO k = 0, recv_size - 1
    1938       418066 :                IF (send_count(k) > 0) THEN
    1939              :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1940       180566 :                                       dest2global(k), info%send_request(k))
    1941              :                END IF
    1942              :             END DO
    1943       161194 :             DEALLOCATE (send_count, send_disp, dest2global)
    1944              :          END IF ! ASSOCIATED(send_dist)
    1945       180122 :          DEALLOCATE (dest_blacs2mpi)
    1946              : 
    1947              :       END IF !IF (.NOT. cp2k_is_parallel)
    1948              : 
    1949       180122 :       CALL timestop(handle)
    1950              : 
    1951       720488 :    END SUBROUTINE cp_fm_start_copy_general
    1952              : 
    1953              : ! **************************************************************************************************
    1954              : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1955              : !> \param destination output fm matrix
    1956              : !> \param info all of the data that will be needed to complete the copy operation
    1957              : ! **************************************************************************************************
    1958       147364 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1959              :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1960              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1961              : 
    1962              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1963              : 
    1964              :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1965              :                                                             src_p_i, src_q_j
    1966       147364 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1967              :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1968       147364 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1969              : 
    1970       147364 :       CALL timeset(routineN, handle)
    1971              : 
    1972              :       IF (.NOT. cp2k_is_parallel) THEN
    1973              :          ! Now unpack the data from the 'send buffer'
    1974              :          k = 0
    1975              :          DO j = 1, SIZE(destination%local_data, 2)
    1976              :             DO i = 1, SIZE(destination%local_data, 1)
    1977              :                k = k + 1
    1978              :                destination%local_data(i, j) = info%send_buf(k)
    1979              :             END DO
    1980              :          END DO
    1981              :          DEALLOCATE (info%send_buf)
    1982              :       ELSE
    1983              :          ! Set up local variables ...
    1984       147364 :          send_size = info%send_size
    1985       442092 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1986       442092 :          nblock_src(1:2) = info%nblock_src(:)
    1987       442092 :          src_num_pe(1:2) = info%src_num_pe(:)
    1988       147364 :          recv_col_indices => info%recv_col_indices
    1989       147364 :          recv_row_indices => info%recv_row_indices
    1990              : 
    1991              :          ! ... use the local variables to do the work
    1992              :          ! DEST_2
    1993       147364 :          CALL mp_waitall(info%recv_request(:))
    1994       442092 :          ALLOCATE (recv_count(0:send_size - 1))
    1995              :          ! Loop over the rmat, filling it in with data from the recv buffers
    1996              :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1997       404236 :          recv_count(:) = 0
    1998      4328028 :          DO j = 1, nlocal_recv(2)
    1999      4180664 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    2000    200658105 :             DO i = 1, nlocal_recv(1)
    2001    196330077 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    2002    196330077 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    2003    196330077 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    2004    200510741 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    2005              :             END DO
    2006              :          END DO
    2007       147364 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    2008              :          ! Invalidate the stored state
    2009              :          NULLIFY (info%recv_col_indices, &
    2010       147364 :                   info%recv_row_indices)
    2011              : 
    2012              :       END IF
    2013              : 
    2014       147364 :       CALL timestop(handle)
    2015              : 
    2016       147364 :    END SUBROUTINE cp_fm_finish_copy_general
    2017              : 
    2018              : ! **************************************************************************************************
    2019              : !> \brief Completes the copy operation: wait for comms clean up MPI state
    2020              : !> \param info all of the data that will be needed to complete the copy operation
    2021              : ! **************************************************************************************************
    2022       160530 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    2023              :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    2024              : 
    2025              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    2026              : 
    2027              :       INTEGER                                            :: handle
    2028              : 
    2029       160530 :       CALL timeset(routineN, handle)
    2030              : 
    2031              :       IF (.NOT. cp2k_is_parallel) THEN
    2032              :          ! Don't do anything - no MPI state for the serial case
    2033              :       ELSE
    2034              :          ! SRC_2
    2035              :          ! If this process is also in the destination decomposition, this deallocate
    2036              :          ! Was already done in cp_fm_finish_copy_general
    2037       160530 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    2038        32094 :             DEALLOCATE (info%src_blacs2mpi)
    2039              :          END IF
    2040       160530 :          CALL mp_waitall(info%send_request)
    2041       160530 :          DEALLOCATE (info%send_request, info%send_buf)
    2042              : 
    2043              :       END IF
    2044              : 
    2045       160530 :       CALL timestop(handle)
    2046              : 
    2047       160530 :    END SUBROUTINE cp_fm_cleanup_copy_general
    2048              : 
    2049              : ! **************************************************************************************************
    2050              : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    2051              : !>        The two matrices can have different contexts.
    2052              : !>
    2053              : !>        Summary of distribution routines for dense matrices
    2054              : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    2055              : !>
    2056              : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    2057              : !>
    2058              : !>        A process that is not a part of the context of A should set descA(2)
    2059              : !>        to -1, and similarly for B.
    2060              : !>
    2061              : !> \param source          input fm matrix
    2062              : !> \param destination     output fm matrix
    2063              : !> \param nrows           number of rows of sub matrix to be copied
    2064              : !> \param ncols           number of cols of sub matrix to be copied
    2065              : !> \param s_firstrow      starting global row index of sub matrix in source
    2066              : !> \param s_firstcol      starting global col index of sub matrix in source
    2067              : !> \param d_firstrow      starting global row index of sub matrix in destination
    2068              : !> \param d_firstcol      starting global col index of sub matrix in destination
    2069              : !> \param global_context  process grid that covers all parts of either A or B.
    2070              : ! **************************************************************************************************
    2071        11174 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    2072              :                                          destination, &
    2073              :                                          nrows, &
    2074              :                                          ncols, &
    2075              :                                          s_firstrow, &
    2076              :                                          s_firstcol, &
    2077              :                                          d_firstrow, &
    2078              :                                          d_firstcol, &
    2079              :                                          global_context)
    2080              : 
    2081              :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    2082              :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    2083              :                                                             d_firstrow, d_firstcol
    2084              : 
    2085              :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    2086              : 
    2087              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    2088              : 
    2089              :       LOGICAL                                  :: debug
    2090              :       INTEGER                                  :: handle
    2091              : #if defined(__parallel)
    2092              :       INTEGER, DIMENSION(9)                    :: desca, descb
    2093              :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    2094        11174 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    2095              : #endif
    2096              : 
    2097        11174 :       CALL timeset(routineN, handle)
    2098              : 
    2099        11174 :       debug = debug_this_module
    2100              : 
    2101              :       IF (.NOT. cp2k_is_parallel) THEN
    2102              :          CALL cp_fm_to_fm_submat(source, &
    2103              :                                  destination, &
    2104              :                                  nrows, &
    2105              :                                  ncols, &
    2106              :                                  s_firstrow, &
    2107              :                                  s_firstcol, &
    2108              :                                  d_firstrow, &
    2109              :                                  d_firstcol)
    2110              :       ELSE
    2111              : #ifdef __parallel
    2112        11174 :          NULLIFY (smat, dmat)
    2113              :          ! check whether source is available on this process
    2114        11174 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2115       111740 :             desca = source%matrix_struct%descriptor
    2116        11174 :             IF (source%use_sp) CPABORT("only DP kind implemented")
    2117        11174 :             IF (nrows > source%matrix_struct%nrow_global) &
    2118            0 :                CPABORT("nrows is greater than nrow_global of source")
    2119        11174 :             IF (ncols > source%matrix_struct%ncol_global) &
    2120            0 :                CPABORT("ncols is greater than ncol_global of source")
    2121        11174 :             smat => source%local_data
    2122              :          ELSE
    2123            0 :             desca = -1
    2124            0 :             smat => dummy
    2125              :          END IF
    2126              :          ! check destination is available on this process
    2127        11174 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2128       111740 :             descb = destination%matrix_struct%descriptor
    2129        11174 :             IF (destination%use_sp) CPABORT("only DP kind implemented")
    2130        11174 :             IF (nrows > destination%matrix_struct%nrow_global) &
    2131            0 :                CPABORT("nrows is greater than nrow_global of destination")
    2132        11174 :             IF (ncols > destination%matrix_struct%ncol_global) &
    2133            0 :                CPABORT("ncols is greater than ncol_global of destination")
    2134        11174 :             dmat => destination%local_data
    2135              :          ELSE
    2136            0 :             descb = -1
    2137            0 :             dmat => dummy
    2138              :          END IF
    2139              :          ! do copy
    2140              : 
    2141              :          CALL pdgemr2d(nrows, &
    2142              :                        ncols, &
    2143              :                        smat, &
    2144              :                        s_firstrow, &
    2145              :                        s_firstcol, &
    2146              :                        desca, &
    2147              :                        dmat, &
    2148              :                        d_firstrow, &
    2149              :                        d_firstcol, &
    2150              :                        descb, &
    2151        11174 :                        global_context%get_handle())
    2152              : #else
    2153              :          MARK_USED(global_context)
    2154              :          CPABORT("this subroutine only supports SCALAPACK")
    2155              : #endif
    2156              :       END IF
    2157              : 
    2158        11174 :       CALL timestop(handle)
    2159              : 
    2160        11174 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2161              : 
    2162              : ! **************************************************************************************************
    2163              : !> \brief ...
    2164              : !> \param matrix ...
    2165              : !> \param irow_global ...
    2166              : !> \param icol_global ...
    2167              : !> \param alpha ...
    2168              : ! **************************************************************************************************
    2169          240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2170              : 
    2171              :       ! Add alpha to the matrix element specified by the global indices
    2172              :       ! irow_global and icol_global
    2173              : 
    2174              :       ! - Creation (05.05.06,MK)
    2175              : 
    2176              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2177              :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2178              :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2179              : 
    2180              :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2181          240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2182              :       TYPE(cp_blacs_env_type), POINTER         :: context
    2183              : #if defined(__parallel)
    2184              :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2185              :                                                   irow_local
    2186              :       INTEGER, DIMENSION(9)                    :: desca
    2187              : #endif
    2188              : 
    2189          240 :       context => matrix%matrix_struct%context
    2190              : 
    2191          240 :       myprow = context%mepos(1)
    2192          240 :       mypcol = context%mepos(2)
    2193              : 
    2194          240 :       nprow = context%num_pe(1)
    2195          240 :       npcol = context%num_pe(2)
    2196              : 
    2197          240 :       a => matrix%local_data
    2198              : 
    2199              : #if defined(__parallel)
    2200              : 
    2201         2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2202              : 
    2203              :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2204          240 :                    irow_local, icol_local, iprow, ipcol)
    2205              : 
    2206          240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2207          120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2208              :       END IF
    2209              : 
    2210              : #else
    2211              : 
    2212              :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2213              : 
    2214              : #endif
    2215              : 
    2216          240 :    END SUBROUTINE cp_fm_add_to_element
    2217              : 
    2218              : ! **************************************************************************************************
    2219              : !> \brief ...
    2220              : !> \param fm ...
    2221              : !> \param unit ...
    2222              : ! **************************************************************************************************
    2223        80087 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2224              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2225              :       INTEGER, INTENT(IN)                      :: unit
    2226              : 
    2227              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2228              : 
    2229              :       INTEGER                                  :: handle, j, max_block, &
    2230              :                                                   ncol_global, nrow_global
    2231              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2232              : #if defined(__parallel)
    2233              :       INTEGER                                  :: i, i_block, icol_local, &
    2234              :                                                   in, info, ipcol, &
    2235              :                                                   iprow, irow_local, &
    2236              :                                                   mepos, &
    2237              :                                                   num_pe, rb, tag
    2238              :       INTEGER, DIMENSION(9)                    :: desc
    2239        80087 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2240              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2241              :       TYPE(cp_blacs_type) :: ictxt_loc
    2242              :       INTEGER, EXTERNAL                        :: numroc
    2243              : #endif
    2244              : 
    2245        80087 :       CALL timeset(routineN, handle)
    2246              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2247        80087 :                           para_env=para_env)
    2248              : 
    2249              : #if defined(__parallel)
    2250        80087 :       num_pe = para_env%num_pe
    2251        80087 :       mepos = para_env%mepos
    2252        80087 :       rb = nrow_global
    2253        80087 :       tag = 0
    2254              :       ! get a new context
    2255        80087 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2256        80087 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2257        80087 :       CPASSERT(info == 0)
    2258              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2259              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2260       160174 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2261              : 
    2262       320348 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2263              : 
    2264              :          ! do the actual scalapack to cols reordering
    2265              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2266              :                        fm%matrix_struct%descriptor, &
    2267        80087 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2268              : 
    2269       240261 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2270     55144819 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2271              : 
    2272       351887 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2273       191713 :             i_block = MIN(max_block, ncol_global - i + 1)
    2274              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2275       191713 :                          irow_local, icol_local, iprow, ipcol)
    2276       191713 :             IF (ipcol == mypcol) THEN
    2277       945442 :                DO j = 1, i_block
    2278    146321450 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2279              :                END DO
    2280              :             END IF
    2281              : 
    2282       191713 :             IF (ipcol == 0) THEN
    2283              :                ! do nothing
    2284              :             ELSE
    2285        66170 :                IF (ipcol == mypcol) THEN
    2286     36177503 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2287              :                END IF
    2288        66170 :                IF (mypcol == 0) THEN
    2289     72321921 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2290              :                END IF
    2291              :             END IF
    2292              : 
    2293       271800 :             IF (unit > 0) THEN
    2294       914458 :                DO j = 1, i_block
    2295       914458 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2296              :                END DO
    2297              :             END IF
    2298              : 
    2299              :          END DO
    2300              :       END ASSOCIATE
    2301        80087 :       DEALLOCATE (vecbuf)
    2302              : 
    2303        80087 :       CALL ictxt_loc%gridexit()
    2304              : 
    2305        80087 :       DEALLOCATE (newdat)
    2306              : 
    2307              : #else
    2308              : 
    2309              :       IF (unit > 0) THEN
    2310              :          DO j = 1, ncol_global
    2311              :             WRITE (unit) fm%local_data(:, j)
    2312              :          END DO
    2313              :       END IF
    2314              : 
    2315              : #endif
    2316        80087 :       CALL timestop(handle)
    2317              : 
    2318       400435 :    END SUBROUTINE cp_fm_write_unformatted
    2319              : 
    2320              : ! **************************************************************************************************
    2321              : !> \brief Write out a full matrix in plain text.
    2322              : !> \param fm the matrix to be outputted
    2323              : !> \param unit the unit number for I/O
    2324              : !> \param header optional header
    2325              : !> \param value_format ...
    2326              : ! **************************************************************************************************
    2327         1122 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2328              :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2329              :       INTEGER, INTENT(IN)                      :: unit
    2330              :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2331              : 
    2332              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2333              : 
    2334              :       CHARACTER(LEN=21)                        :: my_value_format
    2335              :       INTEGER                                  :: handle, i, j, max_block, &
    2336              :                                                   ncol_global, nrow_global
    2337              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2338              : #if defined(__parallel)
    2339              :       INTEGER                                  :: i_block, icol_local, &
    2340              :                                                   in, info, ipcol, &
    2341              :                                                   iprow, irow_local, &
    2342              :                                                   mepos, num_pe, rb, tag, k, &
    2343              :                                                   icol, irow
    2344              :       INTEGER, DIMENSION(9)                    :: desc
    2345         1122 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2346         1122 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2347              :       TYPE(cp_blacs_type) :: ictxt_loc
    2348              :       INTEGER, EXTERNAL                        :: numroc
    2349              : #endif
    2350              : 
    2351         1122 :       CALL timeset(routineN, handle)
    2352              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2353         1122 :                           para_env=para_env)
    2354              : 
    2355         1122 :       IF (PRESENT(value_format)) THEN
    2356            0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2357            0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2358              :       ELSE
    2359         1122 :          my_value_format = "(I10, I10, ES24.12)"
    2360              :       END IF
    2361              : 
    2362         1122 :       IF (unit > 0) THEN
    2363            5 :          IF (PRESENT(header)) WRITE (unit, *) header
    2364            5 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2365              :       END IF
    2366              : 
    2367              : #if defined(__parallel)
    2368         1122 :       num_pe = para_env%num_pe
    2369         1122 :       mepos = para_env%mepos
    2370         1122 :       rb = nrow_global
    2371         1122 :       tag = 0
    2372              :       ! get a new context
    2373         1122 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2374         1122 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2375         1122 :       CPASSERT(info == 0)
    2376              :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2377              :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2378         2244 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2379              : 
    2380         4488 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2381              : 
    2382              :          ! do the actual scalapack to cols reordering
    2383              :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2384              :                        fm%matrix_struct%descriptor, &
    2385         1122 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2386              : 
    2387         3366 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2388         3872 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2389         1122 :          irow = 1
    2390         1122 :          icol = 1
    2391              : 
    2392         4488 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2393         2244 :             i_block = MIN(max_block, ncol_global - i + 1)
    2394              :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2395         2244 :                          irow_local, icol_local, iprow, ipcol)
    2396         2244 :             IF (ipcol == mypcol) THEN
    2397         2264 :                DO j = 1, i_block
    2398         7932 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2399              :                END DO
    2400              :             END IF
    2401              : 
    2402         2244 :             IF (ipcol == 0) THEN
    2403              :                ! do nothing
    2404              :             ELSE
    2405         1118 :                IF (ipcol == mypcol) THEN
    2406         1857 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2407              :                END IF
    2408         1118 :                IF (mypcol == 0) THEN
    2409         3155 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2410              :                END IF
    2411              :             END IF
    2412              : 
    2413         3366 :             IF (unit > 0) THEN
    2414           40 :                DO j = 1, i_block
    2415          650 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2416          610 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2417          610 :                      irow = irow + 1
    2418          640 :                      IF (irow > nrow_global) THEN
    2419           30 :                         irow = 1
    2420           30 :                         icol = icol + 1
    2421              :                      END IF
    2422              :                   END DO
    2423              :                END DO
    2424              :             END IF
    2425              : 
    2426              :          END DO
    2427              :       END ASSOCIATE
    2428         1122 :       DEALLOCATE (vecbuf)
    2429              : 
    2430         1122 :       CALL ictxt_loc%gridexit()
    2431              : 
    2432         1122 :       DEALLOCATE (newdat)
    2433              : 
    2434              : #else
    2435              : 
    2436              :       IF (unit > 0) THEN
    2437              :          DO j = 1, ncol_global
    2438              :             DO i = 1, nrow_global
    2439              :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2440              :             END DO
    2441              :          END DO
    2442              :       END IF
    2443              : 
    2444              : #endif
    2445         1122 :       CALL timestop(handle)
    2446              : 
    2447         5610 :    END SUBROUTINE cp_fm_write_formatted
    2448              : 
    2449              : ! **************************************************************************************************
    2450              : !> \brief ...
    2451              : !> \param fm ...
    2452              : !> \param unit ...
    2453              : ! **************************************************************************************************
    2454         1804 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2455              :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2456              :       INTEGER, INTENT(IN)                      :: unit
    2457              : 
    2458              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2459              : 
    2460              :       INTEGER                                  :: handle, j, max_block, &
    2461              :                                                   ncol_global, nrow_global
    2462              :       TYPE(mp_para_env_type), POINTER          :: para_env
    2463              : #if defined(__parallel)
    2464              :       INTEGER                                  :: k, n_cols
    2465              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2466              : #endif
    2467              : 
    2468         1804 :       CALL timeset(routineN, handle)
    2469              : 
    2470              :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2471         1804 :                           para_env=para_env)
    2472              : 
    2473              : #if defined(__parallel)
    2474              : 
    2475              :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2476              : 
    2477         7216 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2478              : 
    2479         6546 :       DO j = 1, ncol_global, max_block
    2480              : 
    2481         4742 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2482         4742 :          IF (para_env%mepos == 0) THEN
    2483        33604 :             DO k = 1, n_cols
    2484        33604 :                READ (unit) vecbuf(:, k)
    2485              :             END DO
    2486              :          END IF
    2487     11523878 :          CALL para_env%bcast(vecbuf, 0)
    2488         6546 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2489              : 
    2490              :       END DO
    2491              : 
    2492         1804 :       DEALLOCATE (vecbuf)
    2493              : 
    2494              : #else
    2495              : 
    2496              :       DO j = 1, ncol_global
    2497              :          READ (unit) fm%local_data(:, j)
    2498              :       END DO
    2499              : 
    2500              : #endif
    2501              : 
    2502         1804 :       CALL timestop(handle)
    2503              : 
    2504         1804 :    END SUBROUTINE cp_fm_read_unformatted
    2505              : 
    2506              : ! **************************************************************************************************
    2507              : !> \brief ...
    2508              : !> \param mm_type ...
    2509              : ! **************************************************************************************************
    2510         9893 :    SUBROUTINE cp_fm_setup(mm_type)
    2511              :       INTEGER, INTENT(IN)                                :: mm_type
    2512              : 
    2513         9893 :       cp_fm_mm_type = mm_type
    2514         9893 :    END SUBROUTINE cp_fm_setup
    2515              : 
    2516              : ! **************************************************************************************************
    2517              : !> \brief ...
    2518              : !> \return ...
    2519              : ! **************************************************************************************************
    2520      1446324 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2521              :       INTEGER                                            :: res
    2522              : 
    2523      1446324 :       res = cp_fm_mm_type
    2524      1446324 :    END FUNCTION cp_fm_get_mm_type
    2525              : 
    2526              : ! **************************************************************************************************
    2527              : !> \brief ...
    2528              : !> \param ictxt ...
    2529              : !> \param prec ...
    2530              : !> \return ...
    2531              : ! **************************************************************************************************
    2532           10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2533              :       INTEGER                                            :: ictxt
    2534              :       CHARACTER(LEN=1)                                   :: prec
    2535              :       INTEGER                                            :: res
    2536              : #if defined(__parallel)
    2537              :       INTEGER                                            :: pilaenv
    2538           10 :       res = pilaenv(ictxt, prec)
    2539              : #else
    2540              :       MARK_USED(ictxt)
    2541              :       MARK_USED(prec)
    2542              :       res = -1
    2543              : #endif
    2544              : 
    2545           10 :    END FUNCTION cp_fm_pilaenv
    2546              : 
    2547            0 : END MODULE cp_fm_types
        

Generated by: LCOV version 2.0-1