LCOV - code coverage report
Current view: top level - src/fm - cp_fm_types.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:1a29073) Lines: 712 832 85.6 %
Date: 2024-04-17 06:30:47 Functions: 39 48 81.2 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief represent a full matrix distributed on many processors
      10             : !> \par History
      11             : !>      3) separated structure object, removed globenv, renamed to full matrix
      12             : !>         many changes (fawzi 08.2002)
      13             : !> \author Matthias Krack (22.05.2001)
      14             : ! **************************************************************************************************
      15             : MODULE cp_fm_types
      16             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17             :    USE cp_blacs_types,                  ONLY: cp_blacs_type
      18             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_equivalent,&
      19             :                                               cp_fm_struct_get,&
      20             :                                               cp_fm_struct_release,&
      21             :                                               cp_fm_struct_retain,&
      22             :                                               cp_fm_struct_type,&
      23             :                                               cp_fm_struct_write_info
      24             :    USE kinds,                           ONLY: dp,&
      25             :                                               sp
      26             :    USE message_passing,                 ONLY: cp2k_is_parallel,&
      27             :                                               mp_any_source,&
      28             :                                               mp_para_env_type,&
      29             :                                               mp_proc_null,&
      30             :                                               mp_request_null,&
      31             :                                               mp_request_type,&
      32             :                                               mp_waitall
      33             :    USE parallel_rng_types,              ONLY: UNIFORM,&
      34             :                                               rng_stream_type
      35             : #include "../base/base_uses.f90"
      36             : 
      37             :    IMPLICIT NONE
      38             : 
      39             :    PRIVATE
      40             : 
      41             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
      42             :    LOGICAL, PARAMETER          :: debug_this_module = .TRUE.
      43             :    INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
      44             : 
      45             :    INTEGER, PRIVATE :: mm_type = 1
      46             : 
      47             :    PUBLIC :: cp_fm_type, &
      48             :              cp_fm_p_type, copy_info_type
      49             : 
      50             :    PUBLIC :: cp_fm_add_to_element, &
      51             :              cp_fm_create, &
      52             :              cp_fm_release, &
      53             :              cp_fm_get_info, &
      54             :              cp_fm_set_element, &
      55             :              cp_fm_get_element, &
      56             :              cp_fm_get_diag, & ! get diagonal
      57             :              cp_fm_set_all, & ! set all elements and diagonal
      58             :              cp_fm_set_submatrix, & ! set a submatrix to given values
      59             :              cp_fm_get_submatrix, & ! get a submatrix of given values
      60             :              cp_fm_init_random, &
      61             :              cp_fm_maxabsval, & ! find the maximum absolute value
      62             :              cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
      63             :              cp_fm_to_fm, & ! copy (parts of) a fm to a fm
      64             :              cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
      65             :              cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
      66             :              cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
      67             :              cp_fm_to_fm_triangular, &
      68             :              cp_fm_copy_general, &
      69             :              cp_fm_start_copy_general, &
      70             :              cp_fm_finish_copy_general, &
      71             :              cp_fm_cleanup_copy_general, &
      72             :              cp_fm_write_unformatted, & ! writes a full matrix to an open unit
      73             :              cp_fm_write_formatted, & ! writes a full matrix to an open unit
      74             :              cp_fm_read_unformatted, & ! reads a full matrix from an open unit
      75             :              cp_fm_setup, & ! allows to set flags for fms
      76             :              cp_fm_get_mm_type, &
      77             :              cp_fm_write_info, &
      78             :              cp_fm_to_fm_submat_general ! copy matrix across different contexts
      79             : 
      80             :    PUBLIC :: cp_fm_indxg2p, &
      81             :              cp_fm_indxg2l, &
      82             :              cp_fm_indxl2g, &
      83             :              cp_fm_pilaenv
      84             : 
      85             :    INTERFACE cp_fm_to_fm
      86             :       MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
      87             :          cp_fm_to_fm_columns ! just a number of columns
      88             :    END INTERFACE
      89             : 
      90             :    INTERFACE cp_fm_release
      91             :       MODULE PROCEDURE cp_fm_release_aa0, &
      92             :          cp_fm_release_aa1, &
      93             :          cp_fm_release_aa2, &
      94             :          cp_fm_release_aa3, &
      95             :          cp_fm_release_ap1, &
      96             :          cp_fm_release_ap2, &
      97             :          cp_fm_release_pa1, &
      98             :          cp_fm_release_pa2, &
      99             :          cp_fm_release_pa3, &
     100             :          cp_fm_release_pp1, &
     101             :          cp_fm_release_pp2
     102             :    END INTERFACE
     103             : 
     104             : ! **************************************************************************************************
     105             : !> \brief represent a full matrix
     106             : !> \param name the name of the matrix, used for printing
     107             : !> \param matrix_struct structure of this matrix
     108             : !> \param local_data array with the data of the matrix (its contents
     109             : !>        depend on the matrix type used: in parallel runs it will be
     110             : !>        in scalapack format, in sequential, it will simply contain
     111             : !>        the matrix)
     112             : !> \par History
     113             : !>      08.2002 created [fawzi]
     114             : !> \author fawzi
     115             : ! **************************************************************************************************
     116             :    TYPE cp_fm_type
     117             : !    PRIVATE
     118             :       CHARACTER(LEN=60) :: name = ""
     119             :       LOGICAL :: use_sp = .FALSE.
     120             :       TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
     121             :       REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
     122             :       REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
     123             :    END TYPE cp_fm_type
     124             : 
     125             : ! **************************************************************************************************
     126             : !> \brief just to build arrays of pointers to matrices
     127             : !> \param matrix the pointer to the matrix
     128             : !> \par History
     129             : !>      08.2002 created [fawzi]
     130             : !> \author fawzi
     131             : ! **************************************************************************************************
     132             :    TYPE cp_fm_p_type
     133             :       TYPE(cp_fm_type), POINTER :: matrix => NULL()
     134             :    END TYPE cp_fm_p_type
     135             : 
     136             : ! **************************************************************************************************
     137             : !> \brief Stores the state of a copy between cp_fm_start_copy_general
     138             : !>        and cp_fm_finish_copy_general
     139             : !> \par History
     140             : !>      Jan 2017  [Mark T]
     141             : ! **************************************************************************************************
     142             :    TYPE copy_info_type
     143             :       INTEGER :: send_size = -1
     144             :       INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row  2->col
     145             :       TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
     146             :       INTEGER, DIMENSION(:), ALLOCATABLE   :: recv_disp
     147             :       INTEGER, DIMENSION(:), POINTER       :: recv_col_indices => NULL(), recv_row_indices => NULL()
     148             :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
     149             :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
     150             :    END TYPE copy_info_type
     151             : 
     152             : CONTAINS
     153             : 
     154             : ! **************************************************************************************************
     155             : !> \brief creates a new full matrix with the given structure
     156             : !> \param matrix the matrix to be created
     157             : !> \param matrix_struct the structure of matrix
     158             : !> \param name ...
     159             : !> \param use_sp ...
     160             : !> \par History
     161             : !>      08.2002 created [fawzi]
     162             : !> \author Fawzi Mohamed
     163             : !> \note
     164             : !>      preferred allocation routine
     165             : ! **************************************************************************************************
     166     1219022 :    SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
     167             :       TYPE(cp_fm_type), INTENT(OUT)                      :: matrix
     168             :       TYPE(cp_fm_struct_type), INTENT(IN), TARGET        :: matrix_struct
     169             :       CHARACTER(len=*), INTENT(in), OPTIONAL             :: name
     170             :       LOGICAL, INTENT(in), OPTIONAL                      :: use_sp
     171             : 
     172             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_create'
     173             : 
     174             :       INTEGER                                            :: handle, ncol_local, npcol, nprow, &
     175             :                                                             nrow_local
     176             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     177             : 
     178     1219022 :       CALL timeset(routineN, handle)
     179             : 
     180             : #if defined(__parallel) && ! defined(__SCALAPACK)
     181             :       CPABORT("full matrices need scalapack for parallel runs")
     182             : #endif
     183             : 
     184     1219022 :       context => matrix_struct%context
     185     1219022 :       matrix%matrix_struct => matrix_struct
     186     1219022 :       CALL cp_fm_struct_retain(matrix%matrix_struct)
     187             : 
     188     1219022 :       matrix%use_sp = .FALSE.
     189     1219022 :       IF (PRESENT(use_sp)) matrix%use_sp = use_sp
     190             : 
     191     1219022 :       nprow = context%num_pe(1)
     192     1219022 :       npcol = context%num_pe(2)
     193     1219022 :       NULLIFY (matrix%local_data)
     194     1219022 :       NULLIFY (matrix%local_data_sp)
     195             : 
     196             :       ! OK, we allocate here at least a 1 x 1 matrix
     197             :       ! this must (and is) compatible with the descinit call
     198             :       ! in cp_fm_struct
     199     1219022 :       nrow_local = matrix_struct%local_leading_dimension
     200     1219022 :       ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
     201     1219022 :       IF (matrix%use_sp) THEN
     202           0 :          ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
     203             :       ELSE
     204     4876088 :          ALLOCATE (matrix%local_data(nrow_local, ncol_local))
     205             :       END IF
     206             : 
     207             :       ! JVDV we should remove this, as it is up to the user to zero afterwards
     208     1219022 :       IF (matrix%use_sp) THEN
     209           0 :          matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
     210             :       ELSE
     211   612954651 :          matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
     212             :       END IF
     213             : 
     214     1219022 :       IF (PRESENT(name)) THEN
     215      624927 :          matrix%name = name
     216             :       ELSE
     217      594095 :          matrix%name = 'full matrix'
     218             :       END IF
     219             : 
     220     1219022 :       CALL timestop(handle)
     221             : 
     222     1219022 :    END SUBROUTINE cp_fm_create
     223             : 
     224             : ! **************************************************************************************************
     225             : !> \brief releases a full matrix
     226             : !> \param matrix the matrix to release
     227             : !> \par History
     228             : !>      08.2002 created [fawzi]
     229             : !> \author Fawzi Mohamed
     230             : ! **************************************************************************************************
     231     1229574 :    SUBROUTINE cp_fm_release_aa0(matrix)
     232             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: matrix
     233             : 
     234     1229574 :       IF (ASSOCIATED(matrix%local_data)) THEN
     235     1218334 :          DEALLOCATE (matrix%local_data)
     236             :          NULLIFY (matrix%local_data)
     237             :       END IF
     238     1229574 :       IF (ASSOCIATED(matrix%local_data_sp)) THEN
     239           0 :          DEALLOCATE (matrix%local_data_sp)
     240             :          NULLIFY (matrix%local_data_sp)
     241             :       END IF
     242     1229574 :       matrix%name = ""
     243     1229574 :       CALL cp_fm_struct_release(matrix%matrix_struct)
     244             : 
     245     1229574 :    END SUBROUTINE cp_fm_release_aa0
     246             : 
     247             : ! **************************************************************************************************
     248             : !> \brief ...
     249             : !> \param matrices ...
     250             : ! **************************************************************************************************
     251       71126 :    SUBROUTINE cp_fm_release_aa1(matrices)
     252             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: matrices
     253             : 
     254             :       INTEGER                                            :: i
     255             : 
     256       71126 :       IF (ALLOCATED(matrices)) THEN
     257      192423 :          DO i = 1, SIZE(matrices)
     258      192423 :             CALL cp_fm_release(matrices(i))
     259             :          END DO
     260       70042 :          DEALLOCATE (matrices)
     261             :       END IF
     262       71126 :    END SUBROUTINE cp_fm_release_aa1
     263             : 
     264             : ! **************************************************************************************************
     265             : !> \brief ...
     266             : !> \param matrices ...
     267             : ! **************************************************************************************************
     268        7928 :    SUBROUTINE cp_fm_release_aa2(matrices)
     269             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: matrices
     270             : 
     271             :       INTEGER                                            :: i, j
     272             : 
     273        7928 :       IF (ALLOCATED(matrices)) THEN
     274       19148 :          DO i = 1, SIZE(matrices, 1)
     275       51288 :             DO j = 1, SIZE(matrices, 2)
     276       45628 :                CALL cp_fm_release(matrices(i, j))
     277             :             END DO
     278             :          END DO
     279        5660 :          DEALLOCATE (matrices)
     280             :       END IF
     281        7928 :    END SUBROUTINE cp_fm_release_aa2
     282             : 
     283             : ! **************************************************************************************************
     284             : !> \brief ...
     285             : !> \param matrices ...
     286             : ! **************************************************************************************************
     287          32 :    SUBROUTINE cp_fm_release_aa3(matrices)
     288             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: matrices
     289             : 
     290             :       INTEGER                                            :: i, j, k
     291             : 
     292          32 :       IF (ALLOCATED(matrices)) THEN
     293         232 :          DO i = 1, SIZE(matrices, 1)
     294         660 :             DO j = 1, SIZE(matrices, 2)
     295        1176 :                DO k = 1, SIZE(matrices, 3)
     296         976 :                   CALL cp_fm_release(matrices(i, j, k))
     297             :                END DO
     298             :             END DO
     299             :          END DO
     300          32 :          DEALLOCATE (matrices)
     301             :       END IF
     302          32 :    END SUBROUTINE cp_fm_release_aa3
     303             : 
     304             : ! **************************************************************************************************
     305             : !> \brief ...
     306             : !> \param matrices ...
     307             : ! **************************************************************************************************
     308      125694 :    SUBROUTINE cp_fm_release_pa1(matrices)
     309             :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: matrices
     310             : 
     311             :       INTEGER                                            :: i
     312             : 
     313      125694 :       IF (ASSOCIATED(matrices)) THEN
     314      115359 :          DO i = 1, SIZE(matrices)
     315      115359 :             CALL cp_fm_release(matrices(i))
     316             :          END DO
     317       49274 :          DEALLOCATE (matrices)
     318             :          NULLIFY (matrices)
     319             :       END IF
     320      125694 :    END SUBROUTINE cp_fm_release_pa1
     321             : 
     322             : ! **************************************************************************************************
     323             : !> \brief ...
     324             : !> \param matrices ...
     325             : ! **************************************************************************************************
     326       19266 :    SUBROUTINE cp_fm_release_pa2(matrices)
     327             :       TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: matrices
     328             : 
     329             :       INTEGER                                            :: i, j
     330             : 
     331       19266 :       IF (ASSOCIATED(matrices)) THEN
     332       43646 :          DO i = 1, SIZE(matrices, 1)
     333       85888 :             DO j = 1, SIZE(matrices, 2)
     334       75124 :                CALL cp_fm_release(matrices(i, j))
     335             :             END DO
     336             :          END DO
     337       10764 :          DEALLOCATE (matrices)
     338             :          NULLIFY (matrices)
     339             :       END IF
     340       19266 :    END SUBROUTINE cp_fm_release_pa2
     341             : 
     342             : ! **************************************************************************************************
     343             : !> \brief ...
     344             : !> \param matrices ...
     345             : ! **************************************************************************************************
     346           0 :    SUBROUTINE cp_fm_release_pa3(matrices)
     347             :       TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER      :: matrices
     348             : 
     349             :       INTEGER                                            :: i, j, k
     350             : 
     351           0 :       IF (ASSOCIATED(matrices)) THEN
     352           0 :          DO i = 1, SIZE(matrices, 1)
     353           0 :             DO j = 1, SIZE(matrices, 2)
     354           0 :                DO k = 1, SIZE(matrices, 3)
     355           0 :                   CALL cp_fm_release(matrices(i, j, k))
     356             :                END DO
     357             :             END DO
     358             :          END DO
     359           0 :          DEALLOCATE (matrices)
     360             :          NULLIFY (matrices)
     361             :       END IF
     362           0 :    END SUBROUTINE cp_fm_release_pa3
     363             : 
     364             : ! **************************************************************************************************
     365             : !> \brief ...
     366             : !> \param matrices ...
     367             : ! **************************************************************************************************
     368           0 :    SUBROUTINE cp_fm_release_ap1(matrices)
     369             :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:)      :: matrices
     370             : 
     371             :       INTEGER                                            :: i
     372             : 
     373           0 :       IF (ALLOCATED(matrices)) THEN
     374           0 :          DO i = 1, SIZE(matrices)
     375           0 :             CALL cp_fm_release(matrices(i)%matrix)
     376           0 :             DEALLOCATE (matrices(i)%matrix)
     377             :          END DO
     378           0 :          DEALLOCATE (matrices)
     379             :       END IF
     380           0 :    END SUBROUTINE cp_fm_release_ap1
     381             : 
     382             : ! **************************************************************************************************
     383             : !> \brief ...
     384             : !> \param matrices ...
     385             : ! **************************************************************************************************
     386           0 :    SUBROUTINE cp_fm_release_ap2(matrices)
     387             :       TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :)   :: matrices
     388             : 
     389             :       INTEGER                                            :: i, j
     390             : 
     391           0 :       IF (ALLOCATED(matrices)) THEN
     392           0 :          DO i = 1, SIZE(matrices, 1)
     393           0 :             DO j = 1, SIZE(matrices, 2)
     394           0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     395           0 :                DEALLOCATE (matrices(i, j)%matrix)
     396             :             END DO
     397             :          END DO
     398           0 :          DEALLOCATE (matrices)
     399             :       END IF
     400           0 :    END SUBROUTINE cp_fm_release_ap2
     401             : 
     402             : ! **************************************************************************************************
     403             : !> \brief ...
     404             : !> \param matrices ...
     405             : ! **************************************************************************************************
     406           0 :    SUBROUTINE cp_fm_release_pp1(matrices)
     407             :       TYPE(cp_fm_p_type), DIMENSION(:), POINTER          :: matrices
     408             : 
     409             :       INTEGER                                            :: i
     410             : 
     411           0 :       IF (ASSOCIATED(matrices)) THEN
     412           0 :          DO i = 1, SIZE(matrices)
     413           0 :             CALL cp_fm_release(matrices(i)%matrix)
     414           0 :             DEALLOCATE (matrices(i)%matrix)
     415             :          END DO
     416           0 :          DEALLOCATE (matrices)
     417             :          NULLIFY (matrices)
     418             :       END IF
     419           0 :    END SUBROUTINE cp_fm_release_pp1
     420             : 
     421             : ! **************************************************************************************************
     422             : !> \brief ...
     423             : !> \param matrices ...
     424             : ! **************************************************************************************************
     425           0 :    SUBROUTINE cp_fm_release_pp2(matrices)
     426             :       TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER       :: matrices
     427             : 
     428             :       INTEGER                                            :: i, j
     429             : 
     430           0 :       IF (ASSOCIATED(matrices)) THEN
     431           0 :          DO i = 1, SIZE(matrices, 1)
     432           0 :             DO j = 1, SIZE(matrices, 2)
     433           0 :                CALL cp_fm_release(matrices(i, j)%matrix)
     434           0 :                DEALLOCATE (matrices(i, j)%matrix)
     435             :             END DO
     436             :          END DO
     437           0 :          DEALLOCATE (matrices)
     438             :          NULLIFY (matrices)
     439             :       END IF
     440           0 :    END SUBROUTINE cp_fm_release_pp2
     441             : 
     442             : ! **************************************************************************************************
     443             : !> \brief fills a matrix with random numbers
     444             : !> \param matrix : to be initialized
     445             : !> \param ncol : numbers of cols to fill
     446             : !> \param start_col : starting at coll number
     447             : !> \author Joost VandeVondele
     448             : !> \note
     449             : !>      the value of a_ij is independent of the number of cpus
     450             : ! **************************************************************************************************
     451        2612 :    SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
     452             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     453             :       INTEGER, INTENT(IN), OPTIONAL                      :: ncol, start_col
     454             : 
     455             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_init_random'
     456             : 
     457             :       INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
     458             :          ncol_local, nrow_global, nrow_local
     459        5224 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     460        2612 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: buff
     461             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
     462        2612 :          POINTER                                         :: local_data
     463             :       REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
     464             :          seed = RESHAPE((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
     465             :       TYPE(rng_stream_type)                              :: rng
     466             : 
     467        2612 :       CALL timeset(routineN, handle)
     468             : 
     469             :       ! guarantee same seed over all tasks
     470        2612 :       CALL matrix%matrix_struct%para_env%bcast(seed, 0)
     471             : 
     472             :       rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
     473        2612 :                             extended_precision=.TRUE., seed=seed)
     474             : 
     475        2612 :       CPASSERT(.NOT. matrix%use_sp)
     476             : 
     477             :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
     478             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     479             :                           local_data=local_data, &
     480        2612 :                           row_indices=row_indices, col_indices=col_indices)
     481             : 
     482        2612 :       my_start_col = 1
     483        2612 :       IF (PRESENT(start_col)) my_start_col = start_col
     484        2612 :       my_ncol = matrix%matrix_struct%ncol_global
     485        2612 :       IF (PRESENT(ncol)) my_ncol = ncol
     486             : 
     487        2612 :       IF (ncol_global < (my_start_col + my_ncol - 1)) &
     488           0 :          CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
     489             : 
     490        7836 :       ALLOCATE (buff(nrow_global))
     491             : 
     492             :       ! each global row has its own substream, in order to reach the stream for the local col,
     493             :       ! we just reset to the next substream
     494             :       ! following this, we fill the full buff with random numbers, and pick those we need
     495        2612 :       icol_global = 0
     496       21375 :       DO icol_local = 1, ncol_local
     497       18763 :          CPASSERT(col_indices(icol_local) > icol_global)
     498             :          DO
     499       18763 :             CALL rng%reset_to_next_substream()
     500       18763 :             icol_global = icol_global + 1
     501       18763 :             IF (icol_global == col_indices(icol_local)) EXIT
     502             :          END DO
     503       18763 :          CALL rng%fill(buff)
     504      785317 :          DO irow_local = 1, nrow_local
     505      782705 :             local_data(irow_local, icol_local) = buff(row_indices(irow_local))
     506             :          END DO
     507             :       END DO
     508             : 
     509        2612 :       DEALLOCATE (buff)
     510             : 
     511             :       ! store seed before deletion (unclear if this is the proper seed)
     512             : 
     513             :       ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
     514             :       ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
     515             :       ! stream of random numbers. While this seems odd and contrary to the original design,
     516             :       ! it was probably introduced to improve reproducibility.
     517             :       ! See also https://github.com/cp2k/cp2k/pull/506
     518        2612 :       CALL rng%get(ig=seed)
     519             : 
     520        2612 :       CALL timestop(handle)
     521             : 
     522       73136 :    END SUBROUTINE cp_fm_init_random
     523             : 
     524             : ! **************************************************************************************************
     525             : !> \brief set all elements of a matrix to the same value,
     526             : !>      and optionally the diagonal to a different one
     527             : !> \param matrix input matrix
     528             : !> \param alpha scalar used to set all elements of the matrix
     529             : !> \param beta scalar used to set diagonal of the matrix
     530             : !> \note
     531             : !>      can be used to zero a matrix
     532             : !>      can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
     533             : ! **************************************************************************************************
     534      299169 :    SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
     535             : 
     536             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     537             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     538             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     539             : 
     540             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_set_all'
     541             : 
     542             :       INTEGER                                            :: handle, i, n
     543             : 
     544      299169 :       CALL timeset(routineN, handle)
     545             : 
     546      299169 :       IF (matrix%use_sp) THEN
     547           0 :          matrix%local_data_sp(:, :) = REAL(alpha, sp)
     548             :       ELSE
     549   152194503 :          matrix%local_data(:, :) = alpha
     550             :       END IF
     551             : 
     552      299169 :       IF (PRESENT(beta)) THEN
     553       47430 :          CPASSERT(.NOT. matrix%use_sp)
     554       47430 :          n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
     555      459162 :          DO i = 1, n
     556      459162 :             CALL cp_fm_set_element(matrix, i, i, beta)
     557             :          END DO
     558             :       END IF
     559             : 
     560      299169 :       CALL timestop(handle)
     561             : 
     562      299169 :    END SUBROUTINE cp_fm_set_all
     563             : 
     564             : ! **************************************************************************************************
     565             : !> \brief returns the diagonal elements of a fm
     566             : !> \param matrix ...
     567             : !> \param diag ...
     568             : ! **************************************************************************************************
     569       10666 :    SUBROUTINE cp_fm_get_diag(matrix, diag)
     570             : 
     571             :       ! arguments
     572             :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
     573             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
     574             : 
     575             :       ! locals
     576             :       INTEGER :: i, nrow_global
     577             : 
     578             : #if defined(__SCALAPACK)
     579             :       INTEGER, DIMENSION(9) :: desca
     580             :       TYPE(cp_blacs_env_type), POINTER :: context
     581             :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     582             :                  nprow
     583       10666 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     584       10666 :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
     585             : #endif
     586             : 
     587       10666 :       CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
     588             : 
     589             : #if defined(__SCALAPACK)
     590      156202 :       diag = 0.0_dp
     591       10666 :       context => matrix%matrix_struct%context
     592       10666 :       myprow = context%mepos(1)
     593       10666 :       mypcol = context%mepos(2)
     594       10666 :       nprow = context%num_pe(1)
     595       10666 :       npcol = context%num_pe(2)
     596             : 
     597       10666 :       a => matrix%local_data
     598       10666 :       a_sp => matrix%local_data_sp
     599      106660 :       desca(:) = matrix%matrix_struct%descriptor(:)
     600             : 
     601      156202 :       DO i = 1, nrow_global
     602             :          CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
     603      145536 :                       irow_local, icol_local, iprow, ipcol)
     604      156202 :          IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     605       72852 :             IF (matrix%use_sp) THEN
     606           0 :                diag(i) = REAL(a_sp(irow_local, icol_local), dp)
     607             :             ELSE
     608       72852 :                diag(i) = a(irow_local, icol_local)
     609             :             END IF
     610             :          END IF
     611             :       END DO
     612             : #else
     613             :       DO i = 1, nrow_global
     614             :          IF (matrix%use_sp) THEN
     615             :             diag(i) = REAL(matrix%local_data_sp(i, i), dp)
     616             :          ELSE
     617             :             diag(i) = matrix%local_data(i, i)
     618             :          END IF
     619             :       END DO
     620             : #endif
     621      301738 :       CALL matrix%matrix_struct%para_env%sum(diag)
     622             : 
     623       10666 :    END SUBROUTINE cp_fm_get_diag
     624             : 
     625             : ! **************************************************************************************************
     626             : !> \brief returns an element of a fm
     627             : !>      this value is valid on every cpu
     628             : !>      using this call is expensive
     629             : !> \param matrix the matrix to read
     630             : !> \param irow_global the row
     631             : !> \param icol_global the col
     632             : !> \param alpha the value of matrix(irow_global, icol_global)
     633             : !> \param local true if the element is on this cpu, false otherwise
     634             : !> \note
     635             : !>      - modified semantics. now this function always returns the value
     636             : !>        previously the value was zero on cpus that didn't own the relevant
     637             : !>        part of the matrix (Joost VandeVondele, May 2003)
     638             : !>      - usage of the function should be avoided, as it is likely to rather slow
     639             : !>        using row_indices/col_indices/local_data + some smart scheme normally
     640             : !>        yields a real parallel code
     641             : ! **************************************************************************************************
     642     2551668 :    SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
     643             : 
     644             :       ! arguments
     645             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     646             :       REAL(KIND=dp), INTENT(OUT)                     :: alpha
     647             :       INTEGER, INTENT(IN)                       :: icol_global, &
     648             :                                                    irow_global
     649             :       LOGICAL, INTENT(OUT), OPTIONAL            :: local
     650             : 
     651             :       ! locals
     652             : #if defined(__SCALAPACK)
     653             :       INTEGER, DIMENSION(9) :: desca
     654             :       TYPE(cp_blacs_env_type), POINTER :: context
     655             :       INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
     656             :                  nprow
     657     2551668 :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
     658             : #endif
     659             : 
     660             : #if defined(__SCALAPACK)
     661     2551668 :       context => matrix%matrix_struct%context
     662     2551668 :       myprow = context%mepos(1)
     663     2551668 :       mypcol = context%mepos(2)
     664     2551668 :       nprow = context%num_pe(1)
     665     2551668 :       npcol = context%num_pe(2)
     666             : 
     667     2551668 :       a => matrix%local_data
     668    25516680 :       desca(:) = matrix%matrix_struct%descriptor(:)
     669             : 
     670             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     671     2551668 :                    irow_local, icol_local, iprow, ipcol)
     672             : 
     673     2551668 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     674     1275834 :          alpha = a(irow_local, icol_local)
     675     1275834 :          CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
     676     1275834 :          IF (PRESENT(local)) local = .TRUE.
     677             :       ELSE
     678     1275834 :          CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
     679     1275834 :          IF (PRESENT(local)) local = .FALSE.
     680             :       END IF
     681             : 
     682             : #else
     683             :       IF (PRESENT(local)) local = .TRUE.
     684             :       alpha = matrix%local_data(irow_global, icol_global)
     685             : #endif
     686             : 
     687     2551668 :    END SUBROUTINE cp_fm_get_element
     688             : 
     689             : ! **************************************************************************************************
     690             : !> \brief sets an element of a matrix
     691             : !> \param matrix ...
     692             : !> \param irow_global ...
     693             : !> \param icol_global ...
     694             : !> \param alpha ...
     695             : !> \note
     696             : !>      we expect all cpus to have the same arguments in the call to this function
     697             : !>      (otherwise one should use local_data tricks)
     698             : ! **************************************************************************************************
     699      816070 :    SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
     700             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
     701             :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
     702             :       REAL(KIND=dp), INTENT(IN)                :: alpha
     703             : 
     704             :       INTEGER                                  :: mypcol, myprow, npcol, nprow
     705             :       TYPE(cp_blacs_env_type), POINTER         :: context
     706             : #if defined(__SCALAPACK)
     707             :       INTEGER                                  :: icol_local, ipcol, iprow, &
     708             :                                                   irow_local
     709             :       INTEGER, DIMENSION(9)                    :: desca
     710      816070 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     711             : #endif
     712             : 
     713      816070 :       context => matrix%matrix_struct%context
     714      816070 :       myprow = context%mepos(1)
     715      816070 :       mypcol = context%mepos(2)
     716      816070 :       nprow = context%num_pe(1)
     717      816070 :       npcol = context%num_pe(2)
     718             : 
     719           0 :       CPASSERT(.NOT. matrix%use_sp)
     720             : 
     721             : #if defined(__SCALAPACK)
     722             : 
     723      816070 :       a => matrix%local_data
     724             : 
     725     8160700 :       desca(:) = matrix%matrix_struct%descriptor(:)
     726             : 
     727             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
     728      816070 :                    irow_local, icol_local, iprow, ipcol)
     729             : 
     730      816070 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     731      412370 :          a(irow_local, icol_local) = alpha
     732             :       END IF
     733             : 
     734             : #else
     735             : 
     736             :       matrix%local_data(irow_global, icol_global) = alpha
     737             : 
     738             : #endif
     739      816070 :    END SUBROUTINE cp_fm_set_element
     740             : 
     741             : ! **************************************************************************************************
     742             : !> \brief sets a submatrix of a full matrix
     743             : !>       fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     744             : !>       = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
     745             : !>       * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     746             : !> \param fm the full to change
     747             : !> \param new_values a replicated full matrix with the new values
     748             : !> \param start_row the starting row of b_matrix (defaults to 1)
     749             : !> \param start_col the starting col of b_matrix (defaults to 1)
     750             : !> \param n_rows the number of row to change in b (defaults to
     751             : !>        size(op(new_values),1))
     752             : !> \param n_cols the number of columns to change in b (defaults to
     753             : !>        size(op(new_values),2))
     754             : !> \param alpha rescaling factor for the new values (defaults to 1.0)
     755             : !> \param beta rescaling factor for the old values (defaults to 0.0)
     756             : !> \param transpose if new_values should be transposed: if true
     757             : !>        op(new_values)=new_values^T, else op(new_values)=new_values
     758             : !>        (defaults to false)
     759             : !> \par History
     760             : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     761             : !> \author Fawzi Mohamed
     762             : !> \note
     763             : !>      optimized for full column updates and alpha=1.0, beta=0.0
     764             : !>      the new_values need to be valid on all cpus
     765             : ! **************************************************************************************************
     766       61251 :    SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
     767             :                                   start_col, n_rows, n_cols, alpha, beta, transpose)
     768             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     769             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(in)         :: new_values
     770             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     771             :       REAL(KIND=dp), INTENT(in), OPTIONAL                :: alpha, beta
     772             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     773             : 
     774             :       INTEGER                                            :: i, i0, j, j0, ncol, ncol_block, &
     775             :                                                             ncol_global, ncol_local, nrow, &
     776             :                                                             nrow_block, nrow_global, nrow_local, &
     777             :                                                             this_col, this_row
     778       61251 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     779             :       LOGICAL                                            :: tr_a
     780             :       REAL(KIND=dp)                                      :: al, be
     781       61251 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     782             : 
     783       61251 :       al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
     784             :       ! can be called too many times, making it a bit useless
     785             :       ! CALL timeset(routineN//','//moduleN,handle)
     786             : 
     787           0 :       CPASSERT(.NOT. fm%use_sp)
     788             : 
     789       61251 :       IF (PRESENT(alpha)) al = alpha
     790       61251 :       IF (PRESENT(beta)) be = beta
     791       61251 :       IF (PRESENT(start_row)) i0 = start_row
     792       61251 :       IF (PRESENT(start_col)) j0 = start_col
     793       61251 :       IF (PRESENT(transpose)) tr_a = transpose
     794       15695 :       IF (tr_a) THEN
     795       15603 :          nrow = SIZE(new_values, 2)
     796       15603 :          ncol = SIZE(new_values, 1)
     797             :       ELSE
     798       45648 :          nrow = SIZE(new_values, 1)
     799       45648 :          ncol = SIZE(new_values, 2)
     800             :       END IF
     801       61251 :       IF (PRESENT(n_rows)) nrow = n_rows
     802       61251 :       IF (PRESENT(n_cols)) ncol = n_cols
     803             : 
     804       61251 :       full_block => fm%local_data
     805             : 
     806             :       CALL cp_fm_get_info(matrix=fm, &
     807             :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     808             :                           nrow_block=nrow_block, ncol_block=ncol_block, &
     809             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     810       61251 :                           row_indices=row_indices, col_indices=col_indices)
     811             : 
     812       61251 :       IF (al == 1.0 .AND. be == 0.0) THEN
     813     1289094 :          DO j = 1, ncol_local
     814     1236981 :             this_col = col_indices(j) - j0 + 1
     815     1289094 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     816      283421 :                IF (tr_a) THEN
     817        6465 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     818      157692 :                      DO i = 1, nrow_local
     819      157692 :                         full_block(i, j) = new_values(this_col, row_indices(i))
     820             :                      END DO
     821             :                   ELSE
     822         594 :                      DO i = 1, nrow_local
     823         510 :                         this_row = row_indices(i) - i0 + 1
     824         594 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     825         255 :                            full_block(i, j) = new_values(this_col, this_row)
     826             :                         END IF
     827             :                      END DO
     828             :                   END IF
     829             :                ELSE
     830      276956 :                   IF (i0 == 1 .AND. nrow_global == nrow) THEN
     831     8894821 :                      DO i = 1, nrow_local
     832     8894821 :                         full_block(i, j) = new_values(row_indices(i), this_col)
     833             :                      END DO
     834             :                   ELSE
     835        4662 :                      DO i = 1, nrow_local
     836        3990 :                         this_row = row_indices(i) - i0 + 1
     837        4662 :                         IF (this_row >= 1 .AND. this_row <= nrow) THEN
     838         476 :                            full_block(i, j) = new_values(this_row, this_col)
     839             :                         END IF
     840             :                      END DO
     841             :                   END IF
     842             :                END IF
     843             :             END IF
     844             :          END DO
     845             :       ELSE
     846      831608 :          DO j = 1, ncol_local
     847      822470 :             this_col = col_indices(j) - j0 + 1
     848      831608 :             IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     849      822470 :                IF (tr_a) THEN
     850    88223375 :                   DO i = 1, nrow_local
     851    87400905 :                      this_row = row_indices(i) - i0 + 1
     852    88223375 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     853             :                         full_block(i, j) = al*new_values(this_col, this_row) + &
     854      411235 :                                            be*full_block(i, j)
     855             :                      END IF
     856             :                   END DO
     857             :                ELSE
     858           0 :                   DO i = 1, nrow_local
     859           0 :                      this_row = row_indices(i) - i0 + 1
     860           0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     861             :                         full_block(i, j) = al*new_values(this_row, this_col) + &
     862           0 :                                            be*full_block(i, j)
     863             :                      END IF
     864             :                   END DO
     865             :                END IF
     866             :             END IF
     867             :          END DO
     868             :       END IF
     869             : 
     870             :       ! CALL timestop(handle)
     871             : 
     872       61251 :    END SUBROUTINE cp_fm_set_submatrix
     873             : 
     874             : ! **************************************************************************************************
     875             : !> \brief gets a submatrix of a full matrix
     876             : !>       op(target_m)(1:n_rows,1:n_cols)
     877             : !>       =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
     878             : !>      target_m is replicated on all cpus
     879             : !>      using this call is expensive
     880             : !> \param fm the full you want to get the info from
     881             : !> \param target_m a replicated full matrix that will contain the result
     882             : !> \param start_row the starting row of b_matrix (defaults to 1)
     883             : !> \param start_col the starting col of b_matrix (defaults to 1)
     884             : !> \param n_rows the number of row to change in b (defaults to
     885             : !>        size(op(new_values),1))
     886             : !> \param n_cols the number of columns to change in b (defaults to
     887             : !>        size(op(new_values),2))
     888             : !> \param transpose if target_m should be transposed: if true
     889             : !>        op(target_m)=target_m^T, else op(target_m)=target_m
     890             : !>        (defaults to false)
     891             : !> \par History
     892             : !>      07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
     893             : !> \author Fawzi Mohamed
     894             : !> \note
     895             : !>      optimized for full column updates. Zeros out a little too much
     896             : !>      of target_m
     897             : !>      the target_m is replicated and valid on all cpus
     898             : ! **************************************************************************************************
     899       70848 :    SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
     900             :                                   start_col, n_rows, n_cols, transpose)
     901             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     902             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(out)        :: target_m
     903             :       INTEGER, INTENT(in), OPTIONAL                      :: start_row, start_col, n_rows, n_cols
     904             :       LOGICAL, INTENT(in), OPTIONAL                      :: transpose
     905             : 
     906             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
     907             : 
     908             :       INTEGER                                            :: handle, i, i0, j, j0, ncol, ncol_global, &
     909             :                                                             ncol_local, nrow, nrow_global, &
     910             :                                                             nrow_local, this_col, this_row
     911       70848 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     912             :       LOGICAL                                            :: tr_a
     913       70848 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: full_block
     914             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     915             : 
     916       70848 :       CALL timeset(routineN, handle)
     917             : 
     918       70848 :       i0 = 1; j0 = 1; tr_a = .FALSE.
     919             : 
     920       70848 :       CPASSERT(.NOT. fm%use_sp)
     921             : 
     922       70848 :       IF (PRESENT(start_row)) i0 = start_row
     923       70848 :       IF (PRESENT(start_col)) j0 = start_col
     924       70848 :       IF (PRESENT(transpose)) tr_a = transpose
     925        6240 :       IF (tr_a) THEN
     926        2362 :          nrow = SIZE(target_m, 2)
     927        2362 :          ncol = SIZE(target_m, 1)
     928             :       ELSE
     929       68486 :          nrow = SIZE(target_m, 1)
     930       68486 :          ncol = SIZE(target_m, 2)
     931             :       END IF
     932       70848 :       IF (PRESENT(n_rows)) nrow = n_rows
     933       70848 :       IF (PRESENT(n_cols)) ncol = n_cols
     934             : 
     935       70848 :       para_env => fm%matrix_struct%para_env
     936             : 
     937       70848 :       full_block => fm%local_data
     938             : #if defined(__SCALAPACK)
     939             :       ! zero-out whole target_m
     940       70848 :       IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
     941       70798 :          CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
     942             :       END IF
     943             : #endif
     944             : 
     945             :       CALL cp_fm_get_info(matrix=fm, &
     946             :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     947             :                           nrow_local=nrow_local, ncol_local=ncol_local, &
     948       70848 :                           row_indices=row_indices, col_indices=col_indices)
     949             : 
     950      447704 :       DO j = 1, ncol_local
     951      376856 :          this_col = col_indices(j) - j0 + 1
     952      447704 :          IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
     953      268986 :             IF (tr_a) THEN
     954        2362 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     955       81863 :                   DO i = 1, nrow_local
     956       81863 :                      target_m(this_col, row_indices(i)) = full_block(i, j)
     957             :                   END DO
     958             :                ELSE
     959           0 :                   DO i = 1, nrow_local
     960           0 :                      this_row = row_indices(i) - i0 + 1
     961           0 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     962           0 :                         target_m(this_col, this_row) = full_block(i, j)
     963             :                      END IF
     964             :                   END DO
     965             :                END IF
     966             :             ELSE
     967      266624 :                IF (i0 == 1 .AND. nrow_global == nrow) THEN
     968     5988839 :                   DO i = 1, nrow_local
     969     5988839 :                      target_m(row_indices(i), this_col) = full_block(i, j)
     970             :                   END DO
     971             :                ELSE
     972     1168384 :                   DO i = 1, nrow_local
     973     1141908 :                      this_row = row_indices(i) - i0 + 1
     974     1168384 :                      IF (this_row >= 1 .AND. this_row <= nrow) THEN
     975       24612 :                         target_m(this_row, this_col) = full_block(i, j)
     976             :                      END IF
     977             :                   END DO
     978             :                END IF
     979             :             END IF
     980             :          END IF
     981             :       END DO
     982             : 
     983    15453868 :       CALL para_env%sum(target_m)
     984             : 
     985       70848 :       CALL timestop(handle)
     986             : 
     987       70848 :    END SUBROUTINE cp_fm_get_submatrix
     988             : 
     989             : ! **************************************************************************************************
     990             : !> \brief returns all kind of information about the full matrix
     991             : !> \param matrix ...
     992             : !> \param name ...
     993             : !> \param nrow_global ...
     994             : !> \param ncol_global ...
     995             : !> \param nrow_block ...
     996             : !> \param ncol_block ...
     997             : !> \param nrow_local ...
     998             : !> \param ncol_local ...
     999             : !> \param row_indices ...
    1000             : !> \param col_indices ...
    1001             : !> \param local_data ...
    1002             : !> \param context ...
    1003             : !> \param nrow_locals ...
    1004             : !> \param ncol_locals ...
    1005             : !> \param matrix_struct ...
    1006             : !> \param para_env ...
    1007             : !> \note
    1008             : !>       see also cp_fm_struct for explanation
    1009             : !>       - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
    1010             : !>         access to the local blacs block
    1011             : ! **************************************************************************************************
    1012     5315253 :    SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
    1013             :                              nrow_block, ncol_block, nrow_local, ncol_local, &
    1014             :                              row_indices, col_indices, local_data, context, &
    1015             :                              nrow_locals, ncol_locals, matrix_struct, para_env)
    1016             : 
    1017             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1018             :       CHARACTER(LEN=*), INTENT(OUT), OPTIONAL            :: name
    1019             :       INTEGER, INTENT(OUT), OPTIONAL                     :: nrow_global, ncol_global, nrow_block, &
    1020             :                                                             ncol_block, nrow_local, ncol_local
    1021             :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: row_indices, col_indices
    1022             :       REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
    1023             :          OPTIONAL, POINTER                               :: local_data
    1024             :       TYPE(cp_blacs_env_type), OPTIONAL, POINTER         :: context
    1025             :       INTEGER, DIMENSION(:), OPTIONAL, POINTER           :: nrow_locals, ncol_locals
    1026             :       TYPE(cp_fm_struct_type), OPTIONAL, POINTER         :: matrix_struct
    1027             :       TYPE(mp_para_env_type), OPTIONAL, POINTER          :: para_env
    1028             : 
    1029           8 :       IF (PRESENT(name)) name = matrix%name
    1030     5315253 :       IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
    1031     5315253 :       IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
    1032             : 
    1033             :       CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
    1034             :                             ncol_local=ncol_local, nrow_global=nrow_global, &
    1035             :                             ncol_global=ncol_global, nrow_block=nrow_block, &
    1036             :                             ncol_block=ncol_block, row_indices=row_indices, &
    1037             :                             col_indices=col_indices, nrow_locals=nrow_locals, &
    1038    25839921 :                             ncol_locals=ncol_locals, context=context, para_env=para_env)
    1039             : 
    1040     5315253 :    END SUBROUTINE cp_fm_get_info
    1041             : 
    1042             : ! **************************************************************************************************
    1043             : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
    1044             : !> \param matrix a cp_fm_type instance
    1045             : !> \param io_unit the I/O unit to use for writing
    1046             : ! **************************************************************************************************
    1047           3 :    SUBROUTINE cp_fm_write_info(matrix, io_unit)
    1048             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1049             :       INTEGER, INTENT(IN)                                :: io_unit
    1050             : 
    1051           3 :       WRITE (io_unit, '(/,A,A12)') "CP_FM | Name:                           ", matrix%name
    1052           3 :       CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
    1053           3 :    END SUBROUTINE cp_fm_write_info
    1054             : 
    1055             : ! **************************************************************************************************
    1056             : !> \brief find the maximum absolute value of the matrix element
    1057             : !>      maxval(abs(matrix))
    1058             : !> \param matrix ...
    1059             : !> \param a_max ...
    1060             : !> \param ir_max ...
    1061             : !> \param ic_max ...
    1062             : ! **************************************************************************************************
    1063       89625 :    SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
    1064             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1065             :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1066             :       INTEGER, INTENT(OUT), OPTIONAL                     :: ir_max, ic_max
    1067             : 
    1068             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_maxabsval'
    1069             : 
    1070             :       INTEGER                                            :: handle, i, ic_max_local, ir_max_local, &
    1071             :                                                             j, mepos, ncol_local, nrow_local, &
    1072             :                                                             num_pe
    1073       89625 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ic_max_vec, ir_max_vec
    1074       89625 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1075             :       REAL(dp)                                           :: my_max
    1076       89625 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_max_vec
    1077       89625 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1078       89625 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: my_block_sp
    1079             : 
    1080       89625 :       CALL timeset(routineN, handle)
    1081             : 
    1082       89625 :       my_block => matrix%local_data
    1083       89625 :       my_block_sp => matrix%local_data_sp
    1084             : 
    1085             :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
    1086       89625 :                           row_indices=row_indices, col_indices=col_indices)
    1087             : 
    1088       89625 :       IF (matrix%use_sp) THEN
    1089           0 :          a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
    1090             :       ELSE
    1091    55194339 :          a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
    1092             :       END IF
    1093             : 
    1094       89625 :       IF (PRESENT(ir_max)) THEN
    1095           0 :          num_pe = matrix%matrix_struct%para_env%num_pe
    1096           0 :          mepos = matrix%matrix_struct%para_env%mepos
    1097           0 :          ALLOCATE (ir_max_vec(0:num_pe - 1))
    1098           0 :          ir_max_vec(0:num_pe - 1) = 0
    1099           0 :          ALLOCATE (ic_max_vec(0:num_pe - 1))
    1100           0 :          ic_max_vec(0:num_pe - 1) = 0
    1101           0 :          ALLOCATE (a_max_vec(0:num_pe - 1))
    1102           0 :          a_max_vec(0:num_pe - 1) = 0.0_dp
    1103           0 :          my_max = 0.0_dp
    1104             : 
    1105           0 :          IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
    1106           0 :             DO i = 1, ncol_local
    1107           0 :                DO j = 1, nrow_local
    1108           0 :                   IF (matrix%use_sp) THEN
    1109           0 :                      IF (ABS(my_block_sp(j, i)) > my_max) THEN
    1110           0 :                         my_max = REAL(my_block_sp(j, i), dp)
    1111           0 :                         ir_max_local = j
    1112           0 :                         ic_max_local = i
    1113             :                      END IF
    1114             :                   ELSE
    1115           0 :                      IF (ABS(my_block(j, i)) > my_max) THEN
    1116           0 :                         my_max = my_block(j, i)
    1117           0 :                         ir_max_local = j
    1118           0 :                         ic_max_local = i
    1119             :                      END IF
    1120             :                   END IF
    1121             :                END DO
    1122             :             END DO
    1123             : 
    1124           0 :             a_max_vec(mepos) = my_max
    1125           0 :             ir_max_vec(mepos) = row_indices(ir_max_local)
    1126           0 :             ic_max_vec(mepos) = col_indices(ic_max_local)
    1127             : 
    1128             :          END IF
    1129             : 
    1130           0 :          CALL matrix%matrix_struct%para_env%sum(a_max_vec)
    1131           0 :          CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
    1132           0 :          CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
    1133             : 
    1134           0 :          my_max = 0.0_dp
    1135           0 :          DO i = 0, num_pe - 1
    1136           0 :             IF (a_max_vec(i) > my_max) THEN
    1137           0 :                ir_max = ir_max_vec(i)
    1138           0 :                ic_max = ic_max_vec(i)
    1139             :             END IF
    1140             :          END DO
    1141             : 
    1142           0 :          DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
    1143           0 :          CPASSERT(ic_max > 0)
    1144           0 :          CPASSERT(ir_max > 0)
    1145             : 
    1146             :       END IF
    1147             : 
    1148       89625 :       CALL matrix%matrix_struct%para_env%max(a_max)
    1149             : 
    1150       89625 :       CALL timestop(handle)
    1151             : 
    1152      179250 :    END SUBROUTINE cp_fm_maxabsval
    1153             : 
    1154             : ! **************************************************************************************************
    1155             : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
    1156             : !>      = || A ||_infinity
    1157             : !> \param matrix ...
    1158             : !> \param a_max ...
    1159             : !> \note
    1160             : !>      for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
    1161             : !>      Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
    1162             : !>      http://mathworld.wolfram.com/MatrixNorm.html
    1163             : !>      (but the bound is not so tight in the general case)
    1164             : ! **************************************************************************************************
    1165        4294 :    SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
    1166             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1167             :       REAL(KIND=dp), INTENT(OUT)                         :: a_max
    1168             : 
    1169             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
    1170             : 
    1171             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_global, &
    1172             :                                                             nrow_local
    1173        4294 :       INTEGER, DIMENSION(:), POINTER                     :: row_indices
    1174             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: values
    1175        4294 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1176             : 
    1177        4294 :       CALL timeset(routineN, handle)
    1178             : 
    1179        4294 :       my_block => matrix%local_data
    1180             : 
    1181        4294 :       CPASSERT(.NOT. matrix%use_sp)
    1182             : 
    1183             :       CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
    1184        4294 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1185             : 
    1186             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1187       12882 :       ALLOCATE (values(nrow_global))
    1188       62846 :       values = 0.0_dp
    1189       62846 :       DO j = 1, ncol_local
    1190      530930 :          DO i = 1, nrow_local
    1191      526636 :             values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
    1192             :          END DO
    1193             :       END DO
    1194        4294 :       CALL matrix%matrix_struct%para_env%sum(values)
    1195       67140 :       a_max = MAXVAL(values)
    1196        4294 :       DEALLOCATE (values)
    1197             : 
    1198        4294 :       CALL timestop(handle)
    1199        8588 :    END SUBROUTINE
    1200             : 
    1201             : ! **************************************************************************************************
    1202             : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
    1203             : !> \param matrix ...
    1204             : !> \param norm_array ...
    1205             : ! **************************************************************************************************
    1206        1282 :    SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
    1207             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1208             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: norm_array
    1209             : 
    1210             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_vectorsnorm'
    1211             : 
    1212             :       INTEGER                                            :: handle, i, j, ncol_global, ncol_local, &
    1213             :                                                             nrow_local
    1214        1282 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices
    1215        1282 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1216             : 
    1217        1282 :       CALL timeset(routineN, handle)
    1218             : 
    1219        1282 :       my_block => matrix%local_data
    1220             : 
    1221        1282 :       CPASSERT(.NOT. matrix%use_sp)
    1222             : 
    1223             :       CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
    1224        1282 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1225             : 
    1226             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1227       29746 :       norm_array = 0.0_dp
    1228       29746 :       DO j = 1, ncol_local
    1229     1261016 :          DO i = 1, nrow_local
    1230     1259734 :             norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
    1231             :          END DO
    1232             :       END DO
    1233       58210 :       CALL matrix%matrix_struct%para_env%sum(norm_array)
    1234       29746 :       norm_array = SQRT(norm_array)
    1235             : 
    1236        1282 :       CALL timestop(handle)
    1237        1282 :    END SUBROUTINE
    1238             : 
    1239             : ! **************************************************************************************************
    1240             : !> \brief summing up all the elements along the matrix's i-th index
    1241             : !>        \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
    1242             : !>        or
    1243             : !>        \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
    1244             : !> \param matrix     an input matrix A
    1245             : !> \param sum_array  sums of elements in each column/row
    1246             : !> \param dir ...
    1247             : !> \note  forked from cp_fm_vectorsnorm() to be used with
    1248             : !>        the maximum overlap method
    1249             : !>        added row variation
    1250             : ! **************************************************************************************************
    1251        7760 :    SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
    1252             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1253             :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: sum_array
    1254             :       CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: dir
    1255             : 
    1256             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_vectorssum'
    1257             : 
    1258             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
    1259        7760 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1260             :       LOGICAL                                            :: docol
    1261        7760 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: my_block
    1262             : 
    1263        7760 :       CALL timeset(routineN, handle)
    1264             : 
    1265        7760 :       IF (PRESENT(dir)) THEN
    1266        7720 :          IF (dir == 'c' .OR. dir == 'C') THEN
    1267             :             docol = .TRUE.
    1268        7720 :          ELSEIF (dir == 'r' .OR. dir == 'R') THEN
    1269             :             docol = .FALSE.
    1270             :          ELSE
    1271           0 :             CPABORT('Wrong argument DIR')
    1272             :          END IF
    1273             :       ELSE
    1274             :          docol = .TRUE.
    1275             :       END IF
    1276             : 
    1277        7760 :       my_block => matrix%local_data
    1278             : 
    1279        7760 :       CPASSERT(.NOT. matrix%use_sp)
    1280             : 
    1281             :       CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
    1282        7760 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1283             : 
    1284             :       ! the efficiency could be improved by making use of the row-col distribution of scalapack
    1285      240690 :       sum_array(:) = 0.0_dp
    1286        7760 :       IF (docol) THEN
    1287         448 :       DO j = 1, ncol_local
    1288        3628 :          DO i = 1, nrow_local
    1289        3588 :             sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
    1290             :          END DO
    1291             :       END DO
    1292             :       ELSE
    1293       86224 :       DO j = 1, ncol_local
    1294     6633258 :          DO i = 1, nrow_local
    1295     6625538 :             sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
    1296             :          END DO
    1297             :       END DO
    1298             :       END IF
    1299      473620 :       CALL matrix%matrix_struct%para_env%sum(sum_array)
    1300             : 
    1301        7760 :       CALL timestop(handle)
    1302        7760 :    END SUBROUTINE
    1303             : 
    1304             : ! **************************************************************************************************
    1305             : !> \brief copy one identically sized matrix in the other
    1306             : !> \param source ...
    1307             : !> \param destination ...
    1308             : !> \note
    1309             : !>      see also cp_fm_to_fm_columns
    1310             : ! **************************************************************************************************
    1311      359180 :    SUBROUTINE cp_fm_to_fm_matrix(source, destination)
    1312             : 
    1313             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1314             : 
    1315             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
    1316             : 
    1317             :       INTEGER                                            :: handle, npcol, nprow
    1318             : 
    1319      359180 :       CALL timeset(routineN, handle)
    1320             : 
    1321      359180 :       nprow = source%matrix_struct%context%num_pe(1)
    1322      359180 :       npcol = source%matrix_struct%context%num_pe(2)
    1323             : 
    1324      359180 :       IF ((.NOT. cp2k_is_parallel) .OR. &
    1325             :           cp_fm_struct_equivalent(source%matrix_struct, &
    1326             :                                   destination%matrix_struct)) THEN
    1327      359180 :          IF (source%use_sp .AND. destination%use_sp) THEN
    1328           0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1329             :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
    1330             :                CALL cp_abort(__LOCATION__, &
    1331             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1332             :                              "> to full matrix <"//TRIM(destination%name)// &
    1333           0 :                              ">. The local_data blocks have different sizes.")
    1334             :             CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
    1335           0 :                        source%local_data_sp, 1, destination%local_data_sp, 1)
    1336      359180 :          ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
    1337           0 :             IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
    1338             :                 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
    1339             :                CALL cp_abort(__LOCATION__, &
    1340             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1341             :                              "> to full matrix <"//TRIM(destination%name)// &
    1342           0 :                              ">. The local_data blocks have different sizes.")
    1343           0 :             destination%local_data = REAL(source%local_data_sp, dp)
    1344      359180 :          ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
    1345           0 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
    1346             :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
    1347             :                CALL cp_abort(__LOCATION__, &
    1348             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1349             :                              "> to full matrix <"//TRIM(destination%name)// &
    1350           0 :                              ">. The local_data blocks have different sizes.")
    1351           0 :             destination%local_data_sp = REAL(source%local_data, sp)
    1352             :          ELSE
    1353      359180 :             IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
    1354             :                 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
    1355             :                CALL cp_abort(__LOCATION__, &
    1356             :                              "Cannot copy full matrix <"//TRIM(source%name)// &
    1357             :                              "> to full matrix <"//TRIM(destination%name)// &
    1358           0 :                              ">. The local_data blocks have different sizes.")
    1359             :             CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
    1360      359180 :                        source%local_data, 1, destination%local_data, 1)
    1361             :          END IF
    1362             :       ELSE
    1363           0 :          CPABORT("Data structures of source and target full matrix are not equivalent")
    1364             :       END IF
    1365             : 
    1366      359180 :       CALL timestop(handle)
    1367             : 
    1368      359180 :    END SUBROUTINE cp_fm_to_fm_matrix
    1369             : 
    1370             : ! **************************************************************************************************
    1371             : !> \brief copy just a subset of columns of a fm to a fm
    1372             : !> \param msource ...
    1373             : !> \param mtarget ...
    1374             : !> \param ncol ...
    1375             : !> \param source_start ...
    1376             : !> \param target_start ...
    1377             : ! **************************************************************************************************
    1378      162914 :    SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
    1379             :                                   target_start)
    1380             : 
    1381             :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1382             :       INTEGER, INTENT(IN)                      :: ncol
    1383             :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
    1384             : 
    1385             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
    1386             : 
    1387             :       INTEGER                                  :: handle, n, ss, ts
    1388             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1389             : #if defined(__SCALAPACK)
    1390             :       INTEGER                                  :: i
    1391             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1392             : #endif
    1393             : 
    1394      162914 :       CALL timeset(routineN, handle)
    1395             : 
    1396      162914 :       ss = 1
    1397      162914 :       ts = 1
    1398             : 
    1399      162914 :       IF (PRESENT(source_start)) ss = source_start
    1400      162914 :       IF (PRESENT(target_start)) ts = target_start
    1401             : 
    1402      162914 :       n = msource%matrix_struct%nrow_global
    1403             : 
    1404      162914 :       a => msource%local_data
    1405      162914 :       b => mtarget%local_data
    1406             : 
    1407             : #if defined(__SCALAPACK)
    1408     1629140 :       desca(:) = msource%matrix_struct%descriptor(:)
    1409     1629140 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1410      643018 :       DO i = 0, ncol - 1
    1411      643018 :          CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
    1412             :       END DO
    1413             : #else
    1414             :       CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
    1415             : #endif
    1416             : 
    1417      162914 :       CALL timestop(handle)
    1418             : 
    1419      162914 :    END SUBROUTINE cp_fm_to_fm_columns
    1420             : 
    1421             : ! **************************************************************************************************
    1422             : !> \brief copy just a triangular matrix
    1423             : !> \param msource ...
    1424             : !> \param mtarget ...
    1425             : !> \param uplo ...
    1426             : ! **************************************************************************************************
    1427         370 :    SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
    1428             : 
    1429             :       TYPE(cp_fm_type), INTENT(IN)          :: msource, mtarget
    1430             :       CHARACTER(LEN=*), INTENT(IN)             :: uplo
    1431             : 
    1432             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
    1433             : 
    1434             :       INTEGER                                  :: handle, ncol, nrow
    1435         370 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1436             : #if defined(__SCALAPACK)
    1437             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1438             : #endif
    1439             : 
    1440         370 :       CALL timeset(routineN, handle)
    1441             : 
    1442         370 :       nrow = msource%matrix_struct%nrow_global
    1443         370 :       ncol = msource%matrix_struct%ncol_global
    1444             : 
    1445         370 :       a => msource%local_data
    1446         370 :       b => mtarget%local_data
    1447             : 
    1448             : #if defined(__SCALAPACK)
    1449        3700 :       desca(:) = msource%matrix_struct%descriptor(:)
    1450        3700 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1451         370 :       CALL pdlacpy(uplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
    1452             : #else
    1453             :       CALL dlacpy(uplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
    1454             : #endif
    1455             : 
    1456         370 :       CALL timestop(handle)
    1457             : 
    1458         370 :    END SUBROUTINE cp_fm_to_fm_triangular
    1459             : 
    1460             : ! **************************************************************************************************
    1461             : !> \brief copy just a part ot the matrix
    1462             : !> \param msource ...
    1463             : !> \param mtarget ...
    1464             : !> \param nrow ...
    1465             : !> \param ncol ...
    1466             : !> \param s_firstrow ...
    1467             : !> \param s_firstcol ...
    1468             : !> \param t_firstrow ...
    1469             : !> \param t_firstcol ...
    1470             : ! **************************************************************************************************
    1471             : 
    1472        6806 :    SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
    1473             : 
    1474             :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
    1475             :       INTEGER, INTENT(IN)                      :: nrow, ncol, s_firstrow, &
    1476             :                                                   s_firstcol, t_firstrow, &
    1477             :                                                   t_firstcol
    1478             : 
    1479             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
    1480             : 
    1481             :       INTEGER                                  :: handle, i, na, nb, ss, ts
    1482             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
    1483             : #if defined(__SCALAPACK)
    1484             :       INTEGER, DIMENSION(9)                    :: desca, descb
    1485             : #endif
    1486             : 
    1487        6806 :       CALL timeset(routineN, handle)
    1488             : 
    1489        6806 :       a => msource%local_data
    1490        6806 :       b => mtarget%local_data
    1491             : 
    1492        6806 :       na = msource%matrix_struct%nrow_global
    1493        6806 :       nb = mtarget%matrix_struct%nrow_global
    1494             : !    nrow must be <= na and nb
    1495        6806 :       IF (nrow > na) &
    1496           0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1497        6806 :       IF (nrow > nb) &
    1498           0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1499        6806 :       na = msource%matrix_struct%ncol_global
    1500        6806 :       nb = mtarget%matrix_struct%ncol_global
    1501             : !    ncol must be <= na_col and nb_col
    1502        6806 :       IF (ncol > na) &
    1503           0 :          CPABORT("cannot copy because nrow > number of rows of source matrix")
    1504        6806 :       IF (ncol > nb) &
    1505           0 :          CPABORT("cannot copy because nrow > number of rows of target matrix")
    1506             : 
    1507             : #if defined(__SCALAPACK)
    1508       68060 :       desca(:) = msource%matrix_struct%descriptor(:)
    1509       68060 :       descb(:) = mtarget%matrix_struct%descriptor(:)
    1510      101378 :       DO i = 0, ncol - 1
    1511       94572 :          ss = s_firstcol + i
    1512       94572 :          ts = t_firstcol + i
    1513      101378 :          CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
    1514             :       END DO
    1515             : #else
    1516             :       DO i = 0, ncol - 1
    1517             :          ss = s_firstcol + i
    1518             :          ts = t_firstcol + i
    1519             :          CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
    1520             :       END DO
    1521             : #endif
    1522             : 
    1523        6806 :       CALL timestop(handle)
    1524        6806 :    END SUBROUTINE cp_fm_to_fm_submat
    1525             : 
    1526             : ! **************************************************************************************************
    1527             : !> \brief General copy of a fm matrix to another fm matrix.
    1528             : !>        Uses non-blocking MPI rather than ScaLAPACK.
    1529             : !>
    1530             : !> \param source          input fm matrix
    1531             : !> \param destination     output fm matrix
    1532             : !> \param para_env        parallel environment corresponding to the BLACS env that covers all parts
    1533             : !>                        of the input and output matrices
    1534             : !> \par History
    1535             : !>      31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
    1536             : ! **************************************************************************************************
    1537        8934 :    SUBROUTINE cp_fm_copy_general(source, destination, para_env)
    1538             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1539             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1540             : 
    1541             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
    1542             : 
    1543             :       INTEGER                                            :: handle
    1544       80406 :       TYPE(copy_info_type)                               :: info
    1545             : 
    1546        8934 :       CALL timeset(routineN, handle)
    1547             : 
    1548        8934 :       CALL cp_fm_start_copy_general(source, destination, para_env, info)
    1549        8934 :       IF (ASSOCIATED(destination%matrix_struct)) THEN
    1550        8920 :          CALL cp_fm_finish_copy_general(destination, info)
    1551             :       END IF
    1552        8934 :       IF (ASSOCIATED(source%matrix_struct)) THEN
    1553        8780 :          CALL cp_fm_cleanup_copy_general(info)
    1554             :       END IF
    1555             : 
    1556        8934 :       CALL timestop(handle)
    1557        8934 :    END SUBROUTINE cp_fm_copy_general
    1558             : 
    1559             : ! **************************************************************************************************
    1560             : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
    1561             : !> \param source input fm matrix
    1562             : !> \param destination output fm matrix
    1563             : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
    1564             : !>                        of the input and output matrices
    1565             : !> \param info all of the data that will be needed to complete the copy operation
    1566             : ! **************************************************************************************************
    1567     1417086 :    SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
    1568             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    1569             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1570             :       TYPE(copy_info_type), INTENT(OUT)                  :: info
    1571             : 
    1572             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
    1573             : 
    1574             :       INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
    1575             :          ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
    1576             :          nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
    1577             :          recv_rank, recv_size, send_rank, send_size
    1578      157454 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: all_ranks, dest2global, dest_p, dest_q, &
    1579      314908 :                                                             recv_count, send_count, send_disp, &
    1580      157454 :                                                             source2global, src_p, src_q
    1581      157454 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: dest_blacs2mpi
    1582             :       INTEGER, DIMENSION(2)                              :: dest_block, dest_block_tmp, dest_num_pe, &
    1583             :                                                             src_block, src_block_tmp, src_num_pe
    1584      314908 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices, &
    1585      314908 :                                                             send_col_indices, send_row_indices
    1586             :       TYPE(cp_fm_struct_type), POINTER                   :: recv_dist, send_dist
    1587     2204356 :       TYPE(mp_request_type), DIMENSION(6)                :: recv_req, send_req
    1588             : 
    1589      157454 :       CALL timeset(routineN, handle)
    1590             : 
    1591             :       IF (.NOT. cp2k_is_parallel) THEN
    1592             :          ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
    1593             :          nrow_local_send = SIZE(source%local_data, 1)
    1594             :          ncol_local_send = SIZE(source%local_data, 2)
    1595             :          ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
    1596             :          k = 0
    1597             :          DO j = 1, ncol_local_send
    1598             :             DO i = 1, nrow_local_send
    1599             :                k = k + 1
    1600             :                info%send_buf(k) = source%local_data(i, j)
    1601             :             END DO
    1602             :          END DO
    1603             :       ELSE
    1604             :          ! assumption of double precision data is carried forward from ScaLAPACK version
    1605      157454 :          IF (source%use_sp) CPABORT("only DP kind implemented")
    1606      157454 :          IF (destination%use_sp) CPABORT("only DP kind implemented")
    1607             : 
    1608      157454 :          NULLIFY (recv_dist, send_dist)
    1609      157454 :          NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
    1610             : 
    1611             :          ! The 'global' communicator contains both the source and destination decompositions
    1612      157454 :          global_size = para_env%num_pe
    1613      157454 :          global_rank = para_env%mepos
    1614             : 
    1615             :          ! The source/send decomposition and destination/recv decompositions may only exist on
    1616             :          ! on a subset of the processes involved in the communication
    1617             :          ! Check if the source and/or destination arguments are .not. ASSOCIATED():
    1618             :          ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
    1619      157454 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    1620      126116 :             recv_dist => destination%matrix_struct
    1621      126116 :             recv_rank = recv_dist%para_env%mepos
    1622             :          ELSE
    1623       31338 :             recv_rank = mp_proc_null
    1624             :          END IF
    1625             : 
    1626      157454 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    1627      138864 :             send_dist => source%matrix_struct
    1628      138864 :             send_rank = send_dist%para_env%mepos
    1629             :          ELSE
    1630       18590 :             send_rank = mp_proc_null
    1631             :          END IF
    1632             : 
    1633             :          ! Map the rank in the source/dest communicator to the global rank
    1634      472362 :          ALLOCATE (all_ranks(0:global_size - 1))
    1635             : 
    1636      157454 :          CALL para_env%allgather(send_rank, all_ranks)
    1637      157454 :          IF (ASSOCIATED(recv_dist)) THEN
    1638      630580 :             ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1639      378348 :             DO i = 0, global_size - 1
    1640      378348 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1641      215052 :                   source2global(all_ranks(i)) = i
    1642             :                END IF
    1643             :             END DO
    1644             :          END IF
    1645             : 
    1646      157454 :          CALL para_env%allgather(recv_rank, all_ranks)
    1647      157454 :          IF (ASSOCIATED(send_dist)) THEN
    1648      694320 :             ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
    1649      416592 :             DO i = 0, global_size - 1
    1650      416592 :                IF (all_ranks(i) .NE. mp_proc_null) THEN
    1651      215052 :                   dest2global(all_ranks(i)) = i
    1652             :                END IF
    1653             :             END DO
    1654             :          END IF
    1655      157454 :          DEALLOCATE (all_ranks)
    1656             : 
    1657             :          ! Some data from the two decompositions will be needed by all processes in the global group :
    1658             :          ! process grid shape, block size, and the BLACS-to-MPI mapping
    1659             : 
    1660             :          ! The global root process will receive the data (from the root process in each decomposition)
    1661     1102178 :          send_req(:) = mp_request_null
    1662      157454 :          IF (global_rank == 0) THEN
    1663      551089 :             recv_req(:) = mp_request_null
    1664       78727 :             CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
    1665       78727 :             CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
    1666       78727 :             CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
    1667       78727 :             CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
    1668             :          END IF
    1669             : 
    1670      157454 :          IF (ASSOCIATED(send_dist)) THEN
    1671      138864 :             IF ((send_rank .EQ. 0)) THEN
    1672             :                ! need to use separate buffers here in case this is actually global rank 0
    1673      236181 :                src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
    1674       78727 :                CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
    1675       78727 :                CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
    1676             :             END IF
    1677             :          END IF
    1678             : 
    1679      157454 :          IF (ASSOCIATED(recv_dist)) THEN
    1680      126116 :             IF ((recv_rank .EQ. 0)) THEN
    1681      236181 :                dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
    1682       78727 :                CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
    1683       78727 :                CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
    1684             :             END IF
    1685             :          END IF
    1686             : 
    1687      157454 :          IF (global_rank == 0) THEN
    1688       78727 :             CALL mp_waitall(recv_req(1:4))
    1689             :             ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
    1690           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1691             :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1692      551089 :                       )
    1693       78727 :             CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
    1694       78727 :             CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
    1695             :          END IF
    1696             : 
    1697      157454 :          IF (ASSOCIATED(send_dist)) THEN
    1698      138864 :             IF ((send_rank .EQ. 0)) THEN
    1699       78727 :                CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
    1700             :             END IF
    1701             :          END IF
    1702             : 
    1703      157454 :          IF (ASSOCIATED(recv_dist)) THEN
    1704      126116 :             IF ((recv_rank .EQ. 0)) THEN
    1705       78727 :                CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
    1706             :             END IF
    1707             :          END IF
    1708             : 
    1709      157454 :          IF (global_rank == 0) THEN
    1710       78727 :             CALL mp_waitall(recv_req(5:6))
    1711             :          END IF
    1712             : 
    1713             :          ! Finally, broadcast the data to all processes in the global communicator
    1714      157454 :          CALL para_env%bcast(src_block, 0)
    1715      157454 :          CALL para_env%bcast(dest_block, 0)
    1716      157454 :          CALL para_env%bcast(src_num_pe, 0)
    1717      157454 :          CALL para_env%bcast(dest_num_pe, 0)
    1718      472362 :          info%src_num_pe(1:2) = src_num_pe(1:2)
    1719      472362 :          info%nblock_src(1:2) = src_block(1:2)
    1720      157454 :          IF (global_rank .NE. 0) THEN
    1721           0 :             ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
    1722           0 :                       dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
    1723      551089 :                       )
    1724             :          END IF
    1725      157454 :          CALL para_env%bcast(info%src_blacs2mpi, 0)
    1726      157454 :          CALL para_env%bcast(dest_blacs2mpi, 0)
    1727             : 
    1728      157454 :          recv_size = dest_num_pe(1)*dest_num_pe(2)
    1729      157454 :          send_size = src_num_pe(1)*src_num_pe(2)
    1730      157454 :          info%send_size = send_size
    1731      157454 :          CALL mp_waitall(send_req(:))
    1732             : 
    1733             :          ! Setup is now complete, we can start the actual communication here.
    1734             :          ! The order implemented here is:
    1735             :          !  DEST_1
    1736             :          !      compute recv sizes
    1737             :          !      call irecv
    1738             :          !  SRC_1
    1739             :          !      compute send sizes
    1740             :          !      pack send buffers
    1741             :          !      call isend
    1742             :          !  DEST_2
    1743             :          !      wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
    1744             :          !  SRC_2
    1745             :          !      wait for the sends
    1746             : 
    1747             :          ! DEST_1
    1748      157454 :          IF (ASSOCIATED(recv_dist)) THEN
    1749             :             CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
    1750             :                                   col_indices=recv_col_indices &
    1751      126116 :                                   )
    1752      126116 :             info%recv_col_indices => recv_col_indices
    1753      126116 :             info%recv_row_indices => recv_row_indices
    1754      126116 :             nrow_block_src = src_block(1)
    1755      126116 :             ncol_block_src = src_block(2)
    1756     1097864 :             ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
    1757             : 
    1758             :             ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
    1759      126116 :             nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
    1760      126116 :             ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
    1761      126116 :             info%nlocal_recv(1) = nrow_local_recv
    1762      126116 :             info%nlocal_recv(2) = ncol_local_recv
    1763             :             ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
    1764      630580 :             ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
    1765     2243920 :             DO i = 1, nrow_local_recv
    1766             :                ! For each local row we will receive, we look up its global row (in recv_row_indices),
    1767             :                ! then work out which row block it comes from, and which process row that row block comes from.
    1768     2243920 :                src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
    1769             :             END DO
    1770     3714150 :             DO j = 1, ncol_local_recv
    1771             :                ! Similarly for the columns
    1772     3714150 :                src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
    1773             :             END DO
    1774             :             ! src_p/q now contains the process row/column ID that will send data to that row/column
    1775             : 
    1776      252232 :             DO q = 0, src_num_pe(2) - 1
    1777     3714150 :                ncols = COUNT(src_q .EQ. q)
    1778      467284 :                DO p = 0, src_num_pe(1) - 1
    1779     4067200 :                   nrows = COUNT(src_p .EQ. p)
    1780             :                   ! Use the send_dist here as we are looking up the processes where the data comes from
    1781      341168 :                   recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
    1782             :                END DO
    1783             :             END DO
    1784      126116 :             DEALLOCATE (src_p, src_q)
    1785             : 
    1786             :             ! Use one long buffer (and displacements into that buffer)
    1787             :             !     this prevents the need for a rectangular array where not all elements will be populated
    1788      593400 :             ALLOCATE (info%recv_buf(SUM(recv_count(:))))
    1789      126116 :             info%recv_disp(0) = 0
    1790      215052 :             DO i = 1, send_size - 1
    1791      215052 :                info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
    1792             :             END DO
    1793             : 
    1794             :             ! Issue receive calls on ranks which expect data
    1795      341168 :             DO k = 0, send_size - 1
    1796      341168 :                IF (recv_count(k) .GT. 0) THEN
    1797             :                   CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
    1798      157868 :                                       source2global(k), info%recv_request(k))
    1799             :                END IF
    1800             :             END DO
    1801      126116 :             DEALLOCATE (source2global)
    1802             :          END IF ! ASSOCIATED(recv_dist)
    1803             : 
    1804             :          ! SRC_1
    1805      157454 :          IF (ASSOCIATED(send_dist)) THEN
    1806             :             CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
    1807             :                                   col_indices=send_col_indices &
    1808      138864 :                                   )
    1809      138864 :             nrow_block_dest = dest_block(1)
    1810      138864 :             ncol_block_dest = dest_block(2)
    1811     1187100 :             ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
    1812             : 
    1813             :             ! Determine the send counts, allocate the send buffers
    1814      138864 :             nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
    1815      138864 :             ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
    1816             : 
    1817             :             ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
    1818             :             !   i.e. number of rows,cols in the sending distribution
    1819      694320 :             ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
    1820             : 
    1821     2256668 :             DO i = 1, nrow_local_send
    1822             :                ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
    1823     2256668 :                dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1824             :             END DO
    1825     3986302 :             DO j = 1, ncol_local_send
    1826     3986302 :                dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1827             :             END DO
    1828             :             ! dest_p/q now contain the process row/column ID that will receive data from that row/column
    1829             : 
    1830      277728 :             DO q = 0, dest_num_pe(2) - 1
    1831     3986302 :                ncols = COUNT(dest_q .EQ. q)
    1832      492780 :                DO p = 0, dest_num_pe(1) - 1
    1833     3808300 :                   nrows = COUNT(dest_p .EQ. p)
    1834      353916 :                   send_count(dest_blacs2mpi(p, q)) = nrows*ncols
    1835             :                END DO
    1836             :             END DO
    1837      138864 :             DEALLOCATE (dest_p, dest_q)
    1838             : 
    1839             :             ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
    1840      631644 :             ALLOCATE (info%send_buf(SUM(send_count(:))))
    1841      138864 :             send_disp(0) = 0
    1842      215052 :             DO k = 1, recv_size - 1
    1843      215052 :                send_disp(k) = send_disp(k - 1) + send_count(k - 1)
    1844             :             END DO
    1845             : 
    1846             :             ! Loop over the smat, pack the send buffers
    1847      353916 :             send_count(:) = 0
    1848     3986302 :             DO j = 1, ncol_local_send
    1849             :                ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
    1850     3847438 :                dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
    1851   120937210 :                DO i = 1, nrow_local_send
    1852   116950908 :                   dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
    1853   116950908 :                   mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
    1854   116950908 :                   send_count(mpi_rank) = send_count(mpi_rank) + 1
    1855   120798346 :                   info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
    1856             :                END DO
    1857             :             END DO
    1858             : 
    1859             :             ! For each non-zero send_count, call mpi_isend
    1860      353916 :             DO k = 0, recv_size - 1
    1861      353916 :                IF (send_count(k) .GT. 0) THEN
    1862             :                   CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
    1863      157868 :                                       dest2global(k), info%send_request(k))
    1864             :                END IF
    1865             :             END DO
    1866      138864 :             DEALLOCATE (send_count, send_disp, dest2global)
    1867             :          END IF ! ASSOCIATED(send_dist)
    1868      157454 :          DEALLOCATE (dest_blacs2mpi)
    1869             : 
    1870             :       END IF !IF (.NOT. cp2k_is_parallel)
    1871             : 
    1872      157454 :       CALL timestop(handle)
    1873             : 
    1874      629816 :    END SUBROUTINE cp_fm_start_copy_general
    1875             : 
    1876             : ! **************************************************************************************************
    1877             : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
    1878             : !> \param destination output fm matrix
    1879             : !> \param info all of the data that will be needed to complete the copy operation
    1880             : ! **************************************************************************************************
    1881      126116 :    SUBROUTINE cp_fm_finish_copy_general(destination, info)
    1882             :       TYPE(cp_fm_type), INTENT(IN)                       :: destination
    1883             :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1884             : 
    1885             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
    1886             : 
    1887             :       INTEGER                                            :: handle, i, j, k, mpi_rank, send_size, &
    1888             :                                                             src_p_i, src_q_j
    1889      126116 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: recv_count
    1890             :       INTEGER, DIMENSION(2)                              :: nblock_src, nlocal_recv, src_num_pe
    1891      126116 :       INTEGER, DIMENSION(:), POINTER                     :: recv_col_indices, recv_row_indices
    1892             : 
    1893      126116 :       CALL timeset(routineN, handle)
    1894             : 
    1895             :       IF (.NOT. cp2k_is_parallel) THEN
    1896             :          ! Now unpack the data from the 'send buffer'
    1897             :          k = 0
    1898             :          DO j = 1, SIZE(destination%local_data, 2)
    1899             :             DO i = 1, SIZE(destination%local_data, 1)
    1900             :                k = k + 1
    1901             :                destination%local_data(i, j) = info%send_buf(k)
    1902             :             END DO
    1903             :          END DO
    1904             :          DEALLOCATE (info%send_buf)
    1905             :       ELSE
    1906             :          ! Set up local variables ...
    1907      126116 :          send_size = info%send_size
    1908      378348 :          nlocal_recv(1:2) = info%nlocal_recv(:)
    1909      378348 :          nblock_src(1:2) = info%nblock_src(:)
    1910      378348 :          src_num_pe(1:2) = info%src_num_pe(:)
    1911      126116 :          recv_col_indices => info%recv_col_indices
    1912      126116 :          recv_row_indices => info%recv_row_indices
    1913             : 
    1914             :          ! ... use the local variables to do the work
    1915             :          ! DEST_2
    1916      126116 :          CALL mp_waitall(info%recv_request(:))
    1917      378348 :          ALLOCATE (recv_count(0:send_size - 1))
    1918             :          ! Loop over the rmat, filling it in with data from the recv buffers
    1919             :          ! (here the block sizes, num_pes refer to the distribution of the source matrix)
    1920      341168 :          recv_count(:) = 0
    1921     3714150 :          DO j = 1, nlocal_recv(2)
    1922     3588034 :             src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
    1923   120665058 :             DO i = 1, nlocal_recv(1)
    1924   116950908 :                src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
    1925   116950908 :                mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
    1926   116950908 :                recv_count(mpi_rank) = recv_count(mpi_rank) + 1
    1927   120538942 :                destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
    1928             :             END DO
    1929             :          END DO
    1930      126116 :          DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
    1931             :          ! Invalidate the stored state
    1932             :          NULLIFY (info%recv_col_indices, &
    1933      126116 :                   info%recv_row_indices)
    1934             : 
    1935             :       END IF
    1936             : 
    1937      126116 :       CALL timestop(handle)
    1938             : 
    1939      126116 :    END SUBROUTINE cp_fm_finish_copy_general
    1940             : 
    1941             : ! **************************************************************************************************
    1942             : !> \brief Completes the copy operation: wait for comms clean up MPI state
    1943             : !> \param info all of the data that will be needed to complete the copy operation
    1944             : ! **************************************************************************************************
    1945      138200 :    SUBROUTINE cp_fm_cleanup_copy_general(info)
    1946             :       TYPE(copy_info_type), INTENT(INOUT)                :: info
    1947             : 
    1948             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
    1949             : 
    1950             :       INTEGER                                            :: handle
    1951             : 
    1952      138200 :       CALL timeset(routineN, handle)
    1953             : 
    1954             :       IF (.NOT. cp2k_is_parallel) THEN
    1955             :          ! Don't do anything - no MPI state for the serial case
    1956             :       ELSE
    1957             :          ! SRC_2
    1958             :          ! If this process is also in the destination decomposition, this deallocate
    1959             :          ! Was already done in cp_fm_finish_copy_general
    1960      138200 :          IF (ALLOCATED(info%src_blacs2mpi)) THEN
    1961       30674 :             DEALLOCATE (info%src_blacs2mpi)
    1962             :          END IF
    1963      138200 :          CALL mp_waitall(info%send_request)
    1964      138200 :          DEALLOCATE (info%send_request, info%send_buf)
    1965             : 
    1966             :       END IF
    1967             : 
    1968      138200 :       CALL timestop(handle)
    1969             : 
    1970      138200 :    END SUBROUTINE cp_fm_cleanup_copy_general
    1971             : 
    1972             : ! **************************************************************************************************
    1973             : !> \brief General copy of a submatrix of fm matrix to  a submatrix of another fm matrix.
    1974             : !>        The two matrices can have different contexts.
    1975             : !>
    1976             : !>        Summary of distribution routines for dense matrices
    1977             : !>        The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
    1978             : !>
    1979             : !>        call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
    1980             : !>
    1981             : !>        A process that is not a part of the context of A should set descA(2)
    1982             : !>        to -1, and similarly for B.
    1983             : !>
    1984             : !> \param source          input fm matrix
    1985             : !> \param destination     output fm matrix
    1986             : !> \param nrows           number of rows of sub matrix to be copied
    1987             : !> \param ncols           number of cols of sub matrix to be copied
    1988             : !> \param s_firstrow      starting global row index of sub matrix in source
    1989             : !> \param s_firstcol      starting global col index of sub matrix in source
    1990             : !> \param d_firstrow      starting global row index of sub matrix in destination
    1991             : !> \param d_firstcol      starting global col index of sub matrix in destination
    1992             : !> \param global_context  process grid that covers all parts of either A or B.
    1993             : ! **************************************************************************************************
    1994        1536 :    SUBROUTINE cp_fm_to_fm_submat_general(source, &
    1995             :                                          destination, &
    1996             :                                          nrows, &
    1997             :                                          ncols, &
    1998             :                                          s_firstrow, &
    1999             :                                          s_firstcol, &
    2000             :                                          d_firstrow, &
    2001             :                                          d_firstcol, &
    2002             :                                          global_context)
    2003             : 
    2004             :       TYPE(cp_fm_type), INTENT(IN)                       :: source, destination
    2005             :       INTEGER, INTENT(IN)                                :: nrows, ncols, s_firstrow, s_firstcol, &
    2006             :                                                             d_firstrow, d_firstcol
    2007             : 
    2008             :       CLASS(cp_blacs_type), INTENT(IN)        :: global_context
    2009             : 
    2010             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
    2011             : 
    2012             :       LOGICAL                                  :: debug
    2013             :       INTEGER                                  :: handle
    2014             : #if defined(__SCALAPACK)
    2015             :       INTEGER, DIMENSION(9)                    :: desca, descb
    2016             :       REAL(KIND=dp), DIMENSION(1, 1), TARGET   :: dummy
    2017        1536 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: smat, dmat
    2018             : #endif
    2019             : 
    2020        1536 :       CALL timeset(routineN, handle)
    2021             : 
    2022        1536 :       debug = debug_this_module
    2023             : 
    2024             :       IF (.NOT. cp2k_is_parallel) THEN
    2025             :          CALL cp_fm_to_fm_submat(source, &
    2026             :                                  destination, &
    2027             :                                  nrows, &
    2028             :                                  ncols, &
    2029             :                                  s_firstrow, &
    2030             :                                  s_firstcol, &
    2031             :                                  d_firstrow, &
    2032             :                                  d_firstcol)
    2033             :       ELSE
    2034             : #ifdef __SCALAPACK
    2035        1536 :          NULLIFY (smat, dmat)
    2036             :          ! check whether source is available on this process
    2037        1536 :          IF (ASSOCIATED(source%matrix_struct)) THEN
    2038       15360 :             desca = source%matrix_struct%descriptor
    2039        1536 :             IF (source%use_sp) CPABORT("only DP kind implemented")
    2040        1536 :             IF (nrows .GT. source%matrix_struct%nrow_global) &
    2041           0 :                CPABORT("nrows is greater than nrow_global of source")
    2042        1536 :             IF (ncols .GT. source%matrix_struct%ncol_global) &
    2043           0 :                CPABORT("ncols is greater than ncol_global of source")
    2044        1536 :             smat => source%local_data
    2045             :          ELSE
    2046           0 :             desca = -1
    2047           0 :             smat => dummy
    2048             :          END IF
    2049             :          ! check destination is available on this process
    2050        1536 :          IF (ASSOCIATED(destination%matrix_struct)) THEN
    2051       15360 :             descb = destination%matrix_struct%descriptor
    2052        1536 :             IF (destination%use_sp) CPABORT("only DP kind implemented")
    2053        1536 :             IF (nrows .GT. destination%matrix_struct%nrow_global) &
    2054           0 :                CPABORT("nrows is greater than nrow_global of destination")
    2055        1536 :             IF (ncols .GT. destination%matrix_struct%ncol_global) &
    2056           0 :                CPABORT("ncols is greater than ncol_global of destination")
    2057        1536 :             dmat => destination%local_data
    2058             :          ELSE
    2059           0 :             descb = -1
    2060           0 :             dmat => dummy
    2061             :          END IF
    2062             :          ! do copy
    2063             : 
    2064             :          CALL pdgemr2d(nrows, &
    2065             :                        ncols, &
    2066             :                        smat, &
    2067             :                        s_firstrow, &
    2068             :                        s_firstcol, &
    2069             :                        desca, &
    2070             :                        dmat, &
    2071             :                        d_firstrow, &
    2072             :                        d_firstcol, &
    2073             :                        descb, &
    2074        1536 :                        global_context%get_handle())
    2075             : #else
    2076             :          MARK_USED(global_context)
    2077             :          CPABORT("this subroutine only supports SCALAPACK")
    2078             : #endif
    2079             :       END IF
    2080             : 
    2081        1536 :       CALL timestop(handle)
    2082             : 
    2083        1536 :    END SUBROUTINE cp_fm_to_fm_submat_general
    2084             : 
    2085             : ! **************************************************************************************************
    2086             : !> \brief ...
    2087             : !> \param matrix ...
    2088             : !> \param irow_global ...
    2089             : !> \param icol_global ...
    2090             : !> \param alpha ...
    2091             : ! **************************************************************************************************
    2092         240 :    SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
    2093             : 
    2094             :       ! Add alpha to the matrix element specified by the global indices
    2095             :       ! irow_global and icol_global
    2096             : 
    2097             :       ! - Creation (05.05.06,MK)
    2098             : 
    2099             :       TYPE(cp_fm_type), INTENT(IN)          :: matrix
    2100             :       INTEGER, INTENT(IN)                      :: irow_global, icol_global
    2101             :       REAL(KIND=dp), INTENT(IN)                :: alpha
    2102             : 
    2103             :       INTEGER                                  :: mypcol, myprow, npcol, nprow
    2104         240 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2105             :       TYPE(cp_blacs_env_type), POINTER         :: context
    2106             : #if defined(__SCALAPACK)
    2107             :       INTEGER                                  :: icol_local, ipcol, iprow, &
    2108             :                                                   irow_local
    2109             :       INTEGER, DIMENSION(9)                    :: desca
    2110             : #endif
    2111             : 
    2112         240 :       context => matrix%matrix_struct%context
    2113             : 
    2114         240 :       myprow = context%mepos(1)
    2115         240 :       mypcol = context%mepos(2)
    2116             : 
    2117         240 :       nprow = context%num_pe(1)
    2118         240 :       npcol = context%num_pe(2)
    2119             : 
    2120         240 :       a => matrix%local_data
    2121             : 
    2122             : #if defined(__SCALAPACK)
    2123             : 
    2124        2400 :       desca(:) = matrix%matrix_struct%descriptor(:)
    2125             : 
    2126             :       CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
    2127         240 :                    irow_local, icol_local, iprow, ipcol)
    2128             : 
    2129         240 :       IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
    2130         120 :          a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
    2131             :       END IF
    2132             : 
    2133             : #else
    2134             : 
    2135             :       a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
    2136             : 
    2137             : #endif
    2138             : 
    2139         240 :    END SUBROUTINE cp_fm_add_to_element
    2140             : 
    2141             : ! **************************************************************************************************
    2142             : !> \brief ...
    2143             : !> \param fm ...
    2144             : !> \param unit ...
    2145             : ! **************************************************************************************************
    2146       78684 :    SUBROUTINE cp_fm_write_unformatted(fm, unit)
    2147             :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2148             :       INTEGER, INTENT(IN)                      :: unit
    2149             : 
    2150             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
    2151             : 
    2152             :       INTEGER                                  :: handle, j, max_block, &
    2153             :                                                   ncol_global, nrow_global
    2154             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2155             : #if defined(__SCALAPACK)
    2156             :       INTEGER                                  :: i, i_block, icol_local, &
    2157             :                                                   in, info, ipcol, &
    2158             :                                                   iprow, irow_local, &
    2159             :                                                   mepos, &
    2160             :                                                   num_pe, rb, tag
    2161             :       INTEGER, DIMENSION(9)                    :: desc
    2162       78684 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2163             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2164             :       TYPE(cp_blacs_type) :: ictxt_loc
    2165             :       INTEGER, EXTERNAL                        :: numroc
    2166             : #endif
    2167             : 
    2168       78684 :       CALL timeset(routineN, handle)
    2169             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2170       78684 :                           para_env=para_env)
    2171             : 
    2172             : #if defined(__SCALAPACK)
    2173       78684 :       num_pe = para_env%num_pe
    2174       78684 :       mepos = para_env%mepos
    2175       78684 :       rb = nrow_global
    2176       78684 :       tag = 0
    2177             :       ! get a new context
    2178       78684 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2179       78684 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2180       78684 :       CPASSERT(info == 0)
    2181             :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2182             :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2183      157368 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2184             : 
    2185      314736 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2186             : 
    2187             :          ! do the actual scalapack to cols reordering
    2188             :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2189             :                        fm%matrix_struct%descriptor, &
    2190       78684 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2191             : 
    2192      236052 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2193    62359655 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2194             : 
    2195      293914 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2196      136546 :             i_block = MIN(max_block, ncol_global - i + 1)
    2197             :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2198      136546 :                          irow_local, icol_local, iprow, ipcol)
    2199      136546 :             IF (ipcol == mypcol) THEN
    2200      984905 :                DO j = 1, i_block
    2201   158058191 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2202             :                END DO
    2203             :             END IF
    2204             : 
    2205      136546 :             IF (ipcol == 0) THEN
    2206             :                ! do nothing
    2207             :             ELSE
    2208       56164 :                IF (ipcol == mypcol) THEN
    2209    38194718 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2210             :                END IF
    2211       56164 :                IF (mypcol == 0) THEN
    2212    76361354 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2213             :                END IF
    2214             :             END IF
    2215             : 
    2216      215230 :             IF (unit > 0) THEN
    2217      954629 :                DO j = 1, i_block
    2218      954629 :                   WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
    2219             :                END DO
    2220             :             END IF
    2221             : 
    2222             :          END DO
    2223             :       END ASSOCIATE
    2224       78684 :       DEALLOCATE (vecbuf)
    2225             : 
    2226       78684 :       CALL ictxt_loc%gridexit()
    2227             : 
    2228       78684 :       DEALLOCATE (newdat)
    2229             : 
    2230             : #else
    2231             : 
    2232             :       IF (unit > 0) THEN
    2233             :          DO j = 1, ncol_global
    2234             :             WRITE (unit) fm%local_data(:, j)
    2235             :          END DO
    2236             :       END IF
    2237             : 
    2238             : #endif
    2239       78684 :       CALL timestop(handle)
    2240             : 
    2241      393420 :    END SUBROUTINE cp_fm_write_unformatted
    2242             : 
    2243             : ! **************************************************************************************************
    2244             : !> \brief Write out a full matrix in plain text.
    2245             : !> \param fm the matrix to be outputted
    2246             : !> \param unit the unit number for I/O
    2247             : !> \param header optional header
    2248             : !> \param value_format ...
    2249             : ! **************************************************************************************************
    2250          10 :    SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
    2251             :       TYPE(cp_fm_type), INTENT(IN)             :: fm
    2252             :       INTEGER, INTENT(IN)                      :: unit
    2253             :       CHARACTER(LEN=*), INTENT(IN), OPTIONAL   :: header, value_format
    2254             : 
    2255             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
    2256             : 
    2257             :       CHARACTER(LEN=21)                        :: my_value_format
    2258             :       INTEGER                                  :: handle, i, j, max_block, &
    2259             :                                                   ncol_global, nrow_global, &
    2260             :                                                   nrow_block
    2261             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2262             : #if defined(__SCALAPACK)
    2263             :       INTEGER                                  :: i_block, icol_local, &
    2264             :                                                   in, info, ipcol, &
    2265             :                                                   iprow, irow_local, &
    2266             :                                                   mepos, num_pe, rb, tag, k, &
    2267             :                                                   icol, irow
    2268             :       INTEGER, DIMENSION(9)                    :: desc
    2269          10 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: vecbuf
    2270          10 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: newdat
    2271             :       TYPE(cp_blacs_type) :: ictxt_loc
    2272             :       INTEGER, EXTERNAL                        :: numroc
    2273             : #endif
    2274             : 
    2275          10 :       CALL timeset(routineN, handle)
    2276             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2277          10 :                           nrow_block=nrow_block, para_env=para_env)
    2278             : 
    2279          10 :       IF (PRESENT(value_format)) THEN
    2280           0 :          CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
    2281           0 :          my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
    2282             :       ELSE
    2283          10 :          my_value_format = "(I10, I10, ES24.12)"
    2284             :       END IF
    2285             : 
    2286          10 :       IF (unit > 0) THEN
    2287           5 :          IF (PRESENT(header)) WRITE (unit, *) header
    2288           5 :          WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
    2289             :       END IF
    2290             : 
    2291             : #if defined(__SCALAPACK)
    2292          10 :       num_pe = para_env%num_pe
    2293          10 :       mepos = para_env%mepos
    2294          10 :       rb = nrow_global
    2295          10 :       tag = 0
    2296             :       ! get a new context
    2297          10 :       CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
    2298          10 :       CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
    2299          10 :       CPASSERT(info == 0)
    2300             :       ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
    2301             :                  myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
    2302          20 :          in = numroc(ncol_global, max_block, mypcol, 0, npcol)
    2303             : 
    2304          40 :          ALLOCATE (newdat(nrow_global, MAX(1, in)))
    2305             : 
    2306             :          ! do the actual scalapack to cols reordering
    2307             :          CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
    2308             :                        fm%matrix_struct%descriptor, &
    2309          10 :                        newdat, 1, 1, desc, ictxt_loc%get_handle())
    2310             : 
    2311          30 :          ALLOCATE (vecbuf(nrow_global*max_block))
    2312        1226 :          vecbuf = HUGE(1.0_dp) ! init for valgrind
    2313          10 :          irow = 1
    2314          10 :          icol = 1
    2315             : 
    2316          32 :          DO i = 1, ncol_global, MAX(max_block, 1)
    2317          12 :             i_block = MIN(max_block, ncol_global - i + 1)
    2318             :             CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
    2319          12 :                          irow_local, icol_local, iprow, ipcol)
    2320          12 :             IF (ipcol == mypcol) THEN
    2321          36 :                DO j = 1, i_block
    2322        1256 :                   vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
    2323             :                END DO
    2324             :             END IF
    2325             : 
    2326          12 :             IF (ipcol == 0) THEN
    2327             :                ! do nothing
    2328             :             ELSE
    2329           2 :                IF (ipcol == mypcol) THEN
    2330           3 :                   CALL para_env%send(vecbuf(:), 0, tag)
    2331             :                END IF
    2332           2 :                IF (mypcol == 0) THEN
    2333           5 :                   CALL para_env%recv(vecbuf(:), ipcol, tag)
    2334             :                END IF
    2335             :             END IF
    2336             : 
    2337          22 :             IF (unit > 0) THEN
    2338          36 :                DO j = 1, i_block
    2339         646 :                   DO k = (j - 1)*nrow_global + 1, nrow_global*j
    2340         610 :                      WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
    2341         610 :                      irow = irow + 1
    2342         640 :                      IF (irow > nrow_global) THEN
    2343          30 :                         irow = 1
    2344          30 :                         icol = icol + 1
    2345             :                      END IF
    2346             :                   END DO
    2347             :                END DO
    2348             :             END IF
    2349             : 
    2350             :          END DO
    2351             :       END ASSOCIATE
    2352          10 :       DEALLOCATE (vecbuf)
    2353             : 
    2354          10 :       CALL ictxt_loc%gridexit()
    2355             : 
    2356          10 :       DEALLOCATE (newdat)
    2357             : 
    2358             : #else
    2359             : 
    2360             :       IF (unit > 0) THEN
    2361             :          DO j = 1, ncol_global
    2362             :             DO i = 1, nrow_global
    2363             :                WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
    2364             :             END DO
    2365             :          END DO
    2366             :       END IF
    2367             : 
    2368             : #endif
    2369          10 :       CALL timestop(handle)
    2370             : 
    2371          50 :    END SUBROUTINE cp_fm_write_formatted
    2372             : 
    2373             : ! **************************************************************************************************
    2374             : !> \brief ...
    2375             : !> \param fm ...
    2376             : !> \param unit ...
    2377             : ! **************************************************************************************************
    2378        1032 :    SUBROUTINE cp_fm_read_unformatted(fm, unit)
    2379             :       TYPE(cp_fm_type), INTENT(INOUT)          :: fm
    2380             :       INTEGER, INTENT(IN)                      :: unit
    2381             : 
    2382             :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
    2383             : 
    2384             :       INTEGER                                  :: handle, j, max_block, &
    2385             :                                                   ncol_global, nrow_global
    2386             :       TYPE(mp_para_env_type), POINTER          :: para_env
    2387             : #if defined(__SCALAPACK)
    2388             :       INTEGER                                  :: k, n_cols
    2389             :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: vecbuf
    2390             : #endif
    2391             : 
    2392        1032 :       CALL timeset(routineN, handle)
    2393             : 
    2394             :       CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
    2395        1032 :                           para_env=para_env)
    2396             : 
    2397             : #if defined(__SCALAPACK)
    2398             : 
    2399             :       ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
    2400             : 
    2401        4128 :       ALLOCATE (vecbuf(nrow_global, max_block))
    2402             : 
    2403        3080 :       DO j = 1, ncol_global, max_block
    2404             : 
    2405        2048 :          n_cols = MIN(max_block, ncol_global - j + 1)
    2406        2048 :          IF (para_env%mepos == 0) THEN
    2407       30305 :             DO k = 1, n_cols
    2408       30305 :                READ (unit) vecbuf(:, k)
    2409             :             END DO
    2410             :          END IF
    2411    11455440 :          CALL para_env%bcast(vecbuf, 0)
    2412        3080 :          CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
    2413             : 
    2414             :       END DO
    2415             : 
    2416        1032 :       DEALLOCATE (vecbuf)
    2417             : 
    2418             : #else
    2419             : 
    2420             :       DO j = 1, ncol_global
    2421             :          READ (unit) fm%local_data(:, j)
    2422             :       END DO
    2423             : 
    2424             : #endif
    2425             : 
    2426        1032 :       CALL timestop(handle)
    2427             : 
    2428        1032 :    END SUBROUTINE cp_fm_read_unformatted
    2429             : 
    2430             : ! **************************************************************************************************
    2431             : !> \brief wrapper to scalapack function INDXG2P that computes the process
    2432             : !>         coordinate which possesses the entry of a distributed matrix specified
    2433             : !>         by a global index INDXGLOB.
    2434             : !>
    2435             : !>  Arguments
    2436             : !>  =========
    2437             : !>
    2438             : !>  INDXGLOB  (global input) INTEGER
    2439             : !>            The global index of the element.
    2440             : !>
    2441             : !>  NB        (global input) INTEGER
    2442             : !>            Block size, size of the blocks the distributed matrix is
    2443             : !>            split into.
    2444             : !>
    2445             : !>  IPROC     (local dummy) INTEGER
    2446             : !>            Dummy argument in this case in order to unify the calling
    2447             : !>            sequence of the tool-routines.
    2448             : !>
    2449             : !>  ISRCPROC  (global input) INTEGER
    2450             : !>            The coordinate of the process that possesses the first
    2451             : !>            row/column of the distributed matrix.
    2452             : !>
    2453             : !>  NPROCS    (global input) INTEGER
    2454             : !>            The total number processes over which the matrix is
    2455             : !>            distributed.
    2456             : !>
    2457             : !> \param INDXGLOB ...
    2458             : !> \param NB ...
    2459             : !> \param IPROC ...
    2460             : !> \param ISRCPROC ...
    2461             : !> \param NPROCS ...
    2462             : !> \return ...
    2463             : !> \author Mauro Del Ben [MDB] - 12.2012
    2464             : ! **************************************************************************************************
    2465    12628693 :    FUNCTION cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2P)
    2466             :       INTEGER, INTENT(IN)                      :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS
    2467             :       INTEGER                                  :: G2P
    2468             : 
    2469             : #if defined(__SCALAPACK)
    2470             :       INTEGER, EXTERNAL :: indxg2p
    2471             : #endif
    2472             : 
    2473             : #if defined(__SCALAPACK)
    2474             : 
    2475    12628693 :       G2P = indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
    2476             : 
    2477             : #else
    2478             :       MARK_USED(indxglob)
    2479             :       MARK_USED(iproc)
    2480             :       MARK_USED(isrcproc)
    2481             :       MARK_USED(nb)
    2482             :       MARK_USED(nprocs)
    2483             : 
    2484             :       G2P = 0
    2485             : 
    2486             : #endif
    2487             : 
    2488    12628693 :    END FUNCTION cp_fm_indxg2p
    2489             : 
    2490             : ! **************************************************************************************************
    2491             : !> \brief wrapper to scalapack function INDXG2L that computes the local index
    2492             : !>         of a distributed matrix entry pointed to by the global index INDXGLOB.
    2493             : !>
    2494             : !>  Arguments
    2495             : !>  =========
    2496             : !>
    2497             : !>  INDXGLOB  (global input) INTEGER
    2498             : !>            The global index of the distributed matrix entry.
    2499             : !>
    2500             : !>  NB        (global input) INTEGER
    2501             : !>            Block size, size of the blocks the distributed matrix is
    2502             : !>            split into.
    2503             : !>
    2504             : !>  IPROC     (local dummy) INTEGER
    2505             : !>            Dummy argument in this case in order to unify the calling
    2506             : !>            sequence of the tool-routines.
    2507             : !>
    2508             : !>  ISRCPROC  (local dummy) INTEGER
    2509             : !>            Dummy argument in this case in order to unify the calling
    2510             : !>            sequence of the tool-routines.
    2511             : !>
    2512             : !>  NPROCS    (global input) INTEGER
    2513             : !>            The total number processes over which the distributed
    2514             : !>            matrix is distributed.
    2515             : !>
    2516             : !> \param INDXGLOB ...
    2517             : !> \param NB ...
    2518             : !> \param IPROC ...
    2519             : !> \param ISRCPROC ...
    2520             : !> \param NPROCS ...
    2521             : !> \return ...
    2522             : !> \author Mauro Del Ben [MDB] - 12.2012
    2523             : ! **************************************************************************************************
    2524      221218 :    FUNCTION cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2L)
    2525             :       INTEGER, INTENT(IN)                      :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS
    2526             :       INTEGER                                  :: G2L
    2527             : 
    2528             : #if defined(__SCALAPACK)
    2529             :       INTEGER, EXTERNAL :: indxg2l
    2530             : #endif
    2531             : 
    2532             : #if defined(__SCALAPACK)
    2533             : 
    2534      221218 :       G2L = indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
    2535             : 
    2536             : #else
    2537             :       MARK_USED(iproc)
    2538             :       MARK_USED(isrcproc)
    2539             :       MARK_USED(nb)
    2540             :       MARK_USED(nprocs)
    2541             :       MARK_USED(indxglob)
    2542             : 
    2543             :       G2L = INDXGLOB
    2544             : 
    2545             : #endif
    2546             : 
    2547      221218 :    END FUNCTION cp_fm_indxg2l
    2548             : 
    2549             : ! **************************************************************************************************
    2550             : !> \brief wrapper to scalapack function INDXL2G that computes the global index
    2551             : !>         of a distributed matrix entry pointed to by the local index INDXLOC
    2552             : !>         of the process indicated by IPROC.
    2553             : !>
    2554             : !>  Arguments
    2555             : !>  =========
    2556             : !>
    2557             : !>  INDXLOC   (global input) INTEGER
    2558             : !>            The local index of the distributed matrix entry.
    2559             : !>
    2560             : !>  NB        (global input) INTEGER
    2561             : !>            Block size, size of the blocks the distributed matrix is
    2562             : !>            split into.
    2563             : !>
    2564             : !>  IPROC     (local input) INTEGER
    2565             : !>            The coordinate of the process whose local array row or
    2566             : !>            column is to be determined.
    2567             : !>
    2568             : !>  ISRCPROC  (global input) INTEGER
    2569             : !>            The coordinate of the process that possesses the first
    2570             : !>            row/column of the distributed matrix.
    2571             : !>
    2572             : !>  NPROCS    (global input) INTEGER
    2573             : !>            The total number processes over which the distributed
    2574             : !>            matrix is distributed.
    2575             : !>
    2576             : !> \param INDXLOC ...
    2577             : !> \param NB ...
    2578             : !> \param IPROC ...
    2579             : !> \param ISRCPROC ...
    2580             : !> \param NPROCS ...
    2581             : !> \return ...
    2582             : !> \author Mauro Del Ben [MDB] - 12.2012
    2583             : ! **************************************************************************************************
    2584      210841 :    FUNCTION cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) RESULT(L2G)
    2585             :       INTEGER, INTENT(IN)                      :: INDXLOC, NB, IPROC, ISRCPROC, NPROCS
    2586             :       INTEGER                                  :: L2G
    2587             : 
    2588             : #if defined(__SCALAPACK)
    2589             :       INTEGER, EXTERNAL :: indxl2g
    2590             : #endif
    2591             : 
    2592             : #if defined(__SCALAPACK)
    2593             : 
    2594      210841 :       L2G = indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS)
    2595             : 
    2596             : #else
    2597             :       MARK_USED(iproc)
    2598             :       MARK_USED(isrcproc)
    2599             :       MARK_USED(nb)
    2600             :       MARK_USED(nprocs)
    2601             : 
    2602             :       L2G = INDXLOC
    2603             : 
    2604             : #endif
    2605             : 
    2606      210841 :    END FUNCTION cp_fm_indxl2g
    2607             : 
    2608             : ! **************************************************************************************************
    2609             : !> \brief ...
    2610             : !> \param mult_type ...
    2611             : ! **************************************************************************************************
    2612        8989 :    SUBROUTINE cp_fm_setup(mult_type)
    2613             :       INTEGER, INTENT(IN)                                :: mult_type
    2614             : 
    2615        8989 :       mm_type = mult_type
    2616        8989 :    END SUBROUTINE cp_fm_setup
    2617             : 
    2618             : ! **************************************************************************************************
    2619             : !> \brief ...
    2620             : !> \return ...
    2621             : ! **************************************************************************************************
    2622     1323540 :    FUNCTION cp_fm_get_mm_type() RESULT(res)
    2623             :       INTEGER                                            :: res
    2624             : 
    2625     1323540 :       res = mm_type
    2626     1323540 :    END FUNCTION
    2627             : 
    2628             : ! **************************************************************************************************
    2629             : !> \brief ...
    2630             : !> \param ictxt ...
    2631             : !> \param prec ...
    2632             : !> \return ...
    2633             : ! **************************************************************************************************
    2634          10 :    FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
    2635             :       INTEGER                                            :: ictxt
    2636             :       CHARACTER(LEN=1)                                   :: prec
    2637             :       INTEGER                                            :: res
    2638             : #if defined(__SCALAPACK)
    2639             :       INTEGER                                            :: pilaenv
    2640          10 :       res = pilaenv(ictxt, prec)
    2641             : #else
    2642             :       MARK_USED(ictxt)
    2643             :       MARK_USED(prec)
    2644             :       res = -1
    2645             : #endif
    2646             : 
    2647          10 :    END FUNCTION
    2648             : 
    2649           0 : END MODULE cp_fm_types

Generated by: LCOV version 1.15