LCOV - code coverage report
Current view: top level - src/fm - cp_fm_basic_linalg.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 74.4 % 723 538
Test Date: 2026-07-04 06:36:57 Functions: 67.4 % 43 29

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Basic linear algebra operations for full matrices.
      10              : !> \par History
      11              : !>      08.2002 split out of qs_blacs [fawzi]
      12              : !> \author Fawzi Mohamed
      13              : ! **************************************************************************************************
      14              : MODULE cp_fm_basic_linalg
      15              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      16              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
      17              :    USE cp_fm_types, ONLY: &
      18              :       cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
      19              :       cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
      20              :       cp_fm_type
      21              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      22              :                               cp_to_string
      23              :    USE kahan_sum, ONLY: accurate_dot_product, &
      24              :                         accurate_sum
      25              :    USE kinds, ONLY: dp, &
      26              :                     int_8
      27              :    USE machine, ONLY: m_memory
      28              :    USE mathlib, ONLY: get_pseudo_inverse_svd, &
      29              :                       invert_matrix
      30              :    USE message_passing, ONLY: mp_comm_type
      31              : #if defined (__HAS_IEEE_EXCEPTIONS)
      32              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      33              :                               ieee_set_halting_mode, &
      34              :                               IEEE_ALL
      35              : #endif
      36              : #include "../base/base_uses.f90"
      37              : 
      38              :    IMPLICIT NONE
      39              :    PRIVATE
      40              : 
      41              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      42              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
      43              : 
      44              :    PUBLIC :: cp_fm_scale, & ! scale a matrix
      45              :              cp_fm_scale_and_add, & ! scale and add two matrices
      46              :              cp_fm_geadd, & ! general addition
      47              :              cp_fm_add_columns, & ! daxpy type column add
      48              :              cp_fm_column_scale, & ! scale columns of a matrix
      49              :              cp_fm_row_scale, & ! scale rows of a matrix
      50              :              cp_fm_trace, & ! trace of the transpose(A)*B
      51              :              cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
      52              :              cp_fm_norm, & ! different norms of A
      53              :              cp_fm_schur_product, & ! schur product
      54              :              cp_fm_transpose, & ! transpose a matrix
      55              :              cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
      56              :              cp_fm_syrk, & ! rank k update
      57              :              cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
      58              :              cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
      59              :              cp_fm_gemm, & ! multiply two matrices
      60              :              cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
      61              :              cp_fm_invert, & ! computes the inverse and determinant
      62              :              cp_fm_frobenius_norm, & ! frobenius norm
      63              :              cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
      64              :              cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
      65              :              cp_fm_solve, & ! solves the equation  A*B=C A and C are input
      66              :              cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
      67              :              cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
      68              :              cp_fm_potrf, & ! Cholesky decomposition
      69              :              cp_fm_potri, & ! Invert triangular matrix
      70              :              cp_fm_rot_rows, & ! rotates two rows
      71              :              cp_fm_rot_cols, & ! rotates two columns
      72              :              cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
      73              :              cp_fm_det, & ! determinant of a real matrix with correct sign
      74              :              cp_fm_matvec ! matrix-vector multiplication (vector replicated)
      75              : 
      76              :    REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
      77              : 
      78              :    INTERFACE cp_fm_trace
      79              :       MODULE PROCEDURE cp_fm_trace_a0b0t0
      80              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_a
      81              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_p
      82              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
      83              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
      84              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
      85              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
      86              :    END INTERFACE cp_fm_trace
      87              : 
      88              :    INTERFACE cp_fm_contracted_trace
      89              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
      90              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
      91              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
      92              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
      93              :    END INTERFACE cp_fm_contracted_trace
      94              : CONTAINS
      95              : 
      96              : ! **************************************************************************************************
      97              : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
      98              : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
      99              : ! **************************************************************************************************
     100            0 :    SUBROUTINE cp_fm_det(matrix_a, det_a)
     101              : 
     102              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     103              :       REAL(KIND=dp), INTENT(OUT)               :: det_a
     104              :       REAL(KIND=dp)                            :: determinant
     105              :       TYPE(cp_fm_type)                         :: matrix_lu
     106              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     107              :       INTEGER                                  :: n, i, info, P
     108            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     109              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     110              : 
     111              : #if defined(__parallel)
     112              :       INTEGER                                  :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
     113              :       INTEGER, DIMENSION(9)                    :: desca
     114              : #endif
     115              : 
     116              :       CALL cp_fm_create(matrix=matrix_lu, &
     117              :                         matrix_struct=matrix_a%matrix_struct, &
     118            0 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     119            0 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     120              : 
     121            0 :       a => matrix_lu%local_data
     122            0 :       n = matrix_lu%matrix_struct%nrow_global
     123            0 :       ALLOCATE (ipivot(n))
     124            0 :       ipivot(:) = 0
     125            0 :       P = 0
     126            0 :       ALLOCATE (diag(n))
     127            0 :       diag(:) = 0.0_dp
     128              : #if defined(__parallel)
     129              :       ! Use LU decomposition
     130            0 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     131            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     132            0 :       CALL cp_fm_get_diag(matrix_lu, diag)
     133            0 :       determinant = PRODUCT(diag)
     134            0 :       myprow = matrix_lu%matrix_struct%context%mepos(1)
     135            0 :       nprow = matrix_lu%matrix_struct%context%num_pe(1)
     136            0 :       npcol = matrix_lu%matrix_struct%context%num_pe(2)
     137            0 :       nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
     138            0 :       nrow_block = matrix_lu%matrix_struct%nrow_block
     139            0 :       DO irow_local = 1, nrow_local
     140            0 :          i = matrix_lu%matrix_struct%row_indices(irow_local)
     141            0 :          IF (ipivot(irow_local) /= i) P = P + 1
     142              :       END DO
     143            0 :       CALL matrix_lu%matrix_struct%para_env%sum(P)
     144              :       ! very important fix
     145            0 :       P = P/npcol
     146              : #else
     147              :       CALL dgetrf(n, n, a, n, ipivot, info)
     148              :       CALL cp_fm_get_diag(matrix_lu, diag)
     149              :       determinant = PRODUCT(diag)
     150              :       DO i = 1, n
     151              :          IF (ipivot(i) /= i) P = P + 1
     152              :       END DO
     153              : #endif
     154            0 :       DEALLOCATE (ipivot)
     155            0 :       DEALLOCATE (diag)
     156            0 :       CALL cp_fm_release(matrix_lu)
     157            0 :       det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
     158            0 :    END SUBROUTINE cp_fm_det
     159              : 
     160              : ! **************************************************************************************************
     161              : !> \brief calc A <- alpha*A + beta*B
     162              : !>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
     163              : !>      scale A)
     164              : !> \param alpha ...
     165              : !> \param matrix_a ...
     166              : !> \param beta ...
     167              : !> \param matrix_b ...
     168              : ! **************************************************************************************************
     169      1229924 :    SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
     170              : 
     171              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     172              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
     173              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     174              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_b
     175              : 
     176              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
     177              : 
     178              :       INTEGER                                            :: handle, size_a, size_b
     179              :       REAL(KIND=dp)                                      :: my_beta
     180              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     181              : 
     182      1229924 :       CALL timeset(routineN, handle)
     183              : 
     184      1229924 :       my_beta = 0.0_dp
     185      1229924 :       IF (PRESENT(matrix_b)) my_beta = 1.0_dp
     186      1229924 :       IF (PRESENT(beta)) my_beta = beta
     187              :       NULLIFY (a, b)
     188              : 
     189      1229924 :       IF (PRESENT(beta)) THEN
     190      1229922 :          CPASSERT(PRESENT(matrix_b))
     191      1229922 :          IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
     192            0 :             CPWARN("Bad use of routine. Call cp_fm_scale instead")
     193            0 :             CALL cp_fm_scale(alpha + beta, matrix_a)
     194            0 :             CALL timestop(handle)
     195            0 :             RETURN
     196              :          END IF
     197              :       END IF
     198              : 
     199      1229924 :       a => matrix_a%local_data
     200              : 
     201      1229924 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     202              : 
     203      1229924 :       IF (alpha /= 1.0_dp) THEN
     204        80542 :          CALL dscal(size_a, alpha, a, 1)
     205              :       END IF
     206      1229924 :       IF (my_beta /= 0.0_dp) THEN
     207      1223094 :          IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
     208            0 :             CPABORT("Matrices must be in the same blacs context")
     209              : 
     210      1223094 :          IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
     211              :                                      matrix_b%matrix_struct)) THEN
     212              : 
     213      1223094 :             b => matrix_b%local_data
     214      1223094 :             size_b = SIZE(b, 1)*SIZE(b, 2)
     215      1223094 :             IF (size_a /= size_b) &
     216            0 :                CPABORT("Matrices must have same local sizes")
     217              : 
     218      1223094 :             CALL daxpy(size_a, my_beta, b, 1, a, 1)
     219              : 
     220              :          ELSE
     221              :             CALL cp_abort(__LOCATION__, &
     222              :                           "cp_fm_scale_and_add is not yet implemented for cases "// &
     223            0 :                           "where two input matrix structures are not equivalent")
     224              :          END IF
     225              : 
     226              :       END IF
     227              : 
     228      1229924 :       CALL timestop(handle)
     229              : 
     230              :    END SUBROUTINE cp_fm_scale_and_add
     231              : 
     232              : ! **************************************************************************************************
     233              : !> \brief interface to BLACS geadd:
     234              : !>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
     235              : !>        where opt(matrix_a) can be either:
     236              : !>              'N':  matrix_a
     237              : !>              'T':  matrix_a^T
     238              : !>              'C':  matrix_a^H (Hermitian conjugate)
     239              : !>        note that this is a level three routine, use cp_fm_scale_and_add if that
     240              : !>        is sufficient for your needs
     241              : !> \param alpha  : complex scalar
     242              : !> \param trans  : 'N' normal, 'T' transposed
     243              : !> \param matrix_a : input matrix_a
     244              : !> \param beta   : complex scalar
     245              : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
     246              : !> \author  Lianheng Tong
     247              : ! **************************************************************************************************
     248          602 :    SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
     249              :       REAL(KIND=dp), INTENT(IN) :: alpha, beta
     250              :       CHARACTER, INTENT(IN) :: trans
     251              :       TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
     252              : 
     253              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
     254              : 
     255              :       INTEGER :: nrow_global, ncol_global, handle
     256              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
     257              : #if defined(__parallel)
     258              :       INTEGER, DIMENSION(9) :: desca, descb
     259              : #elif !defined(__MKL)
     260              :       INTEGER :: ii, jj
     261              : #endif
     262              : 
     263          602 :       CALL timeset(routineN, handle)
     264              : 
     265          602 :       nrow_global = matrix_a%matrix_struct%nrow_global
     266          602 :       ncol_global = matrix_a%matrix_struct%ncol_global
     267          602 :       CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
     268          602 :       CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
     269              : 
     270          602 :       aa => matrix_a%local_data
     271          602 :       bb => matrix_b%local_data
     272              : 
     273              : #if defined(__parallel)
     274         6020 :       desca = matrix_a%matrix_struct%descriptor
     275         6020 :       descb = matrix_b%matrix_struct%descriptor
     276              :       CALL pdgeadd(trans, &
     277              :                    nrow_global, &
     278              :                    ncol_global, &
     279              :                    alpha, &
     280              :                    aa, &
     281              :                    1, 1, &
     282              :                    desca, &
     283              :                    beta, &
     284              :                    bb, &
     285              :                    1, 1, &
     286          602 :                    descb)
     287              : #elif defined(__MKL)
     288              :       CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
     289              :                         alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
     290              : #else
     291              :       ! dgeadd is not a standard BLAS function, although it is implemented
     292              :       ! in some libraries like OpenBLAS, so not going to use it here
     293              :       SELECT CASE (trans)
     294              :       CASE ('T')
     295              :          DO jj = 1, ncol_global
     296              :             DO ii = 1, nrow_global
     297              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
     298              :             END DO
     299              :          END DO
     300              :       CASE DEFAULT
     301              :          DO jj = 1, ncol_global
     302              :             DO ii = 1, nrow_global
     303              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
     304              :             END DO
     305              :          END DO
     306              :       END SELECT
     307              : #endif
     308              : 
     309          602 :       CALL timestop(handle)
     310              : 
     311          602 :    END SUBROUTINE cp_fm_geadd
     312              : 
     313              : ! **************************************************************************************************
     314              : !> \brief Add (and scale) a subset of columns of a fm to a fm
     315              : !>        b = alpha*a + b
     316              : !> \param msource ...
     317              : !> \param mtarget ...
     318              : !> \param ncol ...
     319              : !> \param alpha ...
     320              : !> \param source_start ...
     321              : !> \param target_start ...
     322              : ! **************************************************************************************************
     323            0 :    SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
     324              : 
     325              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
     326              :       INTEGER, INTENT(IN)                      :: ncol
     327              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: alpha
     328              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
     329              : 
     330              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
     331              : 
     332              :       INTEGER                                  :: handle, n, ss, ts, i
     333              :       REAL(KIND=dp)                            :: fscale
     334              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
     335              : #if defined(__parallel)
     336              :       INTEGER, DIMENSION(9)                    :: desca, descb
     337              : #endif
     338              : 
     339            0 :       CALL timeset(routineN, handle)
     340              : 
     341            0 :       ss = 1
     342            0 :       ts = 1
     343              : 
     344            0 :       IF (PRESENT(source_start)) ss = source_start
     345            0 :       IF (PRESENT(target_start)) ts = target_start
     346              : 
     347            0 :       fscale = 1.0_dp
     348            0 :       IF (PRESENT(alpha)) fscale = alpha
     349              : 
     350            0 :       n = msource%matrix_struct%nrow_global
     351              : 
     352            0 :       a => msource%local_data
     353            0 :       b => mtarget%local_data
     354              : 
     355              : #if defined(__parallel)
     356              :       MARK_USED(i)
     357            0 :       desca(:) = msource%matrix_struct%descriptor(:)
     358            0 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     359            0 :       CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
     360              : #else
     361              :       DO i = 0, ncol - 1
     362              :          b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
     363              :       END DO
     364              : #endif
     365              : 
     366            0 :       CALL timestop(handle)
     367              : 
     368            0 :    END SUBROUTINE cp_fm_add_columns
     369              : 
     370              : ! **************************************************************************************************
     371              : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
     372              : !>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
     373              : !> \param matrix_a ...
     374              : !> \param almost_determinant ...
     375              : !> \param correct_sign ...
     376              : !> \par History
     377              : !>      added correct_sign 02.07 (fschiff)
     378              : !> \author Joost VandeVondele
     379              : !> \note
     380              : !>      - matrix_a is overwritten
     381              : !>      - the sign of the determinant might be wrong
     382              : !>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
     383              : !>      - one should be able to find out if ipivot is an even or an odd permutation...
     384              : !>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
     385              : !>      - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
     386              : ! **************************************************************************************************
     387            0 :    SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
     388              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     389              :       REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
     390              :       LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
     391              : 
     392              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
     393              : 
     394              :       INTEGER                                  :: handle, i, info, n
     395            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     396              :       REAL(KIND=dp)                            :: determinant
     397              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     398              : #if defined(__parallel)
     399              :       INTEGER, DIMENSION(9)                    :: desca
     400            0 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     401              : #else
     402              :       INTEGER                                  :: lda
     403              : #endif
     404              : 
     405            0 :       CALL timeset(routineN, handle)
     406              : 
     407            0 :       a => matrix_a%local_data
     408            0 :       n = matrix_a%matrix_struct%nrow_global
     409            0 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
     410              : 
     411              : #if defined(__parallel)
     412              :       MARK_USED(correct_sign)
     413            0 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     414            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     415              : 
     416            0 :       ALLOCATE (diag(n))
     417            0 :       diag(:) = 0.0_dp
     418            0 :       CALL cp_fm_get_diag(matrix_a, diag)
     419            0 :       determinant = 1.0_dp
     420            0 :       DO i = 1, n
     421            0 :          determinant = determinant*diag(i)
     422              :       END DO
     423            0 :       DEALLOCATE (diag)
     424              : #else
     425              :       lda = SIZE(a, 1)
     426              :       CALL dgetrf(n, n, a, lda, ipivot, info)
     427              :       determinant = 1.0_dp
     428              :       IF (correct_sign) THEN
     429              :          DO i = 1, n
     430              :             IF (ipivot(i) /= i) THEN
     431              :                determinant = -determinant*a(i, i)
     432              :             ELSE
     433              :                determinant = determinant*a(i, i)
     434              :             END IF
     435              :          END DO
     436              :       ELSE
     437              :          DO i = 1, n
     438              :             determinant = determinant*a(i, i)
     439              :          END DO
     440              :       END IF
     441              : #endif
     442              :       ! info is allowed to be zero
     443              :       ! this does just signal a zero diagonal element
     444            0 :       DEALLOCATE (ipivot)
     445            0 :       almost_determinant = determinant ! notice that the sign is random
     446            0 :       CALL timestop(handle)
     447            0 :    END SUBROUTINE cp_fm_lu_decompose
     448              : 
     449              : ! **************************************************************************************************
     450              : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
     451              : !> \param transa : 'N' -> normal   'T' -> transpose
     452              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     453              : !> \param transb ...
     454              : !> \param m ...
     455              : !> \param n ...
     456              : !> \param k ...
     457              : !> \param alpha ...
     458              : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
     459              : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
     460              : !> \param beta ...
     461              : !> \param matrix_c : m x n matrix
     462              : !> \param a_first_col ...
     463              : !> \param a_first_row ...
     464              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
     465              : !> \param b_first_row ...
     466              : !> \param c_first_col ...
     467              : !> \param c_first_row ...
     468              : !> \author Matthias Krack
     469              : !> \note
     470              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     471              : ! **************************************************************************************************
     472          656 :    SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
     473              :                          matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
     474              :                          c_first_col, c_first_row)
     475              : 
     476              :       CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
     477              :       INTEGER, INTENT(IN)                      :: m, n, k
     478              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     479              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     480              :       REAL(KIND=dp), INTENT(IN)                :: beta
     481              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     482              :       INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
     483              :                                                   b_first_col, b_first_row, &
     484              :                                                   c_first_col, c_first_row
     485              : 
     486              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
     487              : 
     488              :       INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
     489              :                                                   j_b, j_c
     490              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     491              : #if defined(__parallel)
     492              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     493              : #else
     494              :       INTEGER                                  :: lda, ldb, ldc
     495              : #endif
     496              : 
     497          656 :       CALL timeset(routineN, handle)
     498              : 
     499              :       !sample peak memory
     500          656 :       CALL m_memory()
     501              : 
     502          656 :       a => matrix_a%local_data
     503          656 :       b => matrix_b%local_data
     504          656 :       c => matrix_c%local_data
     505              : 
     506          656 :       i_a = 1
     507          656 :       IF (PRESENT(a_first_row)) i_a = a_first_row
     508              : 
     509          656 :       j_a = 1
     510          656 :       IF (PRESENT(a_first_col)) j_a = a_first_col
     511              : 
     512          656 :       i_b = 1
     513          656 :       IF (PRESENT(b_first_row)) i_b = b_first_row
     514              : 
     515          656 :       j_b = 1
     516          656 :       IF (PRESENT(b_first_col)) j_b = b_first_col
     517              : 
     518          656 :       i_c = 1
     519          656 :       IF (PRESENT(c_first_row)) i_c = c_first_row
     520              : 
     521          656 :       j_c = 1
     522          656 :       IF (PRESENT(c_first_col)) j_c = c_first_col
     523              : 
     524              : #if defined(__parallel)
     525              : 
     526         6560 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     527         6560 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     528         6560 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     529              : 
     530              :       CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
     531          656 :                   descb, beta, c, i_c, j_c, descc)
     532              : 
     533              : #else
     534              : 
     535              :       lda = SIZE(a, 1)
     536              :       ldb = SIZE(b, 1)
     537              :       ldc = SIZE(c, 1)
     538              : 
     539              :       CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
     540              : 
     541              : #endif
     542          656 :       CALL timestop(handle)
     543              : 
     544          656 :    END SUBROUTINE cp_fm_gemm
     545              : 
     546              : ! **************************************************************************************************
     547              : !> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
     548              : !>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
     549              : !>      where matrix_a is symmetric
     550              : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
     551              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     552              : !> \param uplo triangular format
     553              : !> \param m ...
     554              : !> \param n ...
     555              : !> \param alpha ...
     556              : !> \param matrix_a : m x m matrix
     557              : !> \param matrix_b : m x n matrix
     558              : !> \param beta ...
     559              : !> \param matrix_c : m x n matrix
     560              : !> \author Matthias Krack
     561              : !> \note
     562              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     563              : !>      all matrices in QS are triangular according to uplo
     564              : !>      matrix_a is always an m x m matrix
     565              : !>      typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
     566              : ! **************************************************************************************************
     567       204608 :    SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
     568              : 
     569              :       CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
     570              :       INTEGER, INTENT(IN)                      :: m, n
     571              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     572              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     573              :       REAL(KIND=dp), INTENT(IN)                :: beta
     574              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     575              : 
     576              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
     577              : 
     578              :       INTEGER                                  :: handle
     579       204608 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     580              : #if defined(__parallel)
     581              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     582              : #else
     583              :       INTEGER                                  :: lda, ldb, ldc
     584              : #endif
     585              : 
     586       204608 :       CALL timeset(routineN, handle)
     587              : 
     588       204608 :       a => matrix_a%local_data
     589       204608 :       b => matrix_b%local_data
     590       204608 :       c => matrix_c%local_data
     591              : 
     592              : #if defined(__parallel)
     593              : 
     594      2046080 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     595      2046080 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     596      2046080 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     597              : 
     598       204608 :       CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
     599              : 
     600              : #else
     601              : 
     602              :       lda = matrix_a%matrix_struct%local_leading_dimension
     603              :       ldb = matrix_b%matrix_struct%local_leading_dimension
     604              :       ldc = matrix_c%matrix_struct%local_leading_dimension
     605              : 
     606              :       CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
     607              : 
     608              : #endif
     609       204608 :       CALL timestop(handle)
     610              : 
     611       204608 :    END SUBROUTINE cp_fm_symm
     612              : 
     613              : ! **************************************************************************************************
     614              : !> \brief computes the Frobenius norm of matrix_a
     615              : !> \brief computes the Frobenius norm of matrix_a
     616              : !> \param matrix_a : m x n matrix
     617              : !> \return ...
     618              : !> \author VW
     619              : ! **************************************************************************************************
     620         7926 :    FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
     621              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     622              :       REAL(KIND=dp)                            :: norm
     623              : 
     624              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
     625              : 
     626              :       INTEGER                                  :: handle, size_a
     627         7926 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     628              :       REAL(KIND=dp), EXTERNAL                  :: DDOT
     629              : #if defined(__parallel)
     630              :       TYPE(mp_comm_type)                       :: group
     631              : #endif
     632              : 
     633         7926 :       CALL timeset(routineN, handle)
     634              : 
     635              :       norm = 0.0_dp
     636         7926 :       a => matrix_a%local_data
     637         7926 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     638         7926 :       norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
     639              : #if defined(__parallel)
     640         7926 :       group = matrix_a%matrix_struct%para_env
     641         7926 :       CALL group%sum(norm)
     642              : #endif
     643         7926 :       norm = SQRT(norm)
     644              : 
     645         7926 :       CALL timestop(handle)
     646              : 
     647         7926 :    END FUNCTION cp_fm_frobenius_norm
     648              : 
     649              : ! **************************************************************************************************
     650              : !> \brief performs a rank-k update of a symmetric matrix_c
     651              : !>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
     652              : !> \param uplo : 'U'   ('L')
     653              : !> \param trans : 'N'  ('T')
     654              : !> \param k : number of cols to use in matrix_a
     655              : !>      ia,ja ::  1,1 (could be used for selecting subblock of a)
     656              : !> \param alpha ...
     657              : !> \param matrix_a ...
     658              : !> \param ia ...
     659              : !> \param ja ...
     660              : !> \param beta ...
     661              : !> \param matrix_c ...
     662              : !> \author Matthias Krack
     663              : ! **************************************************************************************************
     664        10542 :    SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
     665              :       CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
     666              :       INTEGER, INTENT(IN)                      :: k
     667              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     668              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     669              :       INTEGER, INTENT(IN)                      :: ia, ja
     670              :       REAL(KIND=dp), INTENT(IN)                :: beta
     671              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     672              : 
     673              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
     674              : 
     675              :       INTEGER                                  :: handle, n
     676        10542 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
     677              : #if defined(__parallel)
     678              :       INTEGER, DIMENSION(9)                    :: desca, descc
     679              : #else
     680              :       INTEGER                                  :: lda, ldc
     681              : #endif
     682              : #if defined (__HAS_IEEE_EXCEPTIONS)
     683              :       LOGICAL, DIMENSION(5)                    :: halt
     684              : #endif
     685              : 
     686        10542 :       CALL timeset(routineN, handle)
     687              : 
     688        10542 :       n = matrix_c%matrix_struct%nrow_global
     689              : 
     690        10542 :       a => matrix_a%local_data
     691        10542 :       c => matrix_c%local_data
     692              : 
     693              : #if defined (__HAS_IEEE_EXCEPTIONS)
     694              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     695              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     696              : #endif
     697              : #if defined(__parallel)
     698       105420 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     699       105420 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     700              : 
     701        10542 :       CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
     702              : #else
     703              :       lda = SIZE(a, 1)
     704              :       ldc = SIZE(c, 1)
     705              : 
     706              :       CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
     707              : #endif
     708              : #if defined (__HAS_IEEE_EXCEPTIONS)
     709              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     710              : #endif
     711        10542 :       CALL timestop(handle)
     712              : 
     713        10542 :    END SUBROUTINE cp_fm_syrk
     714              : 
     715              : ! **************************************************************************************************
     716              : !> \brief computes the schur product of two matrices
     717              : !>       c_ij = a_ij * b_ij
     718              : !> \param matrix_a ...
     719              : !> \param matrix_b ...
     720              : !> \param matrix_c ...
     721              : !> \author Joost VandeVondele
     722              : ! **************************************************************************************************
     723         9218 :    SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
     724              : 
     725              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b, matrix_c
     726              : 
     727              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
     728              : 
     729              :       INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
     730              :                                                             myprow, ncol_local, &
     731              :                                                             nrow_local
     732         9218 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
     733              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     734              : 
     735         9218 :       CALL timeset(routineN, handle)
     736              : 
     737         9218 :       context => matrix_a%matrix_struct%context
     738         9218 :       myprow = context%mepos(1)
     739         9218 :       mypcol = context%mepos(2)
     740              : 
     741         9218 :       a => matrix_a%local_data
     742         9218 :       b => matrix_b%local_data
     743         9218 :       c => matrix_c%local_data
     744              : 
     745         9218 :       nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
     746         9218 :       ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
     747              : 
     748        99168 :       DO icol_local = 1, ncol_local
     749      6670463 :          DO irow_local = 1, nrow_local
     750      6661245 :             c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
     751              :          END DO
     752              :       END DO
     753              : 
     754         9218 :       CALL timestop(handle)
     755              : 
     756         9218 :    END SUBROUTINE cp_fm_schur_product
     757              : 
     758              : ! **************************************************************************************************
     759              : !> \brief returns the trace of matrix_a^T matrix_b, i.e
     760              : !>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
     761              : !> \param matrix_a a matrix
     762              : !> \param matrix_b another matrix
     763              : !> \param trace ...
     764              : !> \par History
     765              : !>      11.06.2001 Creation (Matthias Krack)
     766              : !>      12.2002 added doc [fawzi]
     767              : !> \author Matthias Krack
     768              : !> \note
     769              : !>      note the transposition of matrix_a!
     770              : ! **************************************************************************************************
     771       577496 :    SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
     772              : 
     773              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b
     774              :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     775              : 
     776              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
     777              : 
     778              :       INTEGER                                            :: handle, mypcol, myprow, ncol_local, &
     779              :                                                             nrow_local
     780       577496 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     781              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     782              :       TYPE(mp_comm_type)                                 :: group
     783              : 
     784       577496 :       CALL timeset(routineN, handle)
     785              : 
     786       577496 :       context => matrix_a%matrix_struct%context
     787       577496 :       myprow = context%mepos(1)
     788       577496 :       mypcol = context%mepos(2)
     789              : 
     790       577496 :       group = matrix_a%matrix_struct%para_env
     791              : 
     792       577496 :       a => matrix_a%local_data
     793       577496 :       b => matrix_b%local_data
     794              : 
     795       577496 :       nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
     796       577496 :       ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
     797              : 
     798              :       ! cries for an accurate_dot_product
     799              :       trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
     800       577496 :                                    b(1:nrow_local, 1:ncol_local))
     801              : 
     802       577496 :       CALL group%sum(trace)
     803              : 
     804       577496 :       CALL timestop(handle)
     805              : 
     806       577496 :    END SUBROUTINE cp_fm_trace_a0b0t0
     807              : 
     808              :    #:mute
     809              :       #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
     810              :    #:endmute
     811              : 
     812              : ! **************************************************************************************************
     813              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
     814              : !> \param matrix_a list of A matrices
     815              : !> \param matrix_b B matrix
     816              : !> \param trace    computed traces
     817              : !> \par History
     818              : !>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
     819              : !> \note \parblock
     820              : !>      Computing the trace requires collective communication between involved MPI processes
     821              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     822              : !>      the amount of time wasted in such synchronisation by performing one large collective
     823              : !>      operation which involves all the matrices in question.
     824              : !>
     825              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
     826              : !>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
     827              : !>      'matrix_b' is a single matrix.
     828              : !>      \endparblock
     829              : ! **************************************************************************************************
     830              :    #:for longname, shortname, appendix in types
     831         3930 :       SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
     832              :          TYPE(${longname}$), DIMENSION(:), INTENT(IN)       :: matrix_a
     833              :          TYPE(cp_fm_type), INTENT(IN)                       :: matrix_b
     834              :          REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     835              : 
     836              :          CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
     837              : 
     838              :          INTEGER                                            :: handle, imatrix, n_matrices, &
     839              :                                                                ncols_local, nrows_local
     840         3930 :          REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     841              :          TYPE(mp_comm_type)                                 :: group
     842              : 
     843         3930 :          CALL timeset(routineN, handle)
     844              : 
     845         3930 :          n_matrices = SIZE(trace)
     846         3930 :          CPASSERT(SIZE(matrix_a) == n_matrices)
     847              : 
     848         3930 :          CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
     849              : 
     850         3930 :          ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
     851              : 
     852              : !$OMP PARALLEL DO DEFAULT(NONE), &
     853              : !$OMP             PRIVATE(imatrix, ldata_a), &
     854              : !$OMP             SHARED(ldata_b, matrix_a, matrix_b), &
     855         3930 : !$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace)
     856              : 
     857              :          DO imatrix = 1, n_matrices
     858              : 
     859              :             ! assume that the matrices A(i) and B have identical shapes and distribution schemes
     860              :             ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
     861              :             trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     862              :          END DO
     863              : !$OMP END PARALLEL DO
     864              : 
     865         3930 :          group = matrix_b%matrix_struct%para_env
     866        23250 :          CALL group%sum(trace)
     867              : 
     868         3930 :          CALL timestop(handle)
     869         3930 :       END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
     870              :    #:endfor
     871              : 
     872              : ! **************************************************************************************************
     873              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
     874              : !> \param matrix_a list of A matrices
     875              : !> \param matrix_b list of B matrices
     876              : !> \param trace    computed traces
     877              : !> \param accurate ...
     878              : !> \par History
     879              : !>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
     880              : !> \note \parblock
     881              : !>      Computing the trace requires collective communication between involved MPI processes
     882              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     883              : !>      the amount of time wasted in such synchronisation by performing one large collective
     884              : !>      operation which involves all the matrices in question.
     885              : !>
     886              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
     887              : !>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
     888              : !>      \endparblock
     889              : ! **************************************************************************************************
     890              :    #:for longname1, shortname1, appendix1 in types
     891              :       #:for longname2, shortname2, appendix2 in types
     892       172132 :          SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     893              :             TYPE(${longname1}$), DIMENSION(:), INTENT(IN)      :: matrix_a
     894              :             TYPE(${longname2}$), DIMENSION(:), INTENT(IN)      :: matrix_b
     895              :             REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     896              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     897              : 
     898              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
     899              : 
     900              :             INTEGER                                            :: handle, imatrix, n_matrices, &
     901              :                                                                   ncols_local, nrows_local
     902              :             LOGICAL                                            :: use_accurate_sum
     903       172132 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     904              :             TYPE(mp_comm_type)                                 :: group
     905              : 
     906       172132 :             CALL timeset(routineN, handle)
     907              : 
     908       172132 :             n_matrices = SIZE(trace)
     909       172132 :             CPASSERT(SIZE(matrix_a) == n_matrices)
     910       172132 :             CPASSERT(SIZE(matrix_b) == n_matrices)
     911              : 
     912       172132 :             use_accurate_sum = .TRUE.
     913       172132 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
     914              : 
     915              : !$OMP PARALLEL DO DEFAULT(NONE), &
     916              : !$OMP             PRIVATE(imatrix, ldata_a, ldata_b, ncols_local), &
     917              : !$OMP             PRIVATE(nrows_local), &
     918       172132 : !$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
     919              :             DO imatrix = 1, n_matrices
     920              :                CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
     921              : 
     922              :                ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
     923              :                ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
     924              :                ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
     925              :                IF (use_accurate_sum) THEN
     926              :                   trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     927              :                ELSE
     928              :                   trace(imatrix) = SUM(ldata_a*ldata_b)
     929              :                END IF
     930              :             END DO
     931              : !$OMP END PARALLEL DO
     932              : 
     933       172132 :             group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
     934       577196 :             CALL group%sum(trace)
     935              : 
     936       172132 :             CALL timestop(handle)
     937       172132 :          END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
     938              :       #:endfor
     939              :    #:endfor
     940              : 
     941              : ! **************************************************************************************************
     942              : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
     943              : !> \param matrix_a list of A matrices
     944              : !> \param matrix_b list of B matrices
     945              : !> \param trace    computed traces
     946              : !> \param accurate ...
     947              : ! **************************************************************************************************
     948              :    #:for longname1, shortname1, appendix1 in types
     949              :       #:for longname2, shortname2, appendix2 in types
     950        16874 :          SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     951              :             TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN)   :: matrix_a
     952              :             TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN)   :: matrix_b
     953              :             REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: trace
     954              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     955              : 
     956              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
     957              : 
     958              :             INTEGER                                            :: handle, ia, ib, iz, na, nb, ncols_local, &
     959              :                                                                   nrows_local, nz
     960              :             INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
     961              :             LOGICAL                                            :: use_accurate_sum
     962              :             REAL(kind=dp)                                      :: t
     963        16874 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     964              :             TYPE(mp_comm_type)                                 :: group
     965              : 
     966        16874 :             CALL timeset(routineN, handle)
     967              : 
     968        16874 :             nz = SIZE(matrix_a, 1)
     969        16874 :             CPASSERT(SIZE(matrix_b, 1) == nz)
     970              : 
     971        16874 :             na = SIZE(matrix_a, 2)
     972        16874 :             nb = SIZE(matrix_b, 2)
     973        16874 :             CPASSERT(SIZE(trace, 1) == na)
     974        16874 :             CPASSERT(SIZE(trace, 2) == nb)
     975              : 
     976        16874 :             use_accurate_sum = .TRUE.
     977        16874 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
     978              : 
     979              :             ! here we use one running index (itrace) instead of two (ia, ib) in order to
     980              :             ! improve load balance between shared-memory threads
     981        16874 :             ntraces = na*nb
     982        16874 :             na8 = INT(na, kind=int_8)
     983              : 
     984              : !$OMP PARALLEL DO DEFAULT(NONE), &
     985              : !$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_b, ncols_local), &
     986              : !$OMP             PRIVATE(nrows_local, t), &
     987        16874 : !$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
     988              :             DO itrace = 1, ntraces
     989              :                ib8 = (itrace - 1)/na8
     990              :                ia = INT(itrace - ib8*na8)
     991              :                ib = INT(ib8) + 1
     992              : 
     993              :                t = 0.0_dp
     994              :                DO iz = 1, nz
     995              :                   CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
     996              : 
     997              :                   ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
     998              :                   ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
     999              :                   ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1000              :                   IF (use_accurate_sum) THEN
    1001              :                      t = t + accurate_dot_product(ldata_a, ldata_b)
    1002              :                   ELSE
    1003              :                      t = t + SUM(ldata_a*ldata_b)
    1004              :                   END IF
    1005              :                END DO
    1006              :                trace(ia, ib) = t
    1007              :             END DO
    1008              : !$OMP END PARALLEL DO
    1009              : 
    1010        16874 :             group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
    1011       766714 :             CALL group%sum(trace)
    1012              : 
    1013        16874 :             CALL timestop(handle)
    1014        16874 :          END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
    1015              :       #:endfor
    1016              :    #:endfor
    1017              : 
    1018              : ! **************************************************************************************************
    1019              : !> \brief multiplies in place by a triangular matrix:
    1020              : !>       matrix_b = alpha op(triangular_matrix) matrix_b
    1021              : !>      or (if side='R')
    1022              : !>       matrix_b = alpha matrix_b op(triangular_matrix)
    1023              : !>      op(triangular_matrix) is:
    1024              : !>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
    1025              : !>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
    1026              : !>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
    1027              : !>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
    1028              : !> \param triangular_matrix the triangular matrix that multiplies the other
    1029              : !> \param matrix_b the matrix that gets multiplied and stores the result
    1030              : !> \param side on which side of matrix_b stays op(triangular_matrix)
    1031              : !>        (defaults to 'L')
    1032              : !> \param transpose_tr if the triangular matrix should be transposed
    1033              : !>        (defaults to false)
    1034              : !> \param invert_tr if the triangular matrix should be inverted
    1035              : !>        (defaults to false)
    1036              : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
    1037              : !>        lower ('L') triangle (defaults to 'U')
    1038              : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
    1039              : !>        be assumed to be 1 (defaults to false)
    1040              : !> \param n_rows the number of rows of the result (defaults to
    1041              : !>        size(matrix_b,1))
    1042              : !> \param n_cols the number of columns of the result (defaults to
    1043              : !>        size(matrix_b,2))
    1044              : !> \param alpha ...
    1045              : !> \par History
    1046              : !>      08.2002 created [fawzi]
    1047              : !> \author Fawzi Mohamed
    1048              : !> \note
    1049              : !>      needs an mpi env
    1050              : ! **************************************************************************************************
    1051       122468 :    SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
    1052              :                                         transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
    1053              :                                         alpha)
    1054              :       TYPE(cp_fm_type), INTENT(IN)                       :: triangular_matrix, matrix_b
    1055              :       CHARACTER, INTENT(IN), OPTIONAL                    :: side
    1056              :       LOGICAL, INTENT(IN), OPTIONAL                      :: transpose_tr, invert_tr
    1057              :       CHARACTER, INTENT(IN), OPTIONAL                    :: uplo_tr
    1058              :       LOGICAL, INTENT(IN), OPTIONAL                      :: unit_diag_tr
    1059              :       INTEGER, INTENT(IN), OPTIONAL                      :: n_rows, n_cols
    1060              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
    1061              : 
    1062              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
    1063              : 
    1064              :       CHARACTER                                          :: side_char, transa, unit_diag, uplo
    1065              :       INTEGER                                            :: handle, mdim, m, n
    1066              :       LOGICAL                                            :: invert
    1067              :       REAL(KIND=dp)                                      :: al
    1068              : 
    1069        61234 :       CALL timeset(routineN, handle)
    1070        61234 :       side_char = 'L'
    1071        61234 :       unit_diag = 'N'
    1072        61234 :       uplo = 'U'
    1073        61234 :       transa = 'N'
    1074        61234 :       invert = .FALSE.
    1075        61234 :       al = 1.0_dp
    1076        61234 :       CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
    1077        61234 :       IF (PRESENT(side)) side_char = side
    1078              :       mdim = MERGE(1, 2, 'L' == side_char)
    1079        61234 :       IF (PRESENT(invert_tr)) invert = invert_tr
    1080        61234 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1081        61234 :       IF (PRESENT(unit_diag_tr)) THEN
    1082            0 :          IF (unit_diag_tr) THEN
    1083            0 :             unit_diag = 'U'
    1084              :          ELSE
    1085              :             unit_diag = 'N'
    1086              :          END IF
    1087              :       END IF
    1088        61234 :       IF (PRESENT(transpose_tr)) THEN
    1089         4648 :          IF (transpose_tr) THEN
    1090         2032 :             transa = 'T'
    1091              :          ELSE
    1092              :             transa = 'N'
    1093              :          END IF
    1094              :       END IF
    1095        61234 :       IF (PRESENT(alpha)) al = alpha
    1096        61234 :       IF (PRESENT(n_rows)) m = n_rows
    1097        61234 :       IF (PRESENT(n_cols)) n = n_cols
    1098              : 
    1099        61234 :       IF (invert) THEN
    1100              : 
    1101              : #if defined(__parallel)
    1102              :          CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1103              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1104              :                      triangular_matrix%matrix_struct%descriptor, &
    1105              :                      matrix_b%local_data(1, 1), 1, 1, &
    1106        47256 :                      matrix_b%matrix_struct%descriptor(1))
    1107              : #else
    1108              :          CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1109              :                     triangular_matrix%local_data(1, 1), &
    1110              :                     SIZE(triangular_matrix%local_data, mdim), &
    1111              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1112              : #endif
    1113              : 
    1114              :       ELSE
    1115              : 
    1116              : #if defined(__parallel)
    1117              :          CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1118              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1119              :                      triangular_matrix%matrix_struct%descriptor, &
    1120              :                      matrix_b%local_data(1, 1), 1, 1, &
    1121        13978 :                      matrix_b%matrix_struct%descriptor(1))
    1122              : #else
    1123              :          CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1124              :                     triangular_matrix%local_data(1, 1), &
    1125              :                     SIZE(triangular_matrix%local_data, mdim), &
    1126              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1127              : #endif
    1128              : 
    1129              :       END IF
    1130              : 
    1131        61234 :       CALL timestop(handle)
    1132        61234 :    END SUBROUTINE cp_fm_triangular_multiply
    1133              : 
    1134              : ! **************************************************************************************************
    1135              : !> \brief scales a matrix
    1136              : !>      matrix_a = alpha * matrix_b
    1137              : !> \param alpha ...
    1138              : !> \param matrix_a ...
    1139              : !> \note
    1140              : !>      use cp_fm_set_all to zero (avoids problems with nan)
    1141              : ! **************************************************************************************************
    1142       203445 :    SUBROUTINE cp_fm_scale(alpha, matrix_a)
    1143              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1144              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    1145              : 
    1146              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_scale'
    1147              : 
    1148              :       INTEGER                                            :: handle, size_a
    1149              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    1150              : 
    1151       203445 :       CALL timeset(routineN, handle)
    1152              : 
    1153              :       NULLIFY (a)
    1154              : 
    1155       203445 :       a => matrix_a%local_data
    1156       203445 :       size_a = SIZE(a, 1)*SIZE(a, 2)
    1157              : 
    1158       203445 :       CALL DSCAL(size_a, alpha, a, 1)
    1159              : 
    1160       203445 :       CALL timestop(handle)
    1161              : 
    1162       203445 :    END SUBROUTINE cp_fm_scale
    1163              : 
    1164              : ! **************************************************************************************************
    1165              : !> \brief transposes a matrix
    1166              : !>      matrixt = matrix ^ T
    1167              : !> \param matrix ...
    1168              : !> \param matrixt ...
    1169              : !> \note
    1170              : !>      all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
    1171              : ! **************************************************************************************************
    1172        21692 :    SUBROUTINE cp_fm_transpose(matrix, matrixt)
    1173              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, matrixt
    1174              : 
    1175              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
    1176              : 
    1177              :       INTEGER                                  :: handle, ncol_global, &
    1178              :                                                   nrow_global, ncol_globalt, nrow_globalt
    1179        10846 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
    1180              : #if defined(__parallel)
    1181              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1182              : #elif !defined(__MKL)
    1183              :       INTEGER                                  :: i, j
    1184              : #endif
    1185              : 
    1186        10846 :       nrow_global = matrix%matrix_struct%nrow_global
    1187        10846 :       ncol_global = matrix%matrix_struct%ncol_global
    1188        10846 :       nrow_globalt = matrixt%matrix_struct%nrow_global
    1189        10846 :       ncol_globalt = matrixt%matrix_struct%ncol_global
    1190            0 :       CPASSERT(nrow_global == ncol_globalt)
    1191        10846 :       CPASSERT(nrow_globalt == ncol_global)
    1192              : 
    1193        10846 :       CALL timeset(routineN, handle)
    1194              : 
    1195        10846 :       a => matrix%local_data
    1196        10846 :       c => matrixt%local_data
    1197              : 
    1198              : #if defined(__parallel)
    1199       108460 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1200       108460 :       descc(:) = matrixt%matrix_struct%descriptor(:)
    1201        10846 :       CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
    1202              : #elif defined(__MKL)
    1203              :       CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
    1204              : #else
    1205              :       DO j = 1, ncol_global
    1206              :          DO i = 1, nrow_global
    1207              :             c(j, i) = a(i, j)
    1208              :          END DO
    1209              :       END DO
    1210              : #endif
    1211        10846 :       CALL timestop(handle)
    1212              : 
    1213        10846 :    END SUBROUTINE cp_fm_transpose
    1214              : 
    1215              : ! **************************************************************************************************
    1216              : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
    1217              : !> \param matrix the triangular matrix as input, the full matrix as output
    1218              : !> \param work a matrix of the same size as matrix
    1219              : !> \param uplo triangular format; defaults to 'U'
    1220              : !> \author Matthias Krack
    1221              : !> \note
    1222              : !>       the opposite triangular part is irrelevant
    1223              : ! **************************************************************************************************
    1224       354674 :    SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
    1225              : 
    1226              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, work
    1227              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1228              : 
    1229              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
    1230              : 
    1231              :       CHARACTER                                :: myuplo
    1232              :       INTEGER                                  :: handle, icol_global, irow_global, &
    1233              :                                                   mypcol, myprow, ncol_global, &
    1234              :                                                   nrow_global
    1235       177337 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1236              :       TYPE(cp_blacs_env_type), POINTER         :: context
    1237              : 
    1238              : #if defined(__parallel)
    1239              :       INTEGER                                  :: icol_local, irow_local, &
    1240              :                                                   ncol_block, ncol_local, &
    1241              :                                                   nrow_block, nrow_local
    1242              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1243       177337 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: c
    1244              : #endif
    1245              : 
    1246       177337 :       myuplo = 'U'
    1247            0 :       IF (PRESENT(uplo)) myuplo = uplo
    1248              : 
    1249       177337 :       nrow_global = matrix%matrix_struct%nrow_global
    1250       177337 :       ncol_global = matrix%matrix_struct%ncol_global
    1251       177337 :       CPASSERT(nrow_global == ncol_global)
    1252       177337 :       nrow_global = work%matrix_struct%nrow_global
    1253       177337 :       ncol_global = work%matrix_struct%ncol_global
    1254       177337 :       CPASSERT(nrow_global == ncol_global)
    1255              : 
    1256       177337 :       CALL timeset(routineN, handle)
    1257              : 
    1258       177337 :       context => matrix%matrix_struct%context
    1259       177337 :       myprow = context%mepos(1)
    1260       177337 :       mypcol = context%mepos(2)
    1261              : 
    1262              : #if defined(__parallel)
    1263              : 
    1264       177337 :       nrow_block = matrix%matrix_struct%nrow_block
    1265       177337 :       ncol_block = matrix%matrix_struct%ncol_block
    1266              : 
    1267       177337 :       nrow_local = matrix%matrix_struct%nrow_locals(myprow)
    1268       177337 :       ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
    1269              : 
    1270       177337 :       a => work%local_data
    1271      1773370 :       desca(:) = work%matrix_struct%descriptor(:)
    1272       177337 :       c => matrix%local_data
    1273      1773370 :       descc(:) = matrix%matrix_struct%descriptor(:)
    1274              : 
    1275      5671817 :       DO icol_local = 1, ncol_local
    1276      5494480 :          icol_global = matrix%matrix_struct%col_indices(icol_local)
    1277    234497952 :          DO irow_local = 1, nrow_local
    1278    228826135 :             irow_global = matrix%matrix_struct%row_indices(irow_local)
    1279    234320615 :             IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
    1280    112900060 :                c(irow_local, icol_local) = 0.0_dp
    1281    115926075 :             ELSE IF (irow_global == icol_global) THEN
    1282      3026015 :                c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
    1283              :             END IF
    1284              :          END DO
    1285              :       END DO
    1286              : 
    1287      5671817 :       DO icol_local = 1, ncol_local
    1288    234497952 :       DO irow_local = 1, nrow_local
    1289    234320615 :          a(irow_local, icol_local) = c(irow_local, icol_local)
    1290              :       END DO
    1291              :       END DO
    1292              : 
    1293       177337 :       CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
    1294              : 
    1295              : #else
    1296              : 
    1297              :       a => matrix%local_data
    1298              : 
    1299              :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1300              :          DO irow_global = 1, nrow_global
    1301              :          DO icol_global = irow_global + 1, ncol_global
    1302              :             a(icol_global, irow_global) = a(irow_global, icol_global)
    1303              :          END DO
    1304              :          END DO
    1305              :       ELSE
    1306              :          DO icol_global = 1, ncol_global
    1307              :          DO irow_global = icol_global + 1, nrow_global
    1308              :             a(irow_global, icol_global) = a(icol_global, irow_global)
    1309              :          END DO
    1310              :          END DO
    1311              :       END IF
    1312              : 
    1313              : #endif
    1314       177337 :       CALL timestop(handle)
    1315              : 
    1316       177337 :    END SUBROUTINE cp_fm_uplo_to_full
    1317              : 
    1318              : ! **************************************************************************************************
    1319              : !> \brief scales column i of matrix a with scaling(i)
    1320              : !> \param matrixa ...
    1321              : !> \param scaling : an array used for scaling the columns,
    1322              : !>                  SIZE(scaling) determines the number of columns to be scaled
    1323              : !> \author Joost VandeVondele
    1324              : !> \note
    1325              : !>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
    1326              : !>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
    1327              : !>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
    1328              : !>      where every vector has a different prefactor
    1329              : ! **************************************************************************************************
    1330       248232 :    SUBROUTINE cp_fm_column_scale(matrixa, scaling)
    1331              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1332              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1333              : 
    1334              :       INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
    1335              :                                                   npcol, nprow
    1336       248232 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1337              : #if defined(__parallel)
    1338              :       INTEGER                                  :: icol_global, icol_local, &
    1339              :                                                   ipcol, iprow, irow_local
    1340              : #else
    1341              :       INTEGER                                  :: i
    1342              : #endif
    1343              : 
    1344       248232 :       myprow = matrixa%matrix_struct%context%mepos(1)
    1345       248232 :       mypcol = matrixa%matrix_struct%context%mepos(2)
    1346       248232 :       nprow = matrixa%matrix_struct%context%num_pe(1)
    1347       248232 :       npcol = matrixa%matrix_struct%context%num_pe(2)
    1348              : 
    1349       248232 :       ncol_global = matrixa%matrix_struct%ncol_global
    1350              : 
    1351       248232 :       a => matrixa%local_data
    1352       248232 :       n = SIZE(a, 1)
    1353       248232 :       k = MIN(SIZE(scaling), ncol_global)
    1354              : 
    1355              : #if defined(__parallel)
    1356              : 
    1357      3893581 :       DO icol_global = 1, k
    1358              :          CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
    1359              :                       nprow, npcol, myprow, mypcol, &
    1360      3645349 :                       irow_local, icol_local, iprow, ipcol)
    1361      3893581 :          IF ((ipcol == mypcol)) THEN
    1362      3645349 :             CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
    1363              :          END IF
    1364              :       END DO
    1365              : #else
    1366              :       DO i = 1, k
    1367              :          CALL DSCAL(n, scaling(i), a(:, i), 1)
    1368              :       END DO
    1369              : #endif
    1370       248232 :    END SUBROUTINE cp_fm_column_scale
    1371              : 
    1372              : ! **************************************************************************************************
    1373              : !> \brief scales row i of matrix a with scaling(i)
    1374              : !> \param matrixa ...
    1375              : !> \param scaling : an array used for scaling the columns,
    1376              : !> \author JGH
    1377              : !> \note
    1378              : ! **************************************************************************************************
    1379         6322 :    SUBROUTINE cp_fm_row_scale(matrixa, scaling)
    1380              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1381              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1382              : 
    1383              :       INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
    1384         6322 :       INTEGER, DIMENSION(:), POINTER           :: row_indices
    1385         6322 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1386              : #if defined(__parallel)
    1387              :       INTEGER                                  :: irow_global, icol, irow
    1388              : #else
    1389              :       INTEGER                                  :: j
    1390              : #endif
    1391              : 
    1392              :       CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
    1393         6322 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1394         6322 :       CPASSERT(SIZE(scaling) == nrow_global)
    1395              : 
    1396         6322 :       a => matrixa%local_data
    1397         6322 :       n = SIZE(a, 1)
    1398         6322 :       m = SIZE(a, 2)
    1399              : 
    1400              : #if defined(__parallel)
    1401        79038 :       DO icol = 1, ncol_local
    1402      6488000 :          DO irow = 1, nrow_local
    1403      6408962 :             irow_global = row_indices(irow)
    1404      6481678 :             a(irow, icol) = scaling(irow_global)*a(irow, icol)
    1405              :          END DO
    1406              :       END DO
    1407              : #else
    1408              :       DO j = 1, m
    1409              :          a(1:n, j) = scaling(1:n)*a(1:n, j)
    1410              :       END DO
    1411              : #endif
    1412         6322 :    END SUBROUTINE cp_fm_row_scale
    1413              : 
    1414              : ! **************************************************************************************************
    1415              : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
    1416              : !> \param matrix_a the matrix to invert
    1417              : !> \param matrix_inverse the inverse of matrix_a
    1418              : !> \param det_a the determinant of matrix_a
    1419              : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
    1420              : !>                are screened
    1421              : !> \param eigval optionally return matrix eigenvalues/singular values
    1422              : !> \par History
    1423              : !>      note of Jan Wilhelm (12.2015)
    1424              : !>      - computation of determinant corrected
    1425              : !>      - determinant only computed if det_a is present
    1426              : !>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
    1427              : !>      - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
    1428              : !> \author Florian Schiffmann(02.2007)
    1429              : ! **************************************************************************************************
    1430         1860 :    SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
    1431              : 
    1432              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_inverse
    1433              :       REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
    1434              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
    1435              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1436              :          INTENT(INOUT), OPTIONAL               :: eigval
    1437              : 
    1438              :       INTEGER                                  :: n
    1439         1860 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1440              :       REAL(KIND=dp)                            :: determinant, my_eps_svd
    1441              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1442              :       TYPE(cp_fm_type)                         :: matrix_lu
    1443              : 
    1444              : #if defined(__parallel)
    1445              :       TYPE(cp_fm_type)                         :: u, vt, sigma, inv_sigma_ut
    1446              :       TYPE(mp_comm_type) :: group
    1447              :       INTEGER                                  :: i, info, liwork, lwork, exponent_of_minus_one
    1448              :       INTEGER, DIMENSION(9)                    :: desca
    1449              :       LOGICAL                                  :: quenched
    1450              :       REAL(KIND=dp)                            :: alpha, beta
    1451         1860 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
    1452         1860 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
    1453              : #else
    1454              :       LOGICAL                                  :: sign
    1455              :       REAL(KIND=dp)                            :: eps1
    1456              : #endif
    1457              : 
    1458         1860 :       my_eps_svd = 0.0_dp
    1459          478 :       IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
    1460              : 
    1461              :       CALL cp_fm_create(matrix=matrix_lu, &
    1462              :                         matrix_struct=matrix_a%matrix_struct, &
    1463         1860 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1464         1860 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1465              : 
    1466         1860 :       a => matrix_lu%local_data
    1467         1860 :       n = matrix_lu%matrix_struct%nrow_global
    1468         5580 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1469         1860 :       ipivot(:) = 0
    1470              : #if defined(__parallel)
    1471         1860 :       IF (my_eps_svd == 0.0_dp) THEN
    1472              :          ! Use LU decomposition
    1473              :          lwork = 3*n
    1474              :          liwork = 3*n
    1475        18300 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1476         1830 :          CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1477              : 
    1478         1830 :          IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
    1479              : 
    1480         1128 :             ALLOCATE (diag(n))
    1481          924 :             diag(:) = 0.0_dp
    1482          376 :             CALL cp_fm_get_diag(matrix_lu, diag)
    1483              : 
    1484          376 :             exponent_of_minus_one = 0
    1485          376 :             determinant = 1.0_dp
    1486          924 :             DO i = 1, n
    1487          548 :                determinant = determinant*diag(i)
    1488          924 :                IF (ipivot(i) /= i) THEN
    1489          224 :                   exponent_of_minus_one = exponent_of_minus_one + 1
    1490              :                END IF
    1491              :             END DO
    1492          376 :             IF (PRESENT(eigval)) THEN
    1493            0 :                CPASSERT(.NOT. ASSOCIATED(eigval))
    1494            0 :                ALLOCATE (eigval(n))
    1495            0 :                eigval(:) = diag
    1496              :             END IF
    1497          376 :             DEALLOCATE (diag)
    1498              : 
    1499          376 :             group = matrix_lu%matrix_struct%para_env
    1500          376 :             CALL group%sum(exponent_of_minus_one)
    1501              : 
    1502          376 :             determinant = determinant*(-1.0_dp)**exponent_of_minus_one
    1503              : 
    1504              :          END IF
    1505              : 
    1506         1830 :          alpha = 0.0_dp
    1507         1830 :          beta = 1.0_dp
    1508         1830 :          CALL cp_fm_set_all(matrix_inverse, alpha, beta)
    1509         1830 :          CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
    1510              :       ELSE
    1511              :          ! Use singular value decomposition
    1512              :          CALL cp_fm_create(matrix=u, &
    1513              :                            matrix_struct=matrix_a%matrix_struct, &
    1514           30 :                            name="LEFT_SINGULAR_MATRIX")
    1515           30 :          CALL cp_fm_set_all(u, alpha=0.0_dp)
    1516              :          CALL cp_fm_create(matrix=vt, &
    1517              :                            matrix_struct=matrix_a%matrix_struct, &
    1518           30 :                            name="RIGHT_SINGULAR_MATRIX")
    1519           30 :          CALL cp_fm_set_all(vt, alpha=0.0_dp)
    1520           90 :          ALLOCATE (diag(n))
    1521           96 :          diag(:) = 0.0_dp
    1522          300 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1523           30 :          ALLOCATE (work(1))
    1524              :          ! Workspace query
    1525           30 :          lwork = -1
    1526              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1527           30 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1528           30 :          lwork = INT(work(1))
    1529           30 :          DEALLOCATE (work)
    1530           90 :          ALLOCATE (work(lwork))
    1531              :          ! SVD
    1532              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1533           30 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1534              :          ! info == n+1 implies homogeneity error when the number of procs is large
    1535              :          ! this likely isnt a problem, but maybe we should handle it separately
    1536           30 :          IF (info /= 0 .AND. info /= n + 1) &
    1537            0 :             CPABORT("Singular value decomposition of matrix failed.")
    1538              :          ! (Pseudo)inverse and (pseudo)determinant
    1539              :          CALL cp_fm_create(matrix=sigma, &
    1540              :                            matrix_struct=matrix_a%matrix_struct, &
    1541           30 :                            name="SINGULAR_VALUE_MATRIX")
    1542           30 :          CALL cp_fm_set_all(sigma, alpha=0.0_dp)
    1543           30 :          determinant = 1.0_dp
    1544           30 :          quenched = .FALSE.
    1545           30 :          IF (PRESENT(eigval)) THEN
    1546           28 :             CPASSERT(.NOT. ASSOCIATED(eigval))
    1547           84 :             ALLOCATE (eigval(n))
    1548          156 :             eigval(:) = diag
    1549              :          END IF
    1550           96 :          DO i = 1, n
    1551           66 :             IF (diag(i) < my_eps_svd) THEN
    1552           18 :                diag(i) = 0.0_dp
    1553           18 :                quenched = .TRUE.
    1554              :             ELSE
    1555           48 :                determinant = determinant*diag(i)
    1556           48 :                diag(i) = 1.0_dp/diag(i)
    1557              :             END IF
    1558           96 :             CALL cp_fm_set_element(sigma, i, i, diag(i))
    1559              :          END DO
    1560           30 :          DEALLOCATE (diag)
    1561           30 :          IF (quenched) &
    1562              :             CALL cp_warn(__LOCATION__, &
    1563              :                          "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
    1564           12 :                          ". At least one singular value has been quenched.")
    1565              :          ! Sigma^-1 * U^T
    1566              :          CALL cp_fm_create(matrix=inv_sigma_ut, &
    1567              :                            matrix_struct=matrix_a%matrix_struct, &
    1568           30 :                            name="SINGULAR_VALUE_MATRIX")
    1569           30 :          CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
    1570              :          CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
    1571           30 :                      u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
    1572              :          ! A^-1 = V * (Sigma^-1 * U^T)
    1573           30 :          CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
    1574              :          CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
    1575           30 :                      inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
    1576              :          ! Clean up
    1577           30 :          DEALLOCATE (work)
    1578           30 :          CALL cp_fm_release(u)
    1579           30 :          CALL cp_fm_release(vt)
    1580           30 :          CALL cp_fm_release(sigma)
    1581          150 :          CALL cp_fm_release(inv_sigma_ut)
    1582              :       END IF
    1583              : #else
    1584              :       IF (my_eps_svd == 0.0_dp) THEN
    1585              :          sign = .TRUE.
    1586              :          CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
    1587              :                             eval_error=eps1)
    1588              :          CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
    1589              :          IF (PRESENT(eigval)) &
    1590              :             CALL cp_abort(__LOCATION__, &
    1591              :                           "NYI. Eigenvalues not available for return without SCALAPACK.")
    1592              :       ELSE
    1593              :          CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
    1594              :                                      determinant, eigval)
    1595              :       END IF
    1596              : #endif
    1597         1860 :       CALL cp_fm_release(matrix_lu)
    1598         1860 :       DEALLOCATE (ipivot)
    1599         1860 :       IF (PRESENT(det_a)) det_a = determinant
    1600         1860 :    END SUBROUTINE cp_fm_invert
    1601              : 
    1602              : ! **************************************************************************************************
    1603              : !> \brief inverts a triangular matrix
    1604              : !> \param matrix_a ...
    1605              : !> \param uplo_tr triangular format; defaults to 'U'
    1606              : !> \author MI
    1607              : ! **************************************************************************************************
    1608         5312 :    SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
    1609              : 
    1610              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
    1611              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
    1612              : 
    1613              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
    1614              : 
    1615              :       CHARACTER                                :: unit_diag, uplo
    1616              :       INTEGER                                  :: handle, info, ncol_global
    1617         5312 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1618              : #if defined(__parallel)
    1619              :       INTEGER, DIMENSION(9)                    :: desca
    1620              : #endif
    1621              : 
    1622         5312 :       CALL timeset(routineN, handle)
    1623              : 
    1624         5312 :       unit_diag = 'N'
    1625         5312 :       uplo = 'U'
    1626         5312 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1627              : 
    1628         5312 :       ncol_global = matrix_a%matrix_struct%ncol_global
    1629              : 
    1630         5312 :       a => matrix_a%local_data
    1631              : 
    1632              : #if defined(__parallel)
    1633        53120 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1634              : 
    1635         5312 :       CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
    1636              : 
    1637              : #else
    1638              :       CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
    1639              : #endif
    1640              : 
    1641         5312 :       CALL timestop(handle)
    1642         5312 :    END SUBROUTINE cp_fm_triangular_invert
    1643              : 
    1644              : ! **************************************************************************************************
    1645              : !> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
    1646              : !>         the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
    1647              : !>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
    1648              : !> \param matrix_a ...
    1649              : !> \param matrix_r ...
    1650              : !> \param nrow_fact ...
    1651              : !> \param ncol_fact ...
    1652              : !> \param first_row ...
    1653              : !> \param first_col ...
    1654              : !> \author MI
    1655              : ! **************************************************************************************************
    1656        19320 :    SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
    1657              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_r
    1658              :       INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
    1659              :                                                   first_row, first_col
    1660              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1661              : 
    1662              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
    1663              : 
    1664              :       CHARACTER                                :: myuplo
    1665              :       INTEGER                                  :: handle, i, icol, info, irow, &
    1666              :                                                   j, lda, lwork, ncol, &
    1667              :                                                   ndim, nrow
    1668        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
    1669        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
    1670              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1671              : #if defined(__parallel)
    1672              :       INTEGER, DIMENSION(9)                    :: desca
    1673              : #endif
    1674              : 
    1675        19320 :       CALL timeset(routineN, handle)
    1676              : 
    1677        19320 :       myuplo = 'U'
    1678        19320 :       IF (PRESENT(uplo)) myuplo = uplo
    1679              : 
    1680        19320 :       ncol = matrix_a%matrix_struct%ncol_global
    1681        19320 :       nrow = matrix_a%matrix_struct%nrow_global
    1682        19320 :       lda = nrow
    1683              : 
    1684        19320 :       a => matrix_a%local_data
    1685              : 
    1686        19320 :       IF (PRESENT(nrow_fact)) nrow = nrow_fact
    1687        19320 :       IF (PRESENT(ncol_fact)) ncol = ncol_fact
    1688        19320 :       irow = 1
    1689        19320 :       IF (PRESENT(first_row)) irow = first_row
    1690        19320 :       icol = 1
    1691        19320 :       IF (PRESENT(first_col)) icol = first_col
    1692              : 
    1693        19320 :       CPASSERT(nrow >= ncol)
    1694        19320 :       ndim = SIZE(a, 2)
    1695        57960 :       ALLOCATE (tau(ndim))
    1696              : 
    1697              : #if defined(__parallel)
    1698              : 
    1699       193200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1700              : 
    1701        19320 :       lwork = -1
    1702        57960 :       ALLOCATE (work(2*ndim))
    1703        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1704        19320 :       lwork = INT(work(1))
    1705        19320 :       DEALLOCATE (work)
    1706        57960 :       ALLOCATE (work(lwork))
    1707        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1708              : 
    1709              : #else
    1710              :       lwork = -1
    1711              :       ALLOCATE (work(2*ndim))
    1712              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1713              :       lwork = INT(work(1))
    1714              :       DEALLOCATE (work)
    1715              :       ALLOCATE (work(lwork))
    1716              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1717              : 
    1718              : #endif
    1719              : 
    1720        77280 :       ALLOCATE (r_mat(ncol, ncol))
    1721        19320 :       CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
    1722        19320 :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1723        38640 :          DO i = 1, ncol
    1724        38640 :          DO j = i + 1, ncol
    1725        19320 :             r_mat(j, i) = 0.0_dp
    1726              :          END DO
    1727              :          END DO
    1728              :       ELSE
    1729            0 :          DO j = 1, ncol
    1730            0 :          DO i = j + 1, ncol
    1731            0 :             r_mat(i, j) = 0.0_dp
    1732              :          END DO
    1733              :          END DO
    1734              :       END IF
    1735        19320 :       CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
    1736              : 
    1737        19320 :       DEALLOCATE (tau, work, r_mat)
    1738              : 
    1739        19320 :       CALL timestop(handle)
    1740              : 
    1741        19320 :    END SUBROUTINE cp_fm_qr_factorization
    1742              : 
    1743              : ! **************************************************************************************************
    1744              : !> \brief computes the the solution to A*b=A_general using lu decomposition
    1745              : !> \param matrix_a input matrix; will be overwritten
    1746              : !> \param general_a contains the result
    1747              : !> \author Florian Schiffmann
    1748              : ! **************************************************************************************************
    1749         8210 :    SUBROUTINE cp_fm_solve(matrix_a, general_a)
    1750              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, general_a
    1751              : 
    1752              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
    1753              : 
    1754              :       INTEGER                                  :: handle, info, n, nrhs
    1755         8210 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1756              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
    1757              : #if defined(__parallel)
    1758              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1759              : #else
    1760              :       INTEGER                                  :: lda, ldb
    1761              : #endif
    1762              : 
    1763         8210 :       CALL timeset(routineN, handle)
    1764              : 
    1765         8210 :       a => matrix_a%local_data
    1766         8210 :       a_general => general_a%local_data
    1767         8210 :       n = matrix_a%matrix_struct%nrow_global
    1768         8210 :       nrhs = general_a%matrix_struct%ncol_global
    1769        24630 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1770              : 
    1771              : #if defined(__parallel)
    1772        82100 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1773        82100 :       descb(:) = general_a%matrix_struct%descriptor(:)
    1774         8210 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1775              :       CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
    1776         8210 :                    1, 1, descb, info)
    1777              : 
    1778              : #else
    1779              :       lda = SIZE(a, 1)
    1780              :       ldb = SIZE(a_general, 1)
    1781              :       CALL dgetrf(n, n, a, lda, ipivot, info)
    1782              :       CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
    1783              : 
    1784              : #endif
    1785              :       ! info is allowed to be zero
    1786              :       ! this does just signal a zero diagonal element
    1787         8210 :       DEALLOCATE (ipivot)
    1788         8210 :       CALL timestop(handle)
    1789         8210 :    END SUBROUTINE
    1790              : 
    1791              : ! **************************************************************************************************
    1792              : !> \brief Convenience function. Computes the matrix multiplications needed
    1793              : !>        for the multiplication of complex matrices.
    1794              : !>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
    1795              : !> \param transa : 'N' -> normal   'T' -> transpose
    1796              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
    1797              : !> \param transb ...
    1798              : !> \param m ...
    1799              : !> \param n ...
    1800              : !> \param k ...
    1801              : !> \param alpha ...
    1802              : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
    1803              : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
    1804              : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
    1805              : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
    1806              : !> \param beta ...
    1807              : !> \param C_re m x n matrix, real part
    1808              : !> \param C_im m x n matrix, imaginary part
    1809              : !> \param a_first_col ...
    1810              : !> \param a_first_row ...
    1811              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
    1812              : !> \param b_first_row ...
    1813              : !> \param c_first_col ...
    1814              : !> \param c_first_row ...
    1815              : !> \author Samuel Andermatt
    1816              : !> \note
    1817              : !>      C should have no overlap with A, B
    1818              : ! **************************************************************************************************
    1819            0 :    SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
    1820              :                                  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
    1821              :                                  c_first_row)
    1822              :       CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
    1823              :       INTEGER, INTENT(IN)                                :: m, n, k
    1824              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1825              :       TYPE(cp_fm_type), INTENT(IN)                       :: A_re, A_im, B_re, B_im
    1826              :       REAL(KIND=dp), INTENT(IN)                          :: beta
    1827              :       TYPE(cp_fm_type), INTENT(IN)                       :: C_re, C_im
    1828              :       INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
    1829              :                                                             b_first_row, c_first_col, c_first_row
    1830              : 
    1831              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
    1832              : 
    1833              :       INTEGER                                            :: handle
    1834              : 
    1835            0 :       CALL timeset(routineN, handle)
    1836              : 
    1837              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
    1838              :                       a_first_col=a_first_col, &
    1839              :                       a_first_row=a_first_row, &
    1840              :                       b_first_col=b_first_col, &
    1841              :                       b_first_row=b_first_row, &
    1842              :                       c_first_col=c_first_col, &
    1843            0 :                       c_first_row=c_first_row)
    1844              :       CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
    1845              :                       a_first_col=a_first_col, &
    1846              :                       a_first_row=a_first_row, &
    1847              :                       b_first_col=b_first_col, &
    1848              :                       b_first_row=b_first_row, &
    1849              :                       c_first_col=c_first_col, &
    1850            0 :                       c_first_row=c_first_row)
    1851              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
    1852              :                       a_first_col=a_first_col, &
    1853              :                       a_first_row=a_first_row, &
    1854              :                       b_first_col=b_first_col, &
    1855              :                       b_first_row=b_first_row, &
    1856              :                       c_first_col=c_first_col, &
    1857            0 :                       c_first_row=c_first_row)
    1858              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
    1859              :                       a_first_col=a_first_col, &
    1860              :                       a_first_row=a_first_row, &
    1861              :                       b_first_col=b_first_col, &
    1862              :                       b_first_row=b_first_row, &
    1863              :                       c_first_col=c_first_col, &
    1864            0 :                       c_first_row=c_first_row)
    1865              : 
    1866            0 :       CALL timestop(handle)
    1867              : 
    1868            0 :    END SUBROUTINE cp_complex_fm_gemm
    1869              : 
    1870              : ! **************************************************************************************************
    1871              : !> \brief inverts a matrix using LU decomposition
    1872              : !>        the input matrix will be overwritten
    1873              : !> \param matrix   : input a general square non-singular matrix, outputs its inverse
    1874              : !> \param info_out : optional, if present outputs the info from (p)zgetri
    1875              : !> \author Lianheng Tong
    1876              : ! **************************************************************************************************
    1877            0 :    SUBROUTINE cp_fm_lu_invert(matrix, info_out)
    1878              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    1879              :       INTEGER, INTENT(OUT), OPTIONAL           :: info_out
    1880              : 
    1881              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
    1882              : 
    1883              :       INTEGER :: nrows_global, handle, info, lwork
    1884            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
    1885              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
    1886            0 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    1887              : #if defined(__parallel)
    1888              :       INTEGER                                  :: liwork
    1889              :       INTEGER, DIMENSION(9)                    :: desca
    1890            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
    1891              : #else
    1892              :       INTEGER                                  :: lda
    1893              : #endif
    1894              : 
    1895            0 :       CALL timeset(routineN, handle)
    1896              : 
    1897            0 :       mat => matrix%local_data
    1898            0 :       nrows_global = matrix%matrix_struct%nrow_global
    1899            0 :       CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
    1900            0 :       ALLOCATE (ipivot(nrows_global))
    1901              :       ! do LU decomposition
    1902              : #if defined(__parallel)
    1903            0 :       desca = matrix%matrix_struct%descriptor
    1904              :       CALL pdgetrf(nrows_global, nrows_global, &
    1905            0 :                    mat, 1, 1, desca, ipivot, info)
    1906              : #else
    1907              :       lda = SIZE(mat, 1)
    1908              :       CALL dgetrf(nrows_global, nrows_global, &
    1909              :                   mat, lda, ipivot, info)
    1910              : #endif
    1911            0 :       IF (info /= 0) THEN
    1912            0 :          CALL cp_abort(__LOCATION__, "LU decomposition has failed")
    1913              :       END IF
    1914              :       ! do inversion
    1915              : #if defined(__parallel)
    1916            0 :       ALLOCATE (iwork(1))
    1917              :       CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    1918            0 :                    ipivot, work, -1, iwork, -1, info)
    1919              :       lwork = INT(work(1))
    1920            0 :       DEALLOCATE (work)
    1921              :       ALLOCATE (work(lwork))
    1922              :       liwork = INT(iwork(1))
    1923              :       DEALLOCATE (iwork)
    1924              :       ALLOCATE (iwork(liwork))
    1925              :       CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    1926              :                    ipivot, work, lwork, iwork, liwork, info)
    1927              :       DEALLOCATE (iwork)
    1928              : #else
    1929              :       CALL dgetri(nrows_global, mat, lda, &
    1930              :                   ipivot, work, -1, info)
    1931              :       lwork = INT(work(1))
    1932              :       DEALLOCATE (work)
    1933              :       ALLOCATE (work(lwork))
    1934              :       CALL dgetri(nrows_global, mat, lda, &
    1935              :                   ipivot, work, lwork, info)
    1936              : #endif
    1937              :       DEALLOCATE (work)
    1938              :       DEALLOCATE (ipivot)
    1939              : 
    1940              :       IF (PRESENT(info_out)) THEN
    1941              :          info_out = info
    1942              :       ELSE
    1943              :          IF (info /= 0) &
    1944              :             CALL cp_abort(__LOCATION__, "LU inversion has failed")
    1945              :       END IF
    1946              : 
    1947              :       CALL timestop(handle)
    1948              : 
    1949            0 :    END SUBROUTINE cp_fm_lu_invert
    1950              : 
    1951              : ! **************************************************************************************************
    1952              : !> \brief norm of matrix using (p)dlange
    1953              : !> \param matrix   : input a general matrix
    1954              : !> \param mode     : 'M' max abs element value,
    1955              : !>                   '1' or 'O' one norm, i.e. maximum column sum
    1956              : !>                   'I' infinity norm, i.e. maximum row sum
    1957              : !>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
    1958              : !> \return : the norm according to mode
    1959              : !> \author Lianheng Tong
    1960              : ! **************************************************************************************************
    1961          496 :    FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
    1962              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    1963              :       CHARACTER, INTENT(IN) :: mode
    1964              :       REAL(KIND=dp) :: res
    1965              : 
    1966              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
    1967              : 
    1968              :       INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
    1969              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    1970          496 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    1971              : #if defined(__parallel)
    1972              :       INTEGER, DIMENSION(9) :: desca
    1973              : #else
    1974              :       INTEGER :: lda
    1975              : #endif
    1976              : 
    1977          496 :       CALL timeset(routineN, handle)
    1978              : 
    1979              :       CALL cp_fm_get_info(matrix=matrix, &
    1980              :                           nrow_global=nrows, &
    1981              :                           ncol_global=ncols, &
    1982              :                           nrow_local=nrows_local, &
    1983          496 :                           ncol_local=ncols_local)
    1984          496 :       aa => matrix%local_data
    1985              : 
    1986              : #if defined(__parallel)
    1987         4960 :       desca = matrix%matrix_struct%descriptor
    1988              :       SELECT CASE (mode)
    1989              :       CASE ('M', 'm')
    1990          496 :          lwork = 1
    1991              :       CASE ('1', 'O', 'o')
    1992          496 :          lwork = ncols_local
    1993              :       CASE ('I', 'i')
    1994            0 :          lwork = nrows_local
    1995              :       CASE ('F', 'f', 'E', 'e')
    1996            0 :          lwork = 1
    1997              :       CASE DEFAULT
    1998          496 :          CPABORT("mode input is not valid")
    1999              :       END SELECT
    2000         1488 :       ALLOCATE (work(lwork))
    2001          496 :       res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
    2002          496 :       DEALLOCATE (work)
    2003              : #else
    2004              :       SELECT CASE (mode)
    2005              :       CASE ('M', 'm')
    2006              :          lwork = 1
    2007              :       CASE ('1', 'O', 'o')
    2008              :          lwork = 1
    2009              :       CASE ('I', 'i')
    2010              :          lwork = nrows
    2011              :       CASE ('F', 'f', 'E', 'e')
    2012              :          lwork = 1
    2013              :       CASE DEFAULT
    2014              :          CPABORT("mode input is not valid")
    2015              :       END SELECT
    2016              :       ALLOCATE (work(lwork))
    2017              :       lda = SIZE(aa, 1)
    2018              :       res = dlange(mode, nrows, ncols, aa, lda, work)
    2019              :       DEALLOCATE (work)
    2020              : #endif
    2021              : 
    2022          496 :       CALL timestop(handle)
    2023              : 
    2024          496 :    END FUNCTION cp_fm_norm
    2025              : 
    2026              : ! **************************************************************************************************
    2027              : !> \brief trace of a matrix using pdlatra
    2028              : !> \param matrix   : input a square matrix
    2029              : !> \return : the trace
    2030              : !> \author Lianheng Tong
    2031              : ! **************************************************************************************************
    2032            0 :    FUNCTION cp_fm_latra(matrix) RESULT(res)
    2033              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2034              :       REAL(KIND=dp) :: res
    2035              : 
    2036              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
    2037              : 
    2038              :       INTEGER :: nrows, ncols, handle
    2039              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2040              : #if defined(__parallel)
    2041              :       INTEGER, DIMENSION(9) :: desca
    2042              : #else
    2043              :       INTEGER :: ii
    2044              : #endif
    2045              : 
    2046            0 :       CALL timeset(routineN, handle)
    2047              : 
    2048            0 :       nrows = matrix%matrix_struct%nrow_global
    2049            0 :       ncols = matrix%matrix_struct%ncol_global
    2050            0 :       CPASSERT(nrows == ncols)
    2051            0 :       aa => matrix%local_data
    2052              : 
    2053              : #if defined(__parallel)
    2054            0 :       desca = matrix%matrix_struct%descriptor
    2055            0 :       res = pdlatra(nrows, aa, 1, 1, desca)
    2056              : #else
    2057              :       res = 0.0_dp
    2058              :       DO ii = 1, nrows
    2059              :          res = res + aa(ii, ii)
    2060              :       END DO
    2061              : #endif
    2062              : 
    2063            0 :       CALL timestop(handle)
    2064              : 
    2065            0 :    END FUNCTION cp_fm_latra
    2066              : 
    2067              : ! **************************************************************************************************
    2068              : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
    2069              : !>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
    2070              : !> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
    2071              : !> \param tau      : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
    2072              : !> \param nrow ...
    2073              : !> \param ncol ...
    2074              : !> \param first_row ...
    2075              : !> \param first_col ...
    2076              : !> \author MI
    2077              : ! **************************************************************************************************
    2078           38 :    SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
    2079              : 
    2080              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2081              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2082              :       INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
    2083              : 
    2084              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
    2085              : 
    2086              :       INTEGER                                            :: handle
    2087              :       INTEGER                                            :: info, lwork
    2088           38 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
    2089              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2090              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2091              : #if defined(__parallel)
    2092              :       INTEGER, DIMENSION(9) :: descc
    2093              : #else
    2094              :       INTEGER :: lda
    2095              : #endif
    2096              : 
    2097           38 :       CALL timeset(routineN, handle)
    2098              : 
    2099           38 :       a => matrix%local_data
    2100           38 :       lwork = -1
    2101          114 :       ALLOCATE (work(2*nrow))
    2102          114 :       ALLOCATE (ipiv(ncol))
    2103           38 :       info = 0
    2104              : 
    2105              : #if defined(__parallel)
    2106          380 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2107              :       ! Call SCALAPACK routine to get optimal work dimension
    2108           38 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2109           38 :       lwork = INT(work(1))
    2110           38 :       DEALLOCATE (work)
    2111          114 :       ALLOCATE (work(lwork))
    2112          254 :       tau = 0.0_dp
    2113           38 :       ipiv = 0
    2114              : 
    2115              :       ! Call SCALAPACK routine to get QR decomposition of CTs
    2116           38 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2117              : #else
    2118              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2119              :       lda = SIZE(a, 1)
    2120              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2121              :       lwork = INT(work(1))
    2122              :       DEALLOCATE (work)
    2123              :       ALLOCATE (work(lwork))
    2124              :       tau = 0.0_dp
    2125              :       ipiv = 0
    2126              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2127              : #endif
    2128           38 :       CPASSERT(info == 0)
    2129              : 
    2130           38 :       DEALLOCATE (work)
    2131           38 :       DEALLOCATE (ipiv)
    2132              : 
    2133           38 :       CALL timestop(handle)
    2134              : 
    2135           38 :    END SUBROUTINE cp_fm_pdgeqpf
    2136              : 
    2137              : ! **************************************************************************************************
    2138              : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
    2139              : !>         with orthonormal columns, which is defined as the first N columns of a product of K
    2140              : !>         elementary reflectors of order M
    2141              : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
    2142              : !>                  as returned from PDGEQRF
    2143              : !>                 On exit it contains  the M-by-N distributed matrix Q
    2144              : !> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
    2145              : !> \param nrow ...
    2146              : !> \param first_row ...
    2147              : !> \param first_col ...
    2148              : !> \author MI
    2149              : ! **************************************************************************************************
    2150           38 :    SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
    2151              : 
    2152              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2153              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2154              :       INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
    2155              : 
    2156              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
    2157              : 
    2158              :       INTEGER                                            :: handle
    2159              :       INTEGER                                            :: info, lwork
    2160              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2161              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2162              : #if defined(__parallel)
    2163              :       INTEGER, DIMENSION(9) :: descc
    2164              : #else
    2165              :       INTEGER :: lda
    2166              : #endif
    2167              : 
    2168           38 :       CALL timeset(routineN, handle)
    2169              : 
    2170           38 :       a => matrix%local_data
    2171           38 :       lwork = -1
    2172          114 :       ALLOCATE (work(2*nrow))
    2173           38 :       info = 0
    2174              : 
    2175              : #if defined(__parallel)
    2176          380 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2177              : 
    2178           38 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2179           38 :       CPASSERT(info == 0)
    2180           38 :       lwork = INT(work(1))
    2181           38 :       DEALLOCATE (work)
    2182          114 :       ALLOCATE (work(lwork))
    2183              : 
    2184              :       ! Call SCALAPACK routine to get Q
    2185           38 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2186              : #else
    2187              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2188              :       lda = SIZE(a, 1)
    2189              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2190              :       lwork = INT(work(1))
    2191              :       DEALLOCATE (work)
    2192              :       ALLOCATE (work(lwork))
    2193              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2194              : #endif
    2195           38 :       CPASSERT(INFO == 0)
    2196              : 
    2197           38 :       DEALLOCATE (work)
    2198           38 :       CALL timestop(handle)
    2199              : 
    2200           38 :    END SUBROUTINE cp_fm_pdorgqr
    2201              : 
    2202              : ! **************************************************************************************************
    2203              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
    2204              : !> \param matrix ...
    2205              : !> \param irow ...
    2206              : !> \param jrow ...
    2207              : !> \param cs cosine of the rotation angle
    2208              : !> \param sn sinus of the rotation angle
    2209              : !> \author Ole Schuett
    2210              : ! **************************************************************************************************
    2211        31928 :    SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
    2212              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2213              :       INTEGER, INTENT(IN)                      :: irow, jrow
    2214              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2215              : 
    2216              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
    2217              :       INTEGER                                  :: handle, ncol
    2218              : 
    2219              : #if defined(__parallel)
    2220              :       INTEGER                                  :: info, lwork
    2221              :       INTEGER, DIMENSION(9)                    :: desc
    2222        31928 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2223              : #endif
    2224        31928 :       CALL timeset(routineN, handle)
    2225        31928 :       CALL cp_fm_get_info(matrix, ncol_global=ncol)
    2226              : #if defined(__parallel)
    2227        31928 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2228        31928 :          lwork = 2*ncol + 1
    2229        95784 :          ALLOCATE (work(lwork))
    2230       319280 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2231              :          CALL pdrot(ncol, &
    2232              :                     matrix%local_data(1, 1), irow, 1, desc, ncol, &
    2233              :                     matrix%local_data(1, 1), jrow, 1, desc, ncol, &
    2234        31928 :                     cs, sn, work, lwork, info)
    2235        31928 :          CPASSERT(info == 0)
    2236        31928 :          DEALLOCATE (work)
    2237              :       ELSE
    2238              : #endif
    2239            0 :          CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
    2240              : #if defined(__parallel)
    2241              :       END IF
    2242              : #endif
    2243        31928 :       CALL timestop(handle)
    2244        31928 :    END SUBROUTINE cp_fm_rot_rows
    2245              : 
    2246              : ! **************************************************************************************************
    2247              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
    2248              : !> \param matrix ...
    2249              : !> \param icol ...
    2250              : !> \param jcol ...
    2251              : !> \param cs cosine of the rotation angle
    2252              : !> \param sn sinus of the rotation angle
    2253              : !> \author Ole Schuett
    2254              : ! **************************************************************************************************
    2255        36928 :    SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
    2256              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2257              :       INTEGER, INTENT(IN)                      :: icol, jcol
    2258              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2259              : 
    2260              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
    2261              :       INTEGER                                  :: handle, nrow
    2262              : 
    2263              : #if defined(__parallel)
    2264              :       INTEGER                                  :: info, lwork
    2265              :       INTEGER, DIMENSION(9)                    :: desc
    2266        36928 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2267              : #endif
    2268        36928 :       CALL timeset(routineN, handle)
    2269        36928 :       CALL cp_fm_get_info(matrix, nrow_global=nrow)
    2270              : #if defined(__parallel)
    2271        36928 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2272        36928 :          lwork = 2*nrow + 1
    2273       110784 :          ALLOCATE (work(lwork))
    2274       369280 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2275              :          CALL pdrot(nrow, &
    2276              :                     matrix%local_data(1, 1), 1, icol, desc, 1, &
    2277              :                     matrix%local_data(1, 1), 1, jcol, desc, 1, &
    2278        36928 :                     cs, sn, work, lwork, info)
    2279        36928 :          CPASSERT(info == 0)
    2280        36928 :          DEALLOCATE (work)
    2281              :       ELSE
    2282              : #endif
    2283            0 :          CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
    2284              : #if defined(__parallel)
    2285              :       END IF
    2286              : #endif
    2287        36928 :       CALL timestop(handle)
    2288        36928 :    END SUBROUTINE cp_fm_rot_cols
    2289              : 
    2290              : ! **************************************************************************************************
    2291              : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
    2292              : !> \param matrix_a ...
    2293              : !> \param B ...
    2294              : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
    2295              : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
    2296              : !> \param start_row starting index of rows, optional, defaults to 1
    2297              : !> \param start_col starting index of columns, optional, defaults to 1
    2298              : !> \param do_norm ...
    2299              : !> \param do_print ...
    2300              : ! **************************************************************************************************
    2301            0 :    SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
    2302              :                                            do_norm, do_print)
    2303              : 
    2304              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    2305              :       REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: B
    2306              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrows, ncols, start_row, start_col
    2307              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_norm, do_print
    2308              : 
    2309              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
    2310              : 
    2311              :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
    2312              :                  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
    2313              :                  start_col_local, start_row_global, start_row_local, this_col, unit_nr
    2314            0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    2315              :       LOGICAL                                            :: my_do_norm, my_do_print
    2316              :       REAL(KIND=dp)                                      :: norm
    2317            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: a
    2318              : 
    2319            0 :       CALL timeset(routineN, handle)
    2320              : 
    2321            0 :       my_do_norm = .TRUE.
    2322            0 :       IF (PRESENT(do_norm)) my_do_norm = do_norm
    2323              : 
    2324            0 :       my_do_print = .FALSE.
    2325            0 :       IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
    2326              : 
    2327            0 :       unit_nr = -1
    2328            0 :       IF (my_do_print) THEN
    2329            0 :          unit_nr = cp_logger_get_default_unit_nr()
    2330            0 :          IF (unit_nr < 1) my_do_print = .FALSE.
    2331              :       END IF
    2332              : 
    2333            0 :       IF (SIZE(B) /= 0) THEN
    2334            0 :          IF (PRESENT(nrows)) THEN
    2335            0 :             nrow_global = nrows
    2336              :          ELSE
    2337            0 :             nrow_global = SIZE(B, 1)
    2338              :          END IF
    2339              : 
    2340            0 :          IF (PRESENT(ncols)) THEN
    2341            0 :             ncol_global = ncols
    2342              :          ELSE
    2343            0 :             ncol_global = SIZE(B, 2)
    2344              :          END IF
    2345              : 
    2346            0 :          IF (PRESENT(start_row)) THEN
    2347            0 :             start_row_global = start_row
    2348              :          ELSE
    2349              :             start_row_global = 1
    2350              :          END IF
    2351              : 
    2352            0 :          IF (PRESENT(start_col)) THEN
    2353            0 :             start_col_global = start_col
    2354              :          ELSE
    2355              :             start_col_global = 1
    2356              :          END IF
    2357              : 
    2358            0 :          end_row_global = start_row_global + nrow_global - 1
    2359            0 :          end_col_global = start_col_global + ncol_global - 1
    2360              : 
    2361              :          CALL cp_fm_get_info(matrix=matrix_a, &
    2362              :                              nrow_global=nrow_global, ncol_global=ncol_global, &
    2363              :                              nrow_local=nrow_local, ncol_local=ncol_local, &
    2364            0 :                              row_indices=row_indices, col_indices=col_indices)
    2365            0 :          IF (end_row_global > nrow_global) THEN
    2366              :             end_row_global = nrow_global
    2367              :          END IF
    2368            0 :          IF (end_col_global > ncol_global) THEN
    2369              :             end_col_global = ncol_global
    2370              :          END IF
    2371              : 
    2372              :          ! find out row/column indices of locally stored matrix elements that
    2373              :          ! needs to be copied.
    2374              :          ! Arrays row_indices and col_indices are assumed to be sorted in
    2375              :          ! ascending order
    2376            0 :          DO start_row_local = 1, nrow_local
    2377            0 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
    2378              :          END DO
    2379              : 
    2380            0 :          DO end_row_local = start_row_local, nrow_local
    2381            0 :             IF (row_indices(end_row_local) > end_row_global) EXIT
    2382              :          END DO
    2383            0 :          end_row_local = end_row_local - 1
    2384              : 
    2385            0 :          DO start_col_local = 1, ncol_local
    2386            0 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
    2387              :          END DO
    2388              : 
    2389            0 :          DO end_col_local = start_col_local, ncol_local
    2390            0 :             IF (col_indices(end_col_local) > end_col_global) EXIT
    2391              :          END DO
    2392            0 :          end_col_local = end_col_local - 1
    2393              : 
    2394            0 :          a => matrix_a%local_data
    2395              : 
    2396            0 :          this_col = col_indices(start_col_local) - start_col_global + 1
    2397              : 
    2398            0 :          B(:, this_col) = a(:, start_col_local)
    2399              : 
    2400            0 :          IF (my_do_norm) THEN
    2401            0 :             norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2402            0 :             B(:, this_col) = B(:, this_col)/norm
    2403            0 :             IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2404              :          END IF
    2405              : 
    2406            0 :          DO i = start_col_local + 1, end_col_local
    2407            0 :             this_col = col_indices(i) - start_col_global + 1
    2408            0 :             B(:, this_col) = a(:, i)
    2409            0 :             DO j = start_col_local, i - 1
    2410            0 :                j_col = col_indices(j) - start_col_global + 1
    2411              :                B(:, this_col) = B(:, this_col) - &
    2412              :                                 accurate_dot_product(B(:, j_col), B(:, this_col))* &
    2413            0 :                                 B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
    2414              :             END DO
    2415              : 
    2416            0 :             IF (my_do_norm) THEN
    2417            0 :                norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2418            0 :                B(:, this_col) = B(:, this_col)/norm
    2419            0 :                IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2420              :             END IF
    2421              : 
    2422              :          END DO
    2423            0 :          CALL matrix_a%matrix_struct%para_env%sum(B)
    2424              :       END IF
    2425              : 
    2426            0 :       CALL timestop(handle)
    2427              : 
    2428            0 :    END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
    2429              : 
    2430              : ! **************************************************************************************************
    2431              : !> \brief Cholesky decomposition
    2432              : !> \param fm_matrix ...
    2433              : !> \param n ...
    2434              : !> \param uplo triangular format; defaults to 'U'
    2435              : ! **************************************************************************************************
    2436        10544 :    SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
    2437              :       TYPE(cp_fm_type)                         :: fm_matrix
    2438              :       INTEGER, INTENT(IN)                      :: n
    2439              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2440              : 
    2441              :       CHARACTER                                :: myuplo
    2442              :       INTEGER                                  :: info
    2443        10544 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2444              : #if defined(__parallel)
    2445              :       INTEGER, DIMENSION(9)                    :: desca
    2446              : #endif
    2447              : 
    2448        10544 :       myuplo = 'U'
    2449            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2450              : 
    2451        10544 :       a => fm_matrix%local_data
    2452              : #if defined(__parallel)
    2453       105440 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2454        10544 :       CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
    2455              : #else
    2456              :       CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2457              : #endif
    2458        10544 :       IF (info /= 0) &
    2459            0 :          CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
    2460              : 
    2461        10544 :    END SUBROUTINE cp_fm_potrf
    2462              : 
    2463              : ! **************************************************************************************************
    2464              : !> \brief Invert trianguar matrix
    2465              : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
    2466              : !> \param n size of the matrix to invert
    2467              : !> \param uplo triangular format; defaults to 'U'
    2468              : ! **************************************************************************************************
    2469         9750 :    SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
    2470              :       TYPE(cp_fm_type)                         :: fm_matrix
    2471              :       INTEGER, INTENT(IN)                      :: n
    2472              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2473              : 
    2474              :       CHARACTER                                :: myuplo
    2475         9750 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2476              :       INTEGER                                  :: info
    2477              : #if defined(__parallel)
    2478              :       INTEGER, DIMENSION(9)                    :: desca
    2479              : #endif
    2480              : 
    2481         9750 :       myuplo = 'U'
    2482            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2483              : 
    2484         9750 :       a => fm_matrix%local_data
    2485              : #if defined(__parallel)
    2486        97500 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2487         9750 :       CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
    2488              : #else
    2489              :       CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2490              : #endif
    2491         9750 :       CPASSERT(info == 0)
    2492         9750 :    END SUBROUTINE cp_fm_potri
    2493              : 
    2494              : ! **************************************************************************************************
    2495              : !> \brief Calculates
    2496              : !>        yv = alpha*amat*xv + beta*yv
    2497              : !>        where amat: fm matrix
    2498              : !>              xv  : vector replicated
    2499              : !>              yv  : vector replicated
    2500              : !>        Defaults: alpha = 1, beta = 0
    2501              : ! **************************************************************************************************
    2502        21320 :    SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
    2503              :       TYPE(cp_fm_type), INTENT(IN)                   :: amat
    2504              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)        :: xv
    2505              :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)     :: yv
    2506              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)            :: alpha, beta
    2507              : 
    2508              :       INTEGER                                        :: na, nc, nx, ny
    2509              :       REAL(KIND=dp)                                  :: aval, bval
    2510              : #if defined(__parallel)
    2511              :       INTEGER                                        :: nrl, ncl, ic, ir
    2512        21320 :       INTEGER, DIMENSION(:), POINTER                 :: rind, cind
    2513        21320 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE       :: xvl, yvl, yvm
    2514              : #endif
    2515              : 
    2516        21320 :       aval = 1.0_dp
    2517        21320 :       IF (PRESENT(alpha)) aval = alpha
    2518        21320 :       bval = 0.0_dp
    2519        21320 :       IF (PRESENT(beta)) bval = beta
    2520              : 
    2521        21320 :       CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
    2522        21320 :       nx = SIZE(xv)
    2523        21320 :       ny = SIZE(yv)
    2524        21320 :       IF ((nx /= ny) .OR. (nc /= nx)) THEN
    2525            0 :          CPABORT("cp_fm_matvec: incompatible dimensions")
    2526              :       END IF
    2527              : #if defined(__parallel)
    2528              :       CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
    2529        21320 :                           row_indices=rind, col_indices=cind)
    2530       149240 :       ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
    2531       210636 :       DO ic = 1, ncl
    2532       210636 :          xvl(ic) = xv(cind(ic))
    2533              :       END DO
    2534      4017978 :       yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
    2535        21320 :       yvm = 0.0_dp
    2536       115978 :       DO ir = 1, nrl
    2537       115978 :          yvm(rind(ir)) = yvl(ir)
    2538              :       END DO
    2539        21320 :       CALL amat%matrix_struct%para_env%sum(yvm)
    2540        21320 :       IF (bval == 0.0_dp) THEN
    2541       118562 :          yv = aval*yvm
    2542              :       ELSE
    2543        92074 :          yv = bval*yv + aval*yvm
    2544              :       END IF
    2545              : #else
    2546              :       IF (bval == 0.0_dp) THEN
    2547              :          yv = aval*MATMUL(amat%local_data, xv)
    2548              :       ELSE
    2549              :          yv = bval*yv + aval*MATMUL(amat%local_data, xv)
    2550              :       END IF
    2551              : #endif
    2552              : 
    2553        85280 :    END SUBROUTINE cp_fm_matvec
    2554              : 
    2555              : END MODULE cp_fm_basic_linalg
        

Generated by: LCOV version 2.0-1