LCOV - code coverage report
Current view: top level - src/fm - cp_fm_basic_linalg.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 70.8 % 845 598
Test Date: 2025-12-04 06:27:48 Functions: 67.4 % 43 29

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Basic linear algebra operations for full matrices.
      10              : !> \par History
      11              : !>      08.2002 split out of qs_blacs [fawzi]
      12              : !> \author Fawzi Mohamed
      13              : ! **************************************************************************************************
      14              : MODULE cp_fm_basic_linalg
      15              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      16              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
      17              :    USE cp_fm_types, ONLY: &
      18              :       cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
      19              :       cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
      20              :       cp_fm_type
      21              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      22              :                               cp_to_string
      23              :    USE kahan_sum, ONLY: accurate_dot_product, &
      24              :                         accurate_sum
      25              :    USE kinds, ONLY: dp, &
      26              :                     int_8, &
      27              :                     sp
      28              :    USE machine, ONLY: m_memory
      29              :    USE mathlib, ONLY: get_pseudo_inverse_svd, &
      30              :                       invert_matrix
      31              :    USE message_passing, ONLY: mp_comm_type
      32              : #include "../base/base_uses.f90"
      33              : 
      34              :    IMPLICIT NONE
      35              :    PRIVATE
      36              : 
      37              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      38              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
      39              : 
      40              :    PUBLIC :: cp_fm_scale, & ! scale a matrix
      41              :              cp_fm_scale_and_add, & ! scale and add two matrices
      42              :              cp_fm_geadd, & ! general addition
      43              :              cp_fm_add_columns, & ! daxpy type column add
      44              :              cp_fm_column_scale, & ! scale columns of a matrix
      45              :              cp_fm_row_scale, & ! scale rows of a matrix
      46              :              cp_fm_trace, & ! trace of the transpose(A)*B
      47              :              cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
      48              :              cp_fm_norm, & ! different norms of A
      49              :              cp_fm_schur_product, & ! schur product
      50              :              cp_fm_transpose, & ! transpose a matrix
      51              :              cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
      52              :              cp_fm_syrk, & ! rank k update
      53              :              cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
      54              :              cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
      55              :              cp_fm_gemm, & ! multiply two matrices
      56              :              cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
      57              :              cp_fm_invert, & ! computes the inverse and determinant
      58              :              cp_fm_frobenius_norm, & ! frobenius norm
      59              :              cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
      60              :              cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
      61              :              cp_fm_solve, & ! solves the equation  A*B=C A and C are input
      62              :              cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
      63              :              cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
      64              :              cp_fm_potrf, & ! Cholesky decomposition
      65              :              cp_fm_potri, & ! Invert triangular matrix
      66              :              cp_fm_rot_rows, & ! rotates two rows
      67              :              cp_fm_rot_cols, & ! rotates two columns
      68              :              cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
      69              :              cp_fm_det, & ! determinant of a real matrix with correct sign
      70              :              cp_fm_matvec ! matrix-vector multiplication (vector replicated)
      71              : 
      72              :    REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
      73              :    REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
      74              : 
      75              :    INTERFACE cp_fm_trace
      76              :       MODULE PROCEDURE cp_fm_trace_a0b0t0
      77              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_a
      78              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_p
      79              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
      80              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
      81              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
      82              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
      83              :    END INTERFACE cp_fm_trace
      84              : 
      85              :    INTERFACE cp_fm_contracted_trace
      86              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
      87              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
      88              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
      89              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
      90              :    END INTERFACE cp_fm_contracted_trace
      91              : CONTAINS
      92              : 
      93              : ! **************************************************************************************************
      94              : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
      95              : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
      96              : ! **************************************************************************************************
      97            0 :    SUBROUTINE cp_fm_det(matrix_a, det_a)
      98              : 
      99              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     100              :       REAL(KIND=dp), INTENT(OUT)               :: det_a
     101              :       REAL(KIND=dp)                            :: determinant
     102              :       TYPE(cp_fm_type)                         :: matrix_lu
     103              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     104              :       INTEGER                                  :: n, i, info, P
     105            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     106              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     107              : 
     108              : #if defined(__parallel)
     109              :       INTEGER                                  :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
     110              :       INTEGER, DIMENSION(9)                    :: desca
     111              : #endif
     112              : 
     113              :       CALL cp_fm_create(matrix=matrix_lu, &
     114              :                         matrix_struct=matrix_a%matrix_struct, &
     115            0 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     116            0 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     117              : 
     118            0 :       a => matrix_lu%local_data
     119            0 :       n = matrix_lu%matrix_struct%nrow_global
     120            0 :       ALLOCATE (ipivot(n))
     121            0 :       ipivot(:) = 0
     122            0 :       P = 0
     123            0 :       ALLOCATE (diag(n))
     124            0 :       diag(:) = 0.0_dp
     125              : #if defined(__parallel)
     126              :       ! Use LU decomposition
     127            0 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     128            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     129            0 :       CALL cp_fm_get_diag(matrix_lu, diag)
     130            0 :       determinant = PRODUCT(diag)
     131            0 :       myprow = matrix_lu%matrix_struct%context%mepos(1)
     132            0 :       nprow = matrix_lu%matrix_struct%context%num_pe(1)
     133            0 :       npcol = matrix_lu%matrix_struct%context%num_pe(2)
     134            0 :       nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
     135            0 :       nrow_block = matrix_lu%matrix_struct%nrow_block
     136            0 :       DO irow_local = 1, nrow_local
     137            0 :          i = matrix_lu%matrix_struct%row_indices(irow_local)
     138            0 :          IF (ipivot(irow_local) /= i) P = P + 1
     139              :       END DO
     140            0 :       CALL matrix_lu%matrix_struct%para_env%sum(P)
     141              :       ! very important fix
     142            0 :       P = P/npcol
     143              : #else
     144              :       CALL dgetrf(n, n, a, n, ipivot, info)
     145              :       CALL cp_fm_get_diag(matrix_lu, diag)
     146              :       determinant = PRODUCT(diag)
     147              :       DO i = 1, n
     148              :          IF (ipivot(i) /= i) P = P + 1
     149              :       END DO
     150              : #endif
     151            0 :       DEALLOCATE (ipivot)
     152            0 :       DEALLOCATE (diag)
     153            0 :       CALL cp_fm_release(matrix_lu)
     154            0 :       det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
     155            0 :    END SUBROUTINE cp_fm_det
     156              : 
     157              : ! **************************************************************************************************
     158              : !> \brief calc A <- alpha*A + beta*B
     159              : !>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
     160              : !>      scale A)
     161              : !> \param alpha ...
     162              : !> \param matrix_a ...
     163              : !> \param beta ...
     164              : !> \param matrix_b ...
     165              : ! **************************************************************************************************
     166      1069384 :    SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
     167              : 
     168              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     169              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
     170              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     171              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_b
     172              : 
     173              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
     174              : 
     175              :       INTEGER                                            :: handle, size_a, size_b
     176              :       REAL(KIND=dp)                                      :: my_beta
     177      1069384 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     178      1069384 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     179              : 
     180      1069384 :       CALL timeset(routineN, handle)
     181              : 
     182      1069384 :       my_beta = 0.0_dp
     183      1069384 :       IF (PRESENT(matrix_b)) my_beta = 1.0_dp
     184      1069384 :       IF (PRESENT(beta)) my_beta = beta
     185      1069384 :       NULLIFY (a, b)
     186              : 
     187      1069384 :       IF (PRESENT(beta)) THEN
     188      1069384 :          CPASSERT(PRESENT(matrix_b))
     189      1069384 :          IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
     190            0 :             CPWARN("Bad use of routine. Call cp_fm_scale instead")
     191            0 :             CALL cp_fm_scale(alpha + beta, matrix_a)
     192            0 :             CALL timestop(handle)
     193            0 :             RETURN
     194              :          END IF
     195              :       END IF
     196              : 
     197      1069384 :       a => matrix_a%local_data
     198      1069384 :       a_sp => matrix_a%local_data_sp
     199              : 
     200      1069384 :       IF (matrix_a%use_sp) THEN
     201            0 :          size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
     202              :       ELSE
     203      1069384 :          size_a = SIZE(a, 1)*SIZE(a, 2)
     204              :       END IF
     205              : 
     206      1069384 :       IF (alpha /= 1.0_dp) THEN
     207        79234 :          IF (matrix_a%use_sp) THEN
     208            0 :             CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
     209              :          ELSE
     210        79234 :             CALL dscal(size_a, alpha, a, 1)
     211              :          END IF
     212              :       END IF
     213      1069384 :       IF (my_beta /= 0.0_dp) THEN
     214      1061898 :          IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
     215            0 :             CPABORT("Matrices must be in the same blacs context")
     216              : 
     217      1061898 :          IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
     218              :                                      matrix_b%matrix_struct)) THEN
     219              : 
     220      1061898 :             b => matrix_b%local_data
     221      1061898 :             b_sp => matrix_b%local_data_sp
     222      1061898 :             IF (matrix_b%use_sp) THEN
     223            0 :                size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
     224              :             ELSE
     225      1061898 :                size_b = SIZE(b, 1)*SIZE(b, 2)
     226              :             END IF
     227      1061898 :             IF (size_a /= size_b) &
     228            0 :                CPABORT("Matrices must have same local sizes")
     229              : 
     230      1061898 :             IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     231            0 :                CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
     232      1061898 :             ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     233            0 :                CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
     234      1061898 :             ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     235            0 :                CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
     236              :             ELSE
     237      1061898 :                CALL daxpy(size_a, my_beta, b, 1, a, 1)
     238              :             END IF
     239              : 
     240              :          ELSE
     241              : #ifdef __parallel
     242            0 :             CPABORT("to do (pdscal,pdcopy,pdaxpy)")
     243              : #else
     244              :             CPABORT("")
     245              : #endif
     246              :          END IF
     247              : 
     248              :       END IF
     249              : 
     250      1069384 :       CALL timestop(handle)
     251              : 
     252      1069384 :    END SUBROUTINE cp_fm_scale_and_add
     253              : 
     254              : ! **************************************************************************************************
     255              : !> \brief interface to BLACS geadd:
     256              : !>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
     257              : !>        where opt(matrix_a) can be either:
     258              : !>              'N':  matrix_a
     259              : !>              'T':  matrix_a^T
     260              : !>              'C':  matrix_a^H (Hermitian conjugate)
     261              : !>        note that this is a level three routine, use cp_fm_scale_and_add if that
     262              : !>        is sufficient for your needs
     263              : !> \param alpha  : complex scalar
     264              : !> \param trans  : 'N' normal, 'T' transposed
     265              : !> \param matrix_a : input matrix_a
     266              : !> \param beta   : complex scalar
     267              : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
     268              : !> \author  Lianheng Tong
     269              : ! **************************************************************************************************
     270          532 :    SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
     271              :       REAL(KIND=dp), INTENT(IN) :: alpha, beta
     272              :       CHARACTER, INTENT(IN) :: trans
     273              :       TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
     274              : 
     275              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
     276              : 
     277              :       INTEGER :: nrow_global, ncol_global, handle
     278              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
     279              : #if defined(__parallel)
     280              :       INTEGER, DIMENSION(9) :: desca, descb
     281              : #elif !defined(__MKL)
     282              :       INTEGER :: ii, jj
     283              : #endif
     284              : 
     285          532 :       CALL timeset(routineN, handle)
     286              : 
     287          532 :       nrow_global = matrix_a%matrix_struct%nrow_global
     288          532 :       ncol_global = matrix_a%matrix_struct%ncol_global
     289          532 :       CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
     290          532 :       CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
     291              : 
     292          532 :       aa => matrix_a%local_data
     293          532 :       bb => matrix_b%local_data
     294              : 
     295              : #if defined(__parallel)
     296         5320 :       desca = matrix_a%matrix_struct%descriptor
     297         5320 :       descb = matrix_b%matrix_struct%descriptor
     298              :       CALL pdgeadd(trans, &
     299              :                    nrow_global, &
     300              :                    ncol_global, &
     301              :                    alpha, &
     302              :                    aa, &
     303              :                    1, 1, &
     304              :                    desca, &
     305              :                    beta, &
     306              :                    bb, &
     307              :                    1, 1, &
     308          532 :                    descb)
     309              : #elif defined(__MKL)
     310              :       CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
     311              :                         alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
     312              : #else
     313              :       ! dgeadd is not a standard BLAS function, although it is implemented
     314              :       ! in some libraries like OpenBLAS, so not going to use it here
     315              :       SELECT CASE (trans)
     316              :       CASE ('T')
     317              :          DO jj = 1, ncol_global
     318              :             DO ii = 1, nrow_global
     319              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
     320              :             END DO
     321              :          END DO
     322              :       CASE DEFAULT
     323              :          DO jj = 1, ncol_global
     324              :             DO ii = 1, nrow_global
     325              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
     326              :             END DO
     327              :          END DO
     328              :       END SELECT
     329              : #endif
     330              : 
     331          532 :       CALL timestop(handle)
     332              : 
     333          532 :    END SUBROUTINE cp_fm_geadd
     334              : 
     335              : ! **************************************************************************************************
     336              : !> \brief Add (and scale) a subset of columns of a fm to a fm
     337              : !>        b = alpha*a + b
     338              : !> \param msource ...
     339              : !> \param mtarget ...
     340              : !> \param ncol ...
     341              : !> \param alpha ...
     342              : !> \param source_start ...
     343              : !> \param target_start ...
     344              : ! **************************************************************************************************
     345            0 :    SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
     346              : 
     347              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
     348              :       INTEGER, INTENT(IN)                      :: ncol
     349              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: alpha
     350              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
     351              : 
     352              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
     353              : 
     354              :       INTEGER                                  :: handle, n, ss, ts, i
     355              :       REAL(KIND=dp)                            :: fscale
     356              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
     357              : #if defined(__parallel)
     358              :       INTEGER, DIMENSION(9)                    :: desca, descb
     359              : #endif
     360              : 
     361            0 :       CALL timeset(routineN, handle)
     362              : 
     363            0 :       ss = 1
     364            0 :       ts = 1
     365              : 
     366            0 :       IF (PRESENT(source_start)) ss = source_start
     367            0 :       IF (PRESENT(target_start)) ts = target_start
     368              : 
     369            0 :       fscale = 1.0_dp
     370            0 :       IF (PRESENT(alpha)) fscale = alpha
     371              : 
     372            0 :       n = msource%matrix_struct%nrow_global
     373              : 
     374            0 :       a => msource%local_data
     375            0 :       b => mtarget%local_data
     376              : 
     377              : #if defined(__parallel)
     378              :       MARK_USED(i)
     379            0 :       desca(:) = msource%matrix_struct%descriptor(:)
     380            0 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     381            0 :       CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
     382              : #else
     383              :       DO i = 0, ncol - 1
     384              :          b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
     385              :       END DO
     386              : #endif
     387              : 
     388            0 :       CALL timestop(handle)
     389              : 
     390            0 :    END SUBROUTINE cp_fm_add_columns
     391              : 
     392              : ! **************************************************************************************************
     393              : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
     394              : !>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
     395              : !> \param matrix_a ...
     396              : !> \param almost_determinant ...
     397              : !> \param correct_sign ...
     398              : !> \par History
     399              : !>      added correct_sign 02.07 (fschiff)
     400              : !> \author Joost VandeVondele
     401              : !> \note
     402              : !>      - matrix_a is overwritten
     403              : !>      - the sign of the determinant might be wrong
     404              : !>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
     405              : !>      - one should be able to find out if ipivot is an even or an odd permutation...
     406              : !>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
     407              : !>      - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
     408              : ! **************************************************************************************************
     409            0 :    SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
     410              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     411              :       REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
     412              :       LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
     413              : 
     414              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
     415              : 
     416              :       INTEGER                                  :: handle, i, info, n
     417            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     418              :       REAL(KIND=dp)                            :: determinant
     419              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     420              : #if defined(__parallel)
     421              :       INTEGER, DIMENSION(9)                    :: desca
     422            0 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     423              : #else
     424              :       INTEGER                                  :: lda
     425              : #endif
     426              : 
     427            0 :       CALL timeset(routineN, handle)
     428              : 
     429            0 :       a => matrix_a%local_data
     430            0 :       n = matrix_a%matrix_struct%nrow_global
     431            0 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
     432              : 
     433              : #if defined(__parallel)
     434              :       MARK_USED(correct_sign)
     435            0 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     436            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     437              : 
     438            0 :       ALLOCATE (diag(n))
     439            0 :       diag(:) = 0.0_dp
     440            0 :       CALL cp_fm_get_diag(matrix_a, diag)
     441            0 :       determinant = 1.0_dp
     442            0 :       DO i = 1, n
     443            0 :          determinant = determinant*diag(i)
     444              :       END DO
     445            0 :       DEALLOCATE (diag)
     446              : #else
     447              :       lda = SIZE(a, 1)
     448              :       CALL dgetrf(n, n, a, lda, ipivot, info)
     449              :       determinant = 1.0_dp
     450              :       IF (correct_sign) THEN
     451              :          DO i = 1, n
     452              :             IF (ipivot(i) /= i) THEN
     453              :                determinant = -determinant*a(i, i)
     454              :             ELSE
     455              :                determinant = determinant*a(i, i)
     456              :             END IF
     457              :          END DO
     458              :       ELSE
     459              :          DO i = 1, n
     460              :             determinant = determinant*a(i, i)
     461              :          END DO
     462              :       END IF
     463              : #endif
     464              :       ! info is allowed to be zero
     465              :       ! this does just signal a zero diagonal element
     466            0 :       DEALLOCATE (ipivot)
     467            0 :       almost_determinant = determinant ! notice that the sign is random
     468            0 :       CALL timestop(handle)
     469            0 :    END SUBROUTINE cp_fm_lu_decompose
     470              : 
     471              : ! **************************************************************************************************
     472              : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
     473              : !> \param transa : 'N' -> normal   'T' -> transpose
     474              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     475              : !> \param transb ...
     476              : !> \param m ...
     477              : !> \param n ...
     478              : !> \param k ...
     479              : !> \param alpha ...
     480              : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
     481              : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
     482              : !> \param beta ...
     483              : !> \param matrix_c : m x n matrix
     484              : !> \param a_first_col ...
     485              : !> \param a_first_row ...
     486              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
     487              : !> \param b_first_row ...
     488              : !> \param c_first_col ...
     489              : !> \param c_first_row ...
     490              : !> \author Matthias Krack
     491              : !> \note
     492              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     493              : ! **************************************************************************************************
     494          520 :    SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
     495              :                          matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
     496              :                          c_first_col, c_first_row)
     497              : 
     498              :       CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
     499              :       INTEGER, INTENT(IN)                      :: m, n, k
     500              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     501              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     502              :       REAL(KIND=dp), INTENT(IN)                :: beta
     503              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     504              :       INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
     505              :                                                   b_first_col, b_first_row, &
     506              :                                                   c_first_col, c_first_row
     507              : 
     508              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
     509              : 
     510              :       INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
     511              :                                                   j_b, j_c
     512              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     513          520 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp, b_sp, c_sp
     514              : #if defined(__parallel)
     515              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     516              : #else
     517              :       INTEGER                                  :: lda, ldb, ldc
     518              : #endif
     519              : 
     520          520 :       CALL timeset(routineN, handle)
     521              : 
     522              :       !sample peak memory
     523          520 :       CALL m_memory()
     524              : 
     525          520 :       a => matrix_a%local_data
     526          520 :       b => matrix_b%local_data
     527          520 :       c => matrix_c%local_data
     528              : 
     529          520 :       a_sp => matrix_a%local_data_sp
     530          520 :       b_sp => matrix_b%local_data_sp
     531          520 :       c_sp => matrix_c%local_data_sp
     532              : 
     533          520 :       i_a = 1
     534          520 :       IF (PRESENT(a_first_row)) i_a = a_first_row
     535              : 
     536          520 :       j_a = 1
     537          520 :       IF (PRESENT(a_first_col)) j_a = a_first_col
     538              : 
     539          520 :       i_b = 1
     540          520 :       IF (PRESENT(b_first_row)) i_b = b_first_row
     541              : 
     542          520 :       j_b = 1
     543          520 :       IF (PRESENT(b_first_col)) j_b = b_first_col
     544              : 
     545          520 :       i_c = 1
     546          520 :       IF (PRESENT(c_first_row)) i_c = c_first_row
     547              : 
     548          520 :       j_c = 1
     549          520 :       IF (PRESENT(c_first_col)) j_c = c_first_col
     550              : 
     551              : #if defined(__parallel)
     552              : 
     553         5200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     554         5200 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     555         5200 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     556              : 
     557          520 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     558              : 
     559              :          CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
     560            0 :                      descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
     561              : 
     562          520 :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     563              : 
     564              :          CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
     565          520 :                      descb, beta, c, i_c, j_c, descc)
     566              : 
     567              :       ELSE
     568            0 :          CPABORT("Mixed precision gemm NYI")
     569              :       END IF
     570              : #else
     571              : 
     572              :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     573              : 
     574              :          lda = SIZE(a_sp, 1)
     575              :          ldb = SIZE(b_sp, 1)
     576              :          ldc = SIZE(c_sp, 1)
     577              : 
     578              :          CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
     579              :                     REAL(beta, sp), c_sp(i_c, j_c), ldc)
     580              : 
     581              :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     582              : 
     583              :          lda = SIZE(a, 1)
     584              :          ldb = SIZE(b, 1)
     585              :          ldc = SIZE(c, 1)
     586              : 
     587              :          CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
     588              : 
     589              :       ELSE
     590              :          CPABORT("Mixed precision gemm NYI")
     591              :       END IF
     592              : 
     593              : #endif
     594          520 :       CALL timestop(handle)
     595              : 
     596          520 :    END SUBROUTINE cp_fm_gemm
     597              : 
     598              : ! **************************************************************************************************
     599              : !> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
     600              : !>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
     601              : !>      where matrix_a is symmetric
     602              : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
     603              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     604              : !> \param uplo triangular format
     605              : !> \param m ...
     606              : !> \param n ...
     607              : !> \param alpha ...
     608              : !> \param matrix_a : m x m matrix
     609              : !> \param matrix_b : m x n matrix
     610              : !> \param beta ...
     611              : !> \param matrix_c : m x n matrix
     612              : !> \author Matthias Krack
     613              : !> \note
     614              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     615              : !>      all matrices in QS are triangular according to uplo
     616              : !>      matrix_a is always an m x m matrix
     617              : !>      typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
     618              : ! **************************************************************************************************
     619       146338 :    SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
     620              : 
     621              :       CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
     622              :       INTEGER, INTENT(IN)                      :: m, n
     623              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     624              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     625              :       REAL(KIND=dp), INTENT(IN)                :: beta
     626              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     627              : 
     628              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
     629              : 
     630              :       INTEGER                                  :: handle
     631       146338 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     632              : #if defined(__parallel)
     633              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     634              : #else
     635              :       INTEGER                                  :: lda, ldb, ldc
     636              : #endif
     637              : 
     638       146338 :       CALL timeset(routineN, handle)
     639              : 
     640       146338 :       a => matrix_a%local_data
     641       146338 :       b => matrix_b%local_data
     642       146338 :       c => matrix_c%local_data
     643              : 
     644              : #if defined(__parallel)
     645              : 
     646      1463380 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     647      1463380 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     648      1463380 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     649              : 
     650       146338 :       CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
     651              : 
     652              : #else
     653              : 
     654              :       lda = matrix_a%matrix_struct%local_leading_dimension
     655              :       ldb = matrix_b%matrix_struct%local_leading_dimension
     656              :       ldc = matrix_c%matrix_struct%local_leading_dimension
     657              : 
     658              :       CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
     659              : 
     660              : #endif
     661       146338 :       CALL timestop(handle)
     662              : 
     663       146338 :    END SUBROUTINE cp_fm_symm
     664              : 
     665              : ! **************************************************************************************************
     666              : !> \brief computes the Frobenius norm of matrix_a
     667              : !> \brief computes the Frobenius norm of matrix_a
     668              : !> \param matrix_a : m x n matrix
     669              : !> \return ...
     670              : !> \author VW
     671              : ! **************************************************************************************************
     672         7926 :    FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
     673              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     674              :       REAL(KIND=dp)                            :: norm
     675              : 
     676              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
     677              : 
     678              :       INTEGER                                  :: handle, size_a
     679         7926 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     680              :       REAL(KIND=dp), EXTERNAL                  :: DDOT
     681              : #if defined(__parallel)
     682              :       TYPE(mp_comm_type)                       :: group
     683              : #endif
     684              : 
     685         7926 :       CALL timeset(routineN, handle)
     686              : 
     687              :       norm = 0.0_dp
     688         7926 :       a => matrix_a%local_data
     689         7926 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     690         7926 :       norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
     691              : #if defined(__parallel)
     692         7926 :       group = matrix_a%matrix_struct%para_env
     693         7926 :       CALL group%sum(norm)
     694              : #endif
     695         7926 :       norm = SQRT(norm)
     696              : 
     697         7926 :       CALL timestop(handle)
     698              : 
     699         7926 :    END FUNCTION cp_fm_frobenius_norm
     700              : 
     701              : ! **************************************************************************************************
     702              : !> \brief performs a rank-k update of a symmetric matrix_c
     703              : !>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
     704              : !> \param uplo : 'U'   ('L')
     705              : !> \param trans : 'N'  ('T')
     706              : !> \param k : number of cols to use in matrix_a
     707              : !>      ia,ja ::  1,1 (could be used for selecting subblock of a)
     708              : !> \param alpha ...
     709              : !> \param matrix_a ...
     710              : !> \param ia ...
     711              : !> \param ja ...
     712              : !> \param beta ...
     713              : !> \param matrix_c ...
     714              : !> \author Matthias Krack
     715              : ! **************************************************************************************************
     716         7440 :    SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
     717              :       CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
     718              :       INTEGER, INTENT(IN)                      :: k
     719              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     720              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     721              :       INTEGER, INTENT(IN)                      :: ia, ja
     722              :       REAL(KIND=dp), INTENT(IN)                :: beta
     723              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     724              : 
     725              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
     726              : 
     727              :       INTEGER                                  :: handle, n
     728         7440 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
     729              : #if defined(__parallel)
     730              :       INTEGER, DIMENSION(9)                    :: desca, descc
     731              : #else
     732              :       INTEGER                                  :: lda, ldc
     733              : #endif
     734              : 
     735         7440 :       CALL timeset(routineN, handle)
     736              : 
     737         7440 :       n = matrix_c%matrix_struct%nrow_global
     738              : 
     739         7440 :       a => matrix_a%local_data
     740         7440 :       c => matrix_c%local_data
     741              : 
     742              : #if defined(__parallel)
     743              : 
     744        74400 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     745        74400 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     746              : 
     747         7440 :       CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
     748              : 
     749              : #else
     750              : 
     751              :       lda = SIZE(a, 1)
     752              :       ldc = SIZE(c, 1)
     753              : 
     754              :       CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
     755              : 
     756              : #endif
     757         7440 :       CALL timestop(handle)
     758              : 
     759         7440 :    END SUBROUTINE cp_fm_syrk
     760              : 
     761              : ! **************************************************************************************************
     762              : !> \brief computes the schur product of two matrices
     763              : !>       c_ij = a_ij * b_ij
     764              : !> \param matrix_a ...
     765              : !> \param matrix_b ...
     766              : !> \param matrix_c ...
     767              : !> \author Joost VandeVondele
     768              : ! **************************************************************************************************
     769         9148 :    SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
     770              : 
     771              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b, matrix_c
     772              : 
     773              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
     774              : 
     775              :       INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
     776              :                                                             myprow, ncol_local, &
     777              :                                                             nrow_local
     778         9148 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
     779              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     780              : 
     781         9148 :       CALL timeset(routineN, handle)
     782              : 
     783         9148 :       context => matrix_a%matrix_struct%context
     784         9148 :       myprow = context%mepos(1)
     785         9148 :       mypcol = context%mepos(2)
     786              : 
     787         9148 :       a => matrix_a%local_data
     788         9148 :       b => matrix_b%local_data
     789         9148 :       c => matrix_c%local_data
     790              : 
     791         9148 :       nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
     792         9148 :       ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
     793              : 
     794        98692 :       DO icol_local = 1, ncol_local
     795      6666116 :          DO irow_local = 1, nrow_local
     796      6656968 :             c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
     797              :          END DO
     798              :       END DO
     799              : 
     800         9148 :       CALL timestop(handle)
     801              : 
     802         9148 :    END SUBROUTINE cp_fm_schur_product
     803              : 
     804              : ! **************************************************************************************************
     805              : !> \brief returns the trace of matrix_a^T matrix_b, i.e
     806              : !>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
     807              : !> \param matrix_a a matrix
     808              : !> \param matrix_b another matrix
     809              : !> \param trace ...
     810              : !> \par History
     811              : !>      11.06.2001 Creation (Matthias Krack)
     812              : !>      12.2002 added doc [fawzi]
     813              : !> \author Matthias Krack
     814              : !> \note
     815              : !>      note the transposition of matrix_a!
     816              : ! **************************************************************************************************
     817       512532 :    SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
     818              : 
     819              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b
     820              :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     821              : 
     822              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
     823              : 
     824              :       INTEGER                                            :: handle, mypcol, myprow, ncol_local, &
     825              :                                                             nrow_local
     826       512532 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     827       512532 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     828              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     829              :       TYPE(mp_comm_type)                                 :: group
     830              : 
     831       512532 :       CALL timeset(routineN, handle)
     832              : 
     833       512532 :       context => matrix_a%matrix_struct%context
     834       512532 :       myprow = context%mepos(1)
     835       512532 :       mypcol = context%mepos(2)
     836              : 
     837       512532 :       group = matrix_a%matrix_struct%para_env
     838              : 
     839       512532 :       a => matrix_a%local_data
     840       512532 :       b => matrix_b%local_data
     841              : 
     842       512532 :       a_sp => matrix_a%local_data_sp
     843       512532 :       b_sp => matrix_b%local_data_sp
     844              : 
     845       512532 :       nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
     846       512532 :       ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
     847              : 
     848              :       ! cries for an accurate_dot_product
     849       512532 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     850              :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
     851            0 :                                    b_sp(1:nrow_local, 1:ncol_local), dp))
     852       512532 :       ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     853              :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
     854            0 :                               b(1:nrow_local, 1:ncol_local))
     855       512532 :       ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     856              :          trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
     857            0 :                               REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
     858              :       ELSE
     859              :          trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
     860       512532 :                                       b(1:nrow_local, 1:ncol_local))
     861              :       END IF
     862              : 
     863       512532 :       CALL group%sum(trace)
     864              : 
     865       512532 :       CALL timestop(handle)
     866              : 
     867       512532 :    END SUBROUTINE cp_fm_trace_a0b0t0
     868              : 
     869              :    #:mute
     870              :       #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
     871              :    #:endmute
     872              : 
     873              : ! **************************************************************************************************
     874              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
     875              : !> \param matrix_a list of A matrices
     876              : !> \param matrix_b B matrix
     877              : !> \param trace    computed traces
     878              : !> \par History
     879              : !>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
     880              : !> \note \parblock
     881              : !>      Computing the trace requires collective communication between involved MPI processes
     882              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     883              : !>      the amount of time wasted in such synchronisation by performing one large collective
     884              : !>      operation which involves all the matrices in question.
     885              : !>
     886              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
     887              : !>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
     888              : !>      'matrix_b' is a single matrix.
     889              : !>      \endparblock
     890              : ! **************************************************************************************************
     891              :    #:for longname, shortname, appendix in types
     892         3168 :       SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
     893              :          TYPE(${longname}$), DIMENSION(:), INTENT(IN)       :: matrix_a
     894              :          TYPE(cp_fm_type), INTENT(IN)                       :: matrix_b
     895              :          REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     896              : 
     897              :          CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
     898              : 
     899              :          INTEGER                                            :: handle, imatrix, n_matrices, &
     900              :                                                                ncols_local, nrows_local
     901              :          LOGICAL                                            :: use_sp_a, use_sp_b
     902         3168 :          REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     903         3168 :          REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     904              :          TYPE(mp_comm_type)                                 :: group
     905              : 
     906         3168 :          CALL timeset(routineN, handle)
     907              : 
     908         3168 :          n_matrices = SIZE(trace)
     909         3168 :          CPASSERT(SIZE(matrix_a) == n_matrices)
     910              : 
     911         3168 :          CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
     912         3168 :          use_sp_b = matrix_b%use_sp
     913              : 
     914         3168 :          IF (use_sp_b) THEN
     915            0 :             ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
     916              :          ELSE
     917         3168 :             ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
     918              :          END IF
     919              : 
     920              : !$OMP PARALLEL DO DEFAULT(NONE), &
     921              : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
     922              : !$OMP             SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
     923         3168 : !$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
     924              : 
     925              :          DO imatrix = 1, n_matrices
     926              : 
     927              :             use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
     928              : 
     929              :             ! assume that the matrices A(i) and B have identical shapes and distribution schemes
     930              :             IF (use_sp_a .AND. use_sp_b) THEN
     931              :                ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
     932              :                trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     933              :             ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     934              :                ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
     935              :                trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     936              :             ELSE
     937              :                CPABORT("Matrices A and B are of different types")
     938              :             END IF
     939              :          END DO
     940              : !$OMP END PARALLEL DO
     941              : 
     942         3168 :          group = matrix_b%matrix_struct%para_env
     943        19836 :          CALL group%sum(trace)
     944              : 
     945         3168 :          CALL timestop(handle)
     946         3168 :       END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
     947              :    #:endfor
     948              : 
     949              : ! **************************************************************************************************
     950              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
     951              : !> \param matrix_a list of A matrices
     952              : !> \param matrix_b list of B matrices
     953              : !> \param trace    computed traces
     954              : !> \param accurate ...
     955              : !> \par History
     956              : !>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
     957              : !> \note \parblock
     958              : !>      Computing the trace requires collective communication between involved MPI processes
     959              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     960              : !>      the amount of time wasted in such synchronisation by performing one large collective
     961              : !>      operation which involves all the matrices in question.
     962              : !>
     963              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
     964              : !>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
     965              : !>      \endparblock
     966              : ! **************************************************************************************************
     967              :    #:for longname1, shortname1, appendix1 in types
     968              :       #:for longname2, shortname2, appendix2 in types
     969       149172 :          SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     970              :             TYPE(${longname1}$), DIMENSION(:), INTENT(IN)      :: matrix_a
     971              :             TYPE(${longname2}$), DIMENSION(:), INTENT(IN)      :: matrix_b
     972              :             REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     973              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     974              : 
     975              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
     976              : 
     977              :             INTEGER                                            :: handle, imatrix, n_matrices, &
     978              :                                                                   ncols_local, nrows_local
     979              :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
     980       149172 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     981       149172 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     982              :             TYPE(mp_comm_type)                                 :: group
     983              : 
     984       149172 :             CALL timeset(routineN, handle)
     985              : 
     986       149172 :             n_matrices = SIZE(trace)
     987       149172 :             CPASSERT(SIZE(matrix_a) == n_matrices)
     988       149172 :             CPASSERT(SIZE(matrix_b) == n_matrices)
     989              : 
     990       149172 :             use_accurate_sum = .TRUE.
     991       149172 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
     992              : 
     993              : !$OMP PARALLEL DO DEFAULT(NONE), &
     994              : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
     995              : !$OMP             PRIVATE(nrows_local, use_sp_a, use_sp_b), &
     996       149172 : !$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
     997              :             DO imatrix = 1, n_matrices
     998              :                CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
     999              : 
    1000              :                use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
    1001              :                use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
    1002              : 
    1003              :                ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
    1004              :                IF (use_sp_a .AND. use_sp_b) THEN
    1005              :                   ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1006              :                   ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1007              :                   IF (use_accurate_sum) THEN
    1008              :                      trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1009              :                   ELSE
    1010              :                      trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
    1011              :                   END IF
    1012              :                ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1013              :                   ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1014              :                   ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1015              :                   IF (use_accurate_sum) THEN
    1016              :                      trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
    1017              :                   ELSE
    1018              :                      trace(imatrix) = SUM(ldata_a*ldata_b)
    1019              :                   END IF
    1020              :                ELSE
    1021              :                   CPABORT("Matrices A and B are of different types")
    1022              :                END IF
    1023              :             END DO
    1024              : !$OMP END PARALLEL DO
    1025              : 
    1026       149172 :             group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
    1027       498696 :             CALL group%sum(trace)
    1028              : 
    1029       149172 :             CALL timestop(handle)
    1030       149172 :          END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
    1031              :       #:endfor
    1032              :    #:endfor
    1033              : 
    1034              : ! **************************************************************************************************
    1035              : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
    1036              : !> \param matrix_a list of A matrices
    1037              : !> \param matrix_b list of B matrices
    1038              : !> \param trace    computed traces
    1039              : !> \param accurate ...
    1040              : ! **************************************************************************************************
    1041              :    #:for longname1, shortname1, appendix1 in types
    1042              :       #:for longname2, shortname2, appendix2 in types
    1043        13876 :          SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
    1044              :             TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN)   :: matrix_a
    1045              :             TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN)   :: matrix_b
    1046              :             REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: trace
    1047              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
    1048              : 
    1049              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
    1050              : 
    1051              :             INTEGER                                            :: handle, ia, ib, iz, na, nb, ncols_local, &
    1052              :                                                                   nrows_local, nz
    1053              :             INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
    1054              :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
    1055              :             REAL(kind=dp)                                      :: t
    1056        13876 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
    1057        13876 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
    1058              :             TYPE(mp_comm_type)                                 :: group
    1059              : 
    1060        13876 :             CALL timeset(routineN, handle)
    1061              : 
    1062        13876 :             nz = SIZE(matrix_a, 1)
    1063        13876 :             CPASSERT(SIZE(matrix_b, 1) == nz)
    1064              : 
    1065        13876 :             na = SIZE(matrix_a, 2)
    1066        13876 :             nb = SIZE(matrix_b, 2)
    1067        13876 :             CPASSERT(SIZE(trace, 1) == na)
    1068        13876 :             CPASSERT(SIZE(trace, 2) == nb)
    1069              : 
    1070        13876 :             use_accurate_sum = .TRUE.
    1071        13876 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
    1072              : 
    1073              :             ! here we use one running index (itrace) instead of two (ia, ib) in order to
    1074              :             ! improve load balance between shared-memory threads
    1075        13876 :             ntraces = na*nb
    1076        13876 :             na8 = INT(na, kind=int_8)
    1077              : 
    1078              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1079              : !$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
    1080              : !$OMP             PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
    1081        13876 : !$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
    1082              :             DO itrace = 1, ntraces
    1083              :                ib8 = (itrace - 1)/na8
    1084              :                ia = INT(itrace - ib8*na8)
    1085              :                ib = INT(ib8) + 1
    1086              : 
    1087              :                t = 0.0_dp
    1088              :                DO iz = 1, nz
    1089              :                   CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
    1090              :                   use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
    1091              :                   use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
    1092              : 
    1093              :                   ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
    1094              :                   IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1095              :                      ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1096              :                      ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1097              :                      IF (use_accurate_sum) THEN
    1098              :                         t = t + accurate_dot_product(ldata_a, ldata_b)
    1099              :                      ELSE
    1100              :                         t = t + SUM(ldata_a*ldata_b)
    1101              :                      END IF
    1102              :                   ELSE IF (use_sp_a .AND. use_sp_b) THEN
    1103              :                      ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1104              :                      ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1105              :                      IF (use_accurate_sum) THEN
    1106              :                         t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1107              :                      ELSE
    1108              :                         t = t + SUM(ldata_a_sp*ldata_b_sp)
    1109              :                      END IF
    1110              :                   ELSE
    1111              :                      CPABORT("Matrices A and B are of different types")
    1112              :                   END IF
    1113              :                END DO
    1114              :                trace(ia, ib) = t
    1115              :             END DO
    1116              : !$OMP END PARALLEL DO
    1117              : 
    1118        13876 :             group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
    1119       663128 :             CALL group%sum(trace)
    1120              : 
    1121        13876 :             CALL timestop(handle)
    1122        13876 :          END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
    1123              :       #:endfor
    1124              :    #:endfor
    1125              : 
    1126              : ! **************************************************************************************************
    1127              : !> \brief multiplies in place by a triangular matrix:
    1128              : !>       matrix_b = alpha op(triangular_matrix) matrix_b
    1129              : !>      or (if side='R')
    1130              : !>       matrix_b = alpha matrix_b op(triangular_matrix)
    1131              : !>      op(triangular_matrix) is:
    1132              : !>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
    1133              : !>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
    1134              : !>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
    1135              : !>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
    1136              : !> \param triangular_matrix the triangular matrix that multiplies the other
    1137              : !> \param matrix_b the matrix that gets multiplied and stores the result
    1138              : !> \param side on which side of matrix_b stays op(triangular_matrix)
    1139              : !>        (defaults to 'L')
    1140              : !> \param transpose_tr if the triangular matrix should be transposed
    1141              : !>        (defaults to false)
    1142              : !> \param invert_tr if the triangular matrix should be inverted
    1143              : !>        (defaults to false)
    1144              : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
    1145              : !>        lower ('L') triangle (defaults to 'U')
    1146              : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
    1147              : !>        be assumed to be 1 (defaults to false)
    1148              : !> \param n_rows the number of rows of the result (defaults to
    1149              : !>        size(matrix_b,1))
    1150              : !> \param n_cols the number of columns of the result (defaults to
    1151              : !>        size(matrix_b,2))
    1152              : !> \param alpha ...
    1153              : !> \par History
    1154              : !>      08.2002 created [fawzi]
    1155              : !> \author Fawzi Mohamed
    1156              : !> \note
    1157              : !>      needs an mpi env
    1158              : ! **************************************************************************************************
    1159       107336 :    SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
    1160              :                                         transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
    1161              :                                         alpha)
    1162              :       TYPE(cp_fm_type), INTENT(IN)                       :: triangular_matrix, matrix_b
    1163              :       CHARACTER, INTENT(IN), OPTIONAL                    :: side
    1164              :       LOGICAL, INTENT(IN), OPTIONAL                      :: transpose_tr, invert_tr
    1165              :       CHARACTER, INTENT(IN), OPTIONAL                    :: uplo_tr
    1166              :       LOGICAL, INTENT(IN), OPTIONAL                      :: unit_diag_tr
    1167              :       INTEGER, INTENT(IN), OPTIONAL                      :: n_rows, n_cols
    1168              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
    1169              : 
    1170              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
    1171              : 
    1172              :       CHARACTER                                          :: side_char, transa, unit_diag, uplo
    1173              :       INTEGER                                            :: handle, mdim, m, n
    1174              :       LOGICAL                                            :: invert
    1175              :       REAL(KIND=dp)                                      :: al
    1176              : 
    1177        53668 :       CALL timeset(routineN, handle)
    1178        53668 :       side_char = 'L'
    1179        53668 :       unit_diag = 'N'
    1180        53668 :       uplo = 'U'
    1181        53668 :       transa = 'N'
    1182        53668 :       invert = .FALSE.
    1183        53668 :       al = 1.0_dp
    1184        53668 :       CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
    1185        53668 :       IF (PRESENT(side)) side_char = side
    1186              :       mdim = MERGE(1, 2, 'L' == side_char)
    1187        53668 :       IF (PRESENT(invert_tr)) invert = invert_tr
    1188        53668 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1189        53668 :       IF (PRESENT(unit_diag_tr)) THEN
    1190            0 :          IF (unit_diag_tr) THEN
    1191            0 :             unit_diag = 'U'
    1192              :          ELSE
    1193              :             unit_diag = 'N'
    1194              :          END IF
    1195              :       END IF
    1196        53668 :       IF (PRESENT(transpose_tr)) THEN
    1197         3390 :          IF (transpose_tr) THEN
    1198         1278 :             transa = 'T'
    1199              :          ELSE
    1200              :             transa = 'N'
    1201              :          END IF
    1202              :       END IF
    1203        53668 :       IF (PRESENT(alpha)) al = alpha
    1204        53668 :       IF (PRESENT(n_rows)) m = n_rows
    1205        53668 :       IF (PRESENT(n_cols)) n = n_cols
    1206              : 
    1207        53668 :       IF (invert) THEN
    1208              : 
    1209              : #if defined(__parallel)
    1210              :          CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1211              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1212              :                      triangular_matrix%matrix_struct%descriptor, &
    1213              :                      matrix_b%local_data(1, 1), 1, 1, &
    1214        43572 :                      matrix_b%matrix_struct%descriptor(1))
    1215              : #else
    1216              :          CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1217              :                     triangular_matrix%local_data(1, 1), &
    1218              :                     SIZE(triangular_matrix%local_data, mdim), &
    1219              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1220              : #endif
    1221              : 
    1222              :       ELSE
    1223              : 
    1224              : #if defined(__parallel)
    1225              :          CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1226              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1227              :                      triangular_matrix%matrix_struct%descriptor, &
    1228              :                      matrix_b%local_data(1, 1), 1, 1, &
    1229        10096 :                      matrix_b%matrix_struct%descriptor(1))
    1230              : #else
    1231              :          CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1232              :                     triangular_matrix%local_data(1, 1), &
    1233              :                     SIZE(triangular_matrix%local_data, mdim), &
    1234              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1235              : #endif
    1236              : 
    1237              :       END IF
    1238              : 
    1239        53668 :       CALL timestop(handle)
    1240        53668 :    END SUBROUTINE cp_fm_triangular_multiply
    1241              : 
    1242              : ! **************************************************************************************************
    1243              : !> \brief scales a matrix
    1244              : !>      matrix_a = alpha * matrix_b
    1245              : !> \param alpha ...
    1246              : !> \param matrix_a ...
    1247              : !> \note
    1248              : !>      use cp_fm_set_all to zero (avoids problems with nan)
    1249              : ! **************************************************************************************************
    1250        85403 :    SUBROUTINE cp_fm_scale(alpha, matrix_a)
    1251              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1252              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    1253              : 
    1254              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_scale'
    1255              : 
    1256              :       INTEGER                                            :: handle, size_a
    1257              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    1258              : 
    1259        85403 :       CALL timeset(routineN, handle)
    1260              : 
    1261              :       NULLIFY (a)
    1262              : 
    1263        85403 :       a => matrix_a%local_data
    1264        85403 :       size_a = SIZE(a, 1)*SIZE(a, 2)
    1265              : 
    1266        85403 :       CALL DSCAL(size_a, alpha, a, 1)
    1267              : 
    1268        85403 :       CALL timestop(handle)
    1269              : 
    1270        85403 :    END SUBROUTINE cp_fm_scale
    1271              : 
    1272              : ! **************************************************************************************************
    1273              : !> \brief transposes a matrix
    1274              : !>      matrixt = matrix ^ T
    1275              : !> \param matrix ...
    1276              : !> \param matrixt ...
    1277              : !> \note
    1278              : !>      all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
    1279              : ! **************************************************************************************************
    1280        24958 :    SUBROUTINE cp_fm_transpose(matrix, matrixt)
    1281              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, matrixt
    1282              : 
    1283              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
    1284              : 
    1285              :       INTEGER                                  :: handle, ncol_global, &
    1286              :                                                   nrow_global, ncol_globalt, nrow_globalt
    1287        12479 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
    1288              : #if defined(__parallel)
    1289              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1290              : #elif !defined(__MKL)
    1291              :       INTEGER                                  :: i, j
    1292              : #endif
    1293              : 
    1294        12479 :       nrow_global = matrix%matrix_struct%nrow_global
    1295        12479 :       ncol_global = matrix%matrix_struct%ncol_global
    1296        12479 :       nrow_globalt = matrixt%matrix_struct%nrow_global
    1297        12479 :       ncol_globalt = matrixt%matrix_struct%ncol_global
    1298            0 :       CPASSERT(nrow_global == ncol_globalt)
    1299        12479 :       CPASSERT(nrow_globalt == ncol_global)
    1300              : 
    1301        12479 :       CALL timeset(routineN, handle)
    1302              : 
    1303        12479 :       a => matrix%local_data
    1304        12479 :       c => matrixt%local_data
    1305              : 
    1306              : #if defined(__parallel)
    1307       124790 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1308       124790 :       descc(:) = matrixt%matrix_struct%descriptor(:)
    1309        12479 :       CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
    1310              : #elif defined(__MKL)
    1311              :       CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
    1312              : #else
    1313              :       DO j = 1, ncol_global
    1314              :          DO i = 1, nrow_global
    1315              :             c(j, i) = a(i, j)
    1316              :          END DO
    1317              :       END DO
    1318              : #endif
    1319        12479 :       CALL timestop(handle)
    1320              : 
    1321        12479 :    END SUBROUTINE cp_fm_transpose
    1322              : 
    1323              : ! **************************************************************************************************
    1324              : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
    1325              : !> \param matrix the triangular matrix as input, the full matrix as output
    1326              : !> \param work a matrix of the same size as matrix
    1327              : !> \param uplo triangular format; defaults to 'U'
    1328              : !> \author Matthias Krack
    1329              : !> \note
    1330              : !>       the opposite triangular part is irrelevant
    1331              : ! **************************************************************************************************
    1332       340698 :    SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
    1333              : 
    1334              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, work
    1335              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1336              : 
    1337              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
    1338              : 
    1339              :       CHARACTER                                :: myuplo
    1340              :       INTEGER                                  :: handle, icol_global, irow_global, &
    1341              :                                                   mypcol, myprow, ncol_global, &
    1342              :                                                   nrow_global
    1343       170349 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1344       170349 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1345              :       TYPE(cp_blacs_env_type), POINTER         :: context
    1346              : 
    1347              : #if defined(__parallel)
    1348              :       INTEGER                                  :: icol_local, irow_local, &
    1349              :                                                   ncol_block, ncol_local, &
    1350              :                                                   nrow_block, nrow_local
    1351              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1352       170349 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: c
    1353       170349 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: c_sp
    1354              : #endif
    1355              : 
    1356       170349 :       myuplo = 'U'
    1357            0 :       IF (PRESENT(uplo)) myuplo = uplo
    1358              : 
    1359       170349 :       nrow_global = matrix%matrix_struct%nrow_global
    1360       170349 :       ncol_global = matrix%matrix_struct%ncol_global
    1361       170349 :       CPASSERT(nrow_global == ncol_global)
    1362       170349 :       nrow_global = work%matrix_struct%nrow_global
    1363       170349 :       ncol_global = work%matrix_struct%ncol_global
    1364       170349 :       CPASSERT(nrow_global == ncol_global)
    1365       170349 :       CPASSERT(matrix%use_sp .EQV. work%use_sp)
    1366              : 
    1367       170349 :       CALL timeset(routineN, handle)
    1368              : 
    1369       170349 :       context => matrix%matrix_struct%context
    1370       170349 :       myprow = context%mepos(1)
    1371       170349 :       mypcol = context%mepos(2)
    1372              : 
    1373              : #if defined(__parallel)
    1374              : 
    1375       170349 :       nrow_block = matrix%matrix_struct%nrow_block
    1376       170349 :       ncol_block = matrix%matrix_struct%ncol_block
    1377              : 
    1378       170349 :       nrow_local = matrix%matrix_struct%nrow_locals(myprow)
    1379       170349 :       ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
    1380              : 
    1381       170349 :       a => work%local_data
    1382       170349 :       a_sp => work%local_data_sp
    1383      1703490 :       desca(:) = work%matrix_struct%descriptor(:)
    1384       170349 :       c => matrix%local_data
    1385       170349 :       c_sp => matrix%local_data_sp
    1386      1703490 :       descc(:) = matrix%matrix_struct%descriptor(:)
    1387              : 
    1388      5578319 :       DO icol_local = 1, ncol_local
    1389      5407970 :          icol_global = matrix%matrix_struct%col_indices(icol_local)
    1390    244258595 :          DO irow_local = 1, nrow_local
    1391    238680276 :             irow_global = matrix%matrix_struct%row_indices(irow_local)
    1392    244088246 :             IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
    1393    117767217 :                IF (matrix%use_sp) THEN
    1394            0 :                   c_sp(irow_local, icol_local) = 0.0_sp
    1395              :                ELSE
    1396    117767217 :                   c(irow_local, icol_local) = 0.0_dp
    1397              :                END IF
    1398    120913059 :             ELSE IF (irow_global == icol_global) THEN
    1399      3145842 :                IF (matrix%use_sp) THEN
    1400            0 :                   c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
    1401              :                ELSE
    1402      3145842 :                   c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
    1403              :                END IF
    1404              :             END IF
    1405              :          END DO
    1406              :       END DO
    1407              : 
    1408      5578319 :       DO icol_local = 1, ncol_local
    1409    244258595 :       DO irow_local = 1, nrow_local
    1410    244088246 :          IF (matrix%use_sp) THEN
    1411            0 :             a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
    1412              :          ELSE
    1413    238680276 :             a(irow_local, icol_local) = c(irow_local, icol_local)
    1414              :          END IF
    1415              :       END DO
    1416              :       END DO
    1417              : 
    1418       170349 :       IF (matrix%use_sp) THEN
    1419            0 :          CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
    1420              :       ELSE
    1421       170349 :          CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
    1422              :       END IF
    1423              : 
    1424              : #else
    1425              : 
    1426              :       a => matrix%local_data
    1427              :       a_sp => matrix%local_data_sp
    1428              : 
    1429              :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1430              :          DO irow_global = 1, nrow_global
    1431              :          DO icol_global = irow_global + 1, ncol_global
    1432              :             IF (matrix%use_sp) THEN
    1433              :                a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
    1434              :             ELSE
    1435              :                a(icol_global, irow_global) = a(irow_global, icol_global)
    1436              :             END IF
    1437              :          END DO
    1438              :          END DO
    1439              :       ELSE
    1440              :          DO icol_global = 1, ncol_global
    1441              :          DO irow_global = icol_global + 1, nrow_global
    1442              :             IF (matrix%use_sp) THEN
    1443              :                a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
    1444              :             ELSE
    1445              :                a(irow_global, icol_global) = a(icol_global, irow_global)
    1446              :             END IF
    1447              :          END DO
    1448              :          END DO
    1449              :       END IF
    1450              : 
    1451              : #endif
    1452       170349 :       CALL timestop(handle)
    1453              : 
    1454       170349 :    END SUBROUTINE cp_fm_uplo_to_full
    1455              : 
    1456              : ! **************************************************************************************************
    1457              : !> \brief scales column i of matrix a with scaling(i)
    1458              : !> \param matrixa ...
    1459              : !> \param scaling : an array used for scaling the columns,
    1460              : !>                  SIZE(scaling) determines the number of columns to be scaled
    1461              : !> \author Joost VandeVondele
    1462              : !> \note
    1463              : !>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
    1464              : !>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
    1465              : !>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
    1466              : !>      where every vector has a different prefactor
    1467              : ! **************************************************************************************************
    1468       134313 :    SUBROUTINE cp_fm_column_scale(matrixa, scaling)
    1469              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1470              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1471              : 
    1472              :       INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
    1473              :                                                   npcol, nprow
    1474       134313 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1475       134313 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1476              : #if defined(__parallel)
    1477              :       INTEGER                                  :: icol_global, icol_local, &
    1478              :                                                   ipcol, iprow, irow_local
    1479              : #else
    1480              :       INTEGER                                  :: i
    1481              : #endif
    1482              : 
    1483       134313 :       myprow = matrixa%matrix_struct%context%mepos(1)
    1484       134313 :       mypcol = matrixa%matrix_struct%context%mepos(2)
    1485       134313 :       nprow = matrixa%matrix_struct%context%num_pe(1)
    1486       134313 :       npcol = matrixa%matrix_struct%context%num_pe(2)
    1487              : 
    1488       134313 :       ncol_global = matrixa%matrix_struct%ncol_global
    1489              : 
    1490       134313 :       a => matrixa%local_data
    1491       134313 :       a_sp => matrixa%local_data_sp
    1492       134313 :       IF (matrixa%use_sp) THEN
    1493            0 :          n = SIZE(a_sp, 1)
    1494              :       ELSE
    1495       134313 :          n = SIZE(a, 1)
    1496              :       END IF
    1497       134313 :       k = MIN(SIZE(scaling), ncol_global)
    1498              : 
    1499              : #if defined(__parallel)
    1500              : 
    1501      2129606 :       DO icol_global = 1, k
    1502              :          CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
    1503              :                       nprow, npcol, myprow, mypcol, &
    1504      1995293 :                       irow_local, icol_local, iprow, ipcol)
    1505      2129606 :          IF ((ipcol == mypcol)) THEN
    1506      1995293 :             IF (matrixa%use_sp) THEN
    1507            0 :                CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
    1508              :             ELSE
    1509      1995293 :                CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
    1510              :             END IF
    1511              :          END IF
    1512              :       END DO
    1513              : #else
    1514              :       DO i = 1, k
    1515              :          IF (matrixa%use_sp) THEN
    1516              :             CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
    1517              :          ELSE
    1518              :             CALL DSCAL(n, scaling(i), a(:, i), 1)
    1519              :          END IF
    1520              :       END DO
    1521              : #endif
    1522       134313 :    END SUBROUTINE cp_fm_column_scale
    1523              : 
    1524              : ! **************************************************************************************************
    1525              : !> \brief scales row i of matrix a with scaling(i)
    1526              : !> \param matrixa ...
    1527              : !> \param scaling : an array used for scaling the columns,
    1528              : !> \author JGH
    1529              : !> \note
    1530              : ! **************************************************************************************************
    1531         6252 :    SUBROUTINE cp_fm_row_scale(matrixa, scaling)
    1532              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1533              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1534              : 
    1535              :       INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
    1536         6252 :       INTEGER, DIMENSION(:), POINTER           :: row_indices
    1537         6252 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1538         6252 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1539              : #if defined(__parallel)
    1540              :       INTEGER                                  :: irow_global, icol, irow
    1541              : #else
    1542              :       INTEGER                                  :: j
    1543              : #endif
    1544              : 
    1545              :       CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
    1546         6252 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1547         6252 :       CPASSERT(SIZE(scaling) == nrow_global)
    1548              : 
    1549         6252 :       a => matrixa%local_data
    1550         6252 :       a_sp => matrixa%local_data_sp
    1551         6252 :       IF (matrixa%use_sp) THEN
    1552         6252 :          n = SIZE(a_sp, 1)
    1553         6252 :          m = SIZE(a_sp, 2)
    1554              :       ELSE
    1555         6252 :          n = SIZE(a, 1)
    1556         6252 :          m = SIZE(a, 2)
    1557              :       END IF
    1558              : 
    1559              : #if defined(__parallel)
    1560        78676 :       DO icol = 1, ncol_local
    1561        78676 :          IF (matrixa%use_sp) THEN
    1562            0 :             DO irow = 1, nrow_local
    1563            0 :                irow_global = row_indices(irow)
    1564            0 :                a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
    1565              :             END DO
    1566              :          ELSE
    1567      6478826 :             DO irow = 1, nrow_local
    1568      6406402 :                irow_global = row_indices(irow)
    1569      6478826 :                a(irow, icol) = scaling(irow_global)*a(irow, icol)
    1570              :             END DO
    1571              :          END IF
    1572              :       END DO
    1573              : #else
    1574              :       IF (matrixa%use_sp) THEN
    1575              :          DO j = 1, m
    1576              :             a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
    1577              :          END DO
    1578              :       ELSE
    1579              :          DO j = 1, m
    1580              :             a(1:n, j) = scaling(1:n)*a(1:n, j)
    1581              :          END DO
    1582              :       END IF
    1583              : #endif
    1584         6252 :    END SUBROUTINE cp_fm_row_scale
    1585              : 
    1586              : ! **************************************************************************************************
    1587              : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
    1588              : !> \param matrix_a the matrix to invert
    1589              : !> \param matrix_inverse the inverse of matrix_a
    1590              : !> \param det_a the determinant of matrix_a
    1591              : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
    1592              : !>                are screened
    1593              : !> \param eigval optionally return matrix eigenvalues/singular values
    1594              : !> \par History
    1595              : !>      note of Jan Wilhelm (12.2015)
    1596              : !>      - computation of determinant corrected
    1597              : !>      - determinant only computed if det_a is present
    1598              : !>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
    1599              : !>      - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
    1600              : !> \author Florian Schiffmann(02.2007)
    1601              : ! **************************************************************************************************
    1602         1588 :    SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
    1603              : 
    1604              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_inverse
    1605              :       REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
    1606              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
    1607              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1608              :          INTENT(INOUT), OPTIONAL               :: eigval
    1609              : 
    1610              :       INTEGER                                  :: n
    1611         1588 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1612              :       REAL(KIND=dp)                            :: determinant, my_eps_svd
    1613              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1614              :       TYPE(cp_fm_type)                         :: matrix_lu
    1615              : 
    1616              : #if defined(__parallel)
    1617              :       TYPE(cp_fm_type)                         :: u, vt, sigma, inv_sigma_ut
    1618              :       TYPE(mp_comm_type) :: group
    1619              :       INTEGER                                  :: i, info, liwork, lwork, exponent_of_minus_one
    1620              :       INTEGER, DIMENSION(9)                    :: desca
    1621              :       LOGICAL                                  :: quenched
    1622              :       REAL(KIND=dp)                            :: alpha, beta
    1623         1588 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
    1624         1588 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
    1625              : #else
    1626              :       LOGICAL                                  :: sign
    1627              :       REAL(KIND=dp)                            :: eps1
    1628              : #endif
    1629              : 
    1630         1588 :       my_eps_svd = 0.0_dp
    1631          468 :       IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
    1632              : 
    1633              :       CALL cp_fm_create(matrix=matrix_lu, &
    1634              :                         matrix_struct=matrix_a%matrix_struct, &
    1635         1588 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1636         1588 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1637              : 
    1638         1588 :       a => matrix_lu%local_data
    1639         1588 :       n = matrix_lu%matrix_struct%nrow_global
    1640         4764 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1641        19369 :       ipivot(:) = 0
    1642              : #if defined(__parallel)
    1643         1588 :       IF (my_eps_svd == 0.0_dp) THEN
    1644              :          ! Use LU decomposition
    1645         1560 :          lwork = 3*n
    1646         1560 :          liwork = 3*n
    1647        15600 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1648         1560 :          CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1649              : 
    1650         1560 :          IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
    1651              : 
    1652         1116 :             ALLOCATE (diag(n))
    1653          916 :             diag(:) = 0.0_dp
    1654          372 :             CALL cp_fm_get_diag(matrix_lu, diag)
    1655              : 
    1656          372 :             exponent_of_minus_one = 0
    1657          372 :             determinant = 1.0_dp
    1658          916 :             DO i = 1, n
    1659          544 :                determinant = determinant*diag(i)
    1660          916 :                IF (ipivot(i) /= i) THEN
    1661          224 :                   exponent_of_minus_one = exponent_of_minus_one + 1
    1662              :                END IF
    1663              :             END DO
    1664          372 :             IF (PRESENT(eigval)) THEN
    1665            0 :                CPASSERT(.NOT. ASSOCIATED(eigval))
    1666            0 :                ALLOCATE (eigval(n))
    1667            0 :                eigval(:) = diag
    1668              :             END IF
    1669          372 :             DEALLOCATE (diag)
    1670              : 
    1671          372 :             group = matrix_lu%matrix_struct%para_env
    1672          372 :             CALL group%sum(exponent_of_minus_one)
    1673              : 
    1674          372 :             determinant = determinant*(-1.0_dp)**exponent_of_minus_one
    1675              : 
    1676              :          END IF
    1677              : 
    1678         1560 :          alpha = 0.0_dp
    1679         1560 :          beta = 1.0_dp
    1680         1560 :          CALL cp_fm_set_all(matrix_inverse, alpha, beta)
    1681         1560 :          CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
    1682              :       ELSE
    1683              :          ! Use singular value decomposition
    1684              :          CALL cp_fm_create(matrix=u, &
    1685              :                            matrix_struct=matrix_a%matrix_struct, &
    1686           28 :                            name="LEFT_SINGULAR_MATRIX")
    1687           28 :          CALL cp_fm_set_all(u, alpha=0.0_dp)
    1688              :          CALL cp_fm_create(matrix=vt, &
    1689              :                            matrix_struct=matrix_a%matrix_struct, &
    1690           28 :                            name="RIGHT_SINGULAR_MATRIX")
    1691           28 :          CALL cp_fm_set_all(vt, alpha=0.0_dp)
    1692           84 :          ALLOCATE (diag(n))
    1693           92 :          diag(:) = 0.0_dp
    1694          280 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1695           28 :          ALLOCATE (work(1))
    1696              :          ! Workspace query
    1697           28 :          lwork = -1
    1698              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1699           28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1700           28 :          lwork = INT(work(1))
    1701           28 :          DEALLOCATE (work)
    1702           84 :          ALLOCATE (work(lwork))
    1703              :          ! SVD
    1704              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1705           28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1706              :          ! info == n+1 implies homogeneity error when the number of procs is large
    1707              :          ! this likely isnt a problem, but maybe we should handle it separately
    1708           28 :          IF (info /= 0 .AND. info /= n + 1) &
    1709            0 :             CPABORT("Singular value decomposition of matrix failed.")
    1710              :          ! (Pseudo)inverse and (pseudo)determinant
    1711              :          CALL cp_fm_create(matrix=sigma, &
    1712              :                            matrix_struct=matrix_a%matrix_struct, &
    1713           28 :                            name="SINGULAR_VALUE_MATRIX")
    1714           28 :          CALL cp_fm_set_all(sigma, alpha=0.0_dp)
    1715           28 :          determinant = 1.0_dp
    1716           28 :          quenched = .FALSE.
    1717           28 :          IF (PRESENT(eigval)) THEN
    1718           28 :             CPASSERT(.NOT. ASSOCIATED(eigval))
    1719           84 :             ALLOCATE (eigval(n))
    1720          156 :             eigval(:) = diag
    1721              :          END IF
    1722           92 :          DO i = 1, n
    1723           64 :             IF (diag(i) < my_eps_svd) THEN
    1724           18 :                diag(i) = 0.0_dp
    1725           18 :                quenched = .TRUE.
    1726              :             ELSE
    1727           46 :                determinant = determinant*diag(i)
    1728           46 :                diag(i) = 1.0_dp/diag(i)
    1729              :             END IF
    1730           92 :             CALL cp_fm_set_element(sigma, i, i, diag(i))
    1731              :          END DO
    1732           28 :          DEALLOCATE (diag)
    1733           28 :          IF (quenched) &
    1734              :             CALL cp_warn(__LOCATION__, &
    1735              :                          "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
    1736           12 :                          ". At least one singular value has been quenched.")
    1737              :          ! Sigma^-1 * U^T
    1738              :          CALL cp_fm_create(matrix=inv_sigma_ut, &
    1739              :                            matrix_struct=matrix_a%matrix_struct, &
    1740           28 :                            name="SINGULAR_VALUE_MATRIX")
    1741           28 :          CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
    1742              :          CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
    1743           28 :                      u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
    1744              :          ! A^-1 = V * (Sigma^-1 * U^T)
    1745           28 :          CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
    1746              :          CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
    1747           28 :                      inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
    1748              :          ! Clean up
    1749           28 :          DEALLOCATE (work)
    1750           28 :          CALL cp_fm_release(u)
    1751           28 :          CALL cp_fm_release(vt)
    1752           28 :          CALL cp_fm_release(sigma)
    1753          140 :          CALL cp_fm_release(inv_sigma_ut)
    1754              :       END IF
    1755              : #else
    1756              :       IF (my_eps_svd == 0.0_dp) THEN
    1757              :          sign = .TRUE.
    1758              :          CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
    1759              :                             eval_error=eps1)
    1760              :          CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
    1761              :          IF (PRESENT(eigval)) &
    1762              :             CALL cp_abort(__LOCATION__, &
    1763              :                           "NYI. Eigenvalues not available for return without SCALAPACK.")
    1764              :       ELSE
    1765              :          CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
    1766              :                                      determinant, eigval)
    1767              :       END IF
    1768              : #endif
    1769         1588 :       CALL cp_fm_release(matrix_lu)
    1770         1588 :       DEALLOCATE (ipivot)
    1771         1588 :       IF (PRESENT(det_a)) det_a = determinant
    1772         1588 :    END SUBROUTINE cp_fm_invert
    1773              : 
    1774              : ! **************************************************************************************************
    1775              : !> \brief inverts a triangular matrix
    1776              : !> \param matrix_a ...
    1777              : !> \param uplo_tr triangular format; defaults to 'U'
    1778              : !> \author MI
    1779              : ! **************************************************************************************************
    1780         5060 :    SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
    1781              : 
    1782              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
    1783              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
    1784              : 
    1785              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
    1786              : 
    1787              :       CHARACTER                                :: unit_diag, uplo
    1788              :       INTEGER                                  :: handle, info, ncol_global
    1789         5060 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1790              : #if defined(__parallel)
    1791              :       INTEGER, DIMENSION(9)                    :: desca
    1792              : #endif
    1793              : 
    1794         5060 :       CALL timeset(routineN, handle)
    1795              : 
    1796         5060 :       unit_diag = 'N'
    1797         5060 :       uplo = 'U'
    1798         5060 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1799              : 
    1800         5060 :       ncol_global = matrix_a%matrix_struct%ncol_global
    1801              : 
    1802         5060 :       a => matrix_a%local_data
    1803              : 
    1804              : #if defined(__parallel)
    1805        50600 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1806              : 
    1807         5060 :       CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
    1808              : 
    1809              : #else
    1810              :       CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
    1811              : #endif
    1812              : 
    1813         5060 :       CALL timestop(handle)
    1814         5060 :    END SUBROUTINE cp_fm_triangular_invert
    1815              : 
    1816              : ! **************************************************************************************************
    1817              : !> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
    1818              : !>         the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
    1819              : !>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
    1820              : !> \param matrix_a ...
    1821              : !> \param matrix_r ...
    1822              : !> \param nrow_fact ...
    1823              : !> \param ncol_fact ...
    1824              : !> \param first_row ...
    1825              : !> \param first_col ...
    1826              : !> \author MI
    1827              : ! **************************************************************************************************
    1828        19320 :    SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
    1829              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_r
    1830              :       INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
    1831              :                                                   first_row, first_col
    1832              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1833              : 
    1834              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
    1835              : 
    1836              :       CHARACTER                                :: myuplo
    1837              :       INTEGER                                  :: handle, i, icol, info, irow, &
    1838              :                                                   j, lda, lwork, ncol, &
    1839              :                                                   ndim, nrow
    1840        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
    1841        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
    1842              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1843              : #if defined(__parallel)
    1844              :       INTEGER, DIMENSION(9)                    :: desca
    1845              : #endif
    1846              : 
    1847        19320 :       CALL timeset(routineN, handle)
    1848              : 
    1849        19320 :       myuplo = 'U'
    1850        19320 :       IF (PRESENT(uplo)) myuplo = uplo
    1851              : 
    1852        19320 :       ncol = matrix_a%matrix_struct%ncol_global
    1853        19320 :       nrow = matrix_a%matrix_struct%nrow_global
    1854        19320 :       lda = nrow
    1855              : 
    1856        19320 :       a => matrix_a%local_data
    1857              : 
    1858        19320 :       IF (PRESENT(nrow_fact)) nrow = nrow_fact
    1859        19320 :       IF (PRESENT(ncol_fact)) ncol = ncol_fact
    1860        19320 :       irow = 1
    1861        19320 :       IF (PRESENT(first_row)) irow = first_row
    1862        19320 :       icol = 1
    1863        19320 :       IF (PRESENT(first_col)) icol = first_col
    1864              : 
    1865        19320 :       CPASSERT(nrow >= ncol)
    1866        19320 :       ndim = SIZE(a, 2)
    1867        57960 :       ALLOCATE (tau(ndim))
    1868              : 
    1869              : #if defined(__parallel)
    1870              : 
    1871       193200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1872              : 
    1873        19320 :       lwork = -1
    1874        57960 :       ALLOCATE (work(2*ndim))
    1875        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1876        19320 :       lwork = INT(work(1))
    1877        19320 :       DEALLOCATE (work)
    1878        57960 :       ALLOCATE (work(lwork))
    1879        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1880              : 
    1881              : #else
    1882              :       lwork = -1
    1883              :       ALLOCATE (work(2*ndim))
    1884              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1885              :       lwork = INT(work(1))
    1886              :       DEALLOCATE (work)
    1887              :       ALLOCATE (work(lwork))
    1888              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1889              : 
    1890              : #endif
    1891              : 
    1892        77280 :       ALLOCATE (r_mat(ncol, ncol))
    1893        19320 :       CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
    1894        19320 :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1895        38640 :          DO i = 1, ncol
    1896        38640 :          DO j = i + 1, ncol
    1897        19320 :             r_mat(j, i) = 0.0_dp
    1898              :          END DO
    1899              :          END DO
    1900              :       ELSE
    1901            0 :          DO j = 1, ncol
    1902            0 :          DO i = j + 1, ncol
    1903            0 :             r_mat(i, j) = 0.0_dp
    1904              :          END DO
    1905              :          END DO
    1906              :       END IF
    1907        19320 :       CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
    1908              : 
    1909        19320 :       DEALLOCATE (tau, work, r_mat)
    1910              : 
    1911        19320 :       CALL timestop(handle)
    1912              : 
    1913        19320 :    END SUBROUTINE cp_fm_qr_factorization
    1914              : 
    1915              : ! **************************************************************************************************
    1916              : !> \brief computes the the solution to A*b=A_general using lu decomposition
    1917              : !> \param matrix_a input matrix; will be overwritten
    1918              : !> \param general_a contains the result
    1919              : !> \author Florian Schiffmann
    1920              : ! **************************************************************************************************
    1921         6832 :    SUBROUTINE cp_fm_solve(matrix_a, general_a)
    1922              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, general_a
    1923              : 
    1924              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
    1925              : 
    1926              :       INTEGER                                  :: handle, info, n, nrhs
    1927         6832 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1928              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
    1929              : #if defined(__parallel)
    1930              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1931              : #else
    1932              :       INTEGER                                  :: lda, ldb
    1933              : #endif
    1934              : 
    1935         6832 :       CALL timeset(routineN, handle)
    1936              : 
    1937         6832 :       a => matrix_a%local_data
    1938         6832 :       a_general => general_a%local_data
    1939         6832 :       n = matrix_a%matrix_struct%nrow_global
    1940         6832 :       nrhs = general_a%matrix_struct%ncol_global
    1941        20496 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1942              : 
    1943              : #if defined(__parallel)
    1944        68320 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1945        68320 :       descb(:) = general_a%matrix_struct%descriptor(:)
    1946         6832 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1947              :       CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
    1948         6832 :                    1, 1, descb, info)
    1949              : 
    1950              : #else
    1951              :       lda = SIZE(a, 1)
    1952              :       ldb = SIZE(a_general, 1)
    1953              :       CALL dgetrf(n, n, a, lda, ipivot, info)
    1954              :       CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
    1955              : 
    1956              : #endif
    1957              :       ! info is allowed to be zero
    1958              :       ! this does just signal a zero diagonal element
    1959         6832 :       DEALLOCATE (ipivot)
    1960         6832 :       CALL timestop(handle)
    1961         6832 :    END SUBROUTINE
    1962              : 
    1963              : ! **************************************************************************************************
    1964              : !> \brief Convenience function. Computes the matrix multiplications needed
    1965              : !>        for the multiplication of complex matrices.
    1966              : !>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
    1967              : !> \param transa : 'N' -> normal   'T' -> transpose
    1968              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
    1969              : !> \param transb ...
    1970              : !> \param m ...
    1971              : !> \param n ...
    1972              : !> \param k ...
    1973              : !> \param alpha ...
    1974              : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
    1975              : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
    1976              : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
    1977              : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
    1978              : !> \param beta ...
    1979              : !> \param C_re m x n matrix, real part
    1980              : !> \param C_im m x n matrix, imaginary part
    1981              : !> \param a_first_col ...
    1982              : !> \param a_first_row ...
    1983              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
    1984              : !> \param b_first_row ...
    1985              : !> \param c_first_col ...
    1986              : !> \param c_first_row ...
    1987              : !> \author Samuel Andermatt
    1988              : !> \note
    1989              : !>      C should have no overlap with A, B
    1990              : ! **************************************************************************************************
    1991            0 :    SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
    1992              :                                  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
    1993              :                                  c_first_row)
    1994              :       CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
    1995              :       INTEGER, INTENT(IN)                                :: m, n, k
    1996              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1997              :       TYPE(cp_fm_type), INTENT(IN)                       :: A_re, A_im, B_re, B_im
    1998              :       REAL(KIND=dp), INTENT(IN)                          :: beta
    1999              :       TYPE(cp_fm_type), INTENT(IN)                       :: C_re, C_im
    2000              :       INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
    2001              :                                                             b_first_row, c_first_col, c_first_row
    2002              : 
    2003              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
    2004              : 
    2005              :       INTEGER                                            :: handle
    2006              : 
    2007            0 :       CALL timeset(routineN, handle)
    2008              : 
    2009              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
    2010              :                       a_first_col=a_first_col, &
    2011              :                       a_first_row=a_first_row, &
    2012              :                       b_first_col=b_first_col, &
    2013              :                       b_first_row=b_first_row, &
    2014              :                       c_first_col=c_first_col, &
    2015            0 :                       c_first_row=c_first_row)
    2016              :       CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
    2017              :                       a_first_col=a_first_col, &
    2018              :                       a_first_row=a_first_row, &
    2019              :                       b_first_col=b_first_col, &
    2020              :                       b_first_row=b_first_row, &
    2021              :                       c_first_col=c_first_col, &
    2022            0 :                       c_first_row=c_first_row)
    2023              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
    2024              :                       a_first_col=a_first_col, &
    2025              :                       a_first_row=a_first_row, &
    2026              :                       b_first_col=b_first_col, &
    2027              :                       b_first_row=b_first_row, &
    2028              :                       c_first_col=c_first_col, &
    2029            0 :                       c_first_row=c_first_row)
    2030              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
    2031              :                       a_first_col=a_first_col, &
    2032              :                       a_first_row=a_first_row, &
    2033              :                       b_first_col=b_first_col, &
    2034              :                       b_first_row=b_first_row, &
    2035              :                       c_first_col=c_first_col, &
    2036            0 :                       c_first_row=c_first_row)
    2037              : 
    2038            0 :       CALL timestop(handle)
    2039              : 
    2040            0 :    END SUBROUTINE cp_complex_fm_gemm
    2041              : 
    2042              : ! **************************************************************************************************
    2043              : !> \brief inverts a matrix using LU decomposition
    2044              : !>        the input matrix will be overwritten
    2045              : !> \param matrix   : input a general square non-singular matrix, outputs its inverse
    2046              : !> \param info_out : optional, if present outputs the info from (p)zgetri
    2047              : !> \author Lianheng Tong
    2048              : ! **************************************************************************************************
    2049            0 :    SUBROUTINE cp_fm_lu_invert(matrix, info_out)
    2050              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2051              :       INTEGER, INTENT(OUT), OPTIONAL           :: info_out
    2052              : 
    2053              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
    2054              : 
    2055              :       INTEGER :: nrows_global, handle, info, lwork
    2056            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
    2057              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
    2058              :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: mat_sp
    2059            0 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2060            0 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2061              : #if defined(__parallel)
    2062              :       INTEGER                                  :: liwork
    2063              :       INTEGER, DIMENSION(9)                    :: desca
    2064            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
    2065              : #else
    2066              :       INTEGER                                  :: lda
    2067              : #endif
    2068              : 
    2069            0 :       CALL timeset(routineN, handle)
    2070              : 
    2071            0 :       mat => matrix%local_data
    2072            0 :       mat_sp => matrix%local_data_sp
    2073            0 :       nrows_global = matrix%matrix_struct%nrow_global
    2074            0 :       CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
    2075            0 :       ALLOCATE (ipivot(nrows_global))
    2076              :       ! do LU decomposition
    2077              : #if defined(__parallel)
    2078            0 :       desca = matrix%matrix_struct%descriptor
    2079            0 :       IF (matrix%use_sp) THEN
    2080              :          CALL psgetrf(nrows_global, nrows_global, &
    2081            0 :                       mat_sp, 1, 1, desca, ipivot, info)
    2082              :       ELSE
    2083              :          CALL pdgetrf(nrows_global, nrows_global, &
    2084            0 :                       mat, 1, 1, desca, ipivot, info)
    2085              :       END IF
    2086              : #else
    2087              :       lda = SIZE(mat, 1)
    2088              :       IF (matrix%use_sp) THEN
    2089              :          CALL sgetrf(nrows_global, nrows_global, &
    2090              :                      mat_sp, lda, ipivot, info)
    2091              :       ELSE
    2092              :          CALL dgetrf(nrows_global, nrows_global, &
    2093              :                      mat, lda, ipivot, info)
    2094              :       END IF
    2095              : #endif
    2096            0 :       IF (info /= 0) THEN
    2097            0 :          CALL cp_abort(__LOCATION__, "LU decomposition has failed")
    2098              :       END IF
    2099              :       ! do inversion
    2100            0 :       IF (matrix%use_sp) THEN
    2101            0 :          ALLOCATE (work(1))
    2102              :       ELSE
    2103            0 :          ALLOCATE (work_sp(1))
    2104              :       END IF
    2105              : #if defined(__parallel)
    2106            0 :       ALLOCATE (iwork(1))
    2107            0 :       IF (matrix%use_sp) THEN
    2108              :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2109            0 :                       ipivot, work_sp, -1, iwork, -1, info)
    2110            0 :          lwork = INT(work_sp(1))
    2111            0 :          DEALLOCATE (work_sp)
    2112            0 :          ALLOCATE (work_sp(lwork))
    2113              :       ELSE
    2114              :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2115            0 :                       ipivot, work, -1, iwork, -1, info)
    2116            0 :          lwork = INT(work(1))
    2117            0 :          DEALLOCATE (work)
    2118            0 :          ALLOCATE (work(lwork))
    2119              :       END IF
    2120            0 :       liwork = INT(iwork(1))
    2121            0 :       DEALLOCATE (iwork)
    2122            0 :       ALLOCATE (iwork(liwork))
    2123            0 :       IF (matrix%use_sp) THEN
    2124              :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2125            0 :                       ipivot, work_sp, lwork, iwork, liwork, info)
    2126              :       ELSE
    2127              :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2128            0 :                       ipivot, work, lwork, iwork, liwork, info)
    2129              :       END IF
    2130            0 :       DEALLOCATE (iwork)
    2131              : #else
    2132              :       IF (matrix%use_sp) THEN
    2133              :          CALL sgetri(nrows_global, mat_sp, lda, &
    2134              :                      ipivot, work_sp, -1, info)
    2135              :          lwork = INT(work_sp(1))
    2136              :          DEALLOCATE (work_sp)
    2137              :          ALLOCATE (work_sp(lwork))
    2138              :          CALL sgetri(nrows_global, mat_sp, lda, &
    2139              :                      ipivot, work_sp, lwork, info)
    2140              :       ELSE
    2141              :          CALL dgetri(nrows_global, mat, lda, &
    2142              :                      ipivot, work, -1, info)
    2143              :          lwork = INT(work(1))
    2144              :          DEALLOCATE (work)
    2145              :          ALLOCATE (work(lwork))
    2146              :          CALL dgetri(nrows_global, mat, lda, &
    2147              :                      ipivot, work, lwork, info)
    2148              :       END IF
    2149              : #endif
    2150            0 :       IF (matrix%use_sp) THEN
    2151            0 :          DEALLOCATE (work_sp)
    2152              :       ELSE
    2153            0 :          DEALLOCATE (work)
    2154              :       END IF
    2155            0 :       DEALLOCATE (ipivot)
    2156              : 
    2157            0 :       IF (PRESENT(info_out)) THEN
    2158            0 :          info_out = info
    2159              :       ELSE
    2160            0 :          IF (info /= 0) &
    2161            0 :             CALL cp_abort(__LOCATION__, "LU inversion has failed")
    2162              :       END IF
    2163              : 
    2164            0 :       CALL timestop(handle)
    2165              : 
    2166            0 :    END SUBROUTINE cp_fm_lu_invert
    2167              : 
    2168              : ! **************************************************************************************************
    2169              : !> \brief norm of matrix using (p)dlange
    2170              : !> \param matrix   : input a general matrix
    2171              : !> \param mode     : 'M' max abs element value,
    2172              : !>                   '1' or 'O' one norm, i.e. maximum column sum
    2173              : !>                   'I' infinity norm, i.e. maximum row sum
    2174              : !>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
    2175              : !> \return : the norm according to mode
    2176              : !> \author Lianheng Tong
    2177              : ! **************************************************************************************************
    2178          492 :    FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
    2179              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2180              :       CHARACTER, INTENT(IN) :: mode
    2181              :       REAL(KIND=dp) :: res
    2182              : 
    2183              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
    2184              : 
    2185              :       INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
    2186              :       REAL(KIND=sp) :: res_sp
    2187              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2188              :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2189          492 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2190          492 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2191              : #if defined(__parallel)
    2192              :       INTEGER, DIMENSION(9) :: desca
    2193              : #else
    2194              :       INTEGER :: lda
    2195              : #endif
    2196              : 
    2197          492 :       CALL timeset(routineN, handle)
    2198              : 
    2199              :       CALL cp_fm_get_info(matrix=matrix, &
    2200              :                           nrow_global=nrows, &
    2201              :                           ncol_global=ncols, &
    2202              :                           nrow_local=nrows_local, &
    2203          492 :                           ncol_local=ncols_local)
    2204          492 :       aa => matrix%local_data
    2205          492 :       aa_sp => matrix%local_data_sp
    2206              : 
    2207              : #if defined(__parallel)
    2208         4920 :       desca = matrix%matrix_struct%descriptor
    2209              :       SELECT CASE (mode)
    2210              :       CASE ('M', 'm')
    2211          492 :          lwork = 1
    2212              :       CASE ('1', 'O', 'o')
    2213          492 :          lwork = ncols_local
    2214              :       CASE ('I', 'i')
    2215            0 :          lwork = nrows_local
    2216              :       CASE ('F', 'f', 'E', 'e')
    2217            0 :          lwork = 1
    2218              :       CASE DEFAULT
    2219          492 :          CPABORT("mode input is not valid")
    2220              :       END SELECT
    2221          492 :       IF (matrix%use_sp) THEN
    2222            0 :          ALLOCATE (work_sp(lwork))
    2223            0 :          res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
    2224            0 :          DEALLOCATE (work_sp)
    2225            0 :          res = REAL(res_sp, KIND=dp)
    2226              :       ELSE
    2227         1476 :          ALLOCATE (work(lwork))
    2228          492 :          res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
    2229          492 :          DEALLOCATE (work)
    2230              :       END IF
    2231              : #else
    2232              :       SELECT CASE (mode)
    2233              :       CASE ('M', 'm')
    2234              :          lwork = 1
    2235              :       CASE ('1', 'O', 'o')
    2236              :          lwork = 1
    2237              :       CASE ('I', 'i')
    2238              :          lwork = nrows
    2239              :       CASE ('F', 'f', 'E', 'e')
    2240              :          lwork = 1
    2241              :       CASE DEFAULT
    2242              :          CPABORT("mode input is not valid")
    2243              :       END SELECT
    2244              :       IF (matrix%use_sp) THEN
    2245              :          ALLOCATE (work_sp(lwork))
    2246              :          lda = SIZE(aa_sp, 1)
    2247              :          res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
    2248              :          DEALLOCATE (work_sp)
    2249              :          res = REAL(res_sp, KIND=dp)
    2250              :       ELSE
    2251              :          ALLOCATE (work(lwork))
    2252              :          lda = SIZE(aa, 1)
    2253              :          res = dlange(mode, nrows, ncols, aa, lda, work)
    2254              :          DEALLOCATE (work)
    2255              :       END IF
    2256              : #endif
    2257              : 
    2258          492 :       CALL timestop(handle)
    2259              : 
    2260          492 :    END FUNCTION cp_fm_norm
    2261              : 
    2262              : ! **************************************************************************************************
    2263              : !> \brief trace of a matrix using pdlatra
    2264              : !> \param matrix   : input a square matrix
    2265              : !> \return : the trace
    2266              : !> \author Lianheng Tong
    2267              : ! **************************************************************************************************
    2268            0 :    FUNCTION cp_fm_latra(matrix) RESULT(res)
    2269              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2270              :       REAL(KIND=dp) :: res
    2271              : 
    2272              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
    2273              : 
    2274              :       INTEGER :: nrows, ncols, handle
    2275              :       REAL(KIND=sp) :: res_sp
    2276              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2277              :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2278              : #if defined(__parallel)
    2279              :       INTEGER, DIMENSION(9) :: desca
    2280              : #else
    2281              :       INTEGER :: ii
    2282              : #endif
    2283              : 
    2284            0 :       CALL timeset(routineN, handle)
    2285              : 
    2286            0 :       nrows = matrix%matrix_struct%nrow_global
    2287            0 :       ncols = matrix%matrix_struct%ncol_global
    2288            0 :       CPASSERT(nrows == ncols)
    2289            0 :       aa => matrix%local_data
    2290            0 :       aa_sp => matrix%local_data_sp
    2291              : 
    2292              : #if defined(__parallel)
    2293            0 :       desca = matrix%matrix_struct%descriptor
    2294            0 :       IF (matrix%use_sp) THEN
    2295            0 :          res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
    2296            0 :          res = REAL(res_sp, KIND=dp)
    2297              :       ELSE
    2298            0 :          res = pdlatra(nrows, aa, 1, 1, desca)
    2299              :       END IF
    2300              : #else
    2301              :       IF (matrix%use_sp) THEN
    2302              :          res_sp = 0.0_sp
    2303              :          DO ii = 1, nrows
    2304              :             res_sp = res_sp + aa_sp(ii, ii)
    2305              :          END DO
    2306              :          res = REAL(res_sp, KIND=dp)
    2307              :       ELSE
    2308              :          res = 0.0_dp
    2309              :          DO ii = 1, nrows
    2310              :             res = res + aa(ii, ii)
    2311              :          END DO
    2312              :       END IF
    2313              : #endif
    2314              : 
    2315            0 :       CALL timestop(handle)
    2316              : 
    2317            0 :    END FUNCTION cp_fm_latra
    2318              : 
    2319              : ! **************************************************************************************************
    2320              : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
    2321              : !>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
    2322              : !> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
    2323              : !> \param tau      : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
    2324              : !> \param nrow ...
    2325              : !> \param ncol ...
    2326              : !> \param first_row ...
    2327              : !> \param first_col ...
    2328              : !> \author MI
    2329              : ! **************************************************************************************************
    2330           36 :    SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
    2331              : 
    2332              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2333              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2334              :       INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
    2335              : 
    2336              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
    2337              : 
    2338              :       INTEGER                                            :: handle
    2339              :       INTEGER                                            :: info, lwork
    2340           36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
    2341              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2342              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2343              : #if defined(__parallel)
    2344              :       INTEGER, DIMENSION(9) :: descc
    2345              : #else
    2346              :       INTEGER :: lda
    2347              : #endif
    2348              : 
    2349           36 :       CALL timeset(routineN, handle)
    2350              : 
    2351           36 :       a => matrix%local_data
    2352           36 :       lwork = -1
    2353          108 :       ALLOCATE (work(2*nrow))
    2354          108 :       ALLOCATE (ipiv(ncol))
    2355           36 :       info = 0
    2356              : 
    2357              : #if defined(__parallel)
    2358          360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2359              :       ! Call SCALAPACK routine to get optimal work dimension
    2360           36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2361           36 :       lwork = INT(work(1))
    2362           36 :       DEALLOCATE (work)
    2363          108 :       ALLOCATE (work(lwork))
    2364          244 :       tau = 0.0_dp
    2365          354 :       ipiv = 0
    2366              : 
    2367              :       ! Call SCALAPACK routine to get QR decomposition of CTs
    2368           36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2369              : #else
    2370              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2371              :       lda = SIZE(a, 1)
    2372              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2373              :       lwork = INT(work(1))
    2374              :       DEALLOCATE (work)
    2375              :       ALLOCATE (work(lwork))
    2376              :       tau = 0.0_dp
    2377              :       ipiv = 0
    2378              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2379              : #endif
    2380           36 :       CPASSERT(info == 0)
    2381              : 
    2382           36 :       DEALLOCATE (work)
    2383           36 :       DEALLOCATE (ipiv)
    2384              : 
    2385           36 :       CALL timestop(handle)
    2386              : 
    2387           36 :    END SUBROUTINE cp_fm_pdgeqpf
    2388              : 
    2389              : ! **************************************************************************************************
    2390              : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
    2391              : !>         with orthonormal columns, which is defined as the first N columns of a product of K
    2392              : !>         elementary reflectors of order M
    2393              : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
    2394              : !>                  as returned from PDGEQRF
    2395              : !>                 On exit it contains  the M-by-N distributed matrix Q
    2396              : !> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
    2397              : !> \param nrow ...
    2398              : !> \param first_row ...
    2399              : !> \param first_col ...
    2400              : !> \author MI
    2401              : ! **************************************************************************************************
    2402           36 :    SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
    2403              : 
    2404              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2405              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2406              :       INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
    2407              : 
    2408              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
    2409              : 
    2410              :       INTEGER                                            :: handle
    2411              :       INTEGER                                            :: info, lwork
    2412              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2413              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2414              : #if defined(__parallel)
    2415              :       INTEGER, DIMENSION(9) :: descc
    2416              : #else
    2417              :       INTEGER :: lda
    2418              : #endif
    2419              : 
    2420           36 :       CALL timeset(routineN, handle)
    2421              : 
    2422           36 :       a => matrix%local_data
    2423           36 :       lwork = -1
    2424          108 :       ALLOCATE (work(2*nrow))
    2425           36 :       info = 0
    2426              : 
    2427              : #if defined(__parallel)
    2428          360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2429              : 
    2430           36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2431           36 :       CPASSERT(info == 0)
    2432           36 :       lwork = INT(work(1))
    2433           36 :       DEALLOCATE (work)
    2434          108 :       ALLOCATE (work(lwork))
    2435              : 
    2436              :       ! Call SCALAPACK routine to get Q
    2437           36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2438              : #else
    2439              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2440              :       lda = SIZE(a, 1)
    2441              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2442              :       lwork = INT(work(1))
    2443              :       DEALLOCATE (work)
    2444              :       ALLOCATE (work(lwork))
    2445              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2446              : #endif
    2447           36 :       CPASSERT(INFO == 0)
    2448              : 
    2449           36 :       DEALLOCATE (work)
    2450           36 :       CALL timestop(handle)
    2451              : 
    2452           36 :    END SUBROUTINE cp_fm_pdorgqr
    2453              : 
    2454              : ! **************************************************************************************************
    2455              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
    2456              : !> \param matrix ...
    2457              : !> \param irow ...
    2458              : !> \param jrow ...
    2459              : !> \param cs cosine of the rotation angle
    2460              : !> \param sn sinus of the rotation angle
    2461              : !> \author Ole Schuett
    2462              : ! **************************************************************************************************
    2463       565504 :    SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
    2464              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2465              :       INTEGER, INTENT(IN)                      :: irow, jrow
    2466              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2467              : 
    2468              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
    2469              :       INTEGER                                  :: handle, ncol
    2470              : 
    2471              : #if defined(__parallel)
    2472              :       INTEGER                                  :: info, lwork
    2473              :       INTEGER, DIMENSION(9)                    :: desc
    2474       565504 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2475              : #endif
    2476       565504 :       CALL timeset(routineN, handle)
    2477       565504 :       CALL cp_fm_get_info(matrix, ncol_global=ncol)
    2478              : #if defined(__parallel)
    2479       565504 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2480       565504 :          lwork = 2*ncol + 1
    2481      1696512 :          ALLOCATE (work(lwork))
    2482      5655040 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2483              :          CALL pdrot(ncol, &
    2484              :                     matrix%local_data(1, 1), irow, 1, desc, ncol, &
    2485              :                     matrix%local_data(1, 1), jrow, 1, desc, ncol, &
    2486       565504 :                     cs, sn, work, lwork, info)
    2487       565504 :          CPASSERT(info == 0)
    2488       565504 :          DEALLOCATE (work)
    2489              :       ELSE
    2490              : #endif
    2491            0 :          CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
    2492              : #if defined(__parallel)
    2493              :       END IF
    2494              : #endif
    2495       565504 :       CALL timestop(handle)
    2496       565504 :    END SUBROUTINE cp_fm_rot_rows
    2497              : 
    2498              : ! **************************************************************************************************
    2499              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
    2500              : !> \param matrix ...
    2501              : !> \param icol ...
    2502              : !> \param jcol ...
    2503              : !> \param cs cosine of the rotation angle
    2504              : !> \param sn sinus of the rotation angle
    2505              : !> \author Ole Schuett
    2506              : ! **************************************************************************************************
    2507       637106 :    SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
    2508              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2509              :       INTEGER, INTENT(IN)                      :: icol, jcol
    2510              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2511              : 
    2512              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
    2513              :       INTEGER                                  :: handle, nrow
    2514              : 
    2515              : #if defined(__parallel)
    2516              :       INTEGER                                  :: info, lwork
    2517              :       INTEGER, DIMENSION(9)                    :: desc
    2518       637106 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2519              : #endif
    2520       637106 :       CALL timeset(routineN, handle)
    2521       637106 :       CALL cp_fm_get_info(matrix, nrow_global=nrow)
    2522              : #if defined(__parallel)
    2523       637106 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2524       637106 :          lwork = 2*nrow + 1
    2525      1911318 :          ALLOCATE (work(lwork))
    2526      6371060 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2527              :          CALL pdrot(nrow, &
    2528              :                     matrix%local_data(1, 1), 1, icol, desc, 1, &
    2529              :                     matrix%local_data(1, 1), 1, jcol, desc, 1, &
    2530       637106 :                     cs, sn, work, lwork, info)
    2531       637106 :          CPASSERT(info == 0)
    2532       637106 :          DEALLOCATE (work)
    2533              :       ELSE
    2534              : #endif
    2535            0 :          CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
    2536              : #if defined(__parallel)
    2537              :       END IF
    2538              : #endif
    2539       637106 :       CALL timestop(handle)
    2540       637106 :    END SUBROUTINE cp_fm_rot_cols
    2541              : 
    2542              : ! **************************************************************************************************
    2543              : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
    2544              : !> \param matrix_a ...
    2545              : !> \param B ...
    2546              : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
    2547              : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
    2548              : !> \param start_row starting index of rows, optional, defaults to 1
    2549              : !> \param start_col starting index of columns, optional, defaults to 1
    2550              : !> \param do_norm ...
    2551              : !> \param do_print ...
    2552              : ! **************************************************************************************************
    2553            0 :    SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
    2554              :                                            do_norm, do_print)
    2555              : 
    2556              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    2557              :       REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: B
    2558              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrows, ncols, start_row, start_col
    2559              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_norm, do_print
    2560              : 
    2561              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
    2562              : 
    2563              :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
    2564              :                  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
    2565              :                  start_col_local, start_row_global, start_row_local, this_col, unit_nr
    2566            0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    2567              :       LOGICAL                                            :: my_do_norm, my_do_print
    2568              :       REAL(KIND=dp)                                      :: norm
    2569            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: a
    2570              : 
    2571            0 :       CALL timeset(routineN, handle)
    2572              : 
    2573            0 :       my_do_norm = .TRUE.
    2574            0 :       IF (PRESENT(do_norm)) my_do_norm = do_norm
    2575              : 
    2576            0 :       my_do_print = .FALSE.
    2577            0 :       IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
    2578              : 
    2579            0 :       unit_nr = -1
    2580            0 :       IF (my_do_print) THEN
    2581            0 :          unit_nr = cp_logger_get_default_unit_nr()
    2582            0 :          IF (unit_nr < 1) my_do_print = .FALSE.
    2583              :       END IF
    2584              : 
    2585            0 :       IF (SIZE(B) /= 0) THEN
    2586            0 :          IF (PRESENT(nrows)) THEN
    2587            0 :             nrow_global = nrows
    2588              :          ELSE
    2589            0 :             nrow_global = SIZE(B, 1)
    2590              :          END IF
    2591              : 
    2592            0 :          IF (PRESENT(ncols)) THEN
    2593            0 :             ncol_global = ncols
    2594              :          ELSE
    2595            0 :             ncol_global = SIZE(B, 2)
    2596              :          END IF
    2597              : 
    2598            0 :          IF (PRESENT(start_row)) THEN
    2599            0 :             start_row_global = start_row
    2600              :          ELSE
    2601              :             start_row_global = 1
    2602              :          END IF
    2603              : 
    2604            0 :          IF (PRESENT(start_col)) THEN
    2605            0 :             start_col_global = start_col
    2606              :          ELSE
    2607              :             start_col_global = 1
    2608              :          END IF
    2609              : 
    2610            0 :          end_row_global = start_row_global + nrow_global - 1
    2611            0 :          end_col_global = start_col_global + ncol_global - 1
    2612              : 
    2613              :          CALL cp_fm_get_info(matrix=matrix_a, &
    2614              :                              nrow_global=nrow_global, ncol_global=ncol_global, &
    2615              :                              nrow_local=nrow_local, ncol_local=ncol_local, &
    2616            0 :                              row_indices=row_indices, col_indices=col_indices)
    2617            0 :          IF (end_row_global > nrow_global) THEN
    2618              :             end_row_global = nrow_global
    2619              :          END IF
    2620            0 :          IF (end_col_global > ncol_global) THEN
    2621              :             end_col_global = ncol_global
    2622              :          END IF
    2623              : 
    2624              :          ! find out row/column indices of locally stored matrix elements that
    2625              :          ! needs to be copied.
    2626              :          ! Arrays row_indices and col_indices are assumed to be sorted in
    2627              :          ! ascending order
    2628            0 :          DO start_row_local = 1, nrow_local
    2629            0 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
    2630              :          END DO
    2631              : 
    2632            0 :          DO end_row_local = start_row_local, nrow_local
    2633            0 :             IF (row_indices(end_row_local) > end_row_global) EXIT
    2634              :          END DO
    2635            0 :          end_row_local = end_row_local - 1
    2636              : 
    2637            0 :          DO start_col_local = 1, ncol_local
    2638            0 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
    2639              :          END DO
    2640              : 
    2641            0 :          DO end_col_local = start_col_local, ncol_local
    2642            0 :             IF (col_indices(end_col_local) > end_col_global) EXIT
    2643              :          END DO
    2644            0 :          end_col_local = end_col_local - 1
    2645              : 
    2646            0 :          a => matrix_a%local_data
    2647              : 
    2648            0 :          this_col = col_indices(start_col_local) - start_col_global + 1
    2649              : 
    2650            0 :          B(:, this_col) = a(:, start_col_local)
    2651              : 
    2652            0 :          IF (my_do_norm) THEN
    2653            0 :             norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2654            0 :             B(:, this_col) = B(:, this_col)/norm
    2655            0 :             IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2656              :          END IF
    2657              : 
    2658            0 :          DO i = start_col_local + 1, end_col_local
    2659            0 :             this_col = col_indices(i) - start_col_global + 1
    2660            0 :             B(:, this_col) = a(:, i)
    2661            0 :             DO j = start_col_local, i - 1
    2662            0 :                j_col = col_indices(j) - start_col_global + 1
    2663              :                B(:, this_col) = B(:, this_col) - &
    2664              :                                 accurate_dot_product(B(:, j_col), B(:, this_col))* &
    2665            0 :                                 B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
    2666              :             END DO
    2667              : 
    2668            0 :             IF (my_do_norm) THEN
    2669            0 :                norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2670            0 :                B(:, this_col) = B(:, this_col)/norm
    2671            0 :                IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2672              :             END IF
    2673              : 
    2674              :          END DO
    2675            0 :          CALL matrix_a%matrix_struct%para_env%sum(B)
    2676              :       END IF
    2677              : 
    2678            0 :       CALL timestop(handle)
    2679              : 
    2680            0 :    END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
    2681              : 
    2682              : ! **************************************************************************************************
    2683              : !> \brief Cholesky decomposition
    2684              : !> \param fm_matrix ...
    2685              : !> \param n ...
    2686              : !> \param uplo triangular format; defaults to 'U'
    2687              : ! **************************************************************************************************
    2688        10030 :    SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
    2689              :       TYPE(cp_fm_type)                         :: fm_matrix
    2690              :       INTEGER, INTENT(IN)                      :: n
    2691              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2692              : 
    2693              :       CHARACTER                                :: myuplo
    2694              :       INTEGER                                  :: info
    2695        10030 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2696        10030 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2697              : #if defined(__parallel)
    2698              :       INTEGER, DIMENSION(9)                    :: desca
    2699              : #endif
    2700              : 
    2701        10030 :       myuplo = 'U'
    2702            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2703              : 
    2704        10030 :       a => fm_matrix%local_data
    2705        10030 :       a_sp => fm_matrix%local_data_sp
    2706              : #if defined(__parallel)
    2707       100300 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2708        10030 :       IF (fm_matrix%use_sp) THEN
    2709            0 :          CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2710              :       ELSE
    2711        10030 :          CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
    2712              :       END IF
    2713              : #else
    2714              :       IF (fm_matrix%use_sp) THEN
    2715              :          CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2716              :       ELSE
    2717              :          CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2718              :       END IF
    2719              : #endif
    2720        10030 :       IF (info /= 0) &
    2721            0 :          CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
    2722              : 
    2723        10030 :    END SUBROUTINE cp_fm_potrf
    2724              : 
    2725              : ! **************************************************************************************************
    2726              : !> \brief Invert trianguar matrix
    2727              : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
    2728              : !> \param n size of the matrix to invert
    2729              : !> \param uplo triangular format; defaults to 'U'
    2730              : ! **************************************************************************************************
    2731         9236 :    SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
    2732              :       TYPE(cp_fm_type)                         :: fm_matrix
    2733              :       INTEGER, INTENT(IN)                      :: n
    2734              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2735              : 
    2736              :       CHARACTER                                :: myuplo
    2737         9236 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2738         9236 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2739              :       INTEGER                                  :: info
    2740              : #if defined(__parallel)
    2741              :       INTEGER, DIMENSION(9)                    :: desca
    2742              : #endif
    2743              : 
    2744         9236 :       myuplo = 'U'
    2745            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2746              : 
    2747         9236 :       a => fm_matrix%local_data
    2748         9236 :       a_sp => fm_matrix%local_data_sp
    2749              : #if defined(__parallel)
    2750        92360 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2751         9236 :       IF (fm_matrix%use_sp) THEN
    2752            0 :          CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2753              :       ELSE
    2754         9236 :          CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
    2755              :       END IF
    2756              : #else
    2757              :       IF (fm_matrix%use_sp) THEN
    2758              :          CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2759              :       ELSE
    2760              :          CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2761              :       END IF
    2762              : #endif
    2763         9236 :       CPASSERT(info == 0)
    2764         9236 :    END SUBROUTINE cp_fm_potri
    2765              : 
    2766              : ! **************************************************************************************************
    2767              : !> \brief Calculates
    2768              : !>        yv = alpha*amat*xv + beta*yv
    2769              : !>        where amat: fm matrix
    2770              : !>              xv  : vector replicated
    2771              : !>              yv  : vector replicated
    2772              : !>        Defaults: alpha = 1, beta = 0
    2773              : ! **************************************************************************************************
    2774        17136 :    SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
    2775              :       TYPE(cp_fm_type), INTENT(IN)                   :: amat
    2776              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)        :: xv
    2777              :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)     :: yv
    2778              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)            :: alpha, beta
    2779              : 
    2780              :       INTEGER                                        :: na, nc, nx, ny
    2781              :       REAL(KIND=dp)                                  :: aval, bval
    2782              : #if defined(__parallel)
    2783              :       INTEGER                                        :: nrl, ncl, ic, ir
    2784        17136 :       INTEGER, DIMENSION(:), POINTER                 :: rind, cind
    2785        17136 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE       :: xvl, yvl, yvm
    2786              : #endif
    2787              : 
    2788        17136 :       IF (amat%use_sp) THEN
    2789            0 :          CPABORT("cp_fm_matvec: SP option not available")
    2790              :       END IF
    2791        17136 :       aval = 1.0_dp
    2792        17136 :       IF (PRESENT(alpha)) aval = alpha
    2793        17136 :       bval = 0.0_dp
    2794        17136 :       IF (PRESENT(beta)) bval = beta
    2795              : 
    2796        17136 :       CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
    2797        17136 :       nx = SIZE(xv)
    2798        17136 :       ny = SIZE(yv)
    2799        17136 :       IF ((nx /= ny) .OR. (nc /= nx)) THEN
    2800            0 :          CPABORT("cp_fm_matvec: incompatible dimensions")
    2801              :       END IF
    2802              : #if defined(__parallel)
    2803              :       CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
    2804        17136 :                           row_indices=rind, col_indices=cind)
    2805       119952 :       ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
    2806       150240 :       DO ic = 1, ncl
    2807       150240 :          xvl(ic) = xv(cind(ic))
    2808              :       END DO
    2809      1690008 :       yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
    2810       150240 :       yvm = 0.0_dp
    2811        83688 :       DO ir = 1, nrl
    2812        83688 :          yvm(rind(ir)) = yvl(ir)
    2813              :       END DO
    2814        17136 :       CALL amat%matrix_struct%para_env%sum(yvm)
    2815        17136 :       IF (bval == 0.0_dp) THEN
    2816        75120 :          yv = aval*yvm
    2817              :       ELSE
    2818        75120 :          yv = bval*yv + aval*yvm
    2819              :       END IF
    2820              : #else
    2821              :       IF (bval == 0.0_dp) THEN
    2822              :          yv = aval*MATMUL(amat%local_data, xv)
    2823              :       ELSE
    2824              :          yv = bval*yv + aval*MATMUL(amat%local_data, xv)
    2825              :       END IF
    2826              : #endif
    2827              : 
    2828        68544 :    END SUBROUTINE cp_fm_matvec
    2829              : 
    2830              : END MODULE cp_fm_basic_linalg
        

Generated by: LCOV version 2.0-1