LCOV - code coverage report
Current view: top level - src/fm - cp_fm_basic_linalg.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:8ebf9ad) Lines: 70.8 % 845 598
Test Date: 2026-01-22 06:43:13 Functions: 67.4 % 43 29

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Basic linear algebra operations for full matrices.
      10              : !> \par History
      11              : !>      08.2002 split out of qs_blacs [fawzi]
      12              : !> \author Fawzi Mohamed
      13              : ! **************************************************************************************************
      14              : MODULE cp_fm_basic_linalg
      15              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      16              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
      17              :    USE cp_fm_types, ONLY: &
      18              :       cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
      19              :       cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
      20              :       cp_fm_type
      21              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      22              :                               cp_to_string
      23              :    USE kahan_sum, ONLY: accurate_dot_product, &
      24              :                         accurate_sum
      25              :    USE kinds, ONLY: dp, &
      26              :                     int_8, &
      27              :                     sp
      28              :    USE machine, ONLY: m_memory
      29              :    USE mathlib, ONLY: get_pseudo_inverse_svd, &
      30              :                       invert_matrix
      31              :    USE message_passing, ONLY: mp_comm_type
      32              : #if defined (__HAS_IEEE_EXCEPTIONS)
      33              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      34              :                               ieee_set_halting_mode, &
      35              :                               IEEE_ALL
      36              : #endif
      37              : #include "../base/base_uses.f90"
      38              : 
      39              :    IMPLICIT NONE
      40              :    PRIVATE
      41              : 
      42              :    LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
      43              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
      44              : 
      45              :    PUBLIC :: cp_fm_scale, & ! scale a matrix
      46              :              cp_fm_scale_and_add, & ! scale and add two matrices
      47              :              cp_fm_geadd, & ! general addition
      48              :              cp_fm_add_columns, & ! daxpy type column add
      49              :              cp_fm_column_scale, & ! scale columns of a matrix
      50              :              cp_fm_row_scale, & ! scale rows of a matrix
      51              :              cp_fm_trace, & ! trace of the transpose(A)*B
      52              :              cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
      53              :              cp_fm_norm, & ! different norms of A
      54              :              cp_fm_schur_product, & ! schur product
      55              :              cp_fm_transpose, & ! transpose a matrix
      56              :              cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
      57              :              cp_fm_syrk, & ! rank k update
      58              :              cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
      59              :              cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
      60              :              cp_fm_gemm, & ! multiply two matrices
      61              :              cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
      62              :              cp_fm_invert, & ! computes the inverse and determinant
      63              :              cp_fm_frobenius_norm, & ! frobenius norm
      64              :              cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
      65              :              cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
      66              :              cp_fm_solve, & ! solves the equation  A*B=C A and C are input
      67              :              cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
      68              :              cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
      69              :              cp_fm_potrf, & ! Cholesky decomposition
      70              :              cp_fm_potri, & ! Invert triangular matrix
      71              :              cp_fm_rot_rows, & ! rotates two rows
      72              :              cp_fm_rot_cols, & ! rotates two columns
      73              :              cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
      74              :              cp_fm_det, & ! determinant of a real matrix with correct sign
      75              :              cp_fm_matvec ! matrix-vector multiplication (vector replicated)
      76              : 
      77              :    REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
      78              :    REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
      79              : 
      80              :    INTERFACE cp_fm_trace
      81              :       MODULE PROCEDURE cp_fm_trace_a0b0t0
      82              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_a
      83              :       MODULE PROCEDURE cp_fm_trace_a1b0t1_p
      84              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
      85              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
      86              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
      87              :       MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
      88              :    END INTERFACE cp_fm_trace
      89              : 
      90              :    INTERFACE cp_fm_contracted_trace
      91              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
      92              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
      93              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
      94              :       MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
      95              :    END INTERFACE cp_fm_contracted_trace
      96              : CONTAINS
      97              : 
      98              : ! **************************************************************************************************
      99              : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
     100              : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
     101              : ! **************************************************************************************************
     102            0 :    SUBROUTINE cp_fm_det(matrix_a, det_a)
     103              : 
     104              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     105              :       REAL(KIND=dp), INTENT(OUT)               :: det_a
     106              :       REAL(KIND=dp)                            :: determinant
     107              :       TYPE(cp_fm_type)                         :: matrix_lu
     108              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     109              :       INTEGER                                  :: n, i, info, P
     110            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     111              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     112              : 
     113              : #if defined(__parallel)
     114              :       INTEGER                                  :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
     115              :       INTEGER, DIMENSION(9)                    :: desca
     116              : #endif
     117              : 
     118              :       CALL cp_fm_create(matrix=matrix_lu, &
     119              :                         matrix_struct=matrix_a%matrix_struct, &
     120            0 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     121            0 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     122              : 
     123            0 :       a => matrix_lu%local_data
     124            0 :       n = matrix_lu%matrix_struct%nrow_global
     125            0 :       ALLOCATE (ipivot(n))
     126            0 :       ipivot(:) = 0
     127            0 :       P = 0
     128            0 :       ALLOCATE (diag(n))
     129            0 :       diag(:) = 0.0_dp
     130              : #if defined(__parallel)
     131              :       ! Use LU decomposition
     132            0 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     133            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     134            0 :       CALL cp_fm_get_diag(matrix_lu, diag)
     135            0 :       determinant = PRODUCT(diag)
     136            0 :       myprow = matrix_lu%matrix_struct%context%mepos(1)
     137            0 :       nprow = matrix_lu%matrix_struct%context%num_pe(1)
     138            0 :       npcol = matrix_lu%matrix_struct%context%num_pe(2)
     139            0 :       nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
     140            0 :       nrow_block = matrix_lu%matrix_struct%nrow_block
     141            0 :       DO irow_local = 1, nrow_local
     142            0 :          i = matrix_lu%matrix_struct%row_indices(irow_local)
     143            0 :          IF (ipivot(irow_local) /= i) P = P + 1
     144              :       END DO
     145            0 :       CALL matrix_lu%matrix_struct%para_env%sum(P)
     146              :       ! very important fix
     147            0 :       P = P/npcol
     148              : #else
     149              :       CALL dgetrf(n, n, a, n, ipivot, info)
     150              :       CALL cp_fm_get_diag(matrix_lu, diag)
     151              :       determinant = PRODUCT(diag)
     152              :       DO i = 1, n
     153              :          IF (ipivot(i) /= i) P = P + 1
     154              :       END DO
     155              : #endif
     156            0 :       DEALLOCATE (ipivot)
     157            0 :       DEALLOCATE (diag)
     158            0 :       CALL cp_fm_release(matrix_lu)
     159            0 :       det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
     160            0 :    END SUBROUTINE cp_fm_det
     161              : 
     162              : ! **************************************************************************************************
     163              : !> \brief calc A <- alpha*A + beta*B
     164              : !>      optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
     165              : !>      scale A)
     166              : !> \param alpha ...
     167              : !> \param matrix_a ...
     168              : !> \param beta ...
     169              : !> \param matrix_b ...
     170              : ! **************************************************************************************************
     171      1109542 :    SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
     172              : 
     173              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     174              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
     175              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: beta
     176              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_b
     177              : 
     178              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
     179              : 
     180              :       INTEGER                                            :: handle, size_a, size_b
     181              :       REAL(KIND=dp)                                      :: my_beta
     182      1109542 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     183      1109542 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     184              : 
     185      1109542 :       CALL timeset(routineN, handle)
     186              : 
     187      1109542 :       my_beta = 0.0_dp
     188      1109542 :       IF (PRESENT(matrix_b)) my_beta = 1.0_dp
     189      1109542 :       IF (PRESENT(beta)) my_beta = beta
     190      1109542 :       NULLIFY (a, b)
     191              : 
     192      1109542 :       IF (PRESENT(beta)) THEN
     193      1109542 :          CPASSERT(PRESENT(matrix_b))
     194      1109542 :          IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
     195            0 :             CPWARN("Bad use of routine. Call cp_fm_scale instead")
     196            0 :             CALL cp_fm_scale(alpha + beta, matrix_a)
     197            0 :             CALL timestop(handle)
     198            0 :             RETURN
     199              :          END IF
     200              :       END IF
     201              : 
     202      1109542 :       a => matrix_a%local_data
     203      1109542 :       a_sp => matrix_a%local_data_sp
     204              : 
     205      1109542 :       IF (matrix_a%use_sp) THEN
     206            0 :          size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
     207              :       ELSE
     208      1109542 :          size_a = SIZE(a, 1)*SIZE(a, 2)
     209              :       END IF
     210              : 
     211      1109542 :       IF (alpha /= 1.0_dp) THEN
     212        79682 :          IF (matrix_a%use_sp) THEN
     213            0 :             CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
     214              :          ELSE
     215        79682 :             CALL dscal(size_a, alpha, a, 1)
     216              :          END IF
     217              :       END IF
     218      1109542 :       IF (my_beta /= 0.0_dp) THEN
     219      1102070 :          IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
     220            0 :             CPABORT("Matrices must be in the same blacs context")
     221              : 
     222      1102070 :          IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
     223              :                                      matrix_b%matrix_struct)) THEN
     224              : 
     225      1102070 :             b => matrix_b%local_data
     226      1102070 :             b_sp => matrix_b%local_data_sp
     227      1102070 :             IF (matrix_b%use_sp) THEN
     228            0 :                size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
     229              :             ELSE
     230      1102070 :                size_b = SIZE(b, 1)*SIZE(b, 2)
     231              :             END IF
     232      1102070 :             IF (size_a /= size_b) &
     233            0 :                CPABORT("Matrices must have same local sizes")
     234              : 
     235      1102070 :             IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     236            0 :                CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
     237      1102070 :             ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     238            0 :                CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
     239      1102070 :             ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     240            0 :                CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
     241              :             ELSE
     242      1102070 :                CALL daxpy(size_a, my_beta, b, 1, a, 1)
     243              :             END IF
     244              : 
     245              :          ELSE
     246              : #ifdef __parallel
     247            0 :             CPABORT("to do (pdscal,pdcopy,pdaxpy)")
     248              : #else
     249              :             CPABORT("")
     250              : #endif
     251              :          END IF
     252              : 
     253              :       END IF
     254              : 
     255      1109542 :       CALL timestop(handle)
     256              : 
     257      1109542 :    END SUBROUTINE cp_fm_scale_and_add
     258              : 
     259              : ! **************************************************************************************************
     260              : !> \brief interface to BLACS geadd:
     261              : !>                matrix_b = beta*matrix_b + alpha*opt(matrix_a)
     262              : !>        where opt(matrix_a) can be either:
     263              : !>              'N':  matrix_a
     264              : !>              'T':  matrix_a^T
     265              : !>              'C':  matrix_a^H (Hermitian conjugate)
     266              : !>        note that this is a level three routine, use cp_fm_scale_and_add if that
     267              : !>        is sufficient for your needs
     268              : !> \param alpha  : complex scalar
     269              : !> \param trans  : 'N' normal, 'T' transposed
     270              : !> \param matrix_a : input matrix_a
     271              : !> \param beta   : complex scalar
     272              : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
     273              : !> \author  Lianheng Tong
     274              : ! **************************************************************************************************
     275          532 :    SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
     276              :       REAL(KIND=dp), INTENT(IN) :: alpha, beta
     277              :       CHARACTER, INTENT(IN) :: trans
     278              :       TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
     279              : 
     280              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
     281              : 
     282              :       INTEGER :: nrow_global, ncol_global, handle
     283              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
     284              : #if defined(__parallel)
     285              :       INTEGER, DIMENSION(9) :: desca, descb
     286              : #elif !defined(__MKL)
     287              :       INTEGER :: ii, jj
     288              : #endif
     289              : 
     290          532 :       CALL timeset(routineN, handle)
     291              : 
     292          532 :       nrow_global = matrix_a%matrix_struct%nrow_global
     293          532 :       ncol_global = matrix_a%matrix_struct%ncol_global
     294          532 :       CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
     295          532 :       CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
     296              : 
     297          532 :       aa => matrix_a%local_data
     298          532 :       bb => matrix_b%local_data
     299              : 
     300              : #if defined(__parallel)
     301         5320 :       desca = matrix_a%matrix_struct%descriptor
     302         5320 :       descb = matrix_b%matrix_struct%descriptor
     303              :       CALL pdgeadd(trans, &
     304              :                    nrow_global, &
     305              :                    ncol_global, &
     306              :                    alpha, &
     307              :                    aa, &
     308              :                    1, 1, &
     309              :                    desca, &
     310              :                    beta, &
     311              :                    bb, &
     312              :                    1, 1, &
     313          532 :                    descb)
     314              : #elif defined(__MKL)
     315              :       CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
     316              :                         alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
     317              : #else
     318              :       ! dgeadd is not a standard BLAS function, although it is implemented
     319              :       ! in some libraries like OpenBLAS, so not going to use it here
     320              :       SELECT CASE (trans)
     321              :       CASE ('T')
     322              :          DO jj = 1, ncol_global
     323              :             DO ii = 1, nrow_global
     324              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
     325              :             END DO
     326              :          END DO
     327              :       CASE DEFAULT
     328              :          DO jj = 1, ncol_global
     329              :             DO ii = 1, nrow_global
     330              :                bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
     331              :             END DO
     332              :          END DO
     333              :       END SELECT
     334              : #endif
     335              : 
     336          532 :       CALL timestop(handle)
     337              : 
     338          532 :    END SUBROUTINE cp_fm_geadd
     339              : 
     340              : ! **************************************************************************************************
     341              : !> \brief Add (and scale) a subset of columns of a fm to a fm
     342              : !>        b = alpha*a + b
     343              : !> \param msource ...
     344              : !> \param mtarget ...
     345              : !> \param ncol ...
     346              : !> \param alpha ...
     347              : !> \param source_start ...
     348              : !> \param target_start ...
     349              : ! **************************************************************************************************
     350            0 :    SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
     351              : 
     352              :       TYPE(cp_fm_type), INTENT(IN)             :: msource, mtarget
     353              :       INTEGER, INTENT(IN)                      :: ncol
     354              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: alpha
     355              :       INTEGER, INTENT(IN), OPTIONAL            :: source_start, target_start
     356              : 
     357              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
     358              : 
     359              :       INTEGER                                  :: handle, n, ss, ts, i
     360              :       REAL(KIND=dp)                            :: fscale
     361              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b
     362              : #if defined(__parallel)
     363              :       INTEGER, DIMENSION(9)                    :: desca, descb
     364              : #endif
     365              : 
     366            0 :       CALL timeset(routineN, handle)
     367              : 
     368            0 :       ss = 1
     369            0 :       ts = 1
     370              : 
     371            0 :       IF (PRESENT(source_start)) ss = source_start
     372            0 :       IF (PRESENT(target_start)) ts = target_start
     373              : 
     374            0 :       fscale = 1.0_dp
     375            0 :       IF (PRESENT(alpha)) fscale = alpha
     376              : 
     377            0 :       n = msource%matrix_struct%nrow_global
     378              : 
     379            0 :       a => msource%local_data
     380            0 :       b => mtarget%local_data
     381              : 
     382              : #if defined(__parallel)
     383              :       MARK_USED(i)
     384            0 :       desca(:) = msource%matrix_struct%descriptor(:)
     385            0 :       descb(:) = mtarget%matrix_struct%descriptor(:)
     386            0 :       CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
     387              : #else
     388              :       DO i = 0, ncol - 1
     389              :          b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
     390              :       END DO
     391              : #endif
     392              : 
     393            0 :       CALL timestop(handle)
     394              : 
     395            0 :    END SUBROUTINE cp_fm_add_columns
     396              : 
     397              : ! **************************************************************************************************
     398              : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
     399              : !>      IMPORTANT : the sign of the determinant is not defined correctly yet ....
     400              : !> \param matrix_a ...
     401              : !> \param almost_determinant ...
     402              : !> \param correct_sign ...
     403              : !> \par History
     404              : !>      added correct_sign 02.07 (fschiff)
     405              : !> \author Joost VandeVondele
     406              : !> \note
     407              : !>      - matrix_a is overwritten
     408              : !>      - the sign of the determinant might be wrong
     409              : !>      - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
     410              : !>      - one should be able to find out if ipivot is an even or an odd permutation...
     411              : !>        if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
     412              : !>      - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
     413              : ! **************************************************************************************************
     414            0 :    SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
     415              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     416              :       REAL(KIND=dp), INTENT(OUT)               :: almost_determinant
     417              :       LOGICAL, INTENT(IN), OPTIONAL            :: correct_sign
     418              : 
     419              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
     420              : 
     421              :       INTEGER                                  :: handle, i, info, n
     422            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
     423              :       REAL(KIND=dp)                            :: determinant
     424              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     425              : #if defined(__parallel)
     426              :       INTEGER, DIMENSION(9)                    :: desca
     427            0 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
     428              : #else
     429              :       INTEGER                                  :: lda
     430              : #endif
     431              : 
     432            0 :       CALL timeset(routineN, handle)
     433              : 
     434            0 :       a => matrix_a%local_data
     435            0 :       n = matrix_a%matrix_struct%nrow_global
     436            0 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
     437              : 
     438              : #if defined(__parallel)
     439              :       MARK_USED(correct_sign)
     440            0 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     441            0 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
     442              : 
     443            0 :       ALLOCATE (diag(n))
     444            0 :       diag(:) = 0.0_dp
     445            0 :       CALL cp_fm_get_diag(matrix_a, diag)
     446            0 :       determinant = 1.0_dp
     447            0 :       DO i = 1, n
     448            0 :          determinant = determinant*diag(i)
     449              :       END DO
     450            0 :       DEALLOCATE (diag)
     451              : #else
     452              :       lda = SIZE(a, 1)
     453              :       CALL dgetrf(n, n, a, lda, ipivot, info)
     454              :       determinant = 1.0_dp
     455              :       IF (correct_sign) THEN
     456              :          DO i = 1, n
     457              :             IF (ipivot(i) /= i) THEN
     458              :                determinant = -determinant*a(i, i)
     459              :             ELSE
     460              :                determinant = determinant*a(i, i)
     461              :             END IF
     462              :          END DO
     463              :       ELSE
     464              :          DO i = 1, n
     465              :             determinant = determinant*a(i, i)
     466              :          END DO
     467              :       END IF
     468              : #endif
     469              :       ! info is allowed to be zero
     470              :       ! this does just signal a zero diagonal element
     471            0 :       DEALLOCATE (ipivot)
     472            0 :       almost_determinant = determinant ! notice that the sign is random
     473            0 :       CALL timestop(handle)
     474            0 :    END SUBROUTINE cp_fm_lu_decompose
     475              : 
     476              : ! **************************************************************************************************
     477              : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a  ** transa ) * ( matrix_b ** transb )
     478              : !> \param transa : 'N' -> normal   'T' -> transpose
     479              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     480              : !> \param transb ...
     481              : !> \param m ...
     482              : !> \param n ...
     483              : !> \param k ...
     484              : !> \param alpha ...
     485              : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
     486              : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
     487              : !> \param beta ...
     488              : !> \param matrix_c : m x n matrix
     489              : !> \param a_first_col ...
     490              : !> \param a_first_row ...
     491              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
     492              : !> \param b_first_row ...
     493              : !> \param c_first_col ...
     494              : !> \param c_first_row ...
     495              : !> \author Matthias Krack
     496              : !> \note
     497              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     498              : ! **************************************************************************************************
     499          610 :    SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
     500              :                          matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
     501              :                          c_first_col, c_first_row)
     502              : 
     503              :       CHARACTER(LEN=1), INTENT(IN)             :: transa, transb
     504              :       INTEGER, INTENT(IN)                      :: m, n, k
     505              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     506              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     507              :       REAL(KIND=dp), INTENT(IN)                :: beta
     508              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     509              :       INTEGER, INTENT(IN), OPTIONAL            :: a_first_col, a_first_row, &
     510              :                                                   b_first_col, b_first_row, &
     511              :                                                   c_first_col, c_first_row
     512              : 
     513              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
     514              : 
     515              :       INTEGER                                  :: handle, i_a, i_b, i_c, j_a, &
     516              :                                                   j_b, j_c
     517              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     518          610 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp, b_sp, c_sp
     519              : #if defined(__parallel)
     520              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     521              : #else
     522              :       INTEGER                                  :: lda, ldb, ldc
     523              : #endif
     524              : 
     525          610 :       CALL timeset(routineN, handle)
     526              : 
     527              :       !sample peak memory
     528          610 :       CALL m_memory()
     529              : 
     530          610 :       a => matrix_a%local_data
     531          610 :       b => matrix_b%local_data
     532          610 :       c => matrix_c%local_data
     533              : 
     534          610 :       a_sp => matrix_a%local_data_sp
     535          610 :       b_sp => matrix_b%local_data_sp
     536          610 :       c_sp => matrix_c%local_data_sp
     537              : 
     538          610 :       i_a = 1
     539          610 :       IF (PRESENT(a_first_row)) i_a = a_first_row
     540              : 
     541          610 :       j_a = 1
     542          610 :       IF (PRESENT(a_first_col)) j_a = a_first_col
     543              : 
     544          610 :       i_b = 1
     545          610 :       IF (PRESENT(b_first_row)) i_b = b_first_row
     546              : 
     547          610 :       j_b = 1
     548          610 :       IF (PRESENT(b_first_col)) j_b = b_first_col
     549              : 
     550          610 :       i_c = 1
     551          610 :       IF (PRESENT(c_first_row)) i_c = c_first_row
     552              : 
     553          610 :       j_c = 1
     554          610 :       IF (PRESENT(c_first_col)) j_c = c_first_col
     555              : 
     556              : #if defined(__parallel)
     557              : 
     558         6100 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     559         6100 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     560         6100 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     561              : 
     562          610 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     563              : 
     564              :          CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
     565            0 :                      descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
     566              : 
     567          610 :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     568              : 
     569              :          CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
     570          610 :                      descb, beta, c, i_c, j_c, descc)
     571              : 
     572              :       ELSE
     573            0 :          CPABORT("Mixed precision gemm NYI")
     574              :       END IF
     575              : #else
     576              : 
     577              :       IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
     578              : 
     579              :          lda = SIZE(a_sp, 1)
     580              :          ldb = SIZE(b_sp, 1)
     581              :          ldc = SIZE(c_sp, 1)
     582              : 
     583              :          CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
     584              :                     REAL(beta, sp), c_sp(i_c, j_c), ldc)
     585              : 
     586              :       ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
     587              : 
     588              :          lda = SIZE(a, 1)
     589              :          ldb = SIZE(b, 1)
     590              :          ldc = SIZE(c, 1)
     591              : 
     592              :          CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
     593              : 
     594              :       ELSE
     595              :          CPABORT("Mixed precision gemm NYI")
     596              :       END IF
     597              : 
     598              : #endif
     599          610 :       CALL timestop(handle)
     600              : 
     601          610 :    END SUBROUTINE cp_fm_gemm
     602              : 
     603              : ! **************************************************************************************************
     604              : !> \brief computes matrix_c = beta * matrix_c + alpha *  matrix_a  *  matrix_b
     605              : !>      computes matrix_c = beta * matrix_c + alpha *  matrix_b  *  matrix_a
     606              : !>      where matrix_a is symmetric
     607              : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
     608              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
     609              : !> \param uplo triangular format
     610              : !> \param m ...
     611              : !> \param n ...
     612              : !> \param alpha ...
     613              : !> \param matrix_a : m x m matrix
     614              : !> \param matrix_b : m x n matrix
     615              : !> \param beta ...
     616              : !> \param matrix_c : m x n matrix
     617              : !> \author Matthias Krack
     618              : !> \note
     619              : !>      matrix_c should have no overlap with matrix_a, matrix_b
     620              : !>      all matrices in QS are triangular according to uplo
     621              : !>      matrix_a is always an m x m matrix
     622              : !>      typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
     623              : ! **************************************************************************************************
     624       167310 :    SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
     625              : 
     626              :       CHARACTER(LEN=1), INTENT(IN)             :: side, uplo
     627              :       INTEGER, INTENT(IN)                      :: m, n
     628              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     629              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_b
     630              :       REAL(KIND=dp), INTENT(IN)                :: beta
     631              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     632              : 
     633              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
     634              : 
     635              :       INTEGER                                  :: handle
     636       167310 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, b, c
     637              : #if defined(__parallel)
     638              :       INTEGER, DIMENSION(9)                    :: desca, descb, descc
     639              : #else
     640              :       INTEGER                                  :: lda, ldb, ldc
     641              : #endif
     642              : 
     643       167310 :       CALL timeset(routineN, handle)
     644              : 
     645       167310 :       a => matrix_a%local_data
     646       167310 :       b => matrix_b%local_data
     647       167310 :       c => matrix_c%local_data
     648              : 
     649              : #if defined(__parallel)
     650              : 
     651      1673100 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     652      1673100 :       descb(:) = matrix_b%matrix_struct%descriptor(:)
     653      1673100 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     654              : 
     655       167310 :       CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
     656              : 
     657              : #else
     658              : 
     659              :       lda = matrix_a%matrix_struct%local_leading_dimension
     660              :       ldb = matrix_b%matrix_struct%local_leading_dimension
     661              :       ldc = matrix_c%matrix_struct%local_leading_dimension
     662              : 
     663              :       CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
     664              : 
     665              : #endif
     666       167310 :       CALL timestop(handle)
     667              : 
     668       167310 :    END SUBROUTINE cp_fm_symm
     669              : 
     670              : ! **************************************************************************************************
     671              : !> \brief computes the Frobenius norm of matrix_a
     672              : !> \brief computes the Frobenius norm of matrix_a
     673              : !> \param matrix_a : m x n matrix
     674              : !> \return ...
     675              : !> \author VW
     676              : ! **************************************************************************************************
     677         7926 :    FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
     678              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     679              :       REAL(KIND=dp)                            :: norm
     680              : 
     681              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
     682              : 
     683              :       INTEGER                                  :: handle, size_a
     684         7926 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     685              :       REAL(KIND=dp), EXTERNAL                  :: DDOT
     686              : #if defined(__parallel)
     687              :       TYPE(mp_comm_type)                       :: group
     688              : #endif
     689              : 
     690         7926 :       CALL timeset(routineN, handle)
     691              : 
     692              :       norm = 0.0_dp
     693         7926 :       a => matrix_a%local_data
     694         7926 :       size_a = SIZE(a, 1)*SIZE(a, 2)
     695         7926 :       norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
     696              : #if defined(__parallel)
     697         7926 :       group = matrix_a%matrix_struct%para_env
     698         7926 :       CALL group%sum(norm)
     699              : #endif
     700         7926 :       norm = SQRT(norm)
     701              : 
     702         7926 :       CALL timestop(handle)
     703              : 
     704         7926 :    END FUNCTION cp_fm_frobenius_norm
     705              : 
     706              : ! **************************************************************************************************
     707              : !> \brief performs a rank-k update of a symmetric matrix_c
     708              : !>         matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
     709              : !> \param uplo : 'U'   ('L')
     710              : !> \param trans : 'N'  ('T')
     711              : !> \param k : number of cols to use in matrix_a
     712              : !>      ia,ja ::  1,1 (could be used for selecting subblock of a)
     713              : !> \param alpha ...
     714              : !> \param matrix_a ...
     715              : !> \param ia ...
     716              : !> \param ja ...
     717              : !> \param beta ...
     718              : !> \param matrix_c ...
     719              : !> \author Matthias Krack
     720              : ! **************************************************************************************************
     721         8306 :    SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
     722              :       CHARACTER(LEN=1), INTENT(IN)             :: uplo, trans
     723              :       INTEGER, INTENT(IN)                      :: k
     724              :       REAL(KIND=dp), INTENT(IN)                :: alpha
     725              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     726              :       INTEGER, INTENT(IN)                      :: ia, ja
     727              :       REAL(KIND=dp), INTENT(IN)                :: beta
     728              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_c
     729              : 
     730              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
     731              : 
     732              :       INTEGER                                  :: handle, n
     733         8306 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
     734              : #if defined(__parallel)
     735              :       INTEGER, DIMENSION(9)                    :: desca, descc
     736              : #else
     737              :       INTEGER                                  :: lda, ldc
     738              : #endif
     739              : #if defined (__HAS_IEEE_EXCEPTIONS)
     740              :       LOGICAL, DIMENSION(5)                    :: halt
     741              : #endif
     742              : 
     743         8306 :       CALL timeset(routineN, handle)
     744              : 
     745         8306 :       n = matrix_c%matrix_struct%nrow_global
     746              : 
     747         8306 :       a => matrix_a%local_data
     748         8306 :       c => matrix_c%local_data
     749              : 
     750              : #if defined (__HAS_IEEE_EXCEPTIONS)
     751              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     752              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     753              : #endif
     754              : #if defined(__parallel)
     755        83060 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
     756        83060 :       descc(:) = matrix_c%matrix_struct%descriptor(:)
     757              : 
     758         8306 :       CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
     759              : #else
     760              :       lda = SIZE(a, 1)
     761              :       ldc = SIZE(c, 1)
     762              : 
     763              :       CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
     764              : #endif
     765              : #if defined (__HAS_IEEE_EXCEPTIONS)
     766              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     767              : #endif
     768         8306 :       CALL timestop(handle)
     769              : 
     770         8306 :    END SUBROUTINE cp_fm_syrk
     771              : 
     772              : ! **************************************************************************************************
     773              : !> \brief computes the schur product of two matrices
     774              : !>       c_ij = a_ij * b_ij
     775              : !> \param matrix_a ...
     776              : !> \param matrix_b ...
     777              : !> \param matrix_c ...
     778              : !> \author Joost VandeVondele
     779              : ! **************************************************************************************************
     780         9168 :    SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
     781              : 
     782              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b, matrix_c
     783              : 
     784              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
     785              : 
     786              :       INTEGER                                            :: handle, icol_local, irow_local, mypcol, &
     787              :                                                             myprow, ncol_local, &
     788              :                                                             nrow_local
     789         9168 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b, c
     790              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     791              : 
     792         9168 :       CALL timeset(routineN, handle)
     793              : 
     794         9168 :       context => matrix_a%matrix_struct%context
     795         9168 :       myprow = context%mepos(1)
     796         9168 :       mypcol = context%mepos(2)
     797              : 
     798         9168 :       a => matrix_a%local_data
     799         9168 :       b => matrix_b%local_data
     800         9168 :       c => matrix_c%local_data
     801              : 
     802         9168 :       nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
     803         9168 :       ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
     804              : 
     805        98792 :       DO icol_local = 1, ncol_local
     806      6666536 :          DO irow_local = 1, nrow_local
     807      6657368 :             c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
     808              :          END DO
     809              :       END DO
     810              : 
     811         9168 :       CALL timestop(handle)
     812              : 
     813         9168 :    END SUBROUTINE cp_fm_schur_product
     814              : 
     815              : ! **************************************************************************************************
     816              : !> \brief returns the trace of matrix_a^T matrix_b, i.e
     817              : !>      sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
     818              : !> \param matrix_a a matrix
     819              : !> \param matrix_b another matrix
     820              : !> \param trace ...
     821              : !> \par History
     822              : !>      11.06.2001 Creation (Matthias Krack)
     823              : !>      12.2002 added doc [fawzi]
     824              : !> \author Matthias Krack
     825              : !> \note
     826              : !>      note the transposition of matrix_a!
     827              : ! **************************************************************************************************
     828       555016 :    SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
     829              : 
     830              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a, matrix_b
     831              :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     832              : 
     833              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
     834              : 
     835              :       INTEGER                                            :: handle, mypcol, myprow, ncol_local, &
     836              :                                                             nrow_local
     837       555016 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a, b
     838       555016 :       REAL(KIND=sp), DIMENSION(:, :), POINTER            :: a_sp, b_sp
     839              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     840              :       TYPE(mp_comm_type)                                 :: group
     841              : 
     842       555016 :       CALL timeset(routineN, handle)
     843              : 
     844       555016 :       context => matrix_a%matrix_struct%context
     845       555016 :       myprow = context%mepos(1)
     846       555016 :       mypcol = context%mepos(2)
     847              : 
     848       555016 :       group = matrix_a%matrix_struct%para_env
     849              : 
     850       555016 :       a => matrix_a%local_data
     851       555016 :       b => matrix_b%local_data
     852              : 
     853       555016 :       a_sp => matrix_a%local_data_sp
     854       555016 :       b_sp => matrix_b%local_data_sp
     855              : 
     856       555016 :       nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
     857       555016 :       ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
     858              : 
     859              :       ! cries for an accurate_dot_product
     860       555016 :       IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     861              :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
     862            0 :                                    b_sp(1:nrow_local, 1:ncol_local), dp))
     863       555016 :       ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
     864              :          trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
     865            0 :                               b(1:nrow_local, 1:ncol_local))
     866       555016 :       ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
     867              :          trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
     868            0 :                               REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
     869              :       ELSE
     870              :          trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
     871       555016 :                                       b(1:nrow_local, 1:ncol_local))
     872              :       END IF
     873              : 
     874       555016 :       CALL group%sum(trace)
     875              : 
     876       555016 :       CALL timestop(handle)
     877              : 
     878       555016 :    END SUBROUTINE cp_fm_trace_a0b0t0
     879              : 
     880              :    #:mute
     881              :       #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
     882              :    #:endmute
     883              : 
     884              : ! **************************************************************************************************
     885              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
     886              : !> \param matrix_a list of A matrices
     887              : !> \param matrix_b B matrix
     888              : !> \param trace    computed traces
     889              : !> \par History
     890              : !>    * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
     891              : !> \note \parblock
     892              : !>      Computing the trace requires collective communication between involved MPI processes
     893              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     894              : !>      the amount of time wasted in such synchronisation by performing one large collective
     895              : !>      operation which involves all the matrices in question.
     896              : !>
     897              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
     898              : !>      the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
     899              : !>      'matrix_b' is a single matrix.
     900              : !>      \endparblock
     901              : ! **************************************************************************************************
     902              :    #:for longname, shortname, appendix in types
     903         3174 :       SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
     904              :          TYPE(${longname}$), DIMENSION(:), INTENT(IN)       :: matrix_a
     905              :          TYPE(cp_fm_type), INTENT(IN)                       :: matrix_b
     906              :          REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     907              : 
     908              :          CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
     909              : 
     910              :          INTEGER                                            :: handle, imatrix, n_matrices, &
     911              :                                                                ncols_local, nrows_local
     912              :          LOGICAL                                            :: use_sp_a, use_sp_b
     913         3174 :          REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     914         3174 :          REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     915              :          TYPE(mp_comm_type)                                 :: group
     916              : 
     917         3174 :          CALL timeset(routineN, handle)
     918              : 
     919         3174 :          n_matrices = SIZE(trace)
     920         3174 :          CPASSERT(SIZE(matrix_a) == n_matrices)
     921              : 
     922         3174 :          CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
     923         3174 :          use_sp_b = matrix_b%use_sp
     924              : 
     925         3174 :          IF (use_sp_b) THEN
     926            0 :             ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
     927              :          ELSE
     928         3174 :             ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
     929              :          END IF
     930              : 
     931              : !$OMP PARALLEL DO DEFAULT(NONE), &
     932              : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
     933              : !$OMP             SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
     934         3174 : !$OMP             SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
     935              : 
     936              :          DO imatrix = 1, n_matrices
     937              : 
     938              :             use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
     939              : 
     940              :             ! assume that the matrices A(i) and B have identical shapes and distribution schemes
     941              :             IF (use_sp_a .AND. use_sp_b) THEN
     942              :                ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
     943              :                trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
     944              :             ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
     945              :                ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
     946              :                trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
     947              :             ELSE
     948              :                CPABORT("Matrices A and B are of different types")
     949              :             END IF
     950              :          END DO
     951              : !$OMP END PARALLEL DO
     952              : 
     953         3174 :          group = matrix_b%matrix_struct%para_env
     954        19878 :          CALL group%sum(trace)
     955              : 
     956         3174 :          CALL timestop(handle)
     957         3174 :       END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
     958              :    #:endfor
     959              : 
     960              : ! **************************************************************************************************
     961              : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
     962              : !> \param matrix_a list of A matrices
     963              : !> \param matrix_b list of B matrices
     964              : !> \param trace    computed traces
     965              : !> \param accurate ...
     966              : !> \par History
     967              : !>    * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
     968              : !> \note \parblock
     969              : !>      Computing the trace requires collective communication between involved MPI processes
     970              : !>      that implies a synchronisation point between them. The aim of this subroutine is to reduce
     971              : !>      the amount of time wasted in such synchronisation by performing one large collective
     972              : !>      operation which involves all the matrices in question.
     973              : !>
     974              : !>      The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
     975              : !>      all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
     976              : !>      \endparblock
     977              : ! **************************************************************************************************
     978              :    #:for longname1, shortname1, appendix1 in types
     979              :       #:for longname2, shortname2, appendix2 in types
     980       149486 :          SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
     981              :             TYPE(${longname1}$), DIMENSION(:), INTENT(IN)      :: matrix_a
     982              :             TYPE(${longname2}$), DIMENSION(:), INTENT(IN)      :: matrix_b
     983              :             REAL(kind=dp), DIMENSION(:), INTENT(OUT)           :: trace
     984              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
     985              : 
     986              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
     987              : 
     988              :             INTEGER                                            :: handle, imatrix, n_matrices, &
     989              :                                                                   ncols_local, nrows_local
     990              :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
     991       149486 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
     992       149486 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
     993              :             TYPE(mp_comm_type)                                 :: group
     994              : 
     995       149486 :             CALL timeset(routineN, handle)
     996              : 
     997       149486 :             n_matrices = SIZE(trace)
     998       149486 :             CPASSERT(SIZE(matrix_a) == n_matrices)
     999       149486 :             CPASSERT(SIZE(matrix_b) == n_matrices)
    1000              : 
    1001       149486 :             use_accurate_sum = .TRUE.
    1002       149486 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
    1003              : 
    1004              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1005              : !$OMP             PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
    1006              : !$OMP             PRIVATE(nrows_local, use_sp_a, use_sp_b), &
    1007       149486 : !$OMP             SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
    1008              :             DO imatrix = 1, n_matrices
    1009              :                CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
    1010              : 
    1011              :                use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
    1012              :                use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
    1013              : 
    1014              :                ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
    1015              :                IF (use_sp_a .AND. use_sp_b) THEN
    1016              :                   ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1017              :                   ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1018              :                   IF (use_accurate_sum) THEN
    1019              :                      trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1020              :                   ELSE
    1021              :                      trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
    1022              :                   END IF
    1023              :                ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1024              :                   ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1025              :                   ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1026              :                   IF (use_accurate_sum) THEN
    1027              :                      trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
    1028              :                   ELSE
    1029              :                      trace(imatrix) = SUM(ldata_a*ldata_b)
    1030              :                   END IF
    1031              :                ELSE
    1032              :                   CPABORT("Matrices A and B are of different types")
    1033              :                END IF
    1034              :             END DO
    1035              : !$OMP END PARALLEL DO
    1036              : 
    1037       149486 :             group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
    1038       499638 :             CALL group%sum(trace)
    1039              : 
    1040       149486 :             CALL timestop(handle)
    1041       149486 :          END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
    1042              :       #:endfor
    1043              :    #:endfor
    1044              : 
    1045              : ! **************************************************************************************************
    1046              : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
    1047              : !> \param matrix_a list of A matrices
    1048              : !> \param matrix_b list of B matrices
    1049              : !> \param trace    computed traces
    1050              : !> \param accurate ...
    1051              : ! **************************************************************************************************
    1052              :    #:for longname1, shortname1, appendix1 in types
    1053              :       #:for longname2, shortname2, appendix2 in types
    1054        13912 :          SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
    1055              :             TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN)   :: matrix_a
    1056              :             TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN)   :: matrix_b
    1057              :             REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: trace
    1058              :             LOGICAL, INTENT(IN), OPTIONAL                      :: accurate
    1059              : 
    1060              :             CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
    1061              : 
    1062              :             INTEGER                                            :: handle, ia, ib, iz, na, nb, ncols_local, &
    1063              :                                                                   nrows_local, nz
    1064              :             INTEGER(kind=int_8)                                :: ib8, itrace, na8, ntraces
    1065              :             LOGICAL                                            :: use_accurate_sum, use_sp_a, use_sp_b
    1066              :             REAL(kind=dp)                                      :: t
    1067        13912 :             REAL(kind=dp), DIMENSION(:, :), POINTER            :: ldata_a, ldata_b
    1068        13912 :             REAL(kind=sp), DIMENSION(:, :), POINTER            :: ldata_a_sp, ldata_b_sp
    1069              :             TYPE(mp_comm_type)                                 :: group
    1070              : 
    1071        13912 :             CALL timeset(routineN, handle)
    1072              : 
    1073        13912 :             nz = SIZE(matrix_a, 1)
    1074        13912 :             CPASSERT(SIZE(matrix_b, 1) == nz)
    1075              : 
    1076        13912 :             na = SIZE(matrix_a, 2)
    1077        13912 :             nb = SIZE(matrix_b, 2)
    1078        13912 :             CPASSERT(SIZE(trace, 1) == na)
    1079        13912 :             CPASSERT(SIZE(trace, 2) == nb)
    1080              : 
    1081        13912 :             use_accurate_sum = .TRUE.
    1082        13912 :             IF (PRESENT(accurate)) use_accurate_sum = accurate
    1083              : 
    1084              :             ! here we use one running index (itrace) instead of two (ia, ib) in order to
    1085              :             ! improve load balance between shared-memory threads
    1086        13912 :             ntraces = na*nb
    1087        13912 :             na8 = INT(na, kind=int_8)
    1088              : 
    1089              : !$OMP PARALLEL DO DEFAULT(NONE), &
    1090              : !$OMP             PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
    1091              : !$OMP             PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
    1092        13912 : !$OMP             SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
    1093              :             DO itrace = 1, ntraces
    1094              :                ib8 = (itrace - 1)/na8
    1095              :                ia = INT(itrace - ib8*na8)
    1096              :                ib = INT(ib8) + 1
    1097              : 
    1098              :                t = 0.0_dp
    1099              :                DO iz = 1, nz
    1100              :                   CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
    1101              :                   use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
    1102              :                   use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
    1103              : 
    1104              :                   ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
    1105              :                   IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
    1106              :                      ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
    1107              :                      ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
    1108              :                      IF (use_accurate_sum) THEN
    1109              :                         t = t + accurate_dot_product(ldata_a, ldata_b)
    1110              :                      ELSE
    1111              :                         t = t + SUM(ldata_a*ldata_b)
    1112              :                      END IF
    1113              :                   ELSE IF (use_sp_a .AND. use_sp_b) THEN
    1114              :                      ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1115              :                      ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
    1116              :                      IF (use_accurate_sum) THEN
    1117              :                         t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
    1118              :                      ELSE
    1119              :                         t = t + SUM(ldata_a_sp*ldata_b_sp)
    1120              :                      END IF
    1121              :                   ELSE
    1122              :                      CPABORT("Matrices A and B are of different types")
    1123              :                   END IF
    1124              :                END DO
    1125              :                trace(ia, ib) = t
    1126              :             END DO
    1127              : !$OMP END PARALLEL DO
    1128              : 
    1129        13912 :             group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
    1130       664484 :             CALL group%sum(trace)
    1131              : 
    1132        13912 :             CALL timestop(handle)
    1133        13912 :          END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
    1134              :       #:endfor
    1135              :    #:endfor
    1136              : 
    1137              : ! **************************************************************************************************
    1138              : !> \brief multiplies in place by a triangular matrix:
    1139              : !>       matrix_b = alpha op(triangular_matrix) matrix_b
    1140              : !>      or (if side='R')
    1141              : !>       matrix_b = alpha matrix_b op(triangular_matrix)
    1142              : !>      op(triangular_matrix) is:
    1143              : !>       triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
    1144              : !>       triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
    1145              : !>       triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
    1146              : !>       triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
    1147              : !> \param triangular_matrix the triangular matrix that multiplies the other
    1148              : !> \param matrix_b the matrix that gets multiplied and stores the result
    1149              : !> \param side on which side of matrix_b stays op(triangular_matrix)
    1150              : !>        (defaults to 'L')
    1151              : !> \param transpose_tr if the triangular matrix should be transposed
    1152              : !>        (defaults to false)
    1153              : !> \param invert_tr if the triangular matrix should be inverted
    1154              : !>        (defaults to false)
    1155              : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
    1156              : !>        lower ('L') triangle (defaults to 'U')
    1157              : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
    1158              : !>        be assumed to be 1 (defaults to false)
    1159              : !> \param n_rows the number of rows of the result (defaults to
    1160              : !>        size(matrix_b,1))
    1161              : !> \param n_cols the number of columns of the result (defaults to
    1162              : !>        size(matrix_b,2))
    1163              : !> \param alpha ...
    1164              : !> \par History
    1165              : !>      08.2002 created [fawzi]
    1166              : !> \author Fawzi Mohamed
    1167              : !> \note
    1168              : !>      needs an mpi env
    1169              : ! **************************************************************************************************
    1170       110104 :    SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
    1171              :                                         transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
    1172              :                                         alpha)
    1173              :       TYPE(cp_fm_type), INTENT(IN)                       :: triangular_matrix, matrix_b
    1174              :       CHARACTER, INTENT(IN), OPTIONAL                    :: side
    1175              :       LOGICAL, INTENT(IN), OPTIONAL                      :: transpose_tr, invert_tr
    1176              :       CHARACTER, INTENT(IN), OPTIONAL                    :: uplo_tr
    1177              :       LOGICAL, INTENT(IN), OPTIONAL                      :: unit_diag_tr
    1178              :       INTEGER, INTENT(IN), OPTIONAL                      :: n_rows, n_cols
    1179              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
    1180              : 
    1181              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
    1182              : 
    1183              :       CHARACTER                                          :: side_char, transa, unit_diag, uplo
    1184              :       INTEGER                                            :: handle, mdim, m, n
    1185              :       LOGICAL                                            :: invert
    1186              :       REAL(KIND=dp)                                      :: al
    1187              : 
    1188        55052 :       CALL timeset(routineN, handle)
    1189        55052 :       side_char = 'L'
    1190        55052 :       unit_diag = 'N'
    1191        55052 :       uplo = 'U'
    1192        55052 :       transa = 'N'
    1193        55052 :       invert = .FALSE.
    1194        55052 :       al = 1.0_dp
    1195        55052 :       CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
    1196        55052 :       IF (PRESENT(side)) side_char = side
    1197              :       mdim = MERGE(1, 2, 'L' == side_char)
    1198        55052 :       IF (PRESENT(invert_tr)) invert = invert_tr
    1199        55052 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1200        55052 :       IF (PRESENT(unit_diag_tr)) THEN
    1201            0 :          IF (unit_diag_tr) THEN
    1202            0 :             unit_diag = 'U'
    1203              :          ELSE
    1204              :             unit_diag = 'N'
    1205              :          END IF
    1206              :       END IF
    1207        55052 :       IF (PRESENT(transpose_tr)) THEN
    1208         3402 :          IF (transpose_tr) THEN
    1209         1290 :             transa = 'T'
    1210              :          ELSE
    1211              :             transa = 'N'
    1212              :          END IF
    1213              :       END IF
    1214        55052 :       IF (PRESENT(alpha)) al = alpha
    1215        55052 :       IF (PRESENT(n_rows)) m = n_rows
    1216        55052 :       IF (PRESENT(n_cols)) n = n_cols
    1217              : 
    1218        55052 :       IF (invert) THEN
    1219              : 
    1220              : #if defined(__parallel)
    1221              :          CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1222              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1223              :                      triangular_matrix%matrix_struct%descriptor, &
    1224              :                      matrix_b%local_data(1, 1), 1, 1, &
    1225        44468 :                      matrix_b%matrix_struct%descriptor(1))
    1226              : #else
    1227              :          CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
    1228              :                     triangular_matrix%local_data(1, 1), &
    1229              :                     SIZE(triangular_matrix%local_data, mdim), &
    1230              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1231              : #endif
    1232              : 
    1233              :       ELSE
    1234              : 
    1235              : #if defined(__parallel)
    1236              :          CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1237              :                      triangular_matrix%local_data(1, 1), 1, 1, &
    1238              :                      triangular_matrix%matrix_struct%descriptor, &
    1239              :                      matrix_b%local_data(1, 1), 1, 1, &
    1240        10584 :                      matrix_b%matrix_struct%descriptor(1))
    1241              : #else
    1242              :          CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
    1243              :                     triangular_matrix%local_data(1, 1), &
    1244              :                     SIZE(triangular_matrix%local_data, mdim), &
    1245              :                     matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
    1246              : #endif
    1247              : 
    1248              :       END IF
    1249              : 
    1250        55052 :       CALL timestop(handle)
    1251        55052 :    END SUBROUTINE cp_fm_triangular_multiply
    1252              : 
    1253              : ! **************************************************************************************************
    1254              : !> \brief scales a matrix
    1255              : !>      matrix_a = alpha * matrix_b
    1256              : !> \param alpha ...
    1257              : !> \param matrix_a ...
    1258              : !> \note
    1259              : !>      use cp_fm_set_all to zero (avoids problems with nan)
    1260              : ! **************************************************************************************************
    1261        86473 :    SUBROUTINE cp_fm_scale(alpha, matrix_a)
    1262              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    1263              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    1264              : 
    1265              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_scale'
    1266              : 
    1267              :       INTEGER                                            :: handle, size_a
    1268              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    1269              : 
    1270        86473 :       CALL timeset(routineN, handle)
    1271              : 
    1272              :       NULLIFY (a)
    1273              : 
    1274        86473 :       a => matrix_a%local_data
    1275        86473 :       size_a = SIZE(a, 1)*SIZE(a, 2)
    1276              : 
    1277        86473 :       CALL DSCAL(size_a, alpha, a, 1)
    1278              : 
    1279        86473 :       CALL timestop(handle)
    1280              : 
    1281        86473 :    END SUBROUTINE cp_fm_scale
    1282              : 
    1283              : ! **************************************************************************************************
    1284              : !> \brief transposes a matrix
    1285              : !>      matrixt = matrix ^ T
    1286              : !> \param matrix ...
    1287              : !> \param matrixt ...
    1288              : !> \note
    1289              : !>      all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
    1290              : ! **************************************************************************************************
    1291        25872 :    SUBROUTINE cp_fm_transpose(matrix, matrixt)
    1292              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, matrixt
    1293              : 
    1294              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
    1295              : 
    1296              :       INTEGER                                  :: handle, ncol_global, &
    1297              :                                                   nrow_global, ncol_globalt, nrow_globalt
    1298        12936 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, c
    1299              : #if defined(__parallel)
    1300              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1301              : #elif !defined(__MKL)
    1302              :       INTEGER                                  :: i, j
    1303              : #endif
    1304              : 
    1305        12936 :       nrow_global = matrix%matrix_struct%nrow_global
    1306        12936 :       ncol_global = matrix%matrix_struct%ncol_global
    1307        12936 :       nrow_globalt = matrixt%matrix_struct%nrow_global
    1308        12936 :       ncol_globalt = matrixt%matrix_struct%ncol_global
    1309            0 :       CPASSERT(nrow_global == ncol_globalt)
    1310        12936 :       CPASSERT(nrow_globalt == ncol_global)
    1311              : 
    1312        12936 :       CALL timeset(routineN, handle)
    1313              : 
    1314        12936 :       a => matrix%local_data
    1315        12936 :       c => matrixt%local_data
    1316              : 
    1317              : #if defined(__parallel)
    1318       129360 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1319       129360 :       descc(:) = matrixt%matrix_struct%descriptor(:)
    1320        12936 :       CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
    1321              : #elif defined(__MKL)
    1322              :       CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
    1323              : #else
    1324              :       DO j = 1, ncol_global
    1325              :          DO i = 1, nrow_global
    1326              :             c(j, i) = a(i, j)
    1327              :          END DO
    1328              :       END DO
    1329              : #endif
    1330        12936 :       CALL timestop(handle)
    1331              : 
    1332        12936 :    END SUBROUTINE cp_fm_transpose
    1333              : 
    1334              : ! **************************************************************************************************
    1335              : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
    1336              : !> \param matrix the triangular matrix as input, the full matrix as output
    1337              : !> \param work a matrix of the same size as matrix
    1338              : !> \param uplo triangular format; defaults to 'U'
    1339              : !> \author Matthias Krack
    1340              : !> \note
    1341              : !>       the opposite triangular part is irrelevant
    1342              : ! **************************************************************************************************
    1343       295114 :    SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
    1344              : 
    1345              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, work
    1346              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1347              : 
    1348              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
    1349              : 
    1350              :       CHARACTER                                :: myuplo
    1351              :       INTEGER                                  :: handle, icol_global, irow_global, &
    1352              :                                                   mypcol, myprow, ncol_global, &
    1353              :                                                   nrow_global
    1354       147557 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1355       147557 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1356              :       TYPE(cp_blacs_env_type), POINTER         :: context
    1357              : 
    1358              : #if defined(__parallel)
    1359              :       INTEGER                                  :: icol_local, irow_local, &
    1360              :                                                   ncol_block, ncol_local, &
    1361              :                                                   nrow_block, nrow_local
    1362              :       INTEGER, DIMENSION(9)                    :: desca, descc
    1363       147557 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: c
    1364       147557 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: c_sp
    1365              : #endif
    1366              : 
    1367       147557 :       myuplo = 'U'
    1368            0 :       IF (PRESENT(uplo)) myuplo = uplo
    1369              : 
    1370       147557 :       nrow_global = matrix%matrix_struct%nrow_global
    1371       147557 :       ncol_global = matrix%matrix_struct%ncol_global
    1372       147557 :       CPASSERT(nrow_global == ncol_global)
    1373       147557 :       nrow_global = work%matrix_struct%nrow_global
    1374       147557 :       ncol_global = work%matrix_struct%ncol_global
    1375       147557 :       CPASSERT(nrow_global == ncol_global)
    1376       147557 :       CPASSERT(matrix%use_sp .EQV. work%use_sp)
    1377              : 
    1378       147557 :       CALL timeset(routineN, handle)
    1379              : 
    1380       147557 :       context => matrix%matrix_struct%context
    1381       147557 :       myprow = context%mepos(1)
    1382       147557 :       mypcol = context%mepos(2)
    1383              : 
    1384              : #if defined(__parallel)
    1385              : 
    1386       147557 :       nrow_block = matrix%matrix_struct%nrow_block
    1387       147557 :       ncol_block = matrix%matrix_struct%ncol_block
    1388              : 
    1389       147557 :       nrow_local = matrix%matrix_struct%nrow_locals(myprow)
    1390       147557 :       ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
    1391              : 
    1392       147557 :       a => work%local_data
    1393       147557 :       a_sp => work%local_data_sp
    1394      1475570 :       desca(:) = work%matrix_struct%descriptor(:)
    1395       147557 :       c => matrix%local_data
    1396       147557 :       c_sp => matrix%local_data_sp
    1397      1475570 :       descc(:) = matrix%matrix_struct%descriptor(:)
    1398              : 
    1399      4966786 :       DO icol_local = 1, ncol_local
    1400      4819229 :          icol_global = matrix%matrix_struct%col_indices(icol_local)
    1401    229384342 :          DO irow_local = 1, nrow_local
    1402    224417556 :             irow_global = matrix%matrix_struct%row_indices(irow_local)
    1403    229236785 :             IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
    1404    110920155 :                IF (matrix%use_sp) THEN
    1405            0 :                   c_sp(irow_local, icol_local) = 0.0_sp
    1406              :                ELSE
    1407    110920155 :                   c(irow_local, icol_local) = 0.0_dp
    1408              :                END IF
    1409    113497401 :             ELSE IF (irow_global == icol_global) THEN
    1410      2577246 :                IF (matrix%use_sp) THEN
    1411            0 :                   c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
    1412              :                ELSE
    1413      2577246 :                   c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
    1414              :                END IF
    1415              :             END IF
    1416              :          END DO
    1417              :       END DO
    1418              : 
    1419      4966786 :       DO icol_local = 1, ncol_local
    1420    229384342 :       DO irow_local = 1, nrow_local
    1421    229236785 :          IF (matrix%use_sp) THEN
    1422            0 :             a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
    1423              :          ELSE
    1424    224417556 :             a(irow_local, icol_local) = c(irow_local, icol_local)
    1425              :          END IF
    1426              :       END DO
    1427              :       END DO
    1428              : 
    1429       147557 :       IF (matrix%use_sp) THEN
    1430            0 :          CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
    1431              :       ELSE
    1432       147557 :          CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
    1433              :       END IF
    1434              : 
    1435              : #else
    1436              : 
    1437              :       a => matrix%local_data
    1438              :       a_sp => matrix%local_data_sp
    1439              : 
    1440              :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1441              :          DO irow_global = 1, nrow_global
    1442              :          DO icol_global = irow_global + 1, ncol_global
    1443              :             IF (matrix%use_sp) THEN
    1444              :                a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
    1445              :             ELSE
    1446              :                a(icol_global, irow_global) = a(irow_global, icol_global)
    1447              :             END IF
    1448              :          END DO
    1449              :          END DO
    1450              :       ELSE
    1451              :          DO icol_global = 1, ncol_global
    1452              :          DO irow_global = icol_global + 1, nrow_global
    1453              :             IF (matrix%use_sp) THEN
    1454              :                a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
    1455              :             ELSE
    1456              :                a(irow_global, icol_global) = a(icol_global, irow_global)
    1457              :             END IF
    1458              :          END DO
    1459              :          END DO
    1460              :       END IF
    1461              : 
    1462              : #endif
    1463       147557 :       CALL timestop(handle)
    1464              : 
    1465       147557 :    END SUBROUTINE cp_fm_uplo_to_full
    1466              : 
    1467              : ! **************************************************************************************************
    1468              : !> \brief scales column i of matrix a with scaling(i)
    1469              : !> \param matrixa ...
    1470              : !> \param scaling : an array used for scaling the columns,
    1471              : !>                  SIZE(scaling) determines the number of columns to be scaled
    1472              : !> \author Joost VandeVondele
    1473              : !> \note
    1474              : !>      this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
    1475              : !>      that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
    1476              : !>      this procedure can be up to 20 times faster than calling cp_fm_syrk n times
    1477              : !>      where every vector has a different prefactor
    1478              : ! **************************************************************************************************
    1479       139973 :    SUBROUTINE cp_fm_column_scale(matrixa, scaling)
    1480              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1481              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1482              : 
    1483              :       INTEGER                                  :: k, mypcol, myprow, n, ncol_global, &
    1484              :                                                   npcol, nprow
    1485       139973 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1486       139973 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1487              : #if defined(__parallel)
    1488              :       INTEGER                                  :: icol_global, icol_local, &
    1489              :                                                   ipcol, iprow, irow_local
    1490              : #else
    1491              :       INTEGER                                  :: i
    1492              : #endif
    1493              : 
    1494       139973 :       myprow = matrixa%matrix_struct%context%mepos(1)
    1495       139973 :       mypcol = matrixa%matrix_struct%context%mepos(2)
    1496       139973 :       nprow = matrixa%matrix_struct%context%num_pe(1)
    1497       139973 :       npcol = matrixa%matrix_struct%context%num_pe(2)
    1498              : 
    1499       139973 :       ncol_global = matrixa%matrix_struct%ncol_global
    1500              : 
    1501       139973 :       a => matrixa%local_data
    1502       139973 :       a_sp => matrixa%local_data_sp
    1503       139973 :       IF (matrixa%use_sp) THEN
    1504            0 :          n = SIZE(a_sp, 1)
    1505              :       ELSE
    1506       139973 :          n = SIZE(a, 1)
    1507              :       END IF
    1508       139973 :       k = MIN(SIZE(scaling), ncol_global)
    1509              : 
    1510              : #if defined(__parallel)
    1511              : 
    1512      2222214 :       DO icol_global = 1, k
    1513              :          CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
    1514              :                       nprow, npcol, myprow, mypcol, &
    1515      2082241 :                       irow_local, icol_local, iprow, ipcol)
    1516      2222214 :          IF ((ipcol == mypcol)) THEN
    1517      2082241 :             IF (matrixa%use_sp) THEN
    1518            0 :                CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
    1519              :             ELSE
    1520      2082241 :                CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
    1521              :             END IF
    1522              :          END IF
    1523              :       END DO
    1524              : #else
    1525              :       DO i = 1, k
    1526              :          IF (matrixa%use_sp) THEN
    1527              :             CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
    1528              :          ELSE
    1529              :             CALL DSCAL(n, scaling(i), a(:, i), 1)
    1530              :          END IF
    1531              :       END DO
    1532              : #endif
    1533       139973 :    END SUBROUTINE cp_fm_column_scale
    1534              : 
    1535              : ! **************************************************************************************************
    1536              : !> \brief scales row i of matrix a with scaling(i)
    1537              : !> \param matrixa ...
    1538              : !> \param scaling : an array used for scaling the columns,
    1539              : !> \author JGH
    1540              : !> \note
    1541              : ! **************************************************************************************************
    1542         6272 :    SUBROUTINE cp_fm_row_scale(matrixa, scaling)
    1543              :       TYPE(cp_fm_type), INTENT(IN)             :: matrixa
    1544              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)  :: scaling
    1545              : 
    1546              :       INTEGER                                  :: n, m, nrow_global, nrow_local, ncol_local
    1547         6272 :       INTEGER, DIMENSION(:), POINTER           :: row_indices
    1548         6272 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1549         6272 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    1550              : #if defined(__parallel)
    1551              :       INTEGER                                  :: irow_global, icol, irow
    1552              : #else
    1553              :       INTEGER                                  :: j
    1554              : #endif
    1555              : 
    1556              :       CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
    1557         6272 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1558         6272 :       CPASSERT(SIZE(scaling) == nrow_global)
    1559              : 
    1560         6272 :       a => matrixa%local_data
    1561         6272 :       a_sp => matrixa%local_data_sp
    1562         6272 :       IF (matrixa%use_sp) THEN
    1563         6272 :          n = SIZE(a_sp, 1)
    1564         6272 :          m = SIZE(a_sp, 2)
    1565              :       ELSE
    1566         6272 :          n = SIZE(a, 1)
    1567         6272 :          m = SIZE(a, 2)
    1568              :       END IF
    1569              : 
    1570              : #if defined(__parallel)
    1571        78776 :       DO icol = 1, ncol_local
    1572        78776 :          IF (matrixa%use_sp) THEN
    1573            0 :             DO irow = 1, nrow_local
    1574            0 :                irow_global = row_indices(irow)
    1575            0 :                a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
    1576              :             END DO
    1577              :          ELSE
    1578      6479226 :             DO irow = 1, nrow_local
    1579      6406722 :                irow_global = row_indices(irow)
    1580      6479226 :                a(irow, icol) = scaling(irow_global)*a(irow, icol)
    1581              :             END DO
    1582              :          END IF
    1583              :       END DO
    1584              : #else
    1585              :       IF (matrixa%use_sp) THEN
    1586              :          DO j = 1, m
    1587              :             a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
    1588              :          END DO
    1589              :       ELSE
    1590              :          DO j = 1, m
    1591              :             a(1:n, j) = scaling(1:n)*a(1:n, j)
    1592              :          END DO
    1593              :       END IF
    1594              : #endif
    1595         6272 :    END SUBROUTINE cp_fm_row_scale
    1596              : 
    1597              : ! **************************************************************************************************
    1598              : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
    1599              : !> \param matrix_a the matrix to invert
    1600              : !> \param matrix_inverse the inverse of matrix_a
    1601              : !> \param det_a the determinant of matrix_a
    1602              : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
    1603              : !>                are screened
    1604              : !> \param eigval optionally return matrix eigenvalues/singular values
    1605              : !> \par History
    1606              : !>      note of Jan Wilhelm (12.2015)
    1607              : !>      - computation of determinant corrected
    1608              : !>      - determinant only computed if det_a is present
    1609              : !>      12.2016 added option to use SVD instead of LU [Nico Holmberg]
    1610              : !>      - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
    1611              : !> \author Florian Schiffmann(02.2007)
    1612              : ! **************************************************************************************************
    1613         1590 :    SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
    1614              : 
    1615              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_inverse
    1616              :       REAL(KIND=dp), INTENT(OUT), OPTIONAL     :: det_a
    1617              :       REAL(KIND=dp), INTENT(IN), OPTIONAL      :: eps_svd
    1618              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
    1619              :          INTENT(INOUT), OPTIONAL               :: eigval
    1620              : 
    1621              :       INTEGER                                  :: n
    1622         1590 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1623              :       REAL(KIND=dp)                            :: determinant, my_eps_svd
    1624              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1625              :       TYPE(cp_fm_type)                         :: matrix_lu
    1626              : 
    1627              : #if defined(__parallel)
    1628              :       TYPE(cp_fm_type)                         :: u, vt, sigma, inv_sigma_ut
    1629              :       TYPE(mp_comm_type) :: group
    1630              :       INTEGER                                  :: i, info, liwork, lwork, exponent_of_minus_one
    1631              :       INTEGER, DIMENSION(9)                    :: desca
    1632              :       LOGICAL                                  :: quenched
    1633              :       REAL(KIND=dp)                            :: alpha, beta
    1634         1590 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: diag
    1635         1590 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
    1636              : #else
    1637              :       LOGICAL                                  :: sign
    1638              :       REAL(KIND=dp)                            :: eps1
    1639              : #endif
    1640              : 
    1641         1590 :       my_eps_svd = 0.0_dp
    1642          468 :       IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
    1643              : 
    1644              :       CALL cp_fm_create(matrix=matrix_lu, &
    1645              :                         matrix_struct=matrix_a%matrix_struct, &
    1646         1590 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
    1647         1590 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
    1648              : 
    1649         1590 :       a => matrix_lu%local_data
    1650         1590 :       n = matrix_lu%matrix_struct%nrow_global
    1651         4770 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1652        19381 :       ipivot(:) = 0
    1653              : #if defined(__parallel)
    1654         1590 :       IF (my_eps_svd == 0.0_dp) THEN
    1655              :          ! Use LU decomposition
    1656         1562 :          lwork = 3*n
    1657         1562 :          liwork = 3*n
    1658        15620 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1659         1562 :          CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1660              : 
    1661         1562 :          IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
    1662              : 
    1663         1116 :             ALLOCATE (diag(n))
    1664          916 :             diag(:) = 0.0_dp
    1665          372 :             CALL cp_fm_get_diag(matrix_lu, diag)
    1666              : 
    1667          372 :             exponent_of_minus_one = 0
    1668          372 :             determinant = 1.0_dp
    1669          916 :             DO i = 1, n
    1670          544 :                determinant = determinant*diag(i)
    1671          916 :                IF (ipivot(i) /= i) THEN
    1672          224 :                   exponent_of_minus_one = exponent_of_minus_one + 1
    1673              :                END IF
    1674              :             END DO
    1675          372 :             IF (PRESENT(eigval)) THEN
    1676            0 :                CPASSERT(.NOT. ASSOCIATED(eigval))
    1677            0 :                ALLOCATE (eigval(n))
    1678            0 :                eigval(:) = diag
    1679              :             END IF
    1680          372 :             DEALLOCATE (diag)
    1681              : 
    1682          372 :             group = matrix_lu%matrix_struct%para_env
    1683          372 :             CALL group%sum(exponent_of_minus_one)
    1684              : 
    1685          372 :             determinant = determinant*(-1.0_dp)**exponent_of_minus_one
    1686              : 
    1687              :          END IF
    1688              : 
    1689         1562 :          alpha = 0.0_dp
    1690         1562 :          beta = 1.0_dp
    1691         1562 :          CALL cp_fm_set_all(matrix_inverse, alpha, beta)
    1692         1562 :          CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
    1693              :       ELSE
    1694              :          ! Use singular value decomposition
    1695              :          CALL cp_fm_create(matrix=u, &
    1696              :                            matrix_struct=matrix_a%matrix_struct, &
    1697           28 :                            name="LEFT_SINGULAR_MATRIX")
    1698           28 :          CALL cp_fm_set_all(u, alpha=0.0_dp)
    1699              :          CALL cp_fm_create(matrix=vt, &
    1700              :                            matrix_struct=matrix_a%matrix_struct, &
    1701           28 :                            name="RIGHT_SINGULAR_MATRIX")
    1702           28 :          CALL cp_fm_set_all(vt, alpha=0.0_dp)
    1703           84 :          ALLOCATE (diag(n))
    1704           92 :          diag(:) = 0.0_dp
    1705          280 :          desca(:) = matrix_lu%matrix_struct%descriptor(:)
    1706           28 :          ALLOCATE (work(1))
    1707              :          ! Workspace query
    1708           28 :          lwork = -1
    1709              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1710           28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1711           28 :          lwork = INT(work(1))
    1712           28 :          DEALLOCATE (work)
    1713           84 :          ALLOCATE (work(lwork))
    1714              :          ! SVD
    1715              :          CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
    1716           28 :                       1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
    1717              :          ! info == n+1 implies homogeneity error when the number of procs is large
    1718              :          ! this likely isnt a problem, but maybe we should handle it separately
    1719           28 :          IF (info /= 0 .AND. info /= n + 1) &
    1720            0 :             CPABORT("Singular value decomposition of matrix failed.")
    1721              :          ! (Pseudo)inverse and (pseudo)determinant
    1722              :          CALL cp_fm_create(matrix=sigma, &
    1723              :                            matrix_struct=matrix_a%matrix_struct, &
    1724           28 :                            name="SINGULAR_VALUE_MATRIX")
    1725           28 :          CALL cp_fm_set_all(sigma, alpha=0.0_dp)
    1726           28 :          determinant = 1.0_dp
    1727           28 :          quenched = .FALSE.
    1728           28 :          IF (PRESENT(eigval)) THEN
    1729           28 :             CPASSERT(.NOT. ASSOCIATED(eigval))
    1730           84 :             ALLOCATE (eigval(n))
    1731          156 :             eigval(:) = diag
    1732              :          END IF
    1733           92 :          DO i = 1, n
    1734           64 :             IF (diag(i) < my_eps_svd) THEN
    1735           18 :                diag(i) = 0.0_dp
    1736           18 :                quenched = .TRUE.
    1737              :             ELSE
    1738           46 :                determinant = determinant*diag(i)
    1739           46 :                diag(i) = 1.0_dp/diag(i)
    1740              :             END IF
    1741           92 :             CALL cp_fm_set_element(sigma, i, i, diag(i))
    1742              :          END DO
    1743           28 :          DEALLOCATE (diag)
    1744           28 :          IF (quenched) &
    1745              :             CALL cp_warn(__LOCATION__, &
    1746              :                          "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
    1747           12 :                          ". At least one singular value has been quenched.")
    1748              :          ! Sigma^-1 * U^T
    1749              :          CALL cp_fm_create(matrix=inv_sigma_ut, &
    1750              :                            matrix_struct=matrix_a%matrix_struct, &
    1751           28 :                            name="SINGULAR_VALUE_MATRIX")
    1752           28 :          CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
    1753              :          CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
    1754           28 :                      u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
    1755              :          ! A^-1 = V * (Sigma^-1 * U^T)
    1756           28 :          CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
    1757              :          CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
    1758           28 :                      inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
    1759              :          ! Clean up
    1760           28 :          DEALLOCATE (work)
    1761           28 :          CALL cp_fm_release(u)
    1762           28 :          CALL cp_fm_release(vt)
    1763           28 :          CALL cp_fm_release(sigma)
    1764          140 :          CALL cp_fm_release(inv_sigma_ut)
    1765              :       END IF
    1766              : #else
    1767              :       IF (my_eps_svd == 0.0_dp) THEN
    1768              :          sign = .TRUE.
    1769              :          CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
    1770              :                             eval_error=eps1)
    1771              :          CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
    1772              :          IF (PRESENT(eigval)) &
    1773              :             CALL cp_abort(__LOCATION__, &
    1774              :                           "NYI. Eigenvalues not available for return without SCALAPACK.")
    1775              :       ELSE
    1776              :          CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
    1777              :                                      determinant, eigval)
    1778              :       END IF
    1779              : #endif
    1780         1590 :       CALL cp_fm_release(matrix_lu)
    1781         1590 :       DEALLOCATE (ipivot)
    1782         1590 :       IF (PRESENT(det_a)) det_a = determinant
    1783         1590 :    END SUBROUTINE cp_fm_invert
    1784              : 
    1785              : ! **************************************************************************************************
    1786              : !> \brief inverts a triangular matrix
    1787              : !> \param matrix_a ...
    1788              : !> \param uplo_tr triangular format; defaults to 'U'
    1789              : !> \author MI
    1790              : ! **************************************************************************************************
    1791         5072 :    SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
    1792              : 
    1793              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
    1794              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo_tr
    1795              : 
    1796              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
    1797              : 
    1798              :       CHARACTER                                :: unit_diag, uplo
    1799              :       INTEGER                                  :: handle, info, ncol_global
    1800         5072 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1801              : #if defined(__parallel)
    1802              :       INTEGER, DIMENSION(9)                    :: desca
    1803              : #endif
    1804              : 
    1805         5072 :       CALL timeset(routineN, handle)
    1806              : 
    1807         5072 :       unit_diag = 'N'
    1808         5072 :       uplo = 'U'
    1809         5072 :       IF (PRESENT(uplo_tr)) uplo = uplo_tr
    1810              : 
    1811         5072 :       ncol_global = matrix_a%matrix_struct%ncol_global
    1812              : 
    1813         5072 :       a => matrix_a%local_data
    1814              : 
    1815              : #if defined(__parallel)
    1816        50720 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1817              : 
    1818         5072 :       CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
    1819              : 
    1820              : #else
    1821              :       CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
    1822              : #endif
    1823              : 
    1824         5072 :       CALL timestop(handle)
    1825         5072 :    END SUBROUTINE cp_fm_triangular_invert
    1826              : 
    1827              : ! **************************************************************************************************
    1828              : !> \brief  performs a QR factorization of the input rectangular matrix A or of a submatrix of A
    1829              : !>         the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
    1830              : !>         M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
    1831              : !> \param matrix_a ...
    1832              : !> \param matrix_r ...
    1833              : !> \param nrow_fact ...
    1834              : !> \param ncol_fact ...
    1835              : !> \param first_row ...
    1836              : !> \param first_col ...
    1837              : !> \author MI
    1838              : ! **************************************************************************************************
    1839        19320 :    SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
    1840              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, matrix_r
    1841              :       INTEGER, INTENT(IN), OPTIONAL            :: nrow_fact, ncol_fact, &
    1842              :                                                   first_row, first_col
    1843              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    1844              : 
    1845              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
    1846              : 
    1847              :       CHARACTER                                :: myuplo
    1848              :       INTEGER                                  :: handle, i, icol, info, irow, &
    1849              :                                                   j, lda, lwork, ncol, &
    1850              :                                                   ndim, nrow
    1851        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:)      :: tau, work
    1852        19320 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)   :: r_mat
    1853              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    1854              : #if defined(__parallel)
    1855              :       INTEGER, DIMENSION(9)                    :: desca
    1856              : #endif
    1857              : 
    1858        19320 :       CALL timeset(routineN, handle)
    1859              : 
    1860        19320 :       myuplo = 'U'
    1861        19320 :       IF (PRESENT(uplo)) myuplo = uplo
    1862              : 
    1863        19320 :       ncol = matrix_a%matrix_struct%ncol_global
    1864        19320 :       nrow = matrix_a%matrix_struct%nrow_global
    1865        19320 :       lda = nrow
    1866              : 
    1867        19320 :       a => matrix_a%local_data
    1868              : 
    1869        19320 :       IF (PRESENT(nrow_fact)) nrow = nrow_fact
    1870        19320 :       IF (PRESENT(ncol_fact)) ncol = ncol_fact
    1871        19320 :       irow = 1
    1872        19320 :       IF (PRESENT(first_row)) irow = first_row
    1873        19320 :       icol = 1
    1874        19320 :       IF (PRESENT(first_col)) icol = first_col
    1875              : 
    1876        19320 :       CPASSERT(nrow >= ncol)
    1877        19320 :       ndim = SIZE(a, 2)
    1878        57960 :       ALLOCATE (tau(ndim))
    1879              : 
    1880              : #if defined(__parallel)
    1881              : 
    1882       193200 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1883              : 
    1884        19320 :       lwork = -1
    1885        57960 :       ALLOCATE (work(2*ndim))
    1886        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1887        19320 :       lwork = INT(work(1))
    1888        19320 :       DEALLOCATE (work)
    1889        57960 :       ALLOCATE (work(lwork))
    1890        19320 :       CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
    1891              : 
    1892              : #else
    1893              :       lwork = -1
    1894              :       ALLOCATE (work(2*ndim))
    1895              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1896              :       lwork = INT(work(1))
    1897              :       DEALLOCATE (work)
    1898              :       ALLOCATE (work(lwork))
    1899              :       CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
    1900              : 
    1901              : #endif
    1902              : 
    1903        77280 :       ALLOCATE (r_mat(ncol, ncol))
    1904        19320 :       CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
    1905        19320 :       IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
    1906        38640 :          DO i = 1, ncol
    1907        38640 :          DO j = i + 1, ncol
    1908        19320 :             r_mat(j, i) = 0.0_dp
    1909              :          END DO
    1910              :          END DO
    1911              :       ELSE
    1912            0 :          DO j = 1, ncol
    1913            0 :          DO i = j + 1, ncol
    1914            0 :             r_mat(i, j) = 0.0_dp
    1915              :          END DO
    1916              :          END DO
    1917              :       END IF
    1918        19320 :       CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
    1919              : 
    1920        19320 :       DEALLOCATE (tau, work, r_mat)
    1921              : 
    1922        19320 :       CALL timestop(handle)
    1923              : 
    1924        19320 :    END SUBROUTINE cp_fm_qr_factorization
    1925              : 
    1926              : ! **************************************************************************************************
    1927              : !> \brief computes the the solution to A*b=A_general using lu decomposition
    1928              : !> \param matrix_a input matrix; will be overwritten
    1929              : !> \param general_a contains the result
    1930              : !> \author Florian Schiffmann
    1931              : ! **************************************************************************************************
    1932         6832 :    SUBROUTINE cp_fm_solve(matrix_a, general_a)
    1933              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a, general_a
    1934              : 
    1935              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
    1936              : 
    1937              :       INTEGER                                  :: handle, info, n, nrhs
    1938         6832 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: ipivot
    1939              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a, a_general
    1940              : #if defined(__parallel)
    1941              :       INTEGER, DIMENSION(9)                    :: desca, descb
    1942              : #else
    1943              :       INTEGER                                  :: lda, ldb
    1944              : #endif
    1945              : 
    1946         6832 :       CALL timeset(routineN, handle)
    1947              : 
    1948         6832 :       a => matrix_a%local_data
    1949         6832 :       a_general => general_a%local_data
    1950         6832 :       n = matrix_a%matrix_struct%nrow_global
    1951         6832 :       nrhs = general_a%matrix_struct%ncol_global
    1952        20496 :       ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
    1953              : 
    1954              : #if defined(__parallel)
    1955        68320 :       desca(:) = matrix_a%matrix_struct%descriptor(:)
    1956        68320 :       descb(:) = general_a%matrix_struct%descriptor(:)
    1957         6832 :       CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
    1958              :       CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
    1959         6832 :                    1, 1, descb, info)
    1960              : 
    1961              : #else
    1962              :       lda = SIZE(a, 1)
    1963              :       ldb = SIZE(a_general, 1)
    1964              :       CALL dgetrf(n, n, a, lda, ipivot, info)
    1965              :       CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
    1966              : 
    1967              : #endif
    1968              :       ! info is allowed to be zero
    1969              :       ! this does just signal a zero diagonal element
    1970         6832 :       DEALLOCATE (ipivot)
    1971         6832 :       CALL timestop(handle)
    1972         6832 :    END SUBROUTINE
    1973              : 
    1974              : ! **************************************************************************************************
    1975              : !> \brief Convenience function. Computes the matrix multiplications needed
    1976              : !>        for the multiplication of complex matrices.
    1977              : !>        C = beta * C + alpha * ( A  ** transa ) * ( B ** transb )
    1978              : !> \param transa : 'N' -> normal   'T' -> transpose
    1979              : !>      alpha,beta :: can be 0.0_dp and 1.0_dp
    1980              : !> \param transb ...
    1981              : !> \param m ...
    1982              : !> \param n ...
    1983              : !> \param k ...
    1984              : !> \param alpha ...
    1985              : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
    1986              : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
    1987              : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
    1988              : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
    1989              : !> \param beta ...
    1990              : !> \param C_re m x n matrix, real part
    1991              : !> \param C_im m x n matrix, imaginary part
    1992              : !> \param a_first_col ...
    1993              : !> \param a_first_row ...
    1994              : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
    1995              : !> \param b_first_row ...
    1996              : !> \param c_first_col ...
    1997              : !> \param c_first_row ...
    1998              : !> \author Samuel Andermatt
    1999              : !> \note
    2000              : !>      C should have no overlap with A, B
    2001              : ! **************************************************************************************************
    2002            0 :    SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
    2003              :                                  C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
    2004              :                                  c_first_row)
    2005              :       CHARACTER(LEN=1), INTENT(IN)                       :: transa, transb
    2006              :       INTEGER, INTENT(IN)                                :: m, n, k
    2007              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
    2008              :       TYPE(cp_fm_type), INTENT(IN)                       :: A_re, A_im, B_re, B_im
    2009              :       REAL(KIND=dp), INTENT(IN)                          :: beta
    2010              :       TYPE(cp_fm_type), INTENT(IN)                       :: C_re, C_im
    2011              :       INTEGER, INTENT(IN), OPTIONAL                      :: a_first_col, a_first_row, b_first_col, &
    2012              :                                                             b_first_row, c_first_col, c_first_row
    2013              : 
    2014              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
    2015              : 
    2016              :       INTEGER                                            :: handle
    2017              : 
    2018            0 :       CALL timeset(routineN, handle)
    2019              : 
    2020              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
    2021              :                       a_first_col=a_first_col, &
    2022              :                       a_first_row=a_first_row, &
    2023              :                       b_first_col=b_first_col, &
    2024              :                       b_first_row=b_first_row, &
    2025              :                       c_first_col=c_first_col, &
    2026            0 :                       c_first_row=c_first_row)
    2027              :       CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
    2028              :                       a_first_col=a_first_col, &
    2029              :                       a_first_row=a_first_row, &
    2030              :                       b_first_col=b_first_col, &
    2031              :                       b_first_row=b_first_row, &
    2032              :                       c_first_col=c_first_col, &
    2033            0 :                       c_first_row=c_first_row)
    2034              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
    2035              :                       a_first_col=a_first_col, &
    2036              :                       a_first_row=a_first_row, &
    2037              :                       b_first_col=b_first_col, &
    2038              :                       b_first_row=b_first_row, &
    2039              :                       c_first_col=c_first_col, &
    2040            0 :                       c_first_row=c_first_row)
    2041              :       CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
    2042              :                       a_first_col=a_first_col, &
    2043              :                       a_first_row=a_first_row, &
    2044              :                       b_first_col=b_first_col, &
    2045              :                       b_first_row=b_first_row, &
    2046              :                       c_first_col=c_first_col, &
    2047            0 :                       c_first_row=c_first_row)
    2048              : 
    2049            0 :       CALL timestop(handle)
    2050              : 
    2051            0 :    END SUBROUTINE cp_complex_fm_gemm
    2052              : 
    2053              : ! **************************************************************************************************
    2054              : !> \brief inverts a matrix using LU decomposition
    2055              : !>        the input matrix will be overwritten
    2056              : !> \param matrix   : input a general square non-singular matrix, outputs its inverse
    2057              : !> \param info_out : optional, if present outputs the info from (p)zgetri
    2058              : !> \author Lianheng Tong
    2059              : ! **************************************************************************************************
    2060            0 :    SUBROUTINE cp_fm_lu_invert(matrix, info_out)
    2061              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2062              :       INTEGER, INTENT(OUT), OPTIONAL           :: info_out
    2063              : 
    2064              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
    2065              : 
    2066              :       INTEGER :: nrows_global, handle, info, lwork
    2067            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: ipivot
    2068              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: mat
    2069              :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: mat_sp
    2070            0 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2071            0 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2072              : #if defined(__parallel)
    2073              :       INTEGER                                  :: liwork
    2074              :       INTEGER, DIMENSION(9)                    :: desca
    2075            0 :       INTEGER, DIMENSION(:), ALLOCATABLE       :: iwork
    2076              : #else
    2077              :       INTEGER                                  :: lda
    2078              : #endif
    2079              : 
    2080            0 :       CALL timeset(routineN, handle)
    2081              : 
    2082            0 :       mat => matrix%local_data
    2083            0 :       mat_sp => matrix%local_data_sp
    2084            0 :       nrows_global = matrix%matrix_struct%nrow_global
    2085            0 :       CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
    2086            0 :       ALLOCATE (ipivot(nrows_global))
    2087              :       ! do LU decomposition
    2088              : #if defined(__parallel)
    2089            0 :       desca = matrix%matrix_struct%descriptor
    2090            0 :       IF (matrix%use_sp) THEN
    2091              :          CALL psgetrf(nrows_global, nrows_global, &
    2092            0 :                       mat_sp, 1, 1, desca, ipivot, info)
    2093              :       ELSE
    2094              :          CALL pdgetrf(nrows_global, nrows_global, &
    2095            0 :                       mat, 1, 1, desca, ipivot, info)
    2096              :       END IF
    2097              : #else
    2098              :       lda = SIZE(mat, 1)
    2099              :       IF (matrix%use_sp) THEN
    2100              :          CALL sgetrf(nrows_global, nrows_global, &
    2101              :                      mat_sp, lda, ipivot, info)
    2102              :       ELSE
    2103              :          CALL dgetrf(nrows_global, nrows_global, &
    2104              :                      mat, lda, ipivot, info)
    2105              :       END IF
    2106              : #endif
    2107            0 :       IF (info /= 0) THEN
    2108            0 :          CALL cp_abort(__LOCATION__, "LU decomposition has failed")
    2109              :       END IF
    2110              :       ! do inversion
    2111            0 :       IF (matrix%use_sp) THEN
    2112            0 :          ALLOCATE (work(1))
    2113              :       ELSE
    2114            0 :          ALLOCATE (work_sp(1))
    2115              :       END IF
    2116              : #if defined(__parallel)
    2117            0 :       ALLOCATE (iwork(1))
    2118            0 :       IF (matrix%use_sp) THEN
    2119              :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2120            0 :                       ipivot, work_sp, -1, iwork, -1, info)
    2121            0 :          lwork = INT(work_sp(1))
    2122            0 :          DEALLOCATE (work_sp)
    2123            0 :          ALLOCATE (work_sp(lwork))
    2124              :       ELSE
    2125              :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2126            0 :                       ipivot, work, -1, iwork, -1, info)
    2127            0 :          lwork = INT(work(1))
    2128            0 :          DEALLOCATE (work)
    2129            0 :          ALLOCATE (work(lwork))
    2130              :       END IF
    2131            0 :       liwork = INT(iwork(1))
    2132            0 :       DEALLOCATE (iwork)
    2133            0 :       ALLOCATE (iwork(liwork))
    2134            0 :       IF (matrix%use_sp) THEN
    2135              :          CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
    2136            0 :                       ipivot, work_sp, lwork, iwork, liwork, info)
    2137              :       ELSE
    2138              :          CALL pdgetri(nrows_global, mat, 1, 1, desca, &
    2139            0 :                       ipivot, work, lwork, iwork, liwork, info)
    2140              :       END IF
    2141            0 :       DEALLOCATE (iwork)
    2142              : #else
    2143              :       IF (matrix%use_sp) THEN
    2144              :          CALL sgetri(nrows_global, mat_sp, lda, &
    2145              :                      ipivot, work_sp, -1, info)
    2146              :          lwork = INT(work_sp(1))
    2147              :          DEALLOCATE (work_sp)
    2148              :          ALLOCATE (work_sp(lwork))
    2149              :          CALL sgetri(nrows_global, mat_sp, lda, &
    2150              :                      ipivot, work_sp, lwork, info)
    2151              :       ELSE
    2152              :          CALL dgetri(nrows_global, mat, lda, &
    2153              :                      ipivot, work, -1, info)
    2154              :          lwork = INT(work(1))
    2155              :          DEALLOCATE (work)
    2156              :          ALLOCATE (work(lwork))
    2157              :          CALL dgetri(nrows_global, mat, lda, &
    2158              :                      ipivot, work, lwork, info)
    2159              :       END IF
    2160              : #endif
    2161            0 :       IF (matrix%use_sp) THEN
    2162            0 :          DEALLOCATE (work_sp)
    2163              :       ELSE
    2164            0 :          DEALLOCATE (work)
    2165              :       END IF
    2166            0 :       DEALLOCATE (ipivot)
    2167              : 
    2168            0 :       IF (PRESENT(info_out)) THEN
    2169            0 :          info_out = info
    2170              :       ELSE
    2171            0 :          IF (info /= 0) &
    2172            0 :             CALL cp_abort(__LOCATION__, "LU inversion has failed")
    2173              :       END IF
    2174              : 
    2175            0 :       CALL timestop(handle)
    2176              : 
    2177            0 :    END SUBROUTINE cp_fm_lu_invert
    2178              : 
    2179              : ! **************************************************************************************************
    2180              : !> \brief norm of matrix using (p)dlange
    2181              : !> \param matrix   : input a general matrix
    2182              : !> \param mode     : 'M' max abs element value,
    2183              : !>                   '1' or 'O' one norm, i.e. maximum column sum
    2184              : !>                   'I' infinity norm, i.e. maximum row sum
    2185              : !>                   'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
    2186              : !> \return : the norm according to mode
    2187              : !> \author Lianheng Tong
    2188              : ! **************************************************************************************************
    2189          492 :    FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
    2190              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2191              :       CHARACTER, INTENT(IN) :: mode
    2192              :       REAL(KIND=dp) :: res
    2193              : 
    2194              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
    2195              : 
    2196              :       INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
    2197              :       REAL(KIND=sp) :: res_sp
    2198              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2199              :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2200          492 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
    2201          492 :       REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
    2202              : #if defined(__parallel)
    2203              :       INTEGER, DIMENSION(9) :: desca
    2204              : #else
    2205              :       INTEGER :: lda
    2206              : #endif
    2207              : 
    2208          492 :       CALL timeset(routineN, handle)
    2209              : 
    2210              :       CALL cp_fm_get_info(matrix=matrix, &
    2211              :                           nrow_global=nrows, &
    2212              :                           ncol_global=ncols, &
    2213              :                           nrow_local=nrows_local, &
    2214          492 :                           ncol_local=ncols_local)
    2215          492 :       aa => matrix%local_data
    2216          492 :       aa_sp => matrix%local_data_sp
    2217              : 
    2218              : #if defined(__parallel)
    2219         4920 :       desca = matrix%matrix_struct%descriptor
    2220              :       SELECT CASE (mode)
    2221              :       CASE ('M', 'm')
    2222          492 :          lwork = 1
    2223              :       CASE ('1', 'O', 'o')
    2224          492 :          lwork = ncols_local
    2225              :       CASE ('I', 'i')
    2226            0 :          lwork = nrows_local
    2227              :       CASE ('F', 'f', 'E', 'e')
    2228            0 :          lwork = 1
    2229              :       CASE DEFAULT
    2230          492 :          CPABORT("mode input is not valid")
    2231              :       END SELECT
    2232          492 :       IF (matrix%use_sp) THEN
    2233            0 :          ALLOCATE (work_sp(lwork))
    2234            0 :          res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
    2235            0 :          DEALLOCATE (work_sp)
    2236            0 :          res = REAL(res_sp, KIND=dp)
    2237              :       ELSE
    2238         1476 :          ALLOCATE (work(lwork))
    2239          492 :          res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
    2240          492 :          DEALLOCATE (work)
    2241              :       END IF
    2242              : #else
    2243              :       SELECT CASE (mode)
    2244              :       CASE ('M', 'm')
    2245              :          lwork = 1
    2246              :       CASE ('1', 'O', 'o')
    2247              :          lwork = 1
    2248              :       CASE ('I', 'i')
    2249              :          lwork = nrows
    2250              :       CASE ('F', 'f', 'E', 'e')
    2251              :          lwork = 1
    2252              :       CASE DEFAULT
    2253              :          CPABORT("mode input is not valid")
    2254              :       END SELECT
    2255              :       IF (matrix%use_sp) THEN
    2256              :          ALLOCATE (work_sp(lwork))
    2257              :          lda = SIZE(aa_sp, 1)
    2258              :          res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
    2259              :          DEALLOCATE (work_sp)
    2260              :          res = REAL(res_sp, KIND=dp)
    2261              :       ELSE
    2262              :          ALLOCATE (work(lwork))
    2263              :          lda = SIZE(aa, 1)
    2264              :          res = dlange(mode, nrows, ncols, aa, lda, work)
    2265              :          DEALLOCATE (work)
    2266              :       END IF
    2267              : #endif
    2268              : 
    2269          492 :       CALL timestop(handle)
    2270              : 
    2271          492 :    END FUNCTION cp_fm_norm
    2272              : 
    2273              : ! **************************************************************************************************
    2274              : !> \brief trace of a matrix using pdlatra
    2275              : !> \param matrix   : input a square matrix
    2276              : !> \return : the trace
    2277              : !> \author Lianheng Tong
    2278              : ! **************************************************************************************************
    2279            0 :    FUNCTION cp_fm_latra(matrix) RESULT(res)
    2280              :       TYPE(cp_fm_type), INTENT(IN) :: matrix
    2281              :       REAL(KIND=dp) :: res
    2282              : 
    2283              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
    2284              : 
    2285              :       INTEGER :: nrows, ncols, handle
    2286              :       REAL(KIND=sp) :: res_sp
    2287              :       REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
    2288              :       REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
    2289              : #if defined(__parallel)
    2290              :       INTEGER, DIMENSION(9) :: desca
    2291              : #else
    2292              :       INTEGER :: ii
    2293              : #endif
    2294              : 
    2295            0 :       CALL timeset(routineN, handle)
    2296              : 
    2297            0 :       nrows = matrix%matrix_struct%nrow_global
    2298            0 :       ncols = matrix%matrix_struct%ncol_global
    2299            0 :       CPASSERT(nrows == ncols)
    2300            0 :       aa => matrix%local_data
    2301            0 :       aa_sp => matrix%local_data_sp
    2302              : 
    2303              : #if defined(__parallel)
    2304            0 :       desca = matrix%matrix_struct%descriptor
    2305            0 :       IF (matrix%use_sp) THEN
    2306            0 :          res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
    2307            0 :          res = REAL(res_sp, KIND=dp)
    2308              :       ELSE
    2309            0 :          res = pdlatra(nrows, aa, 1, 1, desca)
    2310              :       END IF
    2311              : #else
    2312              :       IF (matrix%use_sp) THEN
    2313              :          res_sp = 0.0_sp
    2314              :          DO ii = 1, nrows
    2315              :             res_sp = res_sp + aa_sp(ii, ii)
    2316              :          END DO
    2317              :          res = REAL(res_sp, KIND=dp)
    2318              :       ELSE
    2319              :          res = 0.0_dp
    2320              :          DO ii = 1, nrows
    2321              :             res = res + aa(ii, ii)
    2322              :          END DO
    2323              :       END IF
    2324              : #endif
    2325              : 
    2326            0 :       CALL timestop(handle)
    2327              : 
    2328            0 :    END FUNCTION cp_fm_latra
    2329              : 
    2330              : ! **************************************************************************************************
    2331              : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
    2332              : !>        sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
    2333              : !> \param matrix   : input M-by-N distributed matrix sub( A ) which is to be factored
    2334              : !> \param tau      : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
    2335              : !> \param nrow ...
    2336              : !> \param ncol ...
    2337              : !> \param first_row ...
    2338              : !> \param first_col ...
    2339              : !> \author MI
    2340              : ! **************************************************************************************************
    2341           36 :    SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
    2342              : 
    2343              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2344              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2345              :       INTEGER, INTENT(IN)                                :: nrow, ncol, first_row, first_col
    2346              : 
    2347              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
    2348              : 
    2349              :       INTEGER                                            :: handle
    2350              :       INTEGER                                            :: info, lwork
    2351           36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
    2352              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2353              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2354              : #if defined(__parallel)
    2355              :       INTEGER, DIMENSION(9) :: descc
    2356              : #else
    2357              :       INTEGER :: lda
    2358              : #endif
    2359              : 
    2360           36 :       CALL timeset(routineN, handle)
    2361              : 
    2362           36 :       a => matrix%local_data
    2363           36 :       lwork = -1
    2364          108 :       ALLOCATE (work(2*nrow))
    2365          108 :       ALLOCATE (ipiv(ncol))
    2366           36 :       info = 0
    2367              : 
    2368              : #if defined(__parallel)
    2369          360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2370              :       ! Call SCALAPACK routine to get optimal work dimension
    2371           36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2372           36 :       lwork = INT(work(1))
    2373           36 :       DEALLOCATE (work)
    2374          108 :       ALLOCATE (work(lwork))
    2375          244 :       tau = 0.0_dp
    2376          354 :       ipiv = 0
    2377              : 
    2378              :       ! Call SCALAPACK routine to get QR decomposition of CTs
    2379           36 :       CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
    2380              : #else
    2381              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2382              :       lda = SIZE(a, 1)
    2383              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2384              :       lwork = INT(work(1))
    2385              :       DEALLOCATE (work)
    2386              :       ALLOCATE (work(lwork))
    2387              :       tau = 0.0_dp
    2388              :       ipiv = 0
    2389              :       CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
    2390              : #endif
    2391           36 :       CPASSERT(info == 0)
    2392              : 
    2393           36 :       DEALLOCATE (work)
    2394           36 :       DEALLOCATE (ipiv)
    2395              : 
    2396           36 :       CALL timestop(handle)
    2397              : 
    2398           36 :    END SUBROUTINE cp_fm_pdgeqpf
    2399              : 
    2400              : ! **************************************************************************************************
    2401              : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
    2402              : !>         with orthonormal columns, which is defined as the first N columns of a product of K
    2403              : !>         elementary reflectors of order M
    2404              : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
    2405              : !>                  as returned from PDGEQRF
    2406              : !>                 On exit it contains  the M-by-N distributed matrix Q
    2407              : !> \param tau :   contains the scalar factors TAU of elementary reflectors  as returned by PDGEQRF
    2408              : !> \param nrow ...
    2409              : !> \param first_row ...
    2410              : !> \param first_col ...
    2411              : !> \author MI
    2412              : ! **************************************************************************************************
    2413           36 :    SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
    2414              : 
    2415              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    2416              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    2417              :       INTEGER, INTENT(IN)                                :: nrow, first_row, first_col
    2418              : 
    2419              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
    2420              : 
    2421              :       INTEGER                                            :: handle
    2422              :       INTEGER                                            :: info, lwork
    2423              :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a
    2424              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
    2425              : #if defined(__parallel)
    2426              :       INTEGER, DIMENSION(9) :: descc
    2427              : #else
    2428              :       INTEGER :: lda
    2429              : #endif
    2430              : 
    2431           36 :       CALL timeset(routineN, handle)
    2432              : 
    2433           36 :       a => matrix%local_data
    2434           36 :       lwork = -1
    2435          108 :       ALLOCATE (work(2*nrow))
    2436           36 :       info = 0
    2437              : 
    2438              : #if defined(__parallel)
    2439          360 :       descc(:) = matrix%matrix_struct%descriptor(:)
    2440              : 
    2441           36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2442           36 :       CPASSERT(info == 0)
    2443           36 :       lwork = INT(work(1))
    2444           36 :       DEALLOCATE (work)
    2445          108 :       ALLOCATE (work(lwork))
    2446              : 
    2447              :       ! Call SCALAPACK routine to get Q
    2448           36 :       CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
    2449              : #else
    2450              :       CPASSERT(first_row == 1 .AND. first_col == 1)
    2451              :       lda = SIZE(a, 1)
    2452              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2453              :       lwork = INT(work(1))
    2454              :       DEALLOCATE (work)
    2455              :       ALLOCATE (work(lwork))
    2456              :       CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
    2457              : #endif
    2458           36 :       CPASSERT(INFO == 0)
    2459              : 
    2460           36 :       DEALLOCATE (work)
    2461           36 :       CALL timestop(handle)
    2462              : 
    2463           36 :    END SUBROUTINE cp_fm_pdorgqr
    2464              : 
    2465              : ! **************************************************************************************************
    2466              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
    2467              : !> \param matrix ...
    2468              : !> \param irow ...
    2469              : !> \param jrow ...
    2470              : !> \param cs cosine of the rotation angle
    2471              : !> \param sn sinus of the rotation angle
    2472              : !> \author Ole Schuett
    2473              : ! **************************************************************************************************
    2474       565504 :    SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
    2475              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2476              :       INTEGER, INTENT(IN)                      :: irow, jrow
    2477              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2478              : 
    2479              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
    2480              :       INTEGER                                  :: handle, ncol
    2481              : 
    2482              : #if defined(__parallel)
    2483              :       INTEGER                                  :: info, lwork
    2484              :       INTEGER, DIMENSION(9)                    :: desc
    2485       565504 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2486              : #endif
    2487       565504 :       CALL timeset(routineN, handle)
    2488       565504 :       CALL cp_fm_get_info(matrix, ncol_global=ncol)
    2489              : #if defined(__parallel)
    2490       565504 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2491       565504 :          lwork = 2*ncol + 1
    2492      1696512 :          ALLOCATE (work(lwork))
    2493      5655040 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2494              :          CALL pdrot(ncol, &
    2495              :                     matrix%local_data(1, 1), irow, 1, desc, ncol, &
    2496              :                     matrix%local_data(1, 1), jrow, 1, desc, ncol, &
    2497       565504 :                     cs, sn, work, lwork, info)
    2498       565504 :          CPASSERT(info == 0)
    2499       565504 :          DEALLOCATE (work)
    2500              :       ELSE
    2501              : #endif
    2502            0 :          CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
    2503              : #if defined(__parallel)
    2504              :       END IF
    2505              : #endif
    2506       565504 :       CALL timestop(handle)
    2507       565504 :    END SUBROUTINE cp_fm_rot_rows
    2508              : 
    2509              : ! **************************************************************************************************
    2510              : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
    2511              : !> \param matrix ...
    2512              : !> \param icol ...
    2513              : !> \param jcol ...
    2514              : !> \param cs cosine of the rotation angle
    2515              : !> \param sn sinus of the rotation angle
    2516              : !> \author Ole Schuett
    2517              : ! **************************************************************************************************
    2518       637106 :    SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
    2519              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix
    2520              :       INTEGER, INTENT(IN)                      :: icol, jcol
    2521              :       REAL(dp), INTENT(IN)                     :: cs, sn
    2522              : 
    2523              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
    2524              :       INTEGER                                  :: handle, nrow
    2525              : 
    2526              : #if defined(__parallel)
    2527              :       INTEGER                                  :: info, lwork
    2528              :       INTEGER, DIMENSION(9)                    :: desc
    2529       637106 :       REAL(dp), DIMENSION(:), ALLOCATABLE      :: work
    2530              : #endif
    2531       637106 :       CALL timeset(routineN, handle)
    2532       637106 :       CALL cp_fm_get_info(matrix, nrow_global=nrow)
    2533              : #if defined(__parallel)
    2534       637106 :       IF (1 /= matrix%matrix_struct%context%n_pid) THEN
    2535       637106 :          lwork = 2*nrow + 1
    2536      1911318 :          ALLOCATE (work(lwork))
    2537      6371060 :          desc(:) = matrix%matrix_struct%descriptor(:)
    2538              :          CALL pdrot(nrow, &
    2539              :                     matrix%local_data(1, 1), 1, icol, desc, 1, &
    2540              :                     matrix%local_data(1, 1), 1, jcol, desc, 1, &
    2541       637106 :                     cs, sn, work, lwork, info)
    2542       637106 :          CPASSERT(info == 0)
    2543       637106 :          DEALLOCATE (work)
    2544              :       ELSE
    2545              : #endif
    2546            0 :          CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
    2547              : #if defined(__parallel)
    2548              :       END IF
    2549              : #endif
    2550       637106 :       CALL timestop(handle)
    2551       637106 :    END SUBROUTINE cp_fm_rot_cols
    2552              : 
    2553              : ! **************************************************************************************************
    2554              : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
    2555              : !> \param matrix_a ...
    2556              : !> \param B ...
    2557              : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
    2558              : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
    2559              : !> \param start_row starting index of rows, optional, defaults to 1
    2560              : !> \param start_col starting index of columns, optional, defaults to 1
    2561              : !> \param do_norm ...
    2562              : !> \param do_print ...
    2563              : ! **************************************************************************************************
    2564            0 :    SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
    2565              :                                            do_norm, do_print)
    2566              : 
    2567              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_a
    2568              :       REAL(kind=dp), DIMENSION(:, :), INTENT(OUT)        :: B
    2569              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrows, ncols, start_row, start_col
    2570              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_norm, do_print
    2571              : 
    2572              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
    2573              : 
    2574              :       INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
    2575              :                  j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
    2576              :                  start_col_local, start_row_global, start_row_local, this_col, unit_nr
    2577            0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    2578              :       LOGICAL                                            :: my_do_norm, my_do_print
    2579              :       REAL(KIND=dp)                                      :: norm
    2580            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: a
    2581              : 
    2582            0 :       CALL timeset(routineN, handle)
    2583              : 
    2584            0 :       my_do_norm = .TRUE.
    2585            0 :       IF (PRESENT(do_norm)) my_do_norm = do_norm
    2586              : 
    2587            0 :       my_do_print = .FALSE.
    2588            0 :       IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
    2589              : 
    2590            0 :       unit_nr = -1
    2591            0 :       IF (my_do_print) THEN
    2592            0 :          unit_nr = cp_logger_get_default_unit_nr()
    2593            0 :          IF (unit_nr < 1) my_do_print = .FALSE.
    2594              :       END IF
    2595              : 
    2596            0 :       IF (SIZE(B) /= 0) THEN
    2597            0 :          IF (PRESENT(nrows)) THEN
    2598            0 :             nrow_global = nrows
    2599              :          ELSE
    2600            0 :             nrow_global = SIZE(B, 1)
    2601              :          END IF
    2602              : 
    2603            0 :          IF (PRESENT(ncols)) THEN
    2604            0 :             ncol_global = ncols
    2605              :          ELSE
    2606            0 :             ncol_global = SIZE(B, 2)
    2607              :          END IF
    2608              : 
    2609            0 :          IF (PRESENT(start_row)) THEN
    2610            0 :             start_row_global = start_row
    2611              :          ELSE
    2612              :             start_row_global = 1
    2613              :          END IF
    2614              : 
    2615            0 :          IF (PRESENT(start_col)) THEN
    2616            0 :             start_col_global = start_col
    2617              :          ELSE
    2618              :             start_col_global = 1
    2619              :          END IF
    2620              : 
    2621            0 :          end_row_global = start_row_global + nrow_global - 1
    2622            0 :          end_col_global = start_col_global + ncol_global - 1
    2623              : 
    2624              :          CALL cp_fm_get_info(matrix=matrix_a, &
    2625              :                              nrow_global=nrow_global, ncol_global=ncol_global, &
    2626              :                              nrow_local=nrow_local, ncol_local=ncol_local, &
    2627            0 :                              row_indices=row_indices, col_indices=col_indices)
    2628            0 :          IF (end_row_global > nrow_global) THEN
    2629              :             end_row_global = nrow_global
    2630              :          END IF
    2631            0 :          IF (end_col_global > ncol_global) THEN
    2632              :             end_col_global = ncol_global
    2633              :          END IF
    2634              : 
    2635              :          ! find out row/column indices of locally stored matrix elements that
    2636              :          ! needs to be copied.
    2637              :          ! Arrays row_indices and col_indices are assumed to be sorted in
    2638              :          ! ascending order
    2639            0 :          DO start_row_local = 1, nrow_local
    2640            0 :             IF (row_indices(start_row_local) >= start_row_global) EXIT
    2641              :          END DO
    2642              : 
    2643            0 :          DO end_row_local = start_row_local, nrow_local
    2644            0 :             IF (row_indices(end_row_local) > end_row_global) EXIT
    2645              :          END DO
    2646            0 :          end_row_local = end_row_local - 1
    2647              : 
    2648            0 :          DO start_col_local = 1, ncol_local
    2649            0 :             IF (col_indices(start_col_local) >= start_col_global) EXIT
    2650              :          END DO
    2651              : 
    2652            0 :          DO end_col_local = start_col_local, ncol_local
    2653            0 :             IF (col_indices(end_col_local) > end_col_global) EXIT
    2654              :          END DO
    2655            0 :          end_col_local = end_col_local - 1
    2656              : 
    2657            0 :          a => matrix_a%local_data
    2658              : 
    2659            0 :          this_col = col_indices(start_col_local) - start_col_global + 1
    2660              : 
    2661            0 :          B(:, this_col) = a(:, start_col_local)
    2662              : 
    2663            0 :          IF (my_do_norm) THEN
    2664            0 :             norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2665            0 :             B(:, this_col) = B(:, this_col)/norm
    2666            0 :             IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2667              :          END IF
    2668              : 
    2669            0 :          DO i = start_col_local + 1, end_col_local
    2670            0 :             this_col = col_indices(i) - start_col_global + 1
    2671            0 :             B(:, this_col) = a(:, i)
    2672            0 :             DO j = start_col_local, i - 1
    2673            0 :                j_col = col_indices(j) - start_col_global + 1
    2674              :                B(:, this_col) = B(:, this_col) - &
    2675              :                                 accurate_dot_product(B(:, j_col), B(:, this_col))* &
    2676            0 :                                 B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
    2677              :             END DO
    2678              : 
    2679            0 :             IF (my_do_norm) THEN
    2680            0 :                norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
    2681            0 :                B(:, this_col) = B(:, this_col)/norm
    2682            0 :                IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
    2683              :             END IF
    2684              : 
    2685              :          END DO
    2686            0 :          CALL matrix_a%matrix_struct%para_env%sum(B)
    2687              :       END IF
    2688              : 
    2689            0 :       CALL timestop(handle)
    2690              : 
    2691            0 :    END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
    2692              : 
    2693              : ! **************************************************************************************************
    2694              : !> \brief Cholesky decomposition
    2695              : !> \param fm_matrix ...
    2696              : !> \param n ...
    2697              : !> \param uplo triangular format; defaults to 'U'
    2698              : ! **************************************************************************************************
    2699        10022 :    SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
    2700              :       TYPE(cp_fm_type)                         :: fm_matrix
    2701              :       INTEGER, INTENT(IN)                      :: n
    2702              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2703              : 
    2704              :       CHARACTER                                :: myuplo
    2705              :       INTEGER                                  :: info
    2706        10022 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2707        10022 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2708              : #if defined(__parallel)
    2709              :       INTEGER, DIMENSION(9)                    :: desca
    2710              : #endif
    2711              : 
    2712        10022 :       myuplo = 'U'
    2713            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2714              : 
    2715        10022 :       a => fm_matrix%local_data
    2716        10022 :       a_sp => fm_matrix%local_data_sp
    2717              : #if defined(__parallel)
    2718       100220 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2719        10022 :       IF (fm_matrix%use_sp) THEN
    2720            0 :          CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2721              :       ELSE
    2722        10022 :          CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
    2723              :       END IF
    2724              : #else
    2725              :       IF (fm_matrix%use_sp) THEN
    2726              :          CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2727              :       ELSE
    2728              :          CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2729              :       END IF
    2730              : #endif
    2731        10022 :       IF (info /= 0) &
    2732            0 :          CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
    2733              : 
    2734        10022 :    END SUBROUTINE cp_fm_potrf
    2735              : 
    2736              : ! **************************************************************************************************
    2737              : !> \brief Invert trianguar matrix
    2738              : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
    2739              : !> \param n size of the matrix to invert
    2740              : !> \param uplo triangular format; defaults to 'U'
    2741              : ! **************************************************************************************************
    2742         9228 :    SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
    2743              :       TYPE(cp_fm_type)                         :: fm_matrix
    2744              :       INTEGER, INTENT(IN)                      :: n
    2745              :       CHARACTER, INTENT(IN), OPTIONAL          :: uplo
    2746              : 
    2747              :       CHARACTER                                :: myuplo
    2748         9228 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
    2749         9228 :       REAL(KIND=sp), DIMENSION(:, :), POINTER  :: a_sp
    2750              :       INTEGER                                  :: info
    2751              : #if defined(__parallel)
    2752              :       INTEGER, DIMENSION(9)                    :: desca
    2753              : #endif
    2754              : 
    2755         9228 :       myuplo = 'U'
    2756            0 :       IF (PRESENT(uplo)) myuplo = uplo
    2757              : 
    2758         9228 :       a => fm_matrix%local_data
    2759         9228 :       a_sp => fm_matrix%local_data_sp
    2760              : #if defined(__parallel)
    2761        92280 :       desca(:) = fm_matrix%matrix_struct%descriptor(:)
    2762         9228 :       IF (fm_matrix%use_sp) THEN
    2763            0 :          CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
    2764              :       ELSE
    2765         9228 :          CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
    2766              :       END IF
    2767              : #else
    2768              :       IF (fm_matrix%use_sp) THEN
    2769              :          CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
    2770              :       ELSE
    2771              :          CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
    2772              :       END IF
    2773              : #endif
    2774         9228 :       CPASSERT(info == 0)
    2775         9228 :    END SUBROUTINE cp_fm_potri
    2776              : 
    2777              : ! **************************************************************************************************
    2778              : !> \brief Calculates
    2779              : !>        yv = alpha*amat*xv + beta*yv
    2780              : !>        where amat: fm matrix
    2781              : !>              xv  : vector replicated
    2782              : !>              yv  : vector replicated
    2783              : !>        Defaults: alpha = 1, beta = 0
    2784              : ! **************************************************************************************************
    2785        17152 :    SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
    2786              :       TYPE(cp_fm_type), INTENT(IN)                   :: amat
    2787              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)        :: xv
    2788              :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)     :: yv
    2789              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)            :: alpha, beta
    2790              : 
    2791              :       INTEGER                                        :: na, nc, nx, ny
    2792              :       REAL(KIND=dp)                                  :: aval, bval
    2793              : #if defined(__parallel)
    2794              :       INTEGER                                        :: nrl, ncl, ic, ir
    2795        17152 :       INTEGER, DIMENSION(:), POINTER                 :: rind, cind
    2796        17152 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE       :: xvl, yvl, yvm
    2797              : #endif
    2798              : 
    2799        17152 :       IF (amat%use_sp) THEN
    2800            0 :          CPABORT("cp_fm_matvec: SP option not available")
    2801              :       END IF
    2802        17152 :       aval = 1.0_dp
    2803        17152 :       IF (PRESENT(alpha)) aval = alpha
    2804        17152 :       bval = 0.0_dp
    2805        17152 :       IF (PRESENT(beta)) bval = beta
    2806              : 
    2807        17152 :       CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
    2808        17152 :       nx = SIZE(xv)
    2809        17152 :       ny = SIZE(yv)
    2810        17152 :       IF ((nx /= ny) .OR. (nc /= nx)) THEN
    2811            0 :          CPABORT("cp_fm_matvec: incompatible dimensions")
    2812              :       END IF
    2813              : #if defined(__parallel)
    2814              :       CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
    2815        17152 :                           row_indices=rind, col_indices=cind)
    2816       120064 :       ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
    2817       150320 :       DO ic = 1, ncl
    2818       150320 :          xvl(ic) = xv(cind(ic))
    2819              :       END DO
    2820      1690376 :       yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
    2821       150320 :       yvm = 0.0_dp
    2822        83736 :       DO ir = 1, nrl
    2823        83736 :          yvm(rind(ir)) = yvl(ir)
    2824              :       END DO
    2825        17152 :       CALL amat%matrix_struct%para_env%sum(yvm)
    2826        17152 :       IF (bval == 0.0_dp) THEN
    2827        75160 :          yv = aval*yvm
    2828              :       ELSE
    2829        75160 :          yv = bval*yv + aval*yvm
    2830              :       END IF
    2831              : #else
    2832              :       IF (bval == 0.0_dp) THEN
    2833              :          yv = aval*MATMUL(amat%local_data, xv)
    2834              :       ELSE
    2835              :          yv = bval*yv + aval*MATMUL(amat%local_data, xv)
    2836              :       END IF
    2837              : #endif
    2838              : 
    2839        68608 :    END SUBROUTINE cp_fm_matvec
    2840              : 
    2841              : END MODULE cp_fm_basic_linalg
        

Generated by: LCOV version 2.0-1