LCOV - code coverage report
Current view: top level - src/fm - cp_fm_diag.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:5064cfc) Lines: 82.2 % 410 337
Test Date: 2026-03-04 06:45:10 Functions: 92.3 % 13 12

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief used for collecting some of the diagonalization schemes available for
      10              : !>      cp_fm_type. cp_fm_power also moved here as it is very related
      11              : !> \note
      12              : !>      first version : most routines imported
      13              : !> \par History
      14              : !>      - unused Jacobi routines removed, cosmetics (05.04.06,MK)
      15              : !> \author Joost VandeVondele (2003-08)
      16              : ! **************************************************************************************************
      17              : MODULE cp_fm_diag
      18              :    USE cp_blacs_types, ONLY: cp_blacs_type
      19              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      20              :    USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
      21              :                                  cp_fm_gemm, &
      22              :                                  cp_fm_scale, &
      23              :                                  cp_fm_syrk, &
      24              :                                  cp_fm_triangular_invert, &
      25              :                                  cp_fm_triangular_multiply, &
      26              :                                  cp_fm_uplo_to_full
      27              :    USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
      28              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
      29              :                                cp_fm_redistribute_start
      30              :    USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
      31              :                          finalize_elpa_library, &
      32              :                          initialize_elpa_library, &
      33              :                          set_elpa_kernel
      34              :    USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
      35              :                                  cp_fm_general_cusolver
      36              : #if defined(__DLAF)
      37              :    USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
      38              :    USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
      39              : #endif
      40              :    USE cp_fm_types, ONLY: cp_fm_get_info, &
      41              :                           cp_fm_set_element, &
      42              :                           cp_fm_to_fm, &
      43              :                           cp_fm_type, &
      44              :                           cp_fm_create, &
      45              :                           cp_fm_get_info, &
      46              :                           cp_fm_release, &
      47              :                           cp_fm_set_all, &
      48              :                           cp_fm_to_fm, &
      49              :                           cp_fm_to_fm_submat, &
      50              :                           cp_fm_type
      51              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
      52              :                            cp_fm_struct_create, &
      53              :                            cp_fm_struct_release, &
      54              :                            cp_fm_struct_type
      55              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      56              :                               cp_get_default_logger, &
      57              :                               cp_logger_get_default_io_unit, &
      58              :                               cp_logger_type, &
      59              :                               cp_to_string
      60              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      61              :                               cp_logger_get_default_unit_nr, &
      62              :                               cp_logger_get_unit_nr, &
      63              :                               cp_logger_type
      64              :    USE kinds, ONLY: default_string_length, &
      65              :                     dp
      66              :    USE machine, ONLY: default_output_unit, &
      67              :                       m_memory
      68              : #if defined (__parallel)
      69              :    USE message_passing, ONLY: mp_comm_type
      70              : #endif
      71              : #if defined (__HAS_IEEE_EXCEPTIONS)
      72              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      73              :                               ieee_set_halting_mode, &
      74              :                               IEEE_ALL
      75              : #endif
      76              : #include "../base/base_uses.f90"
      77              : 
      78              :    IMPLICIT NONE
      79              :    PRIVATE
      80              : 
      81              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
      82              : 
      83              :    REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
      84              : 
      85              :    ! The following saved variables are diagonalization global
      86              :    ! Stores the default library for diagonalization
      87              :    INTEGER, SAVE, PUBLIC    :: diag_type = 0
      88              :    ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
      89              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      90              :    INTEGER, SAVE            :: elpa_neigvec_min = 0
      91              : #if defined(__DLAF)
      92              :    ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
      93              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      94              :    INTEGER, SAVE, PUBLIC    :: dlaf_neigvec_min = 0
      95              : #endif
      96              :    LOGICAL, SAVE, PUBLIC :: cusolver_generalized = .TRUE.
      97              :    ! Threshold value for the orthonormality check of the eigenvectors obtained
      98              :    ! after a diagonalization. A negative value disables the check.
      99              :    REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
     100              : 
     101              :    ! Constants for the diag_type above
     102              :    INTEGER, PARAMETER, PUBLIC  :: FM_DIAG_TYPE_SCALAPACK = 101, &
     103              :                                   FM_DIAG_TYPE_ELPA = 102, &
     104              :                                   FM_DIAG_TYPE_CUSOLVER = 103, &
     105              :                                   FM_DIAG_TYPE_DLAF = 104
     106              : #if defined(__CUSOLVERMP)
     107              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
     108              : #elif defined(__ELPA)
     109              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
     110              : #else
     111              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
     112              : #endif
     113              : 
     114              :    ! Public subroutines
     115              :    PUBLIC :: choose_eigv_solver, &
     116              :              cp_fm_block_jacobi, &
     117              :              cp_fm_power, &
     118              :              cp_fm_syevd, &
     119              :              cp_fm_syevx, &
     120              :              cp_fm_svd, &
     121              :              cp_fm_geeig, &
     122              :              cp_fm_geeig_canon, &
     123              :              diag_init, &
     124              :              diag_finalize
     125              : 
     126              : CONTAINS
     127              : 
     128              : ! **************************************************************************************************
     129              : !> \brief Setup the diagonalization library to be used
     130              : !> \param diag_lib diag_library flag from GLOBAL section in input
     131              : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
     132              : !>                         to ScaLAPACK was applied, .FALSE. otherwise.
     133              : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
     134              : !> \param elpa_neigvec_min_input ...
     135              : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
     136              : !>                diagonalization procedure of suitably sized matrices
     137              : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
     138              : !>                   be printed
     139              : !> \param elpa_one_stage logical that enables the one-stage solver
     140              : !> \param dlaf_neigvec_min_input ...
     141              : !> \param eps_check_diag_input ...
     142              : !> \param cusolver_generalized_input ...
     143              : !> \par History
     144              : !>      - Add support for DLA-Future (05.09.2023, RMeli)
     145              : !> \author  MI 11.2013
     146              : ! **************************************************************************************************
     147        10117 :    SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
     148              :                         elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, &
     149              :                         cusolver_generalized_input)
     150              :       CHARACTER(LEN=*), INTENT(IN)                       :: diag_lib
     151              :       LOGICAL, INTENT(OUT)                               :: fallback_applied
     152              :       INTEGER, INTENT(IN)                                :: elpa_kernel, elpa_neigvec_min_input
     153              :       LOGICAL, INTENT(IN)                                :: elpa_qr, elpa_print, elpa_one_stage
     154              :       INTEGER, INTENT(IN)                                :: dlaf_neigvec_min_input
     155              :       REAL(KIND=dp), INTENT(IN)                          :: eps_check_diag_input
     156              :       LOGICAL, INTENT(IN), OPTIONAL                      :: cusolver_generalized_input
     157              : 
     158              :       LOGICAL, SAVE                                      :: initialized = .FALSE.
     159              : 
     160        10117 :       fallback_applied = .FALSE.
     161              : 
     162        10117 :       IF (diag_lib == "ScaLAPACK") THEN
     163          194 :          diag_type = FM_DIAG_TYPE_SCALAPACK
     164         9923 :       ELSE IF (diag_lib == "ELPA") THEN
     165              : #if defined (__ELPA)
     166              :          ! ELPA is requested and available
     167         9923 :          diag_type = FM_DIAG_TYPE_ELPA
     168              : #else
     169              :          ! ELPA library requested but not linked, switch back to SL
     170              :          diag_type = FM_DIAG_TYPE_SCALAPACK
     171              :          fallback_applied = .TRUE.
     172              : #endif
     173            0 :       ELSE IF (diag_lib == "cuSOLVER") THEN
     174            0 :          diag_type = FM_DIAG_TYPE_CUSOLVER
     175            0 :       ELSE IF (diag_lib == "DLAF") THEN
     176              : #if defined (__DLAF)
     177              :          diag_type = FM_DIAG_TYPE_DLAF
     178              : #else
     179            0 :          CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
     180              : #endif
     181              :       ELSE
     182            0 :          CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
     183              :       END IF
     184              : 
     185              :       ! Initialization of requested diagonalization library
     186        10117 :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
     187         9326 :          CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
     188         9326 :          CALL set_elpa_kernel(elpa_kernel)
     189         9326 :          initialized = .TRUE.
     190              :       END IF
     191              : #if defined(__DLAF)
     192              :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
     193              :          CALL cp_dlaf_initialize()
     194              :          initialized = .TRUE.
     195              :       END IF
     196              :       dlaf_neigvec_min = dlaf_neigvec_min_input
     197              : #else
     198              :       MARK_USED(dlaf_neigvec_min_input)
     199              : #endif
     200              : 
     201        10117 :       elpa_neigvec_min = elpa_neigvec_min_input
     202        10117 :       eps_check_diag = eps_check_diag_input
     203        10117 :       IF (PRESENT(cusolver_generalized_input)) THEN
     204        10117 :          cusolver_generalized = cusolver_generalized_input
     205              :       ELSE
     206            0 :          cusolver_generalized = .TRUE.
     207              :       END IF
     208              : 
     209        10117 :    END SUBROUTINE diag_init
     210              : 
     211              : ! **************************************************************************************************
     212              : !> \brief Finalize the diagonalization library
     213              : ! **************************************************************************************************
     214         9907 :    SUBROUTINE diag_finalize()
     215              : #if defined (__ELPA)
     216         9907 :       IF (diag_type == FM_DIAG_TYPE_ELPA) &
     217         9713 :          CALL finalize_elpa_library()
     218              : #endif
     219              : #if defined (__DLAF)
     220              :       IF (diag_type == FM_DIAG_TYPE_DLAF) &
     221              :          CALL cp_dlaf_finalize()
     222              : #endif
     223         9907 :    END SUBROUTINE diag_finalize
     224              : 
     225              : ! **************************************************************************************************
     226              : !> \brief   Choose the Eigensolver depending on which library is available
     227              : !>          ELPA seems to be unstable for small systems
     228              : !> \param matrix ...
     229              : !> \param eigenvectors ...
     230              : !> \param eigenvalues ...
     231              : !> \param info ...
     232              : !> \par     info If present returns error code and prevents program stops.
     233              : !>               Works currently only for cp_fm_syevd with ScaLAPACK.
     234              : !>               Other solvers will end the program regardless of PRESENT(info).
     235              : !> \par History
     236              : !>      - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
     237              : ! **************************************************************************************************
     238       194923 :    SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
     239              : 
     240              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     241              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     242              :       INTEGER, INTENT(OUT), OPTIONAL                     :: info
     243              : 
     244              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
     245              : 
     246              :       ! Sample peak memory
     247       194923 :       CALL m_memory()
     248              : 
     249       194923 :       IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
     250              : 
     251       194923 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     252         4576 :          CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     253              : 
     254       190347 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     255       190347 :          IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
     256              :             ! We don't trust ELPA with very small matrices.
     257       179203 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     258              :          ELSE
     259        11144 :             CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     260              :          END IF
     261              : 
     262            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     263            0 :          IF (matrix%matrix_struct%nrow_global < 64) THEN
     264              :             ! We don't trust cuSolver with very small matrices.
     265            0 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     266              :          ELSE
     267            0 :             CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
     268              :          END IF
     269              : 
     270              : #if defined(__DLAF)
     271              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     272              :          IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
     273              :             ! Use ScaLAPACK for small matrices
     274              :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     275              :          ELSE
     276              :             CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
     277              :          END IF
     278              : #endif
     279              : 
     280              :       ELSE
     281            0 :          CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
     282              :       END IF
     283              : 
     284       194923 :       CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
     285              : 
     286       194923 :    END SUBROUTINE choose_eigv_solver
     287              : 
     288              : ! **************************************************************************************************
     289              : !> \brief   Check result of diagonalization, i.e. the orthonormality of the eigenvectors
     290              : !> \param matrix Work matrix
     291              : !> \param eigenvectors Eigenvectors to be checked
     292              : !> \param nvec ...
     293              : ! **************************************************************************************************
     294       382068 :    SUBROUTINE check_diag(matrix, eigenvectors, nvec)
     295              : 
     296              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     297              :       INTEGER, INTENT(IN)                                :: nvec
     298              : 
     299              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_diag'
     300              : 
     301              :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     302              :       REAL(KIND=dp)                                      :: eps, eps_abort, eps_warning, gold, test
     303              :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     304              :       LOGICAL                                            :: check_eigenvectors
     305              : #if defined(__parallel)
     306              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     307              :       INTEGER                                            :: il, jl, ipcol, iprow, &
     308              :                                                             mypcol, myprow, npcol, nprow
     309              :       INTEGER, DIMENSION(9)                              :: desca
     310              : #endif
     311              : 
     312       382068 :       CALL timeset(routineN, handle)
     313              : 
     314       382068 :       output_unit = default_output_unit
     315       382068 :       eps_warning = eps_check_diag_default
     316              : #if defined(__CHECK_DIAG)
     317              :       check_eigenvectors = .TRUE.
     318              :       IF (eps_check_diag >= 0.0_dp) THEN
     319              :          eps_warning = eps_check_diag
     320              :       END IF
     321              : #else
     322       382068 :       IF (eps_check_diag >= 0.0_dp) THEN
     323          332 :          check_eigenvectors = .TRUE.
     324          332 :          eps_warning = eps_check_diag
     325              :       ELSE
     326              :          check_eigenvectors = .FALSE.
     327              :       END IF
     328              : #endif
     329       382068 :       eps_abort = 10.0_dp*eps_warning
     330              : 
     331       382068 :       gold = 0.0_dp
     332       382068 :       test = 0.0_dp
     333       382068 :       eps = 0.0_dp
     334              : 
     335       382068 :       IF (check_eigenvectors) THEN
     336              : #if defined(__parallel)
     337          332 :          nrow = eigenvectors%matrix_struct%nrow_global
     338          332 :          ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     339          332 :          CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
     340          332 :          context => matrix%matrix_struct%context
     341          332 :          myprow = context%mepos(1)
     342          332 :          mypcol = context%mepos(2)
     343          332 :          nprow = context%num_pe(1)
     344          332 :          npcol = context%num_pe(2)
     345         3320 :          desca(:) = matrix%matrix_struct%descriptor(:)
     346         5336 :          outer: DO j = 1, ncol
     347       125564 :             DO i = 1, ncol
     348       120228 :                CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     349       125232 :                IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     350        60114 :                   gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     351        60114 :                   test = matrix%local_data(il, jl)
     352        60114 :                   eps = ABS(test - gold)
     353        60114 :                   IF (eps > eps_warning) EXIT outer
     354              :                END IF
     355              :             END DO
     356              :          END DO outer
     357              : #else
     358              :          nrow = SIZE(eigenvectors%local_data, 1)
     359              :          ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
     360              :          CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
     361              :                     eigenvectors%local_data(1, 1), nrow, &
     362              :                     eigenvectors%local_data(1, 1), nrow, &
     363              :                     0.0_dp, matrix%local_data(1, 1), nrow)
     364              :          outer: DO j = 1, ncol
     365              :             DO i = 1, ncol
     366              :                gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     367              :                test = matrix%local_data(i, j)
     368              :                eps = ABS(test - gold)
     369              :                IF (eps > eps_warning) EXIT outer
     370              :             END DO
     371              :          END DO outer
     372              : #endif
     373          332 :          IF (eps > eps_warning) THEN
     374            0 :             IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     375            0 :                diag_type_name = "SYEVD"
     376            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     377            0 :                diag_type_name = "ELPA"
     378            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     379            0 :                diag_type_name = "CUSOLVER"
     380            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     381            0 :                diag_type_name = "DLAF"
     382              :             ELSE
     383            0 :                CPABORT("Unknown diag_type")
     384              :             END IF
     385              :             WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
     386            0 :                "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     387            0 :                "Matrix element (", i, ", ", j, ") = ", test, &
     388            0 :                "The deviation from the expected value ", gold, " is", eps
     389            0 :             IF (eps > eps_abort) THEN
     390            0 :                CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     391              :             ELSE
     392            0 :                CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     393              :             END IF
     394              :          END IF
     395              :       END IF
     396              : 
     397       382068 :       CALL timestop(handle)
     398              : 
     399       382068 :    END SUBROUTINE check_diag
     400              : 
     401              : ! **************************************************************************************************
     402              : !> \brief Issues an error messages and exits (optionally only warns).
     403              : !> \param mesg message to be issued
     404              : !> \param info error code (optional)
     405              : !> \param warn only warn (optional)
     406              : ! **************************************************************************************************
     407            0 :    SUBROUTINE cp_fm_error(mesg, info, warn)
     408              :       CHARACTER(LEN=*), INTENT(IN)                       :: mesg
     409              :       INTEGER, INTENT(IN), OPTIONAL                      :: info
     410              :       LOGICAL, INTENT(IN), OPTIONAL                      :: warn
     411              : 
     412              :       CHARACTER(LEN=2*default_string_length)             :: message
     413              :       LOGICAL                                            :: warning
     414              : 
     415            0 :       IF (PRESENT(info)) THEN
     416            0 :          WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
     417              :       ELSE
     418            0 :          WRITE (message, "(A)") mesg
     419              :       END IF
     420              : 
     421            0 :       IF (PRESENT(warn)) THEN
     422            0 :          warning = warn
     423              :       ELSE ! abort
     424              :          warning = .FALSE.
     425              :       END IF
     426              : 
     427            0 :       IF (warning) THEN
     428            0 :          CPWARN(TRIM(message))
     429              :       ELSE
     430            0 :          CPABORT(TRIM(message))
     431              :       END IF
     432            0 :    END SUBROUTINE cp_fm_error
     433              : 
     434              : ! **************************************************************************************************
     435              : !> \brief   Computes all eigenvalues and vectors of a real symmetric matrix
     436              : !>          significantly faster than syevx, scales also much better.
     437              : !>          Needs workspace to allocate all the eigenvectors
     438              : !> \param matrix ...
     439              : !> \param eigenvectors ...
     440              : !> \param eigenvalues ...
     441              : !> \param info ...
     442              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     443              : !> \par     info If present returns error code and prevents program stops.
     444              : !>               Works currently only for scalapack.
     445              : !>               Other solvers will end the program regardless of PRESENT(info).
     446              : ! **************************************************************************************************
     447       187105 :    SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     448              : 
     449              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     450              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     451              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     452              : 
     453              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd'
     454              : 
     455              :       INTEGER                                  :: handle, myinfo, n, nmo
     456              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
     457              : #if defined(__parallel)
     458              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     459              : #else
     460              :       INTEGER                                  :: liwork, lwork
     461              :       INTEGER, DIMENSION(:), POINTER           :: iwork
     462              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m
     463              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     464              :       INTEGER, TARGET                          :: v(1)
     465              :       REAL(KIND=dp), TARGET                    :: w(1)
     466              : #endif
     467              : 
     468       187105 :       CALL timeset(routineN, handle)
     469              : 
     470       187105 :       myinfo = 0
     471              : 
     472       187105 :       n = matrix%matrix_struct%nrow_global
     473       561315 :       ALLOCATE (eig(n))
     474              : 
     475              : #if defined(__parallel)
     476              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     477              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     478              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     479              :       ! to the original matrix if no redistribution is required
     480       187105 :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
     481              : 
     482              :       ! Call scalapack on CPUs that hold the new matrix
     483       187105 :       IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
     484        95597 :          IF (PRESENT(info)) THEN
     485         3251 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
     486              :          ELSE
     487        92346 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
     488              :          END IF
     489              :       END IF
     490              :       ! Redistribute results and clean up
     491       187105 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     492              : #else
     493              :       ! Retrieve the optimal work array sizes first
     494              :       lwork = -1
     495              :       liwork = -1
     496              :       m => matrix%local_data
     497              :       iwork => v
     498              :       work => w
     499              : 
     500              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     501              : 
     502              :       IF (myinfo /= 0) THEN
     503              :          CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
     504              :       END IF
     505              : 
     506              :       ! Reallocate work arrays and perform diagonalisation
     507              :       lwork = NINT(work(1))
     508              :       ALLOCATE (work(lwork))
     509              : 
     510              :       liwork = iwork(1)
     511              :       ALLOCATE (iwork(liwork))
     512              : 
     513              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     514              : 
     515              :       IF (myinfo /= 0) THEN
     516              :          CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     517              :       END IF
     518              : 
     519              :       CALL cp_fm_to_fm(matrix, eigenvectors)
     520              : 
     521              :       DEALLOCATE (iwork)
     522              :       DEALLOCATE (work)
     523              : #endif
     524              : 
     525       187105 :       IF (PRESENT(info)) info = myinfo
     526              : 
     527       187105 :       nmo = SIZE(eigenvalues, 1)
     528       187105 :       IF (nmo > n) THEN
     529            0 :          eigenvalues(1:n) = eig(1:n)
     530              :       ELSE
     531      1778032 :          eigenvalues(1:nmo) = eig(1:nmo)
     532              :       END IF
     533              : 
     534       187105 :       DEALLOCATE (eig)
     535              : 
     536       187105 :       CALL check_diag(matrix, eigenvectors, n)
     537              : 
     538       187105 :       CALL timestop(handle)
     539              : 
     540       374210 :    END SUBROUTINE cp_fm_syevd
     541              : 
     542              : ! **************************************************************************************************
     543              : !> \brief ...
     544              : !> \param matrix ...
     545              : !> \param eigenvectors ...
     546              : !> \param eigenvalues ...
     547              : !> \param info ...
     548              : ! **************************************************************************************************
     549        95597 :    SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
     550              : 
     551              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     552              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     553              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     554              : 
     555              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd_base'
     556              : 
     557              :       INTEGER                                  :: handle, myinfo
     558              : #if defined(__parallel)
     559              :       TYPE(cp_blacs_env_type), POINTER         :: context
     560              :       INTEGER                                  :: liwork, lwork, n
     561              :       INTEGER, DIMENSION(9)                    :: descm, descv
     562        95597 :       INTEGER, DIMENSION(:), POINTER           :: iwork
     563        95597 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     564        95597 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m, v
     565              :       REAL(KIND=dp), TARGET                    :: w(1)
     566              : #if defined (__HAS_IEEE_EXCEPTIONS)
     567              :       LOGICAL, DIMENSION(5)                    :: halt
     568              : #endif
     569              : #endif
     570              : 
     571        95597 :       CALL timeset(routineN, handle)
     572              : 
     573        95597 :       myinfo = 0
     574              : 
     575              : #if defined(__parallel)
     576              : 
     577        95597 :       n = matrix%matrix_struct%nrow_global
     578        95597 :       m => matrix%local_data
     579        95597 :       context => matrix%matrix_struct%context
     580       955970 :       descm(:) = matrix%matrix_struct%descriptor(:)
     581              : 
     582        95597 :       v => eigenvectors%local_data
     583       955970 :       descv(:) = eigenvectors%matrix_struct%descriptor(:)
     584              : 
     585        95597 :       liwork = 7*n + 8*context%num_pe(2) + 2
     586       286791 :       ALLOCATE (iwork(liwork))
     587              : 
     588              :       ! Work space query
     589        95597 :       lwork = -1
     590        95597 :       work => w
     591              : 
     592              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     593        95597 :                    work(1), lwork, iwork(1), liwork, myinfo)
     594              : 
     595        95597 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     596            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
     597              :       END IF
     598              : 
     599        95597 :       lwork = NINT(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
     600              : #if !defined(__SCALAPACK_NO_WA)
     601              :       ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
     602              :       CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
     603        95597 :                    v(1, 1), 1, 1, descv, work(1), -1, myinfo)
     604              : 
     605        95597 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     606            0 :          CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
     607              :       END IF
     608              : 
     609        95597 :       IF (lwork < (work(1) + 2*n)) THEN
     610        44654 :          lwork = NINT(work(1)) + 2*n ! still wrong by 2*N
     611              :       END IF
     612              : #endif
     613       286791 :       ALLOCATE (work(lwork))
     614              : 
     615              :       ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
     616        95597 :       IF (liwork < iwork(1)) THEN
     617            0 :          liwork = iwork(1)
     618            0 :          DEALLOCATE (iwork)
     619            0 :          ALLOCATE (iwork(liwork))
     620              :       END IF
     621              : 
     622              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     623              :       ! Therefore, we disable floating point traps temporarily.
     624              : #if defined (__HAS_IEEE_EXCEPTIONS)
     625              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     626              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     627              : #endif
     628              : 
     629              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     630        95597 :                    work(1), lwork, iwork(1), liwork, myinfo)
     631              : 
     632              : #if defined (__HAS_IEEE_EXCEPTIONS)
     633              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     634              : #endif
     635        95597 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     636            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     637              :       END IF
     638              : 
     639        95597 :       IF (PRESENT(info)) info = myinfo
     640              : 
     641        95597 :       DEALLOCATE (work)
     642        95597 :       DEALLOCATE (iwork)
     643              : #else
     644              :       MARK_USED(matrix)
     645              :       MARK_USED(eigenvectors)
     646              :       MARK_USED(eigenvalues)
     647              :       myinfo = -1
     648              :       IF (PRESENT(info)) info = myinfo
     649              :       CALL cp_fm_error("ERROR in "//TRIM(routineN)// &
     650              :                        ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
     651              : #endif
     652              : 
     653        95597 :       CALL timestop(handle)
     654              : 
     655        95597 :    END SUBROUTINE cp_fm_syevd_base
     656              : 
     657              : ! **************************************************************************************************
     658              : !> \brief   compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
     659              : !>          If eigenvectors are required this routine will replicate a full matrix on each CPU...
     660              : !>          if more than a handful of vectors are needed, use cp_fm_syevd instead
     661              : !> \param matrix ...
     662              : !> \param eigenvectors ...
     663              : !> \param eigenvalues ...
     664              : !> \param neig ...
     665              : !> \param work_syevx ...
     666              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     667              : !>          neig   is the number of vectors needed (default all)
     668              : !>          work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
     669              : !>                     reducing this saves time, but might cause the routine to fail
     670              : ! **************************************************************************************************
     671           40 :    SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
     672              : 
     673              :       ! Diagonalise the symmetric n by n matrix using the LAPACK library.
     674              : 
     675              :       TYPE(cp_fm_type), INTENT(IN)                 :: matrix
     676              :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)       :: eigenvectors
     677              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)          :: work_syevx
     678              :       INTEGER, INTENT(IN), OPTIONAL                :: neig
     679              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)     :: eigenvalues
     680              : 
     681              :       CHARACTER(LEN=*), PARAMETER                  :: routineN = "cp_fm_syevx"
     682              : 
     683              : #if defined(__parallel)
     684              :       REAL(KIND=dp), PARAMETER                     :: orfac = -1.0_dp
     685              : #endif
     686              :       REAL(KIND=dp), PARAMETER                     :: vl = 0.0_dp, &
     687              :                                                       vu = 0.0_dp
     688              : 
     689              :       TYPE(cp_blacs_env_type), POINTER             :: context
     690              :       TYPE(cp_logger_type), POINTER                :: logger
     691              :       CHARACTER(LEN=1)                             :: job_type
     692              :       REAL(KIND=dp)                                :: abstol, work_syevx_local
     693              :       INTEGER                                      :: handle, info, liwork, lwork, &
     694              :                                                       m, n, nb, npcol, nprow, &
     695              :                                                       output_unit, neig_local
     696              :       LOGICAL                                      :: ionode, needs_evecs
     697           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: ifail, iwork
     698           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: w, work
     699           40 :       REAL(KIND=dp), DIMENSION(:, :), POINTER      :: a, z
     700              : 
     701              :       REAL(KIND=dp), EXTERNAL                      :: dlamch
     702              : 
     703              : #if defined(__parallel)
     704              :       INTEGER                                      :: nn, np0, npe, nq0, nz
     705              :       INTEGER, DIMENSION(9)                        :: desca, descz
     706           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: iclustr
     707           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: gap
     708              :       INTEGER, EXTERNAL                            :: iceil, numroc
     709              : #else
     710              :       INTEGER                                      :: nla, nlz
     711              :       INTEGER, EXTERNAL                            :: ilaenv
     712              : #endif
     713              : #if defined (__HAS_IEEE_EXCEPTIONS)
     714              :       LOGICAL, DIMENSION(5)                        :: halt
     715              : #endif
     716              : 
     717              :       ! by default all
     718           40 :       n = matrix%matrix_struct%nrow_global
     719           40 :       neig_local = n
     720           40 :       IF (PRESENT(neig)) neig_local = neig
     721           40 :       IF (neig_local == 0) RETURN
     722              : 
     723           40 :       CALL timeset(routineN, handle)
     724              : 
     725           40 :       needs_evecs = PRESENT(eigenvectors)
     726              : 
     727           40 :       logger => cp_get_default_logger()
     728           40 :       ionode = logger%para_env%is_source()
     729           40 :       n = matrix%matrix_struct%nrow_global
     730              : 
     731              :       ! by default allocate all needed space
     732           40 :       work_syevx_local = 1.0_dp
     733           40 :       IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
     734              : 
     735              :       ! set scalapack job type
     736           40 :       IF (needs_evecs) THEN
     737           40 :          job_type = "V"
     738              :       ELSE
     739            0 :          job_type = "N"
     740              :       END IF
     741              : 
     742              :       ! target the most accurate calculation of the eigenvalues
     743           40 :       abstol = 2.0_dp*dlamch("S")
     744              : 
     745           40 :       context => matrix%matrix_struct%context
     746           40 :       nprow = context%num_pe(1)
     747           40 :       npcol = context%num_pe(2)
     748              : 
     749          120 :       ALLOCATE (w(n))
     750          560 :       eigenvalues(:) = 0.0_dp
     751              : #if defined(__parallel)
     752              : 
     753           40 :       IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
     754            0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
     755              :       END IF
     756              : 
     757           40 :       a => matrix%local_data
     758          400 :       desca(:) = matrix%matrix_struct%descriptor(:)
     759              : 
     760           40 :       IF (needs_evecs) THEN
     761           40 :          z => eigenvectors%local_data
     762          400 :          descz(:) = eigenvectors%matrix_struct%descriptor(:)
     763              :       ELSE
     764              :          ! z will not be referenced
     765            0 :          z => matrix%local_data
     766            0 :          descz = desca
     767              :       END IF
     768              : 
     769              :       ! Get the optimal work storage size
     770              : 
     771           40 :       npe = nprow*npcol
     772           40 :       nb = matrix%matrix_struct%nrow_block
     773           40 :       nn = MAX(n, nb, 2)
     774           40 :       np0 = numroc(nn, nb, 0, 0, nprow)
     775           40 :       nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
     776              : 
     777           40 :       IF (needs_evecs) THEN
     778              :          lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
     779           40 :                  INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
     780              :       ELSE
     781            0 :          lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
     782              :       END IF
     783           40 :       liwork = 6*MAX(N, npe + 1, 4)
     784              : 
     785          120 :       ALLOCATE (gap(npe))
     786          120 :       gap = 0.0_dp
     787          120 :       ALLOCATE (iclustr(2*npe))
     788          200 :       iclustr = 0
     789          120 :       ALLOCATE (ifail(n))
     790          560 :       ifail = 0
     791          120 :       ALLOCATE (iwork(liwork))
     792          120 :       ALLOCATE (work(lwork))
     793              : 
     794              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     795              :       ! Therefore, we disable floating point traps temporarily.
     796              : #if defined (__HAS_IEEE_EXCEPTIONS)
     797              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     798              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     799              : #endif
     800              :       CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
     801           40 :                    z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
     802              : #if defined (__HAS_IEEE_EXCEPTIONS)
     803              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     804              : #endif
     805              : 
     806              :       ! Error handling
     807           40 :       IF (info /= 0) THEN
     808            0 :          IF (ionode) THEN
     809            0 :             output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     810              :             WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     811            0 :                "info    = ", info, &
     812            0 :                "lwork   = ", lwork, &
     813            0 :                "liwork  = ", liwork, &
     814            0 :                "nz      = ", nz
     815            0 :             IF (info > 0) THEN
     816              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     817            0 :                   "ifail   = ", ifail
     818              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     819            0 :                   "iclustr = ", iclustr
     820              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
     821            0 :                   "gap     = ", gap
     822              :             END IF
     823              :          END IF
     824            0 :          CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
     825              :       END IF
     826              : 
     827              :       ! Release work storage
     828           40 :       DEALLOCATE (gap)
     829           40 :       DEALLOCATE (iclustr)
     830              : 
     831              : #else
     832              : 
     833              :       a => matrix%local_data
     834              :       IF (needs_evecs) THEN
     835              :          z => eigenvectors%local_data
     836              :       ELSE
     837              :          ! z will not be referenced
     838              :          z => matrix%local_data
     839              :       END IF
     840              : 
     841              :       ! Get the optimal work storage size
     842              : 
     843              :       nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
     844              :                ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
     845              : 
     846              :       lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
     847              :       liwork = 5*n
     848              : 
     849              :       ALLOCATE (ifail(n))
     850              :       ifail = 0
     851              :       ALLOCATE (iwork(liwork))
     852              :       ALLOCATE (work(lwork))
     853              :       info = 0
     854              :       nla = SIZE(a, 1)
     855              :       nlz = SIZE(z, 1)
     856              : 
     857              :       ! LAPACK takes advantage of IEEE754 exceptions for speedup.
     858              :       ! Therefore, we disable floating point traps temporarily.
     859              : #if defined (__HAS_IEEE_EXCEPTIONS)
     860              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     861              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     862              : #endif
     863              :       CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
     864              :                   abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
     865              : #if defined (__HAS_IEEE_EXCEPTIONS)
     866              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     867              : #endif
     868              : 
     869              :       ! Error handling
     870              :       IF (info /= 0) THEN
     871              :          output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     872              :          WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     873              :             "info    = ", info
     874              :          IF (info > 0) THEN
     875              :             WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     876              :                "ifail   = ", ifail
     877              :          END IF
     878              :          CPABORT("Error in DSYEVX (ScaLAPACK)")
     879              :       END IF
     880              : 
     881              : #endif
     882              :       ! Release work storage
     883           40 :       DEALLOCATE (ifail)
     884           40 :       DEALLOCATE (iwork)
     885           40 :       DEALLOCATE (work)
     886          400 :       eigenvalues(1:neig_local) = w(1:neig_local)
     887           40 :       DEALLOCATE (w)
     888              : 
     889           40 :       IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
     890              : 
     891           40 :       CALL timestop(handle)
     892              : 
     893          120 :    END SUBROUTINE cp_fm_syevx
     894              : 
     895              : ! **************************************************************************************************
     896              : !> \brief decomposes a quadratic matrix into its singular value decomposition
     897              : !> \param matrix_a ...
     898              : !> \param matrix_eigvl ...
     899              : !> \param matrix_eigvr_t ...
     900              : !> \param eigval ...
     901              : !> \param info ...
     902              : !> \author Maximilian Graml
     903              : ! **************************************************************************************************
     904          100 :    SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
     905              : 
     906              :       TYPE(cp_fm_type), INTENT(IN)              :: matrix_a
     907              :       TYPE(cp_fm_type), INTENT(INOUT)           :: matrix_eigvl, matrix_eigvr_t
     908              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
     909              :          INTENT(INOUT)                          :: eigval
     910              :       INTEGER, INTENT(OUT), OPTIONAL            :: info
     911              : 
     912              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
     913              : 
     914              :       INTEGER                                   :: handle, n, m, myinfo, lwork
     915          100 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
     916              :       TYPE(cp_fm_type)                          :: matrix_lu
     917              :       REAL(KIND=dp), DIMENSION(:), POINTER      :: work
     918              :       REAL(KIND=dp), TARGET                     :: w(1)
     919              : #if defined(__parallel)
     920              :       INTEGER, DIMENSION(9)                     :: desca, descu, descvt
     921              : #endif
     922              : 
     923          100 :       CALL timeset(routineN, handle)
     924              : 
     925              :       CALL cp_fm_create(matrix=matrix_lu, &
     926              :                         matrix_struct=matrix_a%matrix_struct, &
     927          100 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     928          100 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     929          100 :       a => matrix_lu%local_data
     930          100 :       m = matrix_lu%matrix_struct%nrow_global
     931          100 :       n = matrix_lu%matrix_struct%ncol_global
     932              :       ! Assert that incoming matrix is quadratic
     933          100 :       CPASSERT(m == n)
     934              : 
     935              :       ! Prepare for workspace queries
     936          100 :       myinfo = 0
     937          100 :       lwork = -1
     938          100 :       work => w
     939              : #if defined(__parallel)
     940              :       ! To do: That might need a redistribution check as in cp_fm_syevd
     941         1000 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     942         1000 :       descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
     943         1000 :       descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
     944              : 
     945              :       ! Workspace query
     946              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     947          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     948              : 
     949          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     950            0 :          CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
     951              :       END IF
     952              : 
     953          100 :       lwork = NINT(work(1))
     954          300 :       ALLOCATE (work(lwork))
     955              : 
     956              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     957          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     958              : 
     959          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     960            0 :          CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     961              :       END IF
     962              : #else
     963              :       ! Workspace query
     964              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     965              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     966              : 
     967              :       IF (myinfo /= 0) THEN
     968              :          CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
     969              :       END IF
     970              : 
     971              :       lwork = NINT(work(1))
     972              :       ALLOCATE (work(lwork))
     973              : 
     974              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     975              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     976              : 
     977              :       IF (myinfo /= 0) THEN
     978              :          CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     979              :       END IF
     980              : 
     981              : #endif
     982              :       ! Release intermediary matrices
     983          100 :       DEALLOCATE (work)
     984          100 :       CALL cp_fm_release(matrix_lu)
     985              : 
     986          100 :       IF (PRESENT(info)) info = myinfo
     987              : 
     988          100 :       CALL timestop(handle)
     989          100 :    END SUBROUTINE cp_fm_svd
     990              : 
     991              : ! **************************************************************************************************
     992              : !> \brief ...
     993              : !> \param matrix ...
     994              : !> \param work ...
     995              : !> \param exponent ...
     996              : !> \param threshold ...
     997              : !> \param n_dependent ...
     998              : !> \param verbose ...
     999              : !> \param eigvals ...
    1000              : ! **************************************************************************************************
    1001         2168 :    SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
    1002              : 
    1003              :       ! Raise the real symmetric n by n matrix to the power given by
    1004              :       ! the exponent. All eigenvectors with a corresponding eigenvalue lower
    1005              :       ! than threshold are quenched. result in matrix
    1006              : 
    1007              :       ! - Creation (29.03.1999, Matthias Krack)
    1008              :       ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
    1009              : 
    1010              :       TYPE(cp_fm_type), INTENT(IN)               :: matrix, work
    1011              :       REAL(KIND=dp), INTENT(IN)                  :: exponent, threshold
    1012              :       INTEGER, INTENT(OUT)                       :: n_dependent
    1013              :       LOGICAL, INTENT(IN), OPTIONAL              :: verbose
    1014              :       REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
    1015              :          OPTIONAL                                :: eigvals
    1016              : 
    1017              :       CHARACTER(LEN=*), PARAMETER                :: routineN = 'cp_fm_power'
    1018              : 
    1019              :       INTEGER                                    :: handle, icol_global, &
    1020              :                                                     mypcol, myprow, &
    1021              :                                                     ncol_global, nrow_global
    1022              :       LOGICAL                                    :: my_verbose
    1023              :       REAL(KIND=dp)                              :: condition_number, f, p
    1024              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE   :: eigenvalues
    1025         2168 :       REAL(KIND=dp), DIMENSION(:, :), POINTER    :: eigenvectors
    1026              :       TYPE(cp_blacs_env_type), POINTER           :: context
    1027              : 
    1028              : #if defined(__parallel)
    1029              :       INTEGER           :: icol_local, ipcol, iprow, irow_global, irow_local
    1030              : #endif
    1031              : 
    1032         2168 :       CALL timeset(routineN, handle)
    1033              : 
    1034         2168 :       my_verbose = .FALSE.
    1035         2168 :       IF (PRESENT(verbose)) my_verbose = verbose
    1036              : 
    1037         2168 :       context => matrix%matrix_struct%context
    1038         2168 :       myprow = context%mepos(1)
    1039         2168 :       mypcol = context%mepos(2)
    1040         2168 :       n_dependent = 0
    1041         2168 :       p = 0.5_dp*exponent
    1042              : 
    1043         2168 :       nrow_global = matrix%matrix_struct%nrow_global
    1044         2168 :       ncol_global = matrix%matrix_struct%ncol_global
    1045              : 
    1046         6504 :       ALLOCATE (eigenvalues(ncol_global))
    1047        53811 :       eigenvalues(:) = 0.0_dp
    1048              : 
    1049              :       ! Compute the eigenvectors and eigenvalues
    1050              : 
    1051         2168 :       CALL choose_eigv_solver(matrix, work, eigenvalues)
    1052              : 
    1053         2168 :       IF (PRESENT(eigvals)) THEN
    1054          348 :          eigvals(1) = eigenvalues(1)
    1055          348 :          eigvals(2) = eigenvalues(ncol_global)
    1056              :       END IF
    1057              : 
    1058              : #if defined(__parallel)
    1059         2168 :       eigenvectors => work%local_data
    1060              : 
    1061              :       ! Build matrix**exponent with eigenvector quenching
    1062              : 
    1063        53811 :       DO icol_global = 1, ncol_global
    1064              : 
    1065        53811 :          IF (eigenvalues(icol_global) < threshold) THEN
    1066              : 
    1067           50 :             n_dependent = n_dependent + 1
    1068              : 
    1069           50 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1070              : 
    1071           50 :             IF (mypcol == ipcol) THEN
    1072           50 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1073         5850 :                DO irow_global = 1, nrow_global
    1074         5800 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1075         5850 :                   IF (myprow == iprow) THEN
    1076         2900 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1077         2900 :                      eigenvectors(irow_local, icol_local) = 0.0_dp
    1078              :                   END IF
    1079              :                END DO
    1080              :             END IF
    1081              : 
    1082              :          ELSE
    1083              : 
    1084        51593 :             f = eigenvalues(icol_global)**p
    1085              : 
    1086        51593 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1087              : 
    1088        51593 :             IF (mypcol == ipcol) THEN
    1089        51593 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1090      2990128 :                DO irow_global = 1, nrow_global
    1091      2938535 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1092      2990128 :                   IF (myprow == iprow) THEN
    1093      1513342 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1094              :                      eigenvectors(irow_local, icol_local) = &
    1095      1513342 :                         f*eigenvectors(irow_local, icol_local)
    1096              :                   END IF
    1097              :                END DO
    1098              :             END IF
    1099              : 
    1100              :          END IF
    1101              : 
    1102              :       END DO
    1103              : 
    1104              : #else
    1105              : 
    1106              :       eigenvectors => work%local_data
    1107              : 
    1108              :       ! Build matrix**exponent with eigenvector quenching
    1109              : 
    1110              :       DO icol_global = 1, ncol_global
    1111              : 
    1112              :          IF (eigenvalues(icol_global) < threshold) THEN
    1113              : 
    1114              :             n_dependent = n_dependent + 1
    1115              :             eigenvectors(1:nrow_global, icol_global) = 0.0_dp
    1116              : 
    1117              :          ELSE
    1118              : 
    1119              :             f = eigenvalues(icol_global)**p
    1120              :             eigenvectors(1:nrow_global, icol_global) = &
    1121              :                f*eigenvectors(1:nrow_global, icol_global)
    1122              : 
    1123              :          END IF
    1124              : 
    1125              :       END DO
    1126              : 
    1127              : #endif
    1128         2168 :       CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
    1129         2168 :       CALL cp_fm_uplo_to_full(matrix, work)
    1130              : 
    1131              :       ! Print some warnings/notes
    1132         2168 :       IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
    1133            0 :          condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
    1134              :          WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
    1135            0 :             "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
    1136            0 :             "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
    1137            0 :             "CP_FM_POWER: condition number:   ", condition_number
    1138            0 :          IF (eigenvalues(1) <= 0.0_dp) THEN
    1139              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1140            0 :                "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
    1141              :          END IF
    1142            0 :          IF (condition_number > 1.0E12_dp) THEN
    1143              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1144            0 :                "WARNING: high condition number => possibly ill-conditioned matrix"
    1145              :          END IF
    1146              :       END IF
    1147              : 
    1148         2168 :       DEALLOCATE (eigenvalues)
    1149              : 
    1150         2168 :       CALL timestop(handle)
    1151              : 
    1152         2168 :    END SUBROUTINE cp_fm_power
    1153              : 
    1154              : ! **************************************************************************************************
    1155              : !> \brief ...
    1156              : !> \param matrix ...
    1157              : !> \param eigenvectors ...
    1158              : !> \param eigval ...
    1159              : !> \param thresh ...
    1160              : !> \param start_sec_block ...
    1161              : ! **************************************************************************************************
    1162           18 :    SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
    1163              : 
    1164              :       ! Calculates block diagonalization of a full symmetric matrix
    1165              :       ! It has its origin in cp_fm_syevx. This routine rotates only elements
    1166              :       ! which are larger than a threshold values "thresh".
    1167              :       ! start_sec_block is the start of the second block.
    1168              :       ! IT DOES ONLY ONE SWEEP!
    1169              : 
    1170              :       ! - Creation (07.10.2002, Martin Fengler)
    1171              :       ! - Cosmetics (05.04.06, MK)
    1172              : 
    1173              :       TYPE(cp_fm_type), INTENT(IN)              :: eigenvectors, matrix
    1174              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)   :: eigval
    1175              :       INTEGER, INTENT(IN)                       :: start_sec_block
    1176              :       REAL(KIND=dp), INTENT(IN)                 :: thresh
    1177              : 
    1178              :       CHARACTER(len=*), PARAMETER               :: routineN = 'cp_fm_block_jacobi'
    1179              : 
    1180              :       INTEGER :: handle
    1181              :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a, ev
    1182              : 
    1183              :       REAL(KIND=dp) :: tan_theta, tau, c, s
    1184              :       INTEGER  :: q, p, N
    1185           18 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE  :: c_ip
    1186              : 
    1187              : #if defined(__parallel)
    1188              :       TYPE(cp_blacs_env_type), POINTER :: context
    1189              : 
    1190              :       INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
    1191              :                  ev_row_block_size, iam, mynumrows, mype, npe, q_loc
    1192           18 :       REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE  :: a_loc, ev_loc
    1193              :       INTEGER, DIMENSION(9)                        :: desca, descz, &
    1194              :                                                       desc_a_block, &
    1195              :                                                       desc_ev_loc
    1196              :       TYPE(mp_comm_type):: allgrp
    1197              :       TYPE(cp_blacs_type) :: ictxt_loc
    1198              :       INTEGER, EXTERNAL :: numroc
    1199              : #endif
    1200              : 
    1201              :       ! -------------------------------------------------------------------------
    1202              : 
    1203           18 :       CALL timeset(routineN, handle)
    1204              : 
    1205              : #if defined(__parallel)
    1206           18 :       context => matrix%matrix_struct%context
    1207           18 :       allgrp = matrix%matrix_struct%para_env
    1208              : 
    1209           18 :       nprow = context%num_pe(1)
    1210           18 :       npcol = context%num_pe(2)
    1211              : 
    1212           18 :       N = matrix%matrix_struct%nrow_global
    1213              : 
    1214           18 :       A => matrix%local_data
    1215          180 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1216           18 :       EV => eigenvectors%local_data
    1217          180 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1218              : 
    1219              :       ! Copy the block to be rotated to the master process firstly and broadcast to all processes
    1220              :       ! start_sec_block defines where the second block starts!
    1221              :       ! Block will be processed together with the OO block
    1222              : 
    1223           18 :       block_dim_row = start_sec_block - 1
    1224           18 :       block_dim_col = N - block_dim_row
    1225           72 :       ALLOCATE (A_loc(block_dim_row, block_dim_col))
    1226              : 
    1227           18 :       mype = matrix%matrix_struct%para_env%mepos
    1228           18 :       npe = matrix%matrix_struct%para_env%num_pe
    1229              :       ! Get a new context
    1230           18 :       CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
    1231              : 
    1232              :       CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
    1233           18 :                     block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
    1234              : 
    1235              :       CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
    1236           18 :                     A_loc, 1, 1, desc_a_block, context%get_handle())
    1237              :       ! Only the master (root) process received data yet
    1238           18 :       CALL allgrp%bcast(A_loc, 0)
    1239              : 
    1240              :       ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
    1241              :       ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
    1242              : 
    1243              :       ! Initialize distribution of the eigenvectors
    1244           18 :       iam = mype
    1245           18 :       ev_row_block_size = n/(nprow*npcol)
    1246           18 :       mynumrows = NUMROC(N, ev_row_block_size, iam, 0, nprow*npcol)
    1247              : 
    1248          108 :       ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
    1249              : 
    1250              :       CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
    1251           18 :                     mynumrows, info)
    1252              : 
    1253           18 :       CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
    1254              : 
    1255              :       ! Start block diagonalization of matrix
    1256              : 
    1257           18 :       q_loc = 0
    1258              : 
    1259         1170 :       DO q = start_sec_block, N
    1260         1152 :          q_loc = q_loc + 1
    1261       148626 :          DO p = 1, (start_sec_block - 1)
    1262              : 
    1263       148608 :             IF (ABS(A_loc(p, q_loc)) > thresh) THEN
    1264              : 
    1265       117566 :                tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
    1266              : 
    1267       117566 :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1268              : 
    1269              :                ! Cos(theta)
    1270       117566 :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1271       117566 :                s = tan_theta*c
    1272              : 
    1273              :                ! Calculate eigenvectors: Q*J
    1274              :                ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
    1275              :                ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
    1276              :                ! EV(:,p) = c_ip
    1277              :                ! EV(:,q) = c_iq
    1278       117566 :                CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
    1279       117566 :                CALL dscal(mynumrows, c, EV_loc(1, p), 1)
    1280       117566 :                CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
    1281       117566 :                CALL dscal(mynumrows, c, EV_loc(1, q), 1)
    1282       117566 :                CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
    1283              : 
    1284              :             END IF
    1285              : 
    1286              :          END DO
    1287              :       END DO
    1288              : 
    1289              :       ! Copy eigenvectors back to the original distribution
    1290           18 :       CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
    1291              : 
    1292              :       ! Release work storage
    1293           18 :       DEALLOCATE (A_loc, EV_loc, c_ip)
    1294              : 
    1295           18 :       CALL ictxt_loc%gridexit()
    1296              : 
    1297              : #else
    1298              : 
    1299              :       N = matrix%matrix_struct%nrow_global
    1300              : 
    1301              :       ALLOCATE (c_ip(N)) ! Local eigenvalue vector
    1302              : 
    1303              :       A => matrix%local_data ! Contains the Matrix to be worked on
    1304              :       EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
    1305              : 
    1306              :       ! Start matrix diagonalization
    1307              : 
    1308              :       tan_theta = 0.0_dp
    1309              :       tau = 0.0_dp
    1310              : 
    1311              :       DO q = start_sec_block, N
    1312              :          DO p = 1, (start_sec_block - 1)
    1313              : 
    1314              :             IF (ABS(A(p, q)) > thresh) THEN
    1315              : 
    1316              :                tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
    1317              : 
    1318              :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1319              : 
    1320              :                ! Cos(theta)
    1321              :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1322              :                s = tan_theta*c
    1323              : 
    1324              :                ! Calculate eigenvectors: Q*J
    1325              :                ! c_ip = c*EV(:,p) - s*EV(:,q)
    1326              :                ! c_iq = s*EV(:,p) + c*EV(:,q)
    1327              :                ! EV(:,p) = c_ip
    1328              :                ! EV(:,q) = c_iq
    1329              :                CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
    1330              :                CALL dscal(N, c, EV(1, p), 1)
    1331              :                CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
    1332              :                CALL dscal(N, c, EV(1, q), 1)
    1333              :                CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
    1334              : 
    1335              :             END IF
    1336              : 
    1337              :          END DO
    1338              :       END DO
    1339              : 
    1340              :       ! Release work storage
    1341              : 
    1342              :       DEALLOCATE (c_ip)
    1343              : 
    1344              : #endif
    1345              : 
    1346           18 :       CALL timestop(handle)
    1347              : 
    1348           90 :    END SUBROUTINE cp_fm_block_jacobi
    1349              : 
    1350              : ! **************************************************************************************************
    1351              : !> \brief General Eigenvalue Problem  AX = BXE
    1352              : !>        Single option version: Cholesky decomposition of B
    1353              : !> \param amatrix ...
    1354              : !> \param bmatrix ...
    1355              : !> \param eigenvectors ...
    1356              : !> \param eigenvalues ...
    1357              : !> \param work ...
    1358              : ! **************************************************************************************************
    1359          368 :    SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
    1360              : 
    1361              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1362              :       REAL(KIND=dp), DIMENSION(:)                        :: eigenvalues
    1363              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1364              : 
    1365              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig'
    1366              : 
    1367              :       INTEGER                                            :: handle, nao, nmo
    1368              : 
    1369          368 :       CALL timeset(routineN, handle)
    1370              : 
    1371          368 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1372          368 :       nmo = SIZE(eigenvalues)
    1373              : 
    1374          368 :       IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. cusolver_generalized .AND. nao >= 64) THEN
    1375              :          ! Use cuSolverMP generalized eigenvalue solver for large matrices
    1376              :          ! Use work as intermediate buffer since eigenvectors may be smaller (nao x nmo)
    1377            0 :          CALL cp_fm_general_cusolver(amatrix, bmatrix, work, eigenvalues)
    1378            0 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1379              : #if defined(__DLAF)
    1380              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
    1381              :          ! Use DLA-Future generalized eigenvalue solver for large matrices
    1382              :          CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
    1383              : #endif
    1384              :       ELSE
    1385              :          ! Cholesky decompose S=U(T)U
    1386          368 :          CALL cp_fm_cholesky_decompose(bmatrix)
    1387              :          ! Invert to get U^(-1)
    1388          368 :          CALL cp_fm_triangular_invert(bmatrix)
    1389              :          ! Reduce to get U^(-T) * H * U^(-1)
    1390          368 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
    1391          368 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
    1392              :          ! Diagonalize
    1393              :          CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
    1394          368 :                                  eigenvalues=eigenvalues)
    1395              :          ! Restore vectors C = U^(-1) * C*
    1396          368 :          CALL cp_fm_triangular_multiply(bmatrix, work)
    1397          368 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1398              :       END IF
    1399              : 
    1400          368 :       CALL timestop(handle)
    1401              : 
    1402          368 :    END SUBROUTINE cp_fm_geeig
    1403              : 
    1404              : ! **************************************************************************************************
    1405              : !> \brief General Eigenvalue Problem  AX = BXE
    1406              : !>        Use canonical diagonalization : U*s**(-1/2)
    1407              : !> \param amatrix ...
    1408              : !> \param bmatrix ...
    1409              : !> \param eigenvectors ...
    1410              : !> \param eigenvalues ...
    1411              : !> \param work ...
    1412              : !> \param epseig ...
    1413              : ! **************************************************************************************************
    1414           66 :    SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
    1415              : 
    1416              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1417              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1418              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1419              :       REAL(KIND=dp), INTENT(IN)                          :: epseig
    1420              : 
    1421              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_canon'
    1422              : 
    1423              :       INTEGER                                            :: handle, i, icol, irow, nao, nc, ncol, &
    1424              :                                                             nmo, nx
    1425              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: evals
    1426              : 
    1427           66 :       CALL timeset(routineN, handle)
    1428              : 
    1429              :       ! Test sizees
    1430           66 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1431           66 :       nmo = SIZE(eigenvalues)
    1432          198 :       ALLOCATE (evals(nao))
    1433              : 
    1434              :       ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
    1435           66 :       CALL cp_fm_scale(-1.0_dp, bmatrix)
    1436           66 :       CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
    1437         4720 :       evals(:) = -evals(:)
    1438           66 :       nc = nao
    1439         4428 :       DO i = 1, nao
    1440         4428 :          IF (evals(i) < epseig) THEN
    1441           40 :             nc = i - 1
    1442           40 :             EXIT
    1443              :          END IF
    1444              :       END DO
    1445           66 :       CPASSERT(nc /= 0)
    1446              : 
    1447           66 :       IF (nc /= nao) THEN
    1448           40 :          IF (nc < nmo) THEN
    1449              :             ! Copy NULL space definition to last vectors of eigenvectors (if needed)
    1450            0 :             ncol = nmo - nc
    1451            0 :             CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
    1452              :          END IF
    1453              :          ! Set NULL space in eigenvector matrix of S to zero
    1454          332 :          DO icol = nc + 1, nao
    1455        36172 :             DO irow = 1, nao
    1456        36132 :                CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
    1457              :             END DO
    1458              :          END DO
    1459              :          ! Set small eigenvalues to a dummy save value
    1460          332 :          evals(nc + 1:nao) = 1.0_dp
    1461              :       END IF
    1462              :       ! Calculate U*s**(-1/2)
    1463         4720 :       evals(:) = 1.0_dp/SQRT(evals(:))
    1464           66 :       CALL cp_fm_column_scale(work, evals)
    1465              :       ! Reduce to get U^(-T) * H * U^(-1)
    1466           66 :       CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
    1467           66 :       CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
    1468           66 :       IF (nc /= nao) THEN
    1469              :          ! set diagonal values to save large value
    1470          332 :          DO icol = nc + 1, nao
    1471          332 :             CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
    1472              :          END DO
    1473              :       END IF
    1474              :       ! Diagonalize
    1475           66 :       CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
    1476           66 :       nx = MIN(nc, nmo)
    1477              :       ! Restore vectors C = U^(-1) * C*
    1478           66 :       CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
    1479              : 
    1480           66 :       DEALLOCATE (evals)
    1481              : 
    1482           66 :       CALL timestop(handle)
    1483              : 
    1484           66 :    END SUBROUTINE cp_fm_geeig_canon
    1485              : 
    1486              : END MODULE cp_fm_diag
        

Generated by: LCOV version 2.0-1