LCOV - code coverage report
Current view: top level - src/fm - cp_fm_diag.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 82.6 % 408 337
Test Date: 2025-12-04 06:27:48 Functions: 92.3 % 13 12

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief used for collecting some of the diagonalization schemes available for
      10              : !>      cp_fm_type. cp_fm_power also moved here as it is very related
      11              : !> \note
      12              : !>      first version : most routines imported
      13              : !> \par History
      14              : !>      - unused Jacobi routines removed, cosmetics (05.04.06,MK)
      15              : !> \author Joost VandeVondele (2003-08)
      16              : ! **************************************************************************************************
      17              : MODULE cp_fm_diag
      18              :    USE cp_blacs_types, ONLY: cp_blacs_type
      19              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      20              :    USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
      21              :                                  cp_fm_gemm, &
      22              :                                  cp_fm_scale, &
      23              :                                  cp_fm_syrk, &
      24              :                                  cp_fm_triangular_invert, &
      25              :                                  cp_fm_triangular_multiply, &
      26              :                                  cp_fm_uplo_to_full
      27              :    USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
      28              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
      29              :                                cp_fm_redistribute_start
      30              :    USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
      31              :                          finalize_elpa_library, &
      32              :                          initialize_elpa_library, &
      33              :                          set_elpa_kernel, &
      34              :                          set_elpa_print, &
      35              :                          set_elpa_qr
      36              :    USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
      37              :                                  cp_fm_general_cusolver
      38              : #if defined(__DLAF)
      39              :    USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
      40              :    USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
      41              : #endif
      42              :    USE cp_fm_types, ONLY: cp_fm_get_info, &
      43              :                           cp_fm_set_element, &
      44              :                           cp_fm_to_fm, &
      45              :                           cp_fm_type, &
      46              :                           cp_fm_create, &
      47              :                           cp_fm_get_info, &
      48              :                           cp_fm_release, &
      49              :                           cp_fm_set_all, &
      50              :                           cp_fm_to_fm, &
      51              :                           cp_fm_to_fm_submat, &
      52              :                           cp_fm_type
      53              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
      54              :                            cp_fm_struct_create, &
      55              :                            cp_fm_struct_release, &
      56              :                            cp_fm_struct_type
      57              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      58              :                               cp_get_default_logger, &
      59              :                               cp_logger_get_default_io_unit, &
      60              :                               cp_logger_type, &
      61              :                               cp_to_string
      62              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      63              :                               cp_logger_get_default_unit_nr, &
      64              :                               cp_logger_get_unit_nr, &
      65              :                               cp_logger_type
      66              :    USE kinds, ONLY: default_string_length, &
      67              :                     dp
      68              :    USE machine, ONLY: default_output_unit, &
      69              :                       m_memory
      70              : #if defined (__parallel)
      71              :    USE message_passing, ONLY: mp_comm_type
      72              : #endif
      73              : #if defined (__HAS_IEEE_EXCEPTIONS)
      74              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      75              :                               ieee_set_halting_mode, &
      76              :                               IEEE_ALL
      77              : #endif
      78              : #include "../base/base_uses.f90"
      79              : 
      80              :    IMPLICIT NONE
      81              :    PRIVATE
      82              : 
      83              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
      84              : 
      85              :    REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
      86              : 
      87              :    ! The following saved variables are diagonalization global
      88              :    ! Stores the default library for diagonalization
      89              :    INTEGER, SAVE, PUBLIC    :: diag_type = 0
      90              :    ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
      91              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      92              :    INTEGER, SAVE            :: elpa_neigvec_min = 0
      93              : #if defined(__DLAF)
      94              :    ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
      95              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      96              :    INTEGER, SAVE, PUBLIC    :: dlaf_neigvec_min = 0
      97              : #endif
      98              :    ! Threshold value for the orthonormality check of the eigenvectors obtained
      99              :    ! after a diagonalization. A negative value disables the check.
     100              :    REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
     101              : 
     102              :    ! Constants for the diag_type above
     103              :    INTEGER, PARAMETER, PUBLIC  :: FM_DIAG_TYPE_SCALAPACK = 101, &
     104              :                                   FM_DIAG_TYPE_ELPA = 102, &
     105              :                                   FM_DIAG_TYPE_CUSOLVER = 103, &
     106              :                                   FM_DIAG_TYPE_DLAF = 104
     107              : #if defined(__CUSOLVERMP)
     108              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
     109              : #elif defined(__ELPA)
     110              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
     111              : #else
     112              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
     113              : #endif
     114              : 
     115              :    ! Public subroutines
     116              :    PUBLIC :: choose_eigv_solver, &
     117              :              cp_fm_block_jacobi, &
     118              :              cp_fm_power, &
     119              :              cp_fm_syevd, &
     120              :              cp_fm_syevx, &
     121              :              cp_fm_svd, &
     122              :              cp_fm_geeig, &
     123              :              cp_fm_geeig_canon, &
     124              :              diag_init, &
     125              :              diag_finalize
     126              : 
     127              : CONTAINS
     128              : 
     129              : ! **************************************************************************************************
     130              : !> \brief Setup the diagonalization library to be used
     131              : !> \param diag_lib diag_library flag from GLOBAL section in input
     132              : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
     133              : !>                         to ScaLAPACK was applied, .FALSE. otherwise.
     134              : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
     135              : !> \param elpa_neigvec_min_input ...
     136              : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
     137              : !>                diagonalization procedure of suitably sized matrices
     138              : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
     139              : !>                   be printed
     140              : !> \param elpa_qr_unsafe logical that enables potentially unsafe ELPA options
     141              : !> \param dlaf_neigvec_min_input ...
     142              : !> \param eps_check_diag_input ...
     143              : !> \par History
     144              : !>      - Add support for DLA-Future (05.09.2023, RMeli)
     145              : !> \author  MI 11.2013
     146              : ! **************************************************************************************************
     147         9881 :    SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
     148              :                         elpa_print, elpa_qr_unsafe, dlaf_neigvec_min_input, eps_check_diag_input)
     149              :       CHARACTER(LEN=*), INTENT(IN)                       :: diag_lib
     150              :       LOGICAL, INTENT(OUT)                               :: fallback_applied
     151              :       INTEGER, INTENT(IN)                                :: elpa_kernel, elpa_neigvec_min_input
     152              :       LOGICAL, INTENT(IN)                                :: elpa_qr, elpa_print, elpa_qr_unsafe
     153              :       INTEGER, INTENT(IN)                                :: dlaf_neigvec_min_input
     154              :       REAL(KIND=dp), INTENT(IN)                          :: eps_check_diag_input
     155              : 
     156              :       LOGICAL, SAVE                                      :: initialized = .FALSE.
     157              : 
     158         9881 :       fallback_applied = .FALSE.
     159              : 
     160         9881 :       IF (diag_lib == "ScaLAPACK") THEN
     161          194 :          diag_type = FM_DIAG_TYPE_SCALAPACK
     162         9687 :       ELSE IF (diag_lib == "ELPA") THEN
     163              : #if defined (__ELPA)
     164              :          ! ELPA is requested and available
     165         9687 :          diag_type = FM_DIAG_TYPE_ELPA
     166              : #else
     167              :          ! ELPA library requested but not linked, switch back to SL
     168              :          diag_type = FM_DIAG_TYPE_SCALAPACK
     169              :          fallback_applied = .TRUE.
     170              : #endif
     171            0 :       ELSE IF (diag_lib == "cuSOLVER") THEN
     172            0 :          diag_type = FM_DIAG_TYPE_CUSOLVER
     173            0 :       ELSE IF (diag_lib == "DLAF") THEN
     174              : #if defined (__DLAF)
     175              :          diag_type = FM_DIAG_TYPE_DLAF
     176              : #else
     177            0 :          CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
     178              : #endif
     179              :       ELSE
     180            0 :          CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
     181              :       END IF
     182              : 
     183              :       ! Initialization of requested diagonalization library
     184         9881 :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
     185         9090 :          CALL initialize_elpa_library()
     186         9090 :          CALL set_elpa_kernel(elpa_kernel)
     187         9090 :          CALL set_elpa_qr(elpa_qr, elpa_qr_unsafe)
     188         9090 :          CALL set_elpa_print(elpa_print)
     189         9090 :          initialized = .TRUE.
     190              :       END IF
     191              : #if defined(__DLAF)
     192              :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
     193              :          CALL cp_dlaf_initialize()
     194              :          initialized = .TRUE.
     195              :       END IF
     196              :       dlaf_neigvec_min = dlaf_neigvec_min_input
     197              : #else
     198              :       MARK_USED(dlaf_neigvec_min_input)
     199              : #endif
     200              : 
     201         9881 :       elpa_neigvec_min = elpa_neigvec_min_input
     202         9881 :       eps_check_diag = eps_check_diag_input
     203              : 
     204         9881 :    END SUBROUTINE diag_init
     205              : 
     206              : ! **************************************************************************************************
     207              : !> \brief Finalize the diagonalization library
     208              : ! **************************************************************************************************
     209         9671 :    SUBROUTINE diag_finalize()
     210              : #if defined (__ELPA)
     211         9671 :       IF (diag_type == FM_DIAG_TYPE_ELPA) &
     212         9477 :          CALL finalize_elpa_library()
     213              : #endif
     214              : #if defined (__DLAF)
     215              :       IF (diag_type == FM_DIAG_TYPE_DLAF) &
     216              :          CALL cp_dlaf_finalize()
     217              : #endif
     218         9671 :    END SUBROUTINE diag_finalize
     219              : 
     220              : ! **************************************************************************************************
     221              : !> \brief   Choose the Eigensolver depending on which library is available
     222              : !>          ELPA seems to be unstable for small systems
     223              : !> \param matrix ...
     224              : !> \param eigenvectors ...
     225              : !> \param eigenvalues ...
     226              : !> \param info ...
     227              : !> \par     info If present returns error code and prevents program stops.
     228              : !>               Works currently only for cp_fm_syevd with ScaLAPACK.
     229              : !>               Other solvers will end the program regardless of PRESENT(info).
     230              : !> \par History
     231              : !>      - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
     232              : ! **************************************************************************************************
     233       177687 :    SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
     234              : 
     235              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     236              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     237              :       INTEGER, INTENT(OUT), OPTIONAL                     :: info
     238              : 
     239              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
     240              : 
     241              :       ! Sample peak memory
     242       177687 :       CALL m_memory()
     243              : 
     244       177687 :       IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
     245              : 
     246       177687 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     247         4544 :          CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     248              : 
     249       173143 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     250       173143 :          IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
     251              :             ! We don't trust ELPA with very small matrices.
     252       109632 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     253              :          ELSE
     254        63511 :             CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     255              :          END IF
     256              : 
     257            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     258            0 :          IF (matrix%matrix_struct%nrow_global < 64) THEN
     259              :             ! We don't trust cuSolver with very small matrices.
     260            0 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     261              :          ELSE
     262            0 :             CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
     263              :          END IF
     264              : 
     265              : #if defined(__DLAF)
     266              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     267              :          IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
     268              :             ! Use ScaLAPACK for small matrices
     269              :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     270              :          ELSE
     271              :             CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
     272              :          END IF
     273              : #endif
     274              : 
     275              :       ELSE
     276            0 :          CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
     277              :       END IF
     278              : 
     279       177687 :       CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
     280              : 
     281       177687 :    END SUBROUTINE choose_eigv_solver
     282              : 
     283              : ! **************************************************************************************************
     284              : !> \brief   Check result of diagonalization, i.e. the orthonormality of the eigenvectors
     285              : !> \param matrix Work matrix
     286              : !> \param eigenvectors Eigenvectors to be checked
     287              : !> \param nvec ...
     288              : ! **************************************************************************************************
     289       295229 :    SUBROUTINE check_diag(matrix, eigenvectors, nvec)
     290              : 
     291              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     292              :       INTEGER, INTENT(IN)                                :: nvec
     293              : 
     294              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_diag'
     295              : 
     296              :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     297              :       REAL(KIND=dp)                                      :: eps, eps_abort, eps_warning, gold, test
     298              :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     299              :       LOGICAL                                            :: check_eigenvectors
     300              : #if defined(__parallel)
     301              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     302              :       INTEGER                                            :: il, jl, ipcol, iprow, &
     303              :                                                             mypcol, myprow, npcol, nprow
     304              :       INTEGER, DIMENSION(9)                              :: desca
     305              : #endif
     306              : 
     307       295229 :       CALL timeset(routineN, handle)
     308              : 
     309       295229 :       output_unit = default_output_unit
     310       295229 :       eps_warning = eps_check_diag_default
     311              : #if defined(__CHECK_DIAG)
     312              :       check_eigenvectors = .TRUE.
     313              :       IF (eps_check_diag >= 0.0_dp) THEN
     314              :          eps_warning = eps_check_diag
     315              :       END IF
     316              : #else
     317       295229 :       IF (eps_check_diag >= 0.0_dp) THEN
     318          242 :          check_eigenvectors = .TRUE.
     319          242 :          eps_warning = eps_check_diag
     320              :       ELSE
     321              :          check_eigenvectors = .FALSE.
     322              :       END IF
     323              : #endif
     324       295229 :       eps_abort = 10.0_dp*eps_warning
     325              : 
     326       295229 :       gold = 0.0_dp
     327       295229 :       test = 0.0_dp
     328       295229 :       eps = 0.0_dp
     329              : 
     330       295229 :       IF (check_eigenvectors) THEN
     331              : #if defined(__parallel)
     332          242 :          nrow = eigenvectors%matrix_struct%nrow_global
     333          242 :          ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     334          242 :          CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
     335          242 :          context => matrix%matrix_struct%context
     336          242 :          myprow = context%mepos(1)
     337          242 :          mypcol = context%mepos(2)
     338          242 :          nprow = context%num_pe(1)
     339          242 :          npcol = context%num_pe(2)
     340         2420 :          desca(:) = matrix%matrix_struct%descriptor(:)
     341         2792 :          outer: DO j = 1, ncol
     342        41810 :             DO i = 1, ncol
     343        39018 :                CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     344        41568 :                IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     345        19509 :                   gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     346        19509 :                   test = matrix%local_data(il, jl)
     347        19509 :                   eps = ABS(test - gold)
     348        19509 :                   IF (eps > eps_warning) EXIT outer
     349              :                END IF
     350              :             END DO
     351              :          END DO outer
     352              : #else
     353              :          nrow = SIZE(eigenvectors%local_data, 1)
     354              :          ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
     355              :          CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
     356              :                     eigenvectors%local_data(1, 1), nrow, &
     357              :                     eigenvectors%local_data(1, 1), nrow, &
     358              :                     0.0_dp, matrix%local_data(1, 1), nrow)
     359              :          outer: DO j = 1, ncol
     360              :             DO i = 1, ncol
     361              :                gold = MERGE(0.0_dp, 1.0_dp, i /= j)
     362              :                test = matrix%local_data(i, j)
     363              :                eps = ABS(test - gold)
     364              :                IF (eps > eps_warning) EXIT outer
     365              :             END DO
     366              :          END DO outer
     367              : #endif
     368          242 :          IF (eps > eps_warning) THEN
     369            0 :             IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     370            0 :                diag_type_name = "SYEVD"
     371            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     372            0 :                diag_type_name = "ELPA"
     373            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     374            0 :                diag_type_name = "CUSOLVER"
     375            0 :             ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     376            0 :                diag_type_name = "DLAF"
     377              :             ELSE
     378            0 :                CPABORT("Unknown diag_type")
     379              :             END IF
     380              :             WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
     381            0 :                "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     382            0 :                "Matrix element (", i, ", ", j, ") = ", test, &
     383            0 :                "The deviation from the expected value ", gold, " is", eps
     384            0 :             IF (eps > eps_abort) THEN
     385            0 :                CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     386              :             ELSE
     387            0 :                CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     388              :             END IF
     389              :          END IF
     390              :       END IF
     391              : 
     392       295229 :       CALL timestop(handle)
     393              : 
     394       295229 :    END SUBROUTINE check_diag
     395              : 
     396              : ! **************************************************************************************************
     397              : !> \brief Issues an error messages and exits (optionally only warns).
     398              : !> \param mesg message to be issued
     399              : !> \param info error code (optional)
     400              : !> \param warn only warn (optional)
     401              : ! **************************************************************************************************
     402            0 :    SUBROUTINE cp_fm_error(mesg, info, warn)
     403              :       CHARACTER(LEN=*), INTENT(IN)                       :: mesg
     404              :       INTEGER, INTENT(IN), OPTIONAL                      :: info
     405              :       LOGICAL, INTENT(IN), OPTIONAL                      :: warn
     406              : 
     407              :       CHARACTER(LEN=2*default_string_length)             :: message
     408              :       LOGICAL                                            :: warning
     409              : 
     410            0 :       IF (PRESENT(info)) THEN
     411            0 :          WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
     412              :       ELSE
     413            0 :          WRITE (message, "(A)") mesg
     414              :       END IF
     415              : 
     416            0 :       IF (PRESENT(warn)) THEN
     417            0 :          warning = warn
     418              :       ELSE ! abort
     419              :          warning = .FALSE.
     420              :       END IF
     421              : 
     422            0 :       IF (warning) THEN
     423            0 :          CPWARN(TRIM(message))
     424              :       ELSE
     425            0 :          CPABORT(TRIM(message))
     426              :       END IF
     427            0 :    END SUBROUTINE cp_fm_error
     428              : 
     429              : ! **************************************************************************************************
     430              : !> \brief   Computes all eigenvalues and vectors of a real symmetric matrix
     431              : !>          significantly faster than syevx, scales also much better.
     432              : !>          Needs workspace to allocate all the eigenvectors
     433              : !> \param matrix ...
     434              : !> \param eigenvectors ...
     435              : !> \param eigenvalues ...
     436              : !> \param info ...
     437              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     438              : !> \par     info If present returns error code and prevents program stops.
     439              : !>               Works currently only for scalapack.
     440              : !>               Other solvers will end the program regardless of PRESENT(info).
     441              : ! **************************************************************************************************
     442       117502 :    SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     443              : 
     444              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     445              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     446              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     447              : 
     448              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd'
     449              : 
     450              :       INTEGER                                  :: handle, myinfo, n, nmo
     451              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
     452              : #if defined(__parallel)
     453              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     454              : #else
     455              :       INTEGER                                  :: liwork, lwork
     456              :       INTEGER, DIMENSION(:), POINTER           :: iwork
     457              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m
     458              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     459              :       INTEGER, TARGET                          :: v(1)
     460              :       REAL(KIND=dp), TARGET                    :: w(1)
     461              : #endif
     462              : 
     463       117502 :       CALL timeset(routineN, handle)
     464              : 
     465       117502 :       myinfo = 0
     466              : 
     467       117502 :       n = matrix%matrix_struct%nrow_global
     468       352506 :       ALLOCATE (eig(n))
     469              : 
     470              : #if defined(__parallel)
     471              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     472              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     473              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     474              :       ! to the original matrix if no redistribution is required
     475       117502 :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
     476              : 
     477              :       ! Call scalapack on CPUs that hold the new matrix
     478       117502 :       IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
     479        59545 :          IF (PRESENT(info)) THEN
     480         1120 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
     481              :          ELSE
     482        58425 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
     483              :          END IF
     484              :       END IF
     485              :       ! Redistribute results and clean up
     486       117502 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     487              : #else
     488              :       ! Retrieve the optimal work array sizes first
     489              :       lwork = -1
     490              :       liwork = -1
     491              :       m => matrix%local_data
     492              :       iwork => v
     493              :       work => w
     494              : 
     495              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     496              : 
     497              :       IF (myinfo /= 0) THEN
     498              :          CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
     499              :       END IF
     500              : 
     501              :       ! Reallocate work arrays and perform diagonalisation
     502              :       lwork = NINT(work(1))
     503              :       ALLOCATE (work(lwork))
     504              : 
     505              :       liwork = iwork(1)
     506              :       ALLOCATE (iwork(liwork))
     507              : 
     508              :       CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     509              : 
     510              :       IF (myinfo /= 0) THEN
     511              :          CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     512              :       END IF
     513              : 
     514              :       CALL cp_fm_to_fm(matrix, eigenvectors)
     515              : 
     516              :       DEALLOCATE (iwork)
     517              :       DEALLOCATE (work)
     518              : #endif
     519              : 
     520       117502 :       IF (PRESENT(info)) info = myinfo
     521              : 
     522       117502 :       nmo = SIZE(eigenvalues, 1)
     523       117502 :       IF (nmo > n) THEN
     524            0 :          eigenvalues(1:n) = eig(1:n)
     525              :       ELSE
     526       807626 :          eigenvalues(1:nmo) = eig(1:nmo)
     527              :       END IF
     528              : 
     529       117502 :       DEALLOCATE (eig)
     530              : 
     531       117502 :       CALL check_diag(matrix, eigenvectors, n)
     532              : 
     533       117502 :       CALL timestop(handle)
     534              : 
     535       235004 :    END SUBROUTINE cp_fm_syevd
     536              : 
     537              : ! **************************************************************************************************
     538              : !> \brief ...
     539              : !> \param matrix ...
     540              : !> \param eigenvectors ...
     541              : !> \param eigenvalues ...
     542              : !> \param info ...
     543              : ! **************************************************************************************************
     544        59545 :    SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
     545              : 
     546              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     547              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     548              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     549              : 
     550              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd_base'
     551              : 
     552              :       INTEGER                                  :: handle, myinfo
     553              : #if defined(__parallel)
     554              :       TYPE(cp_blacs_env_type), POINTER         :: context
     555              :       INTEGER                                  :: liwork, lwork, n
     556              :       INTEGER, DIMENSION(9)                    :: descm, descv
     557        59545 :       INTEGER, DIMENSION(:), POINTER           :: iwork
     558        59545 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     559        59545 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m, v
     560              :       REAL(KIND=dp), TARGET                    :: w(1)
     561              : #if defined (__HAS_IEEE_EXCEPTIONS)
     562              :       LOGICAL, DIMENSION(5)                    :: halt
     563              : #endif
     564              : #endif
     565              : 
     566        59545 :       CALL timeset(routineN, handle)
     567              : 
     568        59545 :       myinfo = 0
     569              : 
     570              : #if defined(__parallel)
     571              : 
     572        59545 :       n = matrix%matrix_struct%nrow_global
     573        59545 :       m => matrix%local_data
     574        59545 :       context => matrix%matrix_struct%context
     575       595450 :       descm(:) = matrix%matrix_struct%descriptor(:)
     576              : 
     577        59545 :       v => eigenvectors%local_data
     578       595450 :       descv(:) = eigenvectors%matrix_struct%descriptor(:)
     579              : 
     580        59545 :       liwork = 7*n + 8*context%num_pe(2) + 2
     581       178635 :       ALLOCATE (iwork(liwork))
     582              : 
     583              :       ! Work space query
     584        59545 :       lwork = -1
     585        59545 :       work => w
     586              : 
     587              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     588        59545 :                    work(1), lwork, iwork(1), liwork, myinfo)
     589              : 
     590        59545 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     591            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
     592              :       END IF
     593              : 
     594        59545 :       lwork = NINT(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
     595              : #if !defined(__SCALAPACK_NO_WA)
     596              :       ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
     597              :       CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
     598        59545 :                    v(1, 1), 1, 1, descv, work(1), -1, myinfo)
     599              : 
     600        59545 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     601            0 :          CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
     602              :       END IF
     603              : 
     604        59545 :       IF (lwork < (work(1) + 2*n)) THEN
     605        11992 :          lwork = NINT(work(1)) + 2*n ! still wrong by 2*N
     606              :       END IF
     607              : #endif
     608       178635 :       ALLOCATE (work(lwork))
     609              : 
     610              :       ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
     611        59545 :       IF (liwork < iwork(1)) THEN
     612            0 :          liwork = iwork(1)
     613            0 :          DEALLOCATE (iwork)
     614            0 :          ALLOCATE (iwork(liwork))
     615              :       END IF
     616              : 
     617              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     618              :       ! Therefore, we disable floating point traps temporarily.
     619              : #if defined (__HAS_IEEE_EXCEPTIONS)
     620              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     621              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     622              : #endif
     623              : 
     624              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     625        59545 :                    work(1), lwork, iwork(1), liwork, myinfo)
     626              : 
     627              : #if defined (__HAS_IEEE_EXCEPTIONS)
     628              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     629              : #endif
     630        59545 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     631            0 :          CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     632              :       END IF
     633              : 
     634        59545 :       IF (PRESENT(info)) info = myinfo
     635              : 
     636        59545 :       DEALLOCATE (work)
     637        59545 :       DEALLOCATE (iwork)
     638              : #else
     639              :       MARK_USED(matrix)
     640              :       MARK_USED(eigenvectors)
     641              :       MARK_USED(eigenvalues)
     642              :       myinfo = -1
     643              :       IF (PRESENT(info)) info = myinfo
     644              :       CALL cp_fm_error("ERROR in "//TRIM(routineN)// &
     645              :                        ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
     646              : #endif
     647              : 
     648        59545 :       CALL timestop(handle)
     649              : 
     650        59545 :    END SUBROUTINE cp_fm_syevd_base
     651              : 
     652              : ! **************************************************************************************************
     653              : !> \brief   compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
     654              : !>          If eigenvectors are required this routine will replicate a full matrix on each CPU...
     655              : !>          if more than a handful of vectors are needed, use cp_fm_syevd instead
     656              : !> \param matrix ...
     657              : !> \param eigenvectors ...
     658              : !> \param eigenvalues ...
     659              : !> \param neig ...
     660              : !> \param work_syevx ...
     661              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     662              : !>          neig   is the number of vectors needed (default all)
     663              : !>          work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
     664              : !>                     reducing this saves time, but might cause the routine to fail
     665              : ! **************************************************************************************************
     666           40 :    SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
     667              : 
     668              :       ! Diagonalise the symmetric n by n matrix using the LAPACK library.
     669              : 
     670              :       TYPE(cp_fm_type), INTENT(IN)                 :: matrix
     671              :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)       :: eigenvectors
     672              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)          :: work_syevx
     673              :       INTEGER, INTENT(IN), OPTIONAL                :: neig
     674              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)     :: eigenvalues
     675              : 
     676              :       CHARACTER(LEN=*), PARAMETER                  :: routineN = "cp_fm_syevx"
     677              : 
     678              : #if defined(__parallel)
     679              :       REAL(KIND=dp), PARAMETER                     :: orfac = -1.0_dp
     680              : #endif
     681              :       REAL(KIND=dp), PARAMETER                     :: vl = 0.0_dp, &
     682              :                                                       vu = 0.0_dp
     683              : 
     684              :       TYPE(cp_blacs_env_type), POINTER             :: context
     685              :       TYPE(cp_logger_type), POINTER                :: logger
     686              :       CHARACTER(LEN=1)                             :: job_type
     687              :       REAL(KIND=dp)                                :: abstol, work_syevx_local
     688              :       INTEGER                                      :: handle, info, liwork, lwork, &
     689              :                                                       m, n, nb, npcol, nprow, &
     690              :                                                       output_unit, neig_local
     691              :       LOGICAL                                      :: ionode, needs_evecs
     692           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: ifail, iwork
     693           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: w, work
     694           40 :       REAL(KIND=dp), DIMENSION(:, :), POINTER      :: a, z
     695              : 
     696              :       REAL(KIND=dp), EXTERNAL                      :: dlamch
     697              : 
     698              : #if defined(__parallel)
     699              :       INTEGER                                      :: nn, np0, npe, nq0, nz
     700              :       INTEGER, DIMENSION(9)                        :: desca, descz
     701           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: iclustr
     702           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: gap
     703              :       INTEGER, EXTERNAL                            :: iceil, numroc
     704              : #else
     705              :       INTEGER                                      :: nla, nlz
     706              :       INTEGER, EXTERNAL                            :: ilaenv
     707              : #endif
     708              : #if defined (__HAS_IEEE_EXCEPTIONS)
     709              :       LOGICAL, DIMENSION(5)                        :: halt
     710              : #endif
     711              : 
     712              :       ! by default all
     713           40 :       n = matrix%matrix_struct%nrow_global
     714           40 :       neig_local = n
     715           40 :       IF (PRESENT(neig)) neig_local = neig
     716           40 :       IF (neig_local == 0) RETURN
     717              : 
     718           40 :       CALL timeset(routineN, handle)
     719              : 
     720           40 :       needs_evecs = PRESENT(eigenvectors)
     721              : 
     722           40 :       logger => cp_get_default_logger()
     723           40 :       ionode = logger%para_env%is_source()
     724           40 :       n = matrix%matrix_struct%nrow_global
     725              : 
     726              :       ! by default allocate all needed space
     727           40 :       work_syevx_local = 1.0_dp
     728           40 :       IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
     729              : 
     730              :       ! set scalapack job type
     731           40 :       IF (needs_evecs) THEN
     732           40 :          job_type = "V"
     733              :       ELSE
     734            0 :          job_type = "N"
     735              :       END IF
     736              : 
     737              :       ! target the most accurate calculation of the eigenvalues
     738           40 :       abstol = 2.0_dp*dlamch("S")
     739              : 
     740           40 :       context => matrix%matrix_struct%context
     741           40 :       nprow = context%num_pe(1)
     742           40 :       npcol = context%num_pe(2)
     743              : 
     744          120 :       ALLOCATE (w(n))
     745          560 :       eigenvalues(:) = 0.0_dp
     746              : #if defined(__parallel)
     747              : 
     748           40 :       IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
     749            0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
     750              :       END IF
     751              : 
     752           40 :       a => matrix%local_data
     753          400 :       desca(:) = matrix%matrix_struct%descriptor(:)
     754              : 
     755           40 :       IF (needs_evecs) THEN
     756           40 :          z => eigenvectors%local_data
     757          400 :          descz(:) = eigenvectors%matrix_struct%descriptor(:)
     758              :       ELSE
     759              :          ! z will not be referenced
     760            0 :          z => matrix%local_data
     761            0 :          descz = desca
     762              :       END IF
     763              : 
     764              :       ! Get the optimal work storage size
     765              : 
     766           40 :       npe = nprow*npcol
     767           40 :       nb = matrix%matrix_struct%nrow_block
     768           40 :       nn = MAX(n, nb, 2)
     769           40 :       np0 = numroc(nn, nb, 0, 0, nprow)
     770           40 :       nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
     771              : 
     772           40 :       IF (needs_evecs) THEN
     773              :          lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
     774           40 :                  INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
     775              :       ELSE
     776            0 :          lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
     777              :       END IF
     778           40 :       liwork = 6*MAX(N, npe + 1, 4)
     779              : 
     780          120 :       ALLOCATE (gap(npe))
     781          120 :       gap = 0.0_dp
     782          120 :       ALLOCATE (iclustr(2*npe))
     783          200 :       iclustr = 0
     784          120 :       ALLOCATE (ifail(n))
     785          560 :       ifail = 0
     786          120 :       ALLOCATE (iwork(liwork))
     787          120 :       ALLOCATE (work(lwork))
     788              : 
     789              :       ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
     790              :       ! Therefore, we disable floating point traps temporarily.
     791              : #if defined (__HAS_IEEE_EXCEPTIONS)
     792              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     793              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     794              : #endif
     795              :       CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
     796           40 :                    z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
     797              : #if defined (__HAS_IEEE_EXCEPTIONS)
     798              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     799              : #endif
     800              : 
     801              :       ! Error handling
     802           40 :       IF (info /= 0) THEN
     803            0 :          IF (ionode) THEN
     804            0 :             output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     805              :             WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     806            0 :                "info    = ", info, &
     807            0 :                "lwork   = ", lwork, &
     808            0 :                "liwork  = ", liwork, &
     809            0 :                "nz      = ", nz
     810            0 :             IF (info > 0) THEN
     811              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     812            0 :                   "ifail   = ", ifail
     813              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     814            0 :                   "iclustr = ", iclustr
     815              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
     816            0 :                   "gap     = ", gap
     817              :             END IF
     818              :          END IF
     819            0 :          CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
     820              :       END IF
     821              : 
     822              :       ! Release work storage
     823           40 :       DEALLOCATE (gap)
     824           40 :       DEALLOCATE (iclustr)
     825              : 
     826              : #else
     827              : 
     828              :       a => matrix%local_data
     829              :       IF (needs_evecs) THEN
     830              :          z => eigenvectors%local_data
     831              :       ELSE
     832              :          ! z will not be referenced
     833              :          z => matrix%local_data
     834              :       END IF
     835              : 
     836              :       ! Get the optimal work storage size
     837              : 
     838              :       nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
     839              :                ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
     840              : 
     841              :       lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
     842              :       liwork = 5*n
     843              : 
     844              :       ALLOCATE (ifail(n))
     845              :       ifail = 0
     846              :       ALLOCATE (iwork(liwork))
     847              :       ALLOCATE (work(lwork))
     848              :       info = 0
     849              :       nla = SIZE(a, 1)
     850              :       nlz = SIZE(z, 1)
     851              : 
     852              :       ! LAPACK takes advantage of IEEE754 exceptions for speedup.
     853              :       ! Therefore, we disable floating point traps temporarily.
     854              : #if defined (__HAS_IEEE_EXCEPTIONS)
     855              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     856              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     857              : #endif
     858              :       CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
     859              :                   abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
     860              : #if defined (__HAS_IEEE_EXCEPTIONS)
     861              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     862              : #endif
     863              : 
     864              :       ! Error handling
     865              :       IF (info /= 0) THEN
     866              :          output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     867              :          WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     868              :             "info    = ", info
     869              :          IF (info > 0) THEN
     870              :             WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     871              :                "ifail   = ", ifail
     872              :          END IF
     873              :          CPABORT("Error in DSYEVX (ScaLAPACK)")
     874              :       END IF
     875              : 
     876              : #endif
     877              :       ! Release work storage
     878           40 :       DEALLOCATE (ifail)
     879           40 :       DEALLOCATE (iwork)
     880           40 :       DEALLOCATE (work)
     881          400 :       eigenvalues(1:neig_local) = w(1:neig_local)
     882           40 :       DEALLOCATE (w)
     883              : 
     884           40 :       IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
     885              : 
     886           40 :       CALL timestop(handle)
     887              : 
     888          120 :    END SUBROUTINE cp_fm_syevx
     889              : 
     890              : ! **************************************************************************************************
     891              : !> \brief decomposes a quadratic matrix into its singular value decomposition
     892              : !> \param matrix_a ...
     893              : !> \param matrix_eigvl ...
     894              : !> \param matrix_eigvr_t ...
     895              : !> \param eigval ...
     896              : !> \param info ...
     897              : !> \author Maximilian Graml
     898              : ! **************************************************************************************************
     899          100 :    SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
     900              : 
     901              :       TYPE(cp_fm_type), INTENT(IN)              :: matrix_a
     902              :       TYPE(cp_fm_type), INTENT(INOUT)           :: matrix_eigvl, matrix_eigvr_t
     903              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
     904              :          INTENT(INOUT)                          :: eigval
     905              :       INTEGER, INTENT(OUT), OPTIONAL            :: info
     906              : 
     907              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
     908              : 
     909              :       INTEGER                                   :: handle, n, m, myinfo, lwork
     910          100 :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a
     911              :       TYPE(cp_fm_type)                          :: matrix_lu
     912              :       REAL(KIND=dp), DIMENSION(:), POINTER      :: work
     913              :       REAL(KIND=dp), TARGET                     :: w(1)
     914              : #if defined(__parallel)
     915              :       INTEGER, DIMENSION(9)                     :: desca, descu, descvt
     916              : #endif
     917              : 
     918          100 :       CALL timeset(routineN, handle)
     919              : 
     920              :       CALL cp_fm_create(matrix=matrix_lu, &
     921              :                         matrix_struct=matrix_a%matrix_struct, &
     922          100 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     923          100 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     924          100 :       a => matrix_lu%local_data
     925          100 :       m = matrix_lu%matrix_struct%nrow_global
     926          100 :       n = matrix_lu%matrix_struct%ncol_global
     927              :       ! Assert that incoming matrix is quadratic
     928          100 :       CPASSERT(m == n)
     929              : 
     930              :       ! Prepare for workspace queries
     931          100 :       myinfo = 0
     932          100 :       lwork = -1
     933          100 :       work => w
     934              : #if defined(__parallel)
     935              :       ! To do: That might need a redistribution check as in cp_fm_syevd
     936         1000 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     937         1000 :       descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
     938         1000 :       descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
     939              : 
     940              :       ! Workspace query
     941              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     942          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     943              : 
     944          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     945            0 :          CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
     946              :       END IF
     947              : 
     948          100 :       lwork = NINT(work(1))
     949          300 :       ALLOCATE (work(lwork))
     950              : 
     951              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     952          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     953              : 
     954          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     955            0 :          CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     956              :       END IF
     957              : #else
     958              :       ! Workspace query
     959              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     960              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     961              : 
     962              :       IF (myinfo /= 0) THEN
     963              :          CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
     964              :       END IF
     965              : 
     966              :       lwork = NINT(work(1))
     967              :       ALLOCATE (work(lwork))
     968              : 
     969              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     970              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     971              : 
     972              :       IF (myinfo /= 0) THEN
     973              :          CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
     974              :       END IF
     975              : 
     976              : #endif
     977              :       ! Release intermediary matrices
     978          100 :       DEALLOCATE (work)
     979          100 :       CALL cp_fm_release(matrix_lu)
     980              : 
     981          100 :       IF (PRESENT(info)) info = myinfo
     982              : 
     983          100 :       CALL timestop(handle)
     984          100 :    END SUBROUTINE cp_fm_svd
     985              : 
     986              : ! **************************************************************************************************
     987              : !> \brief ...
     988              : !> \param matrix ...
     989              : !> \param work ...
     990              : !> \param exponent ...
     991              : !> \param threshold ...
     992              : !> \param n_dependent ...
     993              : !> \param verbose ...
     994              : !> \param eigvals ...
     995              : ! **************************************************************************************************
     996         1292 :    SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
     997              : 
     998              :       ! Raise the real symmetric n by n matrix to the power given by
     999              :       ! the exponent. All eigenvectors with a corresponding eigenvalue lower
    1000              :       ! than threshold are quenched. result in matrix
    1001              : 
    1002              :       ! - Creation (29.03.1999, Matthias Krack)
    1003              :       ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
    1004              : 
    1005              :       TYPE(cp_fm_type), INTENT(IN)               :: matrix, work
    1006              :       REAL(KIND=dp), INTENT(IN)                  :: exponent, threshold
    1007              :       INTEGER, INTENT(OUT)                       :: n_dependent
    1008              :       LOGICAL, INTENT(IN), OPTIONAL              :: verbose
    1009              :       REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
    1010              :          OPTIONAL                                :: eigvals
    1011              : 
    1012              :       CHARACTER(LEN=*), PARAMETER                :: routineN = 'cp_fm_power'
    1013              : 
    1014              :       INTEGER                                    :: handle, icol_global, &
    1015              :                                                     mypcol, myprow, &
    1016              :                                                     ncol_global, nrow_global
    1017              :       LOGICAL                                    :: my_verbose
    1018              :       REAL(KIND=dp)                              :: condition_number, f, p
    1019              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE   :: eigenvalues
    1020         1292 :       REAL(KIND=dp), DIMENSION(:, :), POINTER    :: eigenvectors
    1021              :       TYPE(cp_blacs_env_type), POINTER           :: context
    1022              : 
    1023              : #if defined(__parallel)
    1024              :       INTEGER           :: icol_local, ipcol, iprow, irow_global, irow_local
    1025              : #endif
    1026              : 
    1027         1292 :       CALL timeset(routineN, handle)
    1028              : 
    1029         1292 :       my_verbose = .FALSE.
    1030         1292 :       IF (PRESENT(verbose)) my_verbose = verbose
    1031              : 
    1032         1292 :       context => matrix%matrix_struct%context
    1033         1292 :       myprow = context%mepos(1)
    1034         1292 :       mypcol = context%mepos(2)
    1035         1292 :       n_dependent = 0
    1036         1292 :       p = 0.5_dp*exponent
    1037              : 
    1038         1292 :       nrow_global = matrix%matrix_struct%nrow_global
    1039         1292 :       ncol_global = matrix%matrix_struct%ncol_global
    1040              : 
    1041         3876 :       ALLOCATE (eigenvalues(ncol_global))
    1042        32257 :       eigenvalues(:) = 0.0_dp
    1043              : 
    1044              :       ! Compute the eigenvectors and eigenvalues
    1045              : 
    1046         1292 :       CALL choose_eigv_solver(matrix, work, eigenvalues)
    1047              : 
    1048         1292 :       IF (PRESENT(eigvals)) THEN
    1049          348 :          eigvals(1) = eigenvalues(1)
    1050          348 :          eigvals(2) = eigenvalues(ncol_global)
    1051              :       END IF
    1052              : 
    1053              : #if defined(__parallel)
    1054         1292 :       eigenvectors => work%local_data
    1055              : 
    1056              :       ! Build matrix**exponent with eigenvector quenching
    1057              : 
    1058        32257 :       DO icol_global = 1, ncol_global
    1059              : 
    1060        32257 :          IF (eigenvalues(icol_global) < threshold) THEN
    1061              : 
    1062           50 :             n_dependent = n_dependent + 1
    1063              : 
    1064           50 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1065              : 
    1066           50 :             IF (mypcol == ipcol) THEN
    1067           50 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1068         5850 :                DO irow_global = 1, nrow_global
    1069         5800 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1070         5850 :                   IF (myprow == iprow) THEN
    1071         2900 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1072         2900 :                      eigenvectors(irow_local, icol_local) = 0.0_dp
    1073              :                   END IF
    1074              :                END DO
    1075              :             END IF
    1076              : 
    1077              :          ELSE
    1078              : 
    1079        30915 :             f = eigenvalues(icol_global)**p
    1080              : 
    1081        30915 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1082              : 
    1083        30915 :             IF (mypcol == ipcol) THEN
    1084        30915 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1085      2181260 :                DO irow_global = 1, nrow_global
    1086      2150345 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1087      2181260 :                   IF (myprow == iprow) THEN
    1088      1119247 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1089              :                      eigenvectors(irow_local, icol_local) = &
    1090      1119247 :                         f*eigenvectors(irow_local, icol_local)
    1091              :                   END IF
    1092              :                END DO
    1093              :             END IF
    1094              : 
    1095              :          END IF
    1096              : 
    1097              :       END DO
    1098              : 
    1099              : #else
    1100              : 
    1101              :       eigenvectors => work%local_data
    1102              : 
    1103              :       ! Build matrix**exponent with eigenvector quenching
    1104              : 
    1105              :       DO icol_global = 1, ncol_global
    1106              : 
    1107              :          IF (eigenvalues(icol_global) < threshold) THEN
    1108              : 
    1109              :             n_dependent = n_dependent + 1
    1110              :             eigenvectors(1:nrow_global, icol_global) = 0.0_dp
    1111              : 
    1112              :          ELSE
    1113              : 
    1114              :             f = eigenvalues(icol_global)**p
    1115              :             eigenvectors(1:nrow_global, icol_global) = &
    1116              :                f*eigenvectors(1:nrow_global, icol_global)
    1117              : 
    1118              :          END IF
    1119              : 
    1120              :       END DO
    1121              : 
    1122              : #endif
    1123         1292 :       CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
    1124         1292 :       CALL cp_fm_uplo_to_full(matrix, work)
    1125              : 
    1126              :       ! Print some warnings/notes
    1127         1292 :       IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
    1128            0 :          condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
    1129              :          WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
    1130            0 :             "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
    1131            0 :             "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
    1132            0 :             "CP_FM_POWER: condition number:   ", condition_number
    1133            0 :          IF (eigenvalues(1) <= 0.0_dp) THEN
    1134              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1135            0 :                "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
    1136              :          END IF
    1137            0 :          IF (condition_number > 1.0E12_dp) THEN
    1138              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1139            0 :                "WARNING: high condition number => possibly ill-conditioned matrix"
    1140              :          END IF
    1141              :       END IF
    1142              : 
    1143         1292 :       DEALLOCATE (eigenvalues)
    1144              : 
    1145         1292 :       CALL timestop(handle)
    1146              : 
    1147         1292 :    END SUBROUTINE cp_fm_power
    1148              : 
    1149              : ! **************************************************************************************************
    1150              : !> \brief ...
    1151              : !> \param matrix ...
    1152              : !> \param eigenvectors ...
    1153              : !> \param eigval ...
    1154              : !> \param thresh ...
    1155              : !> \param start_sec_block ...
    1156              : ! **************************************************************************************************
    1157           18 :    SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
    1158              : 
    1159              :       ! Calculates block diagonalization of a full symmetric matrix
    1160              :       ! It has its origin in cp_fm_syevx. This routine rotates only elements
    1161              :       ! which are larger than a threshold values "thresh".
    1162              :       ! start_sec_block is the start of the second block.
    1163              :       ! IT DOES ONLY ONE SWEEP!
    1164              : 
    1165              :       ! - Creation (07.10.2002, Martin Fengler)
    1166              :       ! - Cosmetics (05.04.06, MK)
    1167              : 
    1168              :       TYPE(cp_fm_type), INTENT(IN)              :: eigenvectors, matrix
    1169              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)   :: eigval
    1170              :       INTEGER, INTENT(IN)                       :: start_sec_block
    1171              :       REAL(KIND=dp), INTENT(IN)                 :: thresh
    1172              : 
    1173              :       CHARACTER(len=*), PARAMETER               :: routineN = 'cp_fm_block_jacobi'
    1174              : 
    1175              :       INTEGER :: handle
    1176              :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a, ev
    1177              : 
    1178              :       REAL(KIND=dp) :: tan_theta, tau, c, s
    1179              :       INTEGER  :: q, p, N
    1180           18 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE  :: c_ip
    1181              : 
    1182              : #if defined(__parallel)
    1183              :       TYPE(cp_blacs_env_type), POINTER :: context
    1184              : 
    1185              :       INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
    1186              :                  ev_row_block_size, iam, mynumrows, mype, npe, q_loc
    1187           18 :       REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE  :: a_loc, ev_loc
    1188              :       INTEGER, DIMENSION(9)                        :: desca, descz, &
    1189              :                                                       desc_a_block, &
    1190              :                                                       desc_ev_loc
    1191              :       TYPE(mp_comm_type):: allgrp
    1192              :       TYPE(cp_blacs_type) :: ictxt_loc
    1193              :       INTEGER, EXTERNAL :: numroc
    1194              : #endif
    1195              : 
    1196              :       ! -------------------------------------------------------------------------
    1197              : 
    1198           18 :       CALL timeset(routineN, handle)
    1199              : 
    1200              : #if defined(__parallel)
    1201           18 :       context => matrix%matrix_struct%context
    1202           18 :       allgrp = matrix%matrix_struct%para_env
    1203              : 
    1204           18 :       nprow = context%num_pe(1)
    1205           18 :       npcol = context%num_pe(2)
    1206              : 
    1207           18 :       N = matrix%matrix_struct%nrow_global
    1208              : 
    1209           18 :       A => matrix%local_data
    1210          180 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1211           18 :       EV => eigenvectors%local_data
    1212          180 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1213              : 
    1214              :       ! Copy the block to be rotated to the master process firstly and broadcast to all processes
    1215              :       ! start_sec_block defines where the second block starts!
    1216              :       ! Block will be processed together with the OO block
    1217              : 
    1218           18 :       block_dim_row = start_sec_block - 1
    1219           18 :       block_dim_col = N - block_dim_row
    1220           72 :       ALLOCATE (A_loc(block_dim_row, block_dim_col))
    1221              : 
    1222           18 :       mype = matrix%matrix_struct%para_env%mepos
    1223           18 :       npe = matrix%matrix_struct%para_env%num_pe
    1224              :       ! Get a new context
    1225           18 :       CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
    1226              : 
    1227              :       CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
    1228           18 :                     block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
    1229              : 
    1230              :       CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
    1231           18 :                     A_loc, 1, 1, desc_a_block, context%get_handle())
    1232              :       ! Only the master (root) process received data yet
    1233           18 :       CALL allgrp%bcast(A_loc, 0)
    1234              : 
    1235              :       ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
    1236              :       ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
    1237              : 
    1238              :       ! Initialize distribution of the eigenvectors
    1239           18 :       iam = mype
    1240           18 :       ev_row_block_size = n/(nprow*npcol)
    1241           18 :       mynumrows = NUMROC(N, ev_row_block_size, iam, 0, nprow*npcol)
    1242              : 
    1243          108 :       ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
    1244              : 
    1245              :       CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
    1246           18 :                     mynumrows, info)
    1247              : 
    1248           18 :       CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
    1249              : 
    1250              :       ! Start block diagonalization of matrix
    1251              : 
    1252           18 :       q_loc = 0
    1253              : 
    1254         1170 :       DO q = start_sec_block, N
    1255         1152 :          q_loc = q_loc + 1
    1256       148626 :          DO p = 1, (start_sec_block - 1)
    1257              : 
    1258       148608 :             IF (ABS(A_loc(p, q_loc)) > thresh) THEN
    1259              : 
    1260       117566 :                tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
    1261              : 
    1262       117566 :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1263              : 
    1264              :                ! Cos(theta)
    1265       117566 :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1266       117566 :                s = tan_theta*c
    1267              : 
    1268              :                ! Calculate eigenvectors: Q*J
    1269              :                ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
    1270              :                ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
    1271              :                ! EV(:,p) = c_ip
    1272              :                ! EV(:,q) = c_iq
    1273       117566 :                CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
    1274       117566 :                CALL dscal(mynumrows, c, EV_loc(1, p), 1)
    1275       117566 :                CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
    1276       117566 :                CALL dscal(mynumrows, c, EV_loc(1, q), 1)
    1277       117566 :                CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
    1278              : 
    1279              :             END IF
    1280              : 
    1281              :          END DO
    1282              :       END DO
    1283              : 
    1284              :       ! Copy eigenvectors back to the original distribution
    1285           18 :       CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
    1286              : 
    1287              :       ! Release work storage
    1288           18 :       DEALLOCATE (A_loc, EV_loc, c_ip)
    1289              : 
    1290           18 :       CALL ictxt_loc%gridexit()
    1291              : 
    1292              : #else
    1293              : 
    1294              :       N = matrix%matrix_struct%nrow_global
    1295              : 
    1296              :       ALLOCATE (c_ip(N)) ! Local eigenvalue vector
    1297              : 
    1298              :       A => matrix%local_data ! Contains the Matrix to be worked on
    1299              :       EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
    1300              : 
    1301              :       ! Start matrix diagonalization
    1302              : 
    1303              :       tan_theta = 0.0_dp
    1304              :       tau = 0.0_dp
    1305              : 
    1306              :       DO q = start_sec_block, N
    1307              :          DO p = 1, (start_sec_block - 1)
    1308              : 
    1309              :             IF (ABS(A(p, q)) > thresh) THEN
    1310              : 
    1311              :                tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
    1312              : 
    1313              :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1314              : 
    1315              :                ! Cos(theta)
    1316              :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1317              :                s = tan_theta*c
    1318              : 
    1319              :                ! Calculate eigenvectors: Q*J
    1320              :                ! c_ip = c*EV(:,p) - s*EV(:,q)
    1321              :                ! c_iq = s*EV(:,p) + c*EV(:,q)
    1322              :                ! EV(:,p) = c_ip
    1323              :                ! EV(:,q) = c_iq
    1324              :                CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
    1325              :                CALL dscal(N, c, EV(1, p), 1)
    1326              :                CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
    1327              :                CALL dscal(N, c, EV(1, q), 1)
    1328              :                CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
    1329              : 
    1330              :             END IF
    1331              : 
    1332              :          END DO
    1333              :       END DO
    1334              : 
    1335              :       ! Release work storage
    1336              : 
    1337              :       DEALLOCATE (c_ip)
    1338              : 
    1339              : #endif
    1340              : 
    1341           18 :       CALL timestop(handle)
    1342              : 
    1343           90 :    END SUBROUTINE cp_fm_block_jacobi
    1344              : 
    1345              : ! **************************************************************************************************
    1346              : !> \brief General Eigenvalue Problem  AX = BXE
    1347              : !>        Single option version: Cholesky decomposition of B
    1348              : !> \param amatrix ...
    1349              : !> \param bmatrix ...
    1350              : !> \param eigenvectors ...
    1351              : !> \param eigenvalues ...
    1352              : !> \param work ...
    1353              : ! **************************************************************************************************
    1354          284 :    SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
    1355              : 
    1356              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1357              :       REAL(KIND=dp), DIMENSION(:)                        :: eigenvalues
    1358              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1359              : 
    1360              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig'
    1361              : 
    1362              :       INTEGER                                            :: handle, nao, nmo
    1363              : 
    1364          284 :       CALL timeset(routineN, handle)
    1365              : 
    1366          284 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1367          284 :       nmo = SIZE(eigenvalues)
    1368              : 
    1369          284 :       IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. nao >= 64) THEN
    1370              :          ! Use cuSolverMP generalized eigenvalue solver for large matrices
    1371            0 :          CALL cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
    1372              : #if defined(__DLAF)
    1373              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
    1374              :          ! Use DLA-Future generalized eigenvalue solver for large matrices
    1375              :          CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
    1376              : #endif
    1377              :       ELSE
    1378              :          ! Cholesky decompose S=U(T)U
    1379          284 :          CALL cp_fm_cholesky_decompose(bmatrix)
    1380              :          ! Invert to get U^(-1)
    1381          284 :          CALL cp_fm_triangular_invert(bmatrix)
    1382              :          ! Reduce to get U^(-T) * H * U^(-1)
    1383          284 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
    1384          284 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
    1385              :          ! Diagonalize
    1386              :          CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
    1387          284 :                                  eigenvalues=eigenvalues)
    1388              :          ! Restore vectors C = U^(-1) * C*
    1389          284 :          CALL cp_fm_triangular_multiply(bmatrix, work)
    1390          284 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1391              :       END IF
    1392              : 
    1393          284 :       CALL timestop(handle)
    1394              : 
    1395          284 :    END SUBROUTINE cp_fm_geeig
    1396              : 
    1397              : ! **************************************************************************************************
    1398              : !> \brief General Eigenvalue Problem  AX = BXE
    1399              : !>        Use canonical diagonalization : U*s**(-1/2)
    1400              : !> \param amatrix ...
    1401              : !> \param bmatrix ...
    1402              : !> \param eigenvectors ...
    1403              : !> \param eigenvalues ...
    1404              : !> \param work ...
    1405              : !> \param epseig ...
    1406              : ! **************************************************************************************************
    1407           66 :    SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
    1408              : 
    1409              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1410              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1411              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1412              :       REAL(KIND=dp), INTENT(IN)                          :: epseig
    1413              : 
    1414              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_canon'
    1415              : 
    1416              :       INTEGER                                            :: handle, i, icol, irow, nao, nc, ncol, &
    1417              :                                                             nmo, nx
    1418              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: evals
    1419              : 
    1420           66 :       CALL timeset(routineN, handle)
    1421              : 
    1422              :       ! Test sizees
    1423           66 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1424           66 :       nmo = SIZE(eigenvalues)
    1425          198 :       ALLOCATE (evals(nao))
    1426              : 
    1427              :       ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
    1428           66 :       CALL cp_fm_scale(-1.0_dp, bmatrix)
    1429           66 :       CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
    1430         4720 :       evals(:) = -evals(:)
    1431           66 :       nc = nao
    1432         4428 :       DO i = 1, nao
    1433         4428 :          IF (evals(i) < epseig) THEN
    1434           40 :             nc = i - 1
    1435           40 :             EXIT
    1436              :          END IF
    1437              :       END DO
    1438           66 :       CPASSERT(nc /= 0)
    1439              : 
    1440           66 :       IF (nc /= nao) THEN
    1441           40 :          IF (nc < nmo) THEN
    1442              :             ! Copy NULL space definition to last vectors of eigenvectors (if needed)
    1443            0 :             ncol = nmo - nc
    1444            0 :             CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
    1445              :          END IF
    1446              :          ! Set NULL space in eigenvector matrix of S to zero
    1447          332 :          DO icol = nc + 1, nao
    1448        36172 :             DO irow = 1, nao
    1449        36132 :                CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
    1450              :             END DO
    1451              :          END DO
    1452              :          ! Set small eigenvalues to a dummy save value
    1453          332 :          evals(nc + 1:nao) = 1.0_dp
    1454              :       END IF
    1455              :       ! Calculate U*s**(-1/2)
    1456         4720 :       evals(:) = 1.0_dp/SQRT(evals(:))
    1457           66 :       CALL cp_fm_column_scale(work, evals)
    1458              :       ! Reduce to get U^(-T) * H * U^(-1)
    1459           66 :       CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
    1460           66 :       CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
    1461           66 :       IF (nc /= nao) THEN
    1462              :          ! set diagonal values to save large value
    1463          332 :          DO icol = nc + 1, nao
    1464          332 :             CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
    1465              :          END DO
    1466              :       END IF
    1467              :       ! Diagonalize
    1468           66 :       CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
    1469           66 :       nx = MIN(nc, nmo)
    1470              :       ! Restore vectors C = U^(-1) * C*
    1471           66 :       CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
    1472              : 
    1473           66 :       DEALLOCATE (evals)
    1474              : 
    1475           66 :       CALL timestop(handle)
    1476              : 
    1477           66 :    END SUBROUTINE cp_fm_geeig_canon
    1478              : 
    1479              : END MODULE cp_fm_diag
        

Generated by: LCOV version 2.0-1