LCOV - CP2K Regtests (git:42dac4a)

LCOV - code coverage report

Current view:	top level - src/fm - cp_fm_diag.F (source / functions)		Coverage	Total	Hit
Test:	CP2K Regtests (git:42dac4a)	Lines:	82.9 %	414	343
Test Date:	2025-07-25 12:55:17	Functions:	100.0 %	12	12

            Line data    Source code

       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief used for collecting some of the diagonalization schemes available for
      10              : !>      cp_fm_type. cp_fm_power also moved here as it is very related
      11              : !> \note
      12              : !>      first version : most routines imported
      13              : !> \par History
      14              : !>      - unused Jacobi routines removed, cosmetics (05.04.06,MK)
      15              : !> \author Joost VandeVondele (2003-08)
      16              : ! **************************************************************************************************
      17              : MODULE cp_fm_diag
      18              : 
      19              :    USE cp_blacs_types, ONLY: cp_blacs_type
      20              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21              :    USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
      22              :                                  cp_fm_gemm, &
      23              :                                  cp_fm_scale, &
      24              :                                  cp_fm_syrk, &
      25              :                                  cp_fm_triangular_invert, &
      26              :                                  cp_fm_triangular_multiply, &
      27              :                                  cp_fm_uplo_to_full
      28              :    USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
      29              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
      30              :                                cp_fm_redistribute_start
      31              :    USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
      32              :                          finalize_elpa_library, &
      33              :                          initialize_elpa_library, &
      34              :                          set_elpa_kernel, &
      35              :                          set_elpa_print, &
      36              :                          set_elpa_qr
      37              :    USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
      38              :                                  cp_fm_general_cusolver
      39              : #if defined(__DLAF)
      40              :    USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
      41              :    USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
      42              : #endif
      43              :    USE cp_fm_types, ONLY: cp_fm_get_info, &
      44              :                           cp_fm_set_element, &
      45              :                           cp_fm_to_fm, &
      46              :                           cp_fm_type, &
      47              :                           cp_fm_create, &
      48              :                           cp_fm_get_info, &
      49              :                           cp_fm_release, &
      50              :                           cp_fm_set_all, &
      51              :                           cp_fm_to_fm, &
      52              :                           cp_fm_to_fm_submat, &
      53              :                           cp_fm_type
      54              :    USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
      55              :                            cp_fm_struct_create, &
      56              :                            cp_fm_struct_release, &
      57              :                            cp_fm_struct_type
      58              :    USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
      59              :                               cp_get_default_logger, &
      60              :                               cp_logger_get_default_io_unit, &
      61              :                               cp_logger_type, &
      62              :                               cp_to_string
      63              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      64              :                               cp_logger_get_default_unit_nr, &
      65              :                               cp_logger_get_unit_nr, &
      66              :                               cp_logger_type
      67              :    USE kinds, ONLY: default_string_length, &
      68              :                     dp
      69              :    USE machine, ONLY: default_output_unit, &
      70              :                       m_memory
      71              : #if defined (__parallel)
      72              :    USE message_passing, ONLY: mp_comm_type
      73              : #endif
      74              : #if defined (__HAS_IEEE_EXCEPTIONS)
      75              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      76              :                               ieee_set_halting_mode, &
      77              :                               IEEE_ALL
      78              : #endif
      79              : #include "../base/base_uses.f90"
      80              : 
      81              :    IMPLICIT NONE
      82              : 
      83              :    PRIVATE
      84              : 
      85              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
      86              : 
      87              :    REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
      88              : 
      89              :    ! The following saved variables are diagonalization global
      90              :    ! Stores the default library for diagonalization
      91              :    INTEGER, SAVE, PUBLIC   :: diag_type = 0
      92              :    ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
      93              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      94              :    INTEGER, SAVE       :: elpa_neigvec_min = 0
      95              : #if defined(__DLAF)
      96              :    ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
      97              :    ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
      98              :    INTEGER, SAVE, PUBLIC    :: dlaf_neigvec_min = 0
      99              : #endif
     100              :    ! Threshold value for the orthonormality check of the eigenvectors obtained
     101              :    ! after a diagonalization. A negative value disables the check.
     102              :    REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
     103              : 
     104              :    ! Constants for the diag_type above
     105              :    INTEGER, PARAMETER, PUBLIC  :: FM_DIAG_TYPE_SCALAPACK = 101, &
     106              :                                   FM_DIAG_TYPE_ELPA = 102, &
     107              :                                   FM_DIAG_TYPE_CUSOLVER = 103, &
     108              :                                   FM_DIAG_TYPE_DLAF = 104
     109              : #if defined(__CUSOLVERMP)
     110              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
     111              : #elif defined(__ELPA)
     112              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
     113              : #else
     114              :    INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
     115              : #endif
     116              : 
     117              :    ! Public subroutines
     118              :    PUBLIC :: choose_eigv_solver, &
     119              :              cp_fm_block_jacobi, &
     120              :              cp_fm_power, &
     121              :              cp_fm_syevd, &
     122              :              cp_fm_syevx, &
     123              :              cp_fm_svd, &
     124              :              cp_fm_geeig, &
     125              :              cp_fm_geeig_canon, &
     126              :              diag_init, &
     127              :              diag_finalize
     128              : 
     129              : CONTAINS
     130              : 
     131              : ! **************************************************************************************************
     132              : !> \brief Setup the diagonalization library to be used
     133              : !> \param diag_lib diag_library flag from GLOBAL section in input
     134              : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
     135              : !>                         to ScaLAPACK was applied, .FALSE. otherwise.
     136              : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
     137              : !> \param elpa_neigvec_min_input ...
     138              : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
     139              : !>                diagonalization procedure of suitably sized matrices
     140              : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
     141              : !>                   be printed
     142              : !> \param elpa_qr_unsafe logical that enables potentially unsafe ELPA options
     143              : !> \param dlaf_neigvec_min_input ...
     144              : !> \param eps_check_diag_input ...
     145              : !> \par History
     146              : !>      - Add support for DLA-Future (05.09.2023, RMeli)
     147              : !> \author  MI 11.2013
     148              : ! **************************************************************************************************
     149         9835 :    SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
     150              :                         elpa_print, elpa_qr_unsafe, dlaf_neigvec_min_input, eps_check_diag_input)
     151              :       CHARACTER(LEN=*), INTENT(IN)                       :: diag_lib
     152              :       LOGICAL, INTENT(OUT)                               :: fallback_applied
     153              :       INTEGER, INTENT(IN)                                :: elpa_kernel, elpa_neigvec_min_input
     154              :       LOGICAL, INTENT(IN)                                :: elpa_qr, elpa_print, elpa_qr_unsafe
     155              :       INTEGER, INTENT(IN)                                :: dlaf_neigvec_min_input
     156              :       REAL(KIND=dp), INTENT(IN)                          :: eps_check_diag_input
     157              : 
     158              :       LOGICAL, SAVE                                      :: initialized = .FALSE.
     159              : 
     160         9835 :       fallback_applied = .FALSE.
     161              : 
     162         9835 :       IF (diag_lib == "ScaLAPACK") THEN
     163          190 :          diag_type = FM_DIAG_TYPE_SCALAPACK
     164         9645 :       ELSE IF (diag_lib == "ELPA") THEN
     165              : #if defined (__ELPA)
     166              :          ! ELPA is requested and available
     167         9645 :          diag_type = FM_DIAG_TYPE_ELPA
     168              : #else
     169              :          ! ELPA library requested but not linked, switch back to SL
     170              :          diag_type = FM_DIAG_TYPE_SCALAPACK
     171              :          fallback_applied = .TRUE.
     172              : #endif
     173            0 :       ELSE IF (diag_lib == "cuSOLVER") THEN
     174            0 :          diag_type = FM_DIAG_TYPE_CUSOLVER
     175            0 :       ELSE IF (diag_lib == "DLAF") THEN
     176              : #if defined (__DLAF)
     177              :          diag_type = FM_DIAG_TYPE_DLAF
     178              : #else
     179            0 :          CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
     180              : #endif
     181              :       ELSE
     182            0 :          CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
     183              :       END IF
     184              : 
     185              :       ! Initialization of requested diagonalization library
     186         9835 :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
     187         9048 :          CALL initialize_elpa_library()
     188         9048 :          CALL set_elpa_kernel(elpa_kernel)
     189         9048 :          CALL set_elpa_qr(elpa_qr, elpa_qr_unsafe)
     190         9048 :          CALL set_elpa_print(elpa_print)
     191         9048 :          initialized = .TRUE.
     192              :       END IF
     193              : #if defined(__DLAF)
     194              :       IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
     195              :          CALL cp_dlaf_initialize()
     196              :          initialized = .TRUE.
     197              :       END IF
     198              :       dlaf_neigvec_min = dlaf_neigvec_min_input
     199              : #else
     200              :       MARK_USED(dlaf_neigvec_min_input)
     201              : #endif
     202              : 
     203         9835 :       elpa_neigvec_min = elpa_neigvec_min_input
     204         9835 :       eps_check_diag = eps_check_diag_input
     205              : 
     206         9835 :    END SUBROUTINE diag_init
     207              : 
     208              : ! **************************************************************************************************
     209              : !> \brief Finalize the diagonalization library
     210              : ! **************************************************************************************************
     211         9625 :    SUBROUTINE diag_finalize()
     212              : #if defined (__ELPA)
     213         9625 :       IF (diag_type == FM_DIAG_TYPE_ELPA) &
     214         9435 :          CALL finalize_elpa_library()
     215              : #endif
     216              : #if defined (__DLAF)
     217              :       IF (diag_type == FM_DIAG_TYPE_DLAF) &
     218              :          CALL cp_dlaf_finalize()
     219              : #endif
     220         9625 :    END SUBROUTINE diag_finalize
     221              : 
     222              : ! **************************************************************************************************
     223              : !> \brief   Choose the Eigensolver depending on which library is available
     224              : !>          ELPA seems to be unstable for small systems
     225              : !> \param matrix ...
     226              : !> \param eigenvectors ...
     227              : !> \param eigenvalues ...
     228              : !> \param info ...
     229              : !> \par     info If present returns error code and prevents program stops.
     230              : !>               Works currently only for cp_fm_syevd with ScaLAPACK.
     231              : !>               Other solvers will end the program regardless of PRESENT(info).
     232              : !> \par History
     233              : !>      - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
     234              : ! **************************************************************************************************
     235       177818 :    SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
     236              : 
     237              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     238              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     239              :       INTEGER, INTENT(OUT), OPTIONAL                     :: info
     240              : 
     241              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
     242              : 
     243              :       ! Sample peak memory
     244       177818 :       CALL m_memory()
     245              : 
     246       177818 :       IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
     247              : 
     248       177818 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     249         4162 :          CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     250              : 
     251       173656 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     252       173656 :          IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
     253              :             ! We don't trust ELPA with very small matrices.
     254       109565 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     255              :          ELSE
     256        64091 :             CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     257              :          END IF
     258              : 
     259            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     260            0 :          IF (matrix%matrix_struct%nrow_global < 64) THEN
     261              :             ! We don't trust cuSolver with very small matrices.
     262            0 :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     263              :          ELSE
     264            0 :             CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
     265              :          END IF
     266              : 
     267              : #if defined(__DLAF)
     268              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     269              :          IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
     270              :             ! Use ScaLAPACK for small matrices
     271              :             CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     272              :          ELSE
     273              :             CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
     274              :          END IF
     275              : #endif
     276              : 
     277              :       ELSE
     278            0 :          CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
     279              :       END IF
     280              : 
     281       177818 :       CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
     282              : 
     283       177818 :    END SUBROUTINE choose_eigv_solver
     284              : 
     285              : ! **************************************************************************************************
     286              : !> \brief   Check result of diagonalization, i.e. the orthonormality of the eigenvectors
     287              : !> \param matrix Work matrix
     288              : !> \param eigenvectors Eigenvectors to be checked
     289              : !> \param nvec ...
     290              : ! **************************************************************************************************
     291       294911 :    SUBROUTINE check_diag(matrix, eigenvectors, nvec)
     292              : 
     293              :       TYPE(cp_fm_type), INTENT(IN)                    :: matrix, eigenvectors
     294              :       INTEGER, INTENT(IN)                                :: nvec
     295              : 
     296              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'check_diag'
     297              : 
     298              :       CHARACTER(LEN=default_string_length)               :: diag_type_name
     299              :       REAL(KIND=dp)                                      :: eps_abort, eps_warning
     300              :       INTEGER                                            :: handle, i, j, ncol, nrow, output_unit
     301              :       LOGICAL                                            :: check_eigenvectors
     302              : #if defined(__parallel)
     303              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     304              :       INTEGER                                            :: il, jl, ipcol, iprow, &
     305              :                                                             mypcol, myprow, npcol, nprow
     306              :       INTEGER, DIMENSION(9)                              :: desca
     307              : #endif
     308              : 
     309       294911 :       CALL timeset(routineN, handle)
     310              : 
     311       294911 :       IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
     312         8324 :          diag_type_name = "SYEVD"
     313       286587 :       ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
     314       286587 :          diag_type_name = "ELPA"
     315            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
     316            0 :          diag_type_name = "CUSOLVER"
     317            0 :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
     318            0 :          diag_type_name = "DLAF"
     319              :       ELSE
     320            0 :          CPABORT("Unknown diag_type")
     321              :       END IF
     322              : 
     323       294911 :       output_unit = default_output_unit
     324              : 
     325       294911 :       eps_warning = eps_check_diag_default
     326              : #if defined(__CHECK_DIAG)
     327              :       check_eigenvectors = .TRUE.
     328              :       IF (eps_check_diag >= 0.0_dp) THEN
     329              :          eps_warning = eps_check_diag
     330              :       END IF
     331              : #else
     332       294911 :       IF (eps_check_diag >= 0.0_dp) THEN
     333          242 :          check_eigenvectors = .TRUE.
     334          242 :          eps_warning = eps_check_diag
     335              :       ELSE
     336              :          check_eigenvectors = .FALSE.
     337              :       END IF
     338              : #endif
     339       294911 :       eps_abort = 10.0_dp*eps_warning
     340              : 
     341       294911 :       IF (check_eigenvectors) THEN
     342              : #if defined(__parallel)
     343          242 :          nrow = eigenvectors%matrix_struct%nrow_global
     344          242 :          ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
     345          242 :          CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
     346          242 :          context => matrix%matrix_struct%context
     347          242 :          myprow = context%mepos(1)
     348          242 :          mypcol = context%mepos(2)
     349          242 :          nprow = context%num_pe(1)
     350          242 :          npcol = context%num_pe(2)
     351         2420 :          desca(:) = matrix%matrix_struct%descriptor(:)
     352         2792 :          DO i = 1, ncol
     353        41810 :             DO j = 1, ncol
     354        39018 :                CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
     355        41568 :                IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
     356        19509 :                   IF (i == j) THEN
     357         1275 :                      IF (ABS(matrix%local_data(il, jl) - 1.0_dp) > eps_warning) THEN
     358              :                         WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     359            0 :                            "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     360            0 :                            "Matrix element (", i, ", ", j, ") = ", matrix%local_data(il, jl), &
     361            0 :                            "The deviation from the expected value 1 is", ABS(matrix%local_data(il, jl) - 1.0_dp)
     362            0 :                         IF (ABS(matrix%local_data(il, jl) - 1.0_dp) > eps_abort) THEN
     363            0 :                            CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     364              :                         ELSE
     365            0 :                            CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     366              :                         END IF
     367              :                      END IF
     368              :                   ELSE
     369        18234 :                      IF (ABS(matrix%local_data(il, jl)) > eps_warning) THEN
     370              :                         WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     371            0 :                            "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     372            0 :                            "Matrix element (", i, ", ", j, ") = ", matrix%local_data(il, jl), &
     373            0 :                            "The deviation from the expected value 0 is", ABS(matrix%local_data(il, jl))
     374            0 :                         IF (ABS(matrix%local_data(il, jl)) > eps_abort) THEN
     375            0 :                            CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     376              :                         ELSE
     377            0 :                            CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     378              :                         END IF
     379              :                      END IF
     380              :                   END IF
     381              :                END IF
     382              :             END DO
     383              :          END DO
     384              : #else
     385              :          nrow = SIZE(eigenvectors%local_data, 1)
     386              :          ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
     387              :          CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
     388              :                     eigenvectors%local_data(1, 1), nrow, &
     389              :                     eigenvectors%local_data(1, 1), nrow, &
     390              :                     0.0_dp, matrix%local_data(1, 1), nrow)
     391              :          DO i = 1, ncol
     392              :             DO j = 1, ncol
     393              :                IF (i == j) THEN
     394              :                   IF (ABS(matrix%local_data(i, j) - 1.0_dp) > eps_warning) THEN
     395              :                      WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     396              :                         "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     397              :                         "Matrix element (", i, ", ", j, ") = ", matrix%local_data(i, j), &
     398              :                         "The deviation from the expected value 1 is", ABS(matrix%local_data(i, j) - 1.0_dp)
     399              :                      IF (ABS(matrix%local_data(i, j) - 1.0_dp) > eps_abort) THEN
     400              :                         CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     401              :                      ELSE
     402              :                         CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     403              :                      END IF
     404              :                   END IF
     405              :                ELSE
     406              :                   IF (ABS(matrix%local_data(i, j)) > eps_warning) THEN
     407              :                      WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,ES10.3)") &
     408              :                         "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
     409              :                         "Matrix element (", i, ", ", j, ") = ", matrix%local_data(i, j), &
     410              :                         "The deviation from the expected value 0 is", ABS(matrix%local_data(i, j))
     411              :                      IF (ABS(matrix%local_data(i, j)) > eps_abort) THEN
     412              :                         CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
     413              :                      ELSE
     414              :                         CPWARN("Check of matrix diagonalization failed in routine "//routineN)
     415              :                      END IF
     416              :                   END IF
     417              :                END IF
     418              :             END DO
     419              :          END DO
     420              : #endif
     421              :       END IF
     422              : 
     423       294911 :       CALL timestop(handle)
     424              : 
     425       294911 :    END SUBROUTINE check_diag
     426              : 
     427              : ! **************************************************************************************************
     428              : !> \brief   Computes all eigenvalues and vectors of a real symmetric matrix
     429              : !>          significantly faster than syevx, scales also much better.
     430              : !>          Needs workspace to allocate all the eigenvectors
     431              : !> \param matrix ...
     432              : !> \param eigenvectors ...
     433              : !> \param eigenvalues ...
     434              : !> \param info ...
     435              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     436              : !> \par     info If present returns error code and prevents program stops.
     437              : !>               Works currently only for scalapack.
     438              : !>               Other solvers will end the program regardless of PRESENT(info).
     439              : ! **************************************************************************************************
     440       117053 :    SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
     441              : 
     442              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     443              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     444              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     445              : 
     446              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd'
     447              : 
     448              :       INTEGER                                  :: handle, myinfo, n, nmo
     449              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
     450              : #if defined(__parallel)
     451              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     452              : #else
     453              :       CHARACTER(LEN=2*default_string_length)   :: message
     454              :       INTEGER                                  :: liwork, lwork, nl
     455              :       INTEGER, DIMENSION(:), POINTER           :: iwork
     456              :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m
     457              :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     458              : #endif
     459              : 
     460       117053 :       CALL timeset(routineN, handle)
     461              : 
     462       117053 :       myinfo = 0
     463              : 
     464       117053 :       n = matrix%matrix_struct%nrow_global
     465       351159 :       ALLOCATE (eig(n))
     466              : 
     467              : #if defined(__parallel)
     468              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     469              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     470              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     471              :       ! to the original matrix if no redistribution is required
     472       117053 :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
     473              : 
     474              :       ! Call scalapack on CPUs that hold the new matrix
     475       117053 :       IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
     476        59220 :          IF (PRESENT(info)) THEN
     477         1114 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
     478              :          ELSE
     479        58106 :             CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
     480              :          END IF
     481              :       END IF
     482              :       ! Redistribute results and clean up
     483       117053 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     484              : #else
     485              :       ! Retrieve the optimal work array sizes first
     486              :       lwork = -1
     487              :       liwork = -1
     488              :       m => matrix%local_data
     489              :       eig(:) = 0.0_dp
     490              : 
     491              :       ALLOCATE (work(1))
     492              :       work(:) = 0.0_dp
     493              :       ALLOCATE (iwork(1))
     494              :       iwork(:) = 0
     495              :       nl = SIZE(m, 1)
     496              : 
     497              :       CALL dsyevd('V', 'U', n, m(1, 1), nl, eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     498              : 
     499              :       IF (myinfo /= 0) THEN
     500              :          WRITE (message, "(A,I0,A)") "ERROR in DSYEVD: Work space query failed (INFO = ", myinfo, ")"
     501              :          IF (PRESENT(info)) THEN
     502              :             CPWARN(TRIM(message))
     503              :          ELSE
     504              :             CPABORT(TRIM(message))
     505              :          END IF
     506              :       END IF
     507              : 
     508              :       ! Reallocate work arrays and perform diagonalisation
     509              :       lwork = INT(work(1))
     510              :       DEALLOCATE (work)
     511              :       ALLOCATE (work(lwork))
     512              :       work(:) = 0.0_dp
     513              : 
     514              :       liwork = iwork(1)
     515              :       DEALLOCATE (iwork)
     516              :       ALLOCATE (iwork(liwork))
     517              :       iwork(:) = 0
     518              : 
     519              :       CALL dsyevd('V', 'U', n, m(1, 1), nl, eig(1), work(1), lwork, iwork(1), liwork, myinfo)
     520              : 
     521              :       IF (myinfo /= 0) THEN
     522              :          WRITE (message, "(A,I0,A)") "ERROR in DSYEVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     523              :          IF (PRESENT(info)) THEN
     524              :             CPWARN(TRIM(message))
     525              :          ELSE
     526              :             CPABORT(TRIM(message))
     527              :          END IF
     528              :       END IF
     529              : 
     530              :       CALL cp_fm_to_fm(matrix, eigenvectors)
     531              : 
     532              :       DEALLOCATE (iwork)
     533              :       DEALLOCATE (work)
     534              : #endif
     535              : 
     536       117053 :       IF (PRESENT(info)) info = myinfo
     537              : 
     538       117053 :       nmo = SIZE(eigenvalues, 1)
     539       117053 :       IF (nmo > n) THEN
     540            0 :          eigenvalues(1:n) = eig(1:n)
     541              :       ELSE
     542       813209 :          eigenvalues(1:nmo) = eig(1:nmo)
     543              :       END IF
     544              : 
     545       117053 :       DEALLOCATE (eig)
     546              : 
     547       117053 :       CALL check_diag(matrix, eigenvectors, n)
     548              : 
     549       117053 :       CALL timestop(handle)
     550              : 
     551       234106 :    END SUBROUTINE cp_fm_syevd
     552              : 
     553              : ! **************************************************************************************************
     554              : !> \brief ...
     555              : !> \param matrix ...
     556              : !> \param eigenvectors ...
     557              : !> \param eigenvalues ...
     558              : !> \param info ...
     559              : ! **************************************************************************************************
     560        59220 :    SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
     561              : 
     562              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     563              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     564              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     565              : 
     566              :       CHARACTER(LEN=*), PARAMETER              :: routineN = 'cp_fm_syevd_base'
     567              : 
     568              :       CHARACTER(LEN=2*default_string_length)   :: message
     569              :       INTEGER                                  :: handle, myinfo
     570              : #if defined(__parallel)
     571              :       TYPE(cp_blacs_env_type), POINTER         :: context
     572              :       INTEGER                                  :: liwork, lwork, &
     573              :                                                   mypcol, myprow, n
     574              :       INTEGER, DIMENSION(9)                    :: descm, descv
     575        59220 :       INTEGER, DIMENSION(:), POINTER           :: iwork
     576        59220 :       REAL(KIND=dp), DIMENSION(:), POINTER     :: work
     577        59220 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: m, v
     578              : #if defined (__HAS_IEEE_EXCEPTIONS)
     579              :       LOGICAL, DIMENSION(5)                    :: halt
     580              : #endif
     581              : #endif
     582              : 
     583        59220 :       CALL timeset(routineN, handle)
     584              : 
     585        59220 :       myinfo = 0
     586              : 
     587              : #if defined(__parallel)
     588              : 
     589        59220 :       n = matrix%matrix_struct%nrow_global
     590        59220 :       m => matrix%local_data
     591        59220 :       context => matrix%matrix_struct%context
     592        59220 :       myprow = context%mepos(1)
     593        59220 :       mypcol = context%mepos(2)
     594       592200 :       descm(:) = matrix%matrix_struct%descriptor(:)
     595              : 
     596        59220 :       v => eigenvectors%local_data
     597       592200 :       descv(:) = eigenvectors%matrix_struct%descriptor(:)
     598              : 
     599        59220 :       liwork = 7*n + 8*context%num_pe(2) + 2
     600       177660 :       ALLOCATE (iwork(liwork))
     601              : 
     602              :       ! Work space query
     603        59220 :       lwork = -1
     604        59220 :       ALLOCATE (work(1))
     605              : 
     606              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     607        59220 :                    work(1), lwork, iwork(1), liwork, myinfo)
     608              : 
     609        59220 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     610            0 :          WRITE (message, "(A,I0,A)") "ERROR in PDSYEVD: Work space query failed (INFO = ", myinfo, ")"
     611            0 :          IF (PRESENT(info)) THEN
     612            0 :             CPWARN(TRIM(message))
     613              :          ELSE
     614            0 :             CPABORT(TRIM(message))
     615              :          END IF
     616              :       END IF
     617              : 
     618              :       ! look here for a PDORMTR warning :-)
     619              :       ! this routine seems to need more workspace than reported
     620              :       ! (? lapack bug ...)
     621              :       ! arbitrary additional memory  ... we give 100000 more words
     622              :       ! (it seems to depend on the block size used)
     623              : 
     624        59220 :       lwork = NINT(work(1) + 100000)
     625              :       ! lwork = NINT(work(1))
     626        59220 :       DEALLOCATE (work)
     627       177660 :       ALLOCATE (work(lwork))
     628              : 
     629              :       ! Scalapack takes advantage of IEEE754 exceptions for speedup.
     630              :       ! Therefore, we disable floating point traps temporarily.
     631              : #if defined (__HAS_IEEE_EXCEPTIONS)
     632              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     633              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     634              : #endif
     635              : 
     636              :       CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
     637        59220 :                    work(1), lwork, iwork(1), liwork, myinfo)
     638              : 
     639              : #if defined (__HAS_IEEE_EXCEPTIONS)
     640              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     641              : #endif
     642        59220 :       IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     643            0 :          WRITE (message, "(A,I0,A)") "ERROR in PDSYEVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     644            0 :          IF (PRESENT(info)) THEN
     645            0 :             CPWARN(TRIM(message))
     646              :          ELSE
     647            0 :             CPABORT(TRIM(message))
     648              :          END IF
     649              :       END IF
     650              : 
     651        59220 :       IF (PRESENT(info)) info = myinfo
     652              : 
     653        59220 :       DEALLOCATE (work)
     654        59220 :       DEALLOCATE (iwork)
     655              : #else
     656              :       MARK_USED(matrix)
     657              :       MARK_USED(eigenvectors)
     658              :       MARK_USED(eigenvalues)
     659              :       myinfo = -1
     660              :       IF (PRESENT(info)) info = myinfo
     661              :       message = "ERROR in "//TRIM(routineN)// &
     662              :                 ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support"
     663              :       CPABORT(TRIM(message))
     664              : #endif
     665              : 
     666        59220 :       CALL timestop(handle)
     667              : 
     668        59220 :    END SUBROUTINE cp_fm_syevd_base
     669              : 
     670              : ! **************************************************************************************************
     671              : !> \brief   compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
     672              : !>          If eigenvectors are required this routine will replicate a full matrix on each CPU...
     673              : !>          if more than a handful of vectors are needed, use cp_fm_syevd instead
     674              : !> \param matrix ...
     675              : !> \param eigenvectors ...
     676              : !> \param eigenvalues ...
     677              : !> \param neig ...
     678              : !> \param work_syevx ...
     679              : !> \par     matrix is supposed to be in upper triangular form, and overwritten by this routine
     680              : !>          neig   is the number of vectors needed (default all)
     681              : !>          work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
     682              : !>                     reducing this saves time, but might cause the routine to fail
     683              : ! **************************************************************************************************
     684           40 :    SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
     685              : 
     686              :       ! Diagonalise the symmetric n by n matrix using the LAPACK library.
     687              : 
     688              :       TYPE(cp_fm_type), INTENT(IN)                 :: matrix
     689              :       TYPE(cp_fm_type), OPTIONAL, INTENT(IN)       :: eigenvectors
     690              :       REAL(KIND=dp), OPTIONAL, INTENT(IN)          :: work_syevx
     691              :       INTEGER, INTENT(IN), OPTIONAL                :: neig
     692              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)     :: eigenvalues
     693              : 
     694              :       CHARACTER(LEN=*), PARAMETER                  :: routineN = "cp_fm_syevx"
     695              : 
     696              : #if defined(__parallel)
     697              :       REAL(KIND=dp), PARAMETER                     :: orfac = -1.0_dp
     698              : #endif
     699              :       REAL(KIND=dp), PARAMETER                     :: vl = 0.0_dp, &
     700              :                                                       vu = 0.0_dp
     701              : 
     702              :       TYPE(cp_blacs_env_type), POINTER             :: context
     703              :       TYPE(cp_logger_type), POINTER                :: logger
     704              :       CHARACTER(LEN=1)                             :: job_type
     705              :       REAL(KIND=dp)                                :: abstol, work_syevx_local
     706              :       INTEGER                                      :: handle, info, &
     707              :                                                       liwork, lwork, m, mypcol, &
     708              :                                                       myprow, n, nb, npcol, &
     709              :                                                       nprow, output_unit, &
     710              :                                                       neig_local
     711              :       LOGICAL                                      :: ionode, needs_evecs
     712           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: ifail, iwork
     713           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: w, work
     714           40 :       REAL(KIND=dp), DIMENSION(:, :), POINTER      :: a, z
     715              : 
     716              :       REAL(KIND=dp), EXTERNAL                      :: dlamch
     717              : 
     718              : #if defined(__parallel)
     719              :       INTEGER                                      :: nn, np0, npe, nq0, nz
     720              :       INTEGER, DIMENSION(9)                        :: desca, descz
     721           40 :       INTEGER, DIMENSION(:), ALLOCATABLE           :: iclustr
     722           40 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE     :: gap
     723              :       INTEGER, EXTERNAL                            :: iceil, numroc
     724              : #if defined (__HAS_IEEE_EXCEPTIONS)
     725              :       LOGICAL, DIMENSION(5)                        :: halt
     726              : #endif
     727              : #else
     728              :       INTEGER                                      :: nla, nlz
     729              :       INTEGER, EXTERNAL                            :: ilaenv
     730              : #endif
     731              : 
     732              :       ! by default all
     733           40 :       n = matrix%matrix_struct%nrow_global
     734           40 :       neig_local = n
     735           40 :       IF (PRESENT(neig)) neig_local = neig
     736           40 :       IF (neig_local == 0) RETURN
     737              : 
     738           40 :       CALL timeset(routineN, handle)
     739              : 
     740           40 :       needs_evecs = PRESENT(eigenvectors)
     741              : 
     742           40 :       logger => cp_get_default_logger()
     743           40 :       ionode = logger%para_env%is_source()
     744           40 :       n = matrix%matrix_struct%nrow_global
     745              : 
     746              :       ! by default allocate all needed space
     747           40 :       work_syevx_local = 1.0_dp
     748           40 :       IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
     749              : 
     750              :       ! set scalapack job type
     751           40 :       IF (needs_evecs) THEN
     752           40 :          job_type = "V"
     753              :       ELSE
     754            0 :          job_type = "N"
     755              :       END IF
     756              : 
     757              :       ! target the most accurate calculation of the eigenvalues
     758           40 :       abstol = 2.0_dp*dlamch("S")
     759              : 
     760           40 :       context => matrix%matrix_struct%context
     761           40 :       myprow = context%mepos(1)
     762           40 :       mypcol = context%mepos(2)
     763           40 :       nprow = context%num_pe(1)
     764           40 :       npcol = context%num_pe(2)
     765              : 
     766          120 :       ALLOCATE (w(n))
     767          560 :       eigenvalues(:) = 0.0_dp
     768              : #if defined(__parallel)
     769              : 
     770           40 :       IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
     771            0 :          CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
     772              :       END IF
     773              : 
     774           40 :       a => matrix%local_data
     775          400 :       desca(:) = matrix%matrix_struct%descriptor(:)
     776              : 
     777           40 :       IF (needs_evecs) THEN
     778           40 :          z => eigenvectors%local_data
     779          400 :          descz(:) = eigenvectors%matrix_struct%descriptor(:)
     780              :       ELSE
     781              :          ! z will not be referenced
     782            0 :          z => matrix%local_data
     783            0 :          descz = desca
     784              :       END IF
     785              : 
     786              :       ! Get the optimal work storage size
     787              : 
     788           40 :       npe = nprow*npcol
     789           40 :       nb = matrix%matrix_struct%nrow_block
     790           40 :       nn = MAX(n, nb, 2)
     791           40 :       np0 = numroc(nn, nb, 0, 0, nprow)
     792           40 :       nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
     793              : 
     794           40 :       IF (needs_evecs) THEN
     795              :          lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
     796           40 :                  INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
     797              :       ELSE
     798            0 :          lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
     799              :       END IF
     800           40 :       liwork = 6*MAX(N, npe + 1, 4)
     801              : 
     802          120 :       ALLOCATE (gap(npe))
     803          120 :       gap = 0.0_dp
     804          120 :       ALLOCATE (iclustr(2*npe))
     805          200 :       iclustr = 0
     806          120 :       ALLOCATE (ifail(n))
     807          560 :       ifail = 0
     808          120 :       ALLOCATE (iwork(liwork))
     809          120 :       ALLOCATE (work(lwork))
     810              : 
     811              :       ! Scalapack takes advantage of IEEE754 exceptions for speedup.
     812              :       ! Therefore, we disable floating point traps temporarily.
     813              : #if defined (__HAS_IEEE_EXCEPTIONS)
     814              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     815              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     816              : #endif
     817              : 
     818              :       CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
     819           40 :                    z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
     820              : 
     821              : #if defined (__HAS_IEEE_EXCEPTIONS)
     822              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     823              : #endif
     824              : 
     825              :       ! Error handling
     826              : 
     827           40 :       IF (info /= 0) THEN
     828            0 :          IF (ionode) THEN
     829            0 :             output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     830              :             WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     831            0 :                "info    = ", info, &
     832            0 :                "lwork   = ", lwork, &
     833            0 :                "liwork  = ", liwork, &
     834            0 :                "nz      = ", nz
     835            0 :             IF (info > 0) THEN
     836              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     837            0 :                   "ifail   = ", ifail
     838              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     839            0 :                   "iclustr = ", iclustr
     840              :                WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
     841            0 :                   "gap     = ", gap
     842              :             END IF
     843              :          END IF
     844            0 :          CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
     845              :       END IF
     846              : 
     847              :       ! Release work storage
     848              : 
     849           40 :       DEALLOCATE (gap)
     850           40 :       DEALLOCATE (iclustr)
     851           40 :       DEALLOCATE (ifail)
     852           40 :       DEALLOCATE (iwork)
     853           40 :       DEALLOCATE (work)
     854              : 
     855              : #else
     856              : 
     857              :       a => matrix%local_data
     858              :       IF (needs_evecs) THEN
     859              :          z => eigenvectors%local_data
     860              :       ELSE
     861              :          ! z will not be referenced
     862              :          z => matrix%local_data
     863              :       END IF
     864              : 
     865              :       ! Get the optimal work storage size
     866              : 
     867              :       nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
     868              :                ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
     869              : 
     870              :       lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
     871              :       liwork = 5*n
     872              : 
     873              :       ALLOCATE (ifail(n))
     874              :       ifail = 0
     875              :       ALLOCATE (iwork(liwork))
     876              :       ALLOCATE (work(lwork))
     877              :       info = 0
     878              :       nla = SIZE(a, 1)
     879              :       nlz = SIZE(z, 1)
     880              : 
     881              :       CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
     882              :                   abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
     883              : 
     884              :       ! Error handling
     885              : 
     886              :       IF (info /= 0) THEN
     887              :          output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
     888              :          WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
     889              :             "info    = ", info
     890              :          IF (info > 0) THEN
     891              :             WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
     892              :                "ifail   = ", ifail
     893              :          END IF
     894              :          CPABORT("Error in DSYEVX (ScaLAPACK)")
     895              :       END IF
     896              : 
     897              :       ! Release work storage
     898              : 
     899              :       DEALLOCATE (ifail)
     900              :       DEALLOCATE (iwork)
     901              :       DEALLOCATE (work)
     902              : 
     903              : #endif
     904          400 :       eigenvalues(1:neig_local) = w(1:neig_local)
     905           40 :       DEALLOCATE (w)
     906              : 
     907           40 :       IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
     908              : 
     909           40 :       CALL timestop(handle)
     910              : 
     911          120 :    END SUBROUTINE cp_fm_syevx
     912              : 
     913              : ! **************************************************************************************************
     914              : !> \brief decomposes a quadratic matrix into its singular value decomposition
     915              : !> \param matrix_a ...
     916              : !> \param matrix_eigvl ...
     917              : !> \param matrix_eigvr_t ...
     918              : !> \param eigval ...
     919              : !> \param info ...
     920              : !> \author Maximilian Graml
     921              : ! **************************************************************************************************
     922          100 :    SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
     923              : 
     924              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix_a
     925              :       TYPE(cp_fm_type), INTENT(INOUT)          :: matrix_eigvl, matrix_eigvr_t
     926              :       REAL(KIND=dp), DIMENSION(:), POINTER, &
     927              :          INTENT(INOUT)                         :: eigval
     928              :       INTEGER, INTENT(OUT), OPTIONAL           :: info
     929              : 
     930              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
     931              : 
     932              :       CHARACTER(LEN=2*default_string_length)   :: message
     933              :       INTEGER                                  :: handle, n, m, myinfo, lwork
     934          100 :       REAL(KIND=dp), DIMENSION(:, :), POINTER  :: a
     935              :       TYPE(cp_fm_type)                         :: matrix_lu
     936          100 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
     937              : 
     938              : #if defined(__parallel)
     939              :       INTEGER, DIMENSION(9)                    :: desca, descu, descvt
     940              : #endif
     941          100 :       CALL timeset(routineN, handle)
     942              : 
     943              :       CALL cp_fm_create(matrix=matrix_lu, &
     944              :                         matrix_struct=matrix_a%matrix_struct, &
     945          100 :                         name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
     946          100 :       CALL cp_fm_to_fm(matrix_a, matrix_lu)
     947          100 :       a => matrix_lu%local_data
     948          100 :       m = matrix_lu%matrix_struct%nrow_global
     949          100 :       n = matrix_lu%matrix_struct%ncol_global
     950              :       ! Assert that incoming matrix is quadratic
     951          100 :       CPASSERT(m == n)
     952              : 
     953              :       ! Prepare for workspace queries
     954          100 :       myinfo = 0
     955          100 :       lwork = -1
     956          100 :       ALLOCATE (work(1))
     957          200 :       work(:) = 0.0_dp
     958              : #if defined(__parallel)
     959              :       ! To do: That might need a redistribution check as in cp_fm_syevd
     960         1000 :       desca(:) = matrix_lu%matrix_struct%descriptor(:)
     961         1000 :       descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
     962         1000 :       descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
     963              : 
     964              :       ! Workspace query
     965              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     966          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     967              : 
     968          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     969            0 :          WRITE (message, "(A,I0,A)") "ERROR in PDGESVD: Work space query failed (INFO = ", myinfo, ")"
     970            0 :          IF (PRESENT(info)) THEN
     971            0 :             CPWARN(TRIM(message))
     972              :          ELSE
     973            0 :             CPABORT(TRIM(message))
     974              :          END IF
     975              :       END IF
     976              : 
     977          100 :       lwork = INT(work(1))
     978          100 :       DEALLOCATE (work)
     979          300 :       ALLOCATE (work(lwork))
     980              :       ! SVD
     981              :       CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
     982          100 :                    1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
     983              : 
     984          100 :       IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
     985            0 :          WRITE (message, "(A,I0,A)") "ERROR in PDGESVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
     986            0 :          IF (PRESENT(info)) THEN
     987            0 :             CPWARN(TRIM(message))
     988              :          ELSE
     989            0 :             CPABORT(TRIM(message))
     990              :          END IF
     991              :       END IF
     992              : #else
     993              :       ! Workspace query
     994              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
     995              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
     996              : 
     997              :       IF (myinfo /= 0) THEN
     998              :          WRITE (message, "(A,I0,A)") "ERROR in DGESVD: Work space query failed (INFO = ", myinfo, ")"
     999              :          IF (PRESENT(info)) THEN
    1000              :             CPWARN(TRIM(message))
    1001              :          ELSE
    1002              :             CPABORT(TRIM(message))
    1003              :          END IF
    1004              :       END IF
    1005              : 
    1006              :       ! SVD
    1007              :       lwork = INT(work(1))
    1008              :       DEALLOCATE (work)
    1009              :       ALLOCATE (work(lwork))
    1010              :       work(:) = 0.0_dp
    1011              : 
    1012              :       CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
    1013              :                   m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
    1014              : 
    1015              :       IF (myinfo /= 0) THEN
    1016              :          WRITE (message, "(A,I0,A)") "ERROR in DGESVD: Matrix diagonalization failed (INFO = ", myinfo, ")"
    1017              :          IF (PRESENT(info)) THEN
    1018              :             CPWARN(TRIM(message))
    1019              :          ELSE
    1020              :             CPABORT(TRIM(message))
    1021              :          END IF
    1022              :       END IF
    1023              : 
    1024              : #endif
    1025              :       ! Release intermediary matrices
    1026          100 :       DEALLOCATE (work)
    1027          100 :       CALL cp_fm_release(matrix_lu)
    1028              : 
    1029          100 :       IF (PRESENT(info)) info = myinfo
    1030              : 
    1031          100 :       CALL timestop(handle)
    1032          100 :    END SUBROUTINE cp_fm_svd
    1033              : 
    1034              : ! **************************************************************************************************
    1035              : !> \brief ...
    1036              : !> \param matrix ...
    1037              : !> \param work ...
    1038              : !> \param exponent ...
    1039              : !> \param threshold ...
    1040              : !> \param n_dependent ...
    1041              : !> \param verbose ...
    1042              : !> \param eigvals ...
    1043              : ! **************************************************************************************************
    1044         1522 :    SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
    1045              : 
    1046              :       ! Raise the real symmetric n by n matrix to the power given by
    1047              :       ! the exponent. All eigenvectors with a corresponding eigenvalue lower
    1048              :       ! than threshold are quenched. result in matrix
    1049              : 
    1050              :       ! - Creation (29.03.1999, Matthias Krack)
    1051              :       ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
    1052              : 
    1053              :       TYPE(cp_fm_type), INTENT(IN)               :: matrix, work
    1054              :       REAL(KIND=dp), INTENT(IN)                  :: exponent, threshold
    1055              :       INTEGER, INTENT(OUT)                       :: n_dependent
    1056              :       LOGICAL, INTENT(IN), OPTIONAL              :: verbose
    1057              :       REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
    1058              :          OPTIONAL                                :: eigvals
    1059              : 
    1060              :       CHARACTER(LEN=*), PARAMETER                :: routineN = 'cp_fm_power'
    1061              : 
    1062              :       INTEGER                                    :: handle, icol_global, &
    1063              :                                                     mypcol, myprow, &
    1064              :                                                     ncol_global, npcol, nprow, &
    1065              :                                                     nrow_global
    1066              :       LOGICAL                                    :: my_verbose
    1067              :       REAL(KIND=dp)                              :: condition_number, f, p
    1068              :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE   :: eigenvalues
    1069         1522 :       REAL(KIND=dp), DIMENSION(:, :), POINTER    :: eigenvectors
    1070              :       TYPE(cp_blacs_env_type), POINTER           :: context
    1071              : 
    1072              : #if defined(__parallel)
    1073              :       INTEGER           :: icol_local, ipcol, iprow, irow_global, &
    1074              :                            irow_local
    1075              : #endif
    1076              : 
    1077         1522 :       CALL timeset(routineN, handle)
    1078              : 
    1079         1522 :       my_verbose = .FALSE.
    1080         1522 :       IF (PRESENT(verbose)) my_verbose = verbose
    1081              : 
    1082         1522 :       context => matrix%matrix_struct%context
    1083         1522 :       myprow = context%mepos(1)
    1084         1522 :       mypcol = context%mepos(2)
    1085         1522 :       nprow = context%num_pe(1)
    1086         1522 :       npcol = context%num_pe(2)
    1087         1522 :       n_dependent = 0
    1088         1522 :       p = 0.5_dp*exponent
    1089              : 
    1090         1522 :       nrow_global = matrix%matrix_struct%nrow_global
    1091         1522 :       ncol_global = matrix%matrix_struct%ncol_global
    1092              : 
    1093         4566 :       ALLOCATE (eigenvalues(ncol_global))
    1094              : 
    1095        38709 :       eigenvalues(:) = 0.0_dp
    1096              : 
    1097              :       ! Compute the eigenvectors and eigenvalues
    1098              : 
    1099         1522 :       CALL choose_eigv_solver(matrix, work, eigenvalues)
    1100              : 
    1101         1522 :       IF (PRESENT(eigvals)) THEN
    1102          348 :          eigvals(1) = eigenvalues(1)
    1103          348 :          eigvals(2) = eigenvalues(ncol_global)
    1104              :       END IF
    1105              : 
    1106              : #if defined(__parallel)
    1107         1522 :       eigenvectors => work%local_data
    1108              : 
    1109              :       ! Build matrix**exponent with eigenvector quenching
    1110              : 
    1111        38709 :       DO icol_global = 1, ncol_global
    1112              : 
    1113        38709 :          IF (eigenvalues(icol_global) < threshold) THEN
    1114              : 
    1115           50 :             n_dependent = n_dependent + 1
    1116              : 
    1117           50 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1118              : 
    1119           50 :             IF (mypcol == ipcol) THEN
    1120           50 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1121         5850 :                DO irow_global = 1, nrow_global
    1122         5800 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1123         5850 :                   IF (myprow == iprow) THEN
    1124         2900 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1125         2900 :                      eigenvectors(irow_local, icol_local) = 0.0_dp
    1126              :                   END IF
    1127              :                END DO
    1128              :             END IF
    1129              : 
    1130              :          ELSE
    1131              : 
    1132        37137 :             f = eigenvalues(icol_global)**p
    1133              : 
    1134        37137 :             ipcol = work%matrix_struct%g2p_col(icol_global)
    1135              : 
    1136        37137 :             IF (mypcol == ipcol) THEN
    1137        37137 :                icol_local = work%matrix_struct%g2l_col(icol_global)
    1138      2363936 :                DO irow_global = 1, nrow_global
    1139      2326799 :                   iprow = work%matrix_struct%g2p_row(irow_global)
    1140      2363936 :                   IF (myprow == iprow) THEN
    1141      1207474 :                      irow_local = work%matrix_struct%g2l_row(irow_global)
    1142              :                      eigenvectors(irow_local, icol_local) = &
    1143      1207474 :                         f*eigenvectors(irow_local, icol_local)
    1144              :                   END IF
    1145              :                END DO
    1146              :             END IF
    1147              : 
    1148              :          END IF
    1149              : 
    1150              :       END DO
    1151              : 
    1152              : #else
    1153              : 
    1154              :       eigenvectors => work%local_data
    1155              : 
    1156              :       ! Build matrix**exponent with eigenvector quenching
    1157              : 
    1158              :       DO icol_global = 1, ncol_global
    1159              : 
    1160              :          IF (eigenvalues(icol_global) < threshold) THEN
    1161              : 
    1162              :             n_dependent = n_dependent + 1
    1163              :             eigenvectors(1:nrow_global, icol_global) = 0.0_dp
    1164              : 
    1165              :          ELSE
    1166              : 
    1167              :             f = eigenvalues(icol_global)**p
    1168              :             eigenvectors(1:nrow_global, icol_global) = &
    1169              :                f*eigenvectors(1:nrow_global, icol_global)
    1170              : 
    1171              :          END IF
    1172              : 
    1173              :       END DO
    1174              : 
    1175              : #endif
    1176         1522 :       CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
    1177         1522 :       CALL cp_fm_uplo_to_full(matrix, work)
    1178              : 
    1179              :       ! Print some warnings/notes
    1180         1522 :       IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
    1181            0 :          condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
    1182              :          WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
    1183            0 :             "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
    1184            0 :             "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
    1185            0 :             "CP_FM_POWER: condition number:   ", condition_number
    1186            0 :          IF (eigenvalues(1) <= 0.0_dp) THEN
    1187              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1188            0 :                "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
    1189              :          END IF
    1190            0 :          IF (condition_number > 1.0E12_dp) THEN
    1191              :             WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
    1192            0 :                "WARNING: high condition number => possibly ill-conditioned matrix"
    1193              :          END IF
    1194              :       END IF
    1195              : 
    1196         1522 :       DEALLOCATE (eigenvalues)
    1197              : 
    1198         1522 :       CALL timestop(handle)
    1199              : 
    1200         1522 :    END SUBROUTINE cp_fm_power
    1201              : 
    1202              : ! **************************************************************************************************
    1203              : !> \brief ...
    1204              : !> \param matrix ...
    1205              : !> \param eigenvectors ...
    1206              : !> \param eigval ...
    1207              : !> \param thresh ...
    1208              : !> \param start_sec_block ...
    1209              : ! **************************************************************************************************
    1210           18 :    SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, &
    1211              :                                  start_sec_block)
    1212              : 
    1213              :       ! Calculates block diagonalization of a full symmetric matrix
    1214              :       ! It has its origin in cp_fm_syevx. This routine rotates only elements
    1215              :       ! which are larger than a threshold values "thresh".
    1216              :       ! start_sec_block is the start of the second block.
    1217              :       ! IT DOES ONLY ONE SWEEP!
    1218              : 
    1219              :       ! - Creation (07.10.2002, Martin Fengler)
    1220              :       ! - Cosmetics (05.04.06, MK)
    1221              : 
    1222              :       TYPE(cp_fm_type), INTENT(IN)              :: eigenvectors, matrix
    1223              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)   :: eigval
    1224              :       INTEGER, INTENT(IN)                       :: start_sec_block
    1225              :       REAL(KIND=dp), INTENT(IN)                 :: thresh
    1226              : 
    1227              :       CHARACTER(len=*), PARAMETER               :: routineN = 'cp_fm_block_jacobi'
    1228              : 
    1229              :       INTEGER :: handle
    1230              :       REAL(KIND=dp), DIMENSION(:, :), POINTER   :: a, ev
    1231              : 
    1232              :       REAL(KIND=dp) :: tan_theta, tau, c, s
    1233              :       INTEGER  :: q, p, N
    1234           18 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE  :: c_ip
    1235              : 
    1236              : #if defined(__parallel)
    1237              :       TYPE(cp_blacs_env_type), POINTER :: context
    1238              : 
    1239              :       INTEGER :: myprow, mypcol, nprow, npcol, block_dim_row, block_dim_col, &
    1240              :                  info, ev_row_block_size, iam, mynumrows, mype, npe, &
    1241              :                  q_loc
    1242           18 :       REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE  :: a_loc, ev_loc
    1243              :       INTEGER, DIMENSION(9)                        :: desca, descz, desc_a_block, &
    1244              :                                                       desc_ev_loc
    1245              :       TYPE(mp_comm_type):: allgrp
    1246              :       TYPE(cp_blacs_type) :: ictxt_loc
    1247              :       INTEGER, EXTERNAL :: numroc
    1248              : #endif
    1249              : 
    1250              :       ! -------------------------------------------------------------------------
    1251              : 
    1252           18 :       CALL timeset(routineN, handle)
    1253              : 
    1254              : #if defined(__parallel)
    1255           18 :       context => matrix%matrix_struct%context
    1256           18 :       allgrp = matrix%matrix_struct%para_env
    1257              : 
    1258           18 :       myprow = context%mepos(1)
    1259           18 :       mypcol = context%mepos(2)
    1260           18 :       nprow = context%num_pe(1)
    1261           18 :       npcol = context%num_pe(2)
    1262              : 
    1263           18 :       N = matrix%matrix_struct%nrow_global
    1264              : 
    1265           18 :       A => matrix%local_data
    1266          180 :       desca(:) = matrix%matrix_struct%descriptor(:)
    1267           18 :       EV => eigenvectors%local_data
    1268          180 :       descz(:) = eigenvectors%matrix_struct%descriptor(:)
    1269              : 
    1270              :       ! Copy the block to be rotated to the master process firstly and broadcast to all processes
    1271              :       ! start_sec_block defines where the second block starts!
    1272              :       ! Block will be processed together with the OO block
    1273              : 
    1274           18 :       block_dim_row = start_sec_block - 1
    1275           18 :       block_dim_col = N - block_dim_row
    1276           72 :       ALLOCATE (A_loc(block_dim_row, block_dim_col))
    1277              : 
    1278           18 :       mype = matrix%matrix_struct%para_env%mepos
    1279           18 :       npe = matrix%matrix_struct%para_env%num_pe
    1280              :       ! Get a new context
    1281           18 :       CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', NPROW*NPCOL, 1)
    1282              : 
    1283              :       CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
    1284           18 :                     block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
    1285              : 
    1286              :       CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
    1287           18 :                     A_loc, 1, 1, desc_a_block, context%get_handle())
    1288              :       ! Only the master (root) process received data yet
    1289           18 :       CALL allgrp%bcast(A_loc, 0)
    1290              : 
    1291              :       ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
    1292              :       ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
    1293              : 
    1294              :       ! Initialize distribution of the eigenvectors
    1295           18 :       iam = mype
    1296           18 :       ev_row_block_size = n/(nprow*npcol)
    1297           18 :       mynumrows = NUMROC(N, ev_row_block_size, iam, 0, NPROW*NPCOL)
    1298              : 
    1299          108 :       ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
    1300              : 
    1301              :       CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
    1302           18 :                     mynumrows, info)
    1303              : 
    1304           18 :       CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
    1305              : 
    1306              :       ! Start block diagonalization of matrix
    1307              : 
    1308           18 :       q_loc = 0
    1309              : 
    1310         1170 :       DO q = start_sec_block, N
    1311         1152 :          q_loc = q_loc + 1
    1312       148626 :          DO p = 1, (start_sec_block - 1)
    1313              : 
    1314       148608 :             IF (ABS(A_loc(p, q_loc)) > thresh) THEN
    1315              : 
    1316       117566 :                tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
    1317              : 
    1318       117566 :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1319              : 
    1320              :                ! Cos(theta)
    1321       117566 :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1322       117566 :                s = tan_theta*c
    1323              : 
    1324              :                ! Calculate eigenvectors: Q*J
    1325              :                ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
    1326              :                ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
    1327              :                ! EV(:,p) = c_ip
    1328              :                ! EV(:,q) = c_iq
    1329       117566 :                CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
    1330       117566 :                CALL dscal(mynumrows, c, EV_loc(1, p), 1)
    1331       117566 :                CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
    1332       117566 :                CALL dscal(mynumrows, c, EV_loc(1, q), 1)
    1333       117566 :                CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
    1334              : 
    1335              :             END IF
    1336              : 
    1337              :          END DO
    1338              :       END DO
    1339              : 
    1340              :       ! Copy eigenvectors back to the original distribution
    1341           18 :       CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
    1342              : 
    1343              :       ! Release work storage
    1344           18 :       DEALLOCATE (A_loc, EV_loc, c_ip)
    1345              : 
    1346           18 :       CALL ictxt_loc%gridexit()
    1347              : 
    1348              : #else
    1349              : 
    1350              :       N = matrix%matrix_struct%nrow_global
    1351              : 
    1352              :       ALLOCATE (c_ip(N)) ! Local eigenvalue vector
    1353              : 
    1354              :       A => matrix%local_data ! Contains the Matrix to be worked on
    1355              :       EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
    1356              : 
    1357              :       ! Start matrix diagonalization
    1358              : 
    1359              :       tan_theta = 0.0_dp
    1360              :       tau = 0.0_dp
    1361              : 
    1362              :       DO q = start_sec_block, N
    1363              :          DO p = 1, (start_sec_block - 1)
    1364              : 
    1365              :             IF (ABS(A(p, q)) > thresh) THEN
    1366              : 
    1367              :                tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
    1368              : 
    1369              :                tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
    1370              : 
    1371              :                ! Cos(theta)
    1372              :                c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
    1373              :                s = tan_theta*c
    1374              : 
    1375              :                ! Calculate eigenvectors: Q*J
    1376              :                ! c_ip = c*EV(:,p) - s*EV(:,q)
    1377              :                ! c_iq = s*EV(:,p) + c*EV(:,q)
    1378              :                ! EV(:,p) = c_ip
    1379              :                ! EV(:,q) = c_iq
    1380              :                CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
    1381              :                CALL dscal(N, c, EV(1, p), 1)
    1382              :                CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
    1383              :                CALL dscal(N, c, EV(1, q), 1)
    1384              :                CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
    1385              : 
    1386              :             END IF
    1387              : 
    1388              :          END DO
    1389              :       END DO
    1390              : 
    1391              :       ! Release work storage
    1392              : 
    1393              :       DEALLOCATE (c_ip)
    1394              : 
    1395              : #endif
    1396              : 
    1397           18 :       CALL timestop(handle)
    1398              : 
    1399           90 :    END SUBROUTINE cp_fm_block_jacobi
    1400              : 
    1401              : ! **************************************************************************************************
    1402              : !> \brief General Eigenvalue Problem  AX = BXE
    1403              : !>        Single option version: Cholesky decomposition of B
    1404              : !> \param amatrix ...
    1405              : !> \param bmatrix ...
    1406              : !> \param eigenvectors ...
    1407              : !> \param eigenvalues ...
    1408              : !> \param work ...
    1409              : ! **************************************************************************************************
    1410          280 :    SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
    1411              : 
    1412              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1413              :       REAL(KIND=dp), DIMENSION(:)                        :: eigenvalues
    1414              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1415              : 
    1416              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig'
    1417              : 
    1418              :       INTEGER                                            :: handle, nao, nmo
    1419              : 
    1420          280 :       CALL timeset(routineN, handle)
    1421              : 
    1422          280 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1423          280 :       nmo = SIZE(eigenvalues)
    1424              : 
    1425          280 :       IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. nao >= 64) THEN
    1426              :          ! Use cuSolverMP generalized eigenvalue solver for large matrices
    1427            0 :          CALL cp_fm_general_cusolver(amatrix, bmatrix, eigenvectors, eigenvalues)
    1428              : #if defined(__DLAF)
    1429              :       ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
    1430              :          ! Use DLA-Future generalized eigenvalue solver for large matrices
    1431              :          CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
    1432              : #endif
    1433              :       ELSE
    1434              :          ! Cholesky decompose S=U(T)U
    1435          280 :          CALL cp_fm_cholesky_decompose(bmatrix)
    1436              :          ! Invert to get U^(-1)
    1437          280 :          CALL cp_fm_triangular_invert(bmatrix)
    1438              :          ! Reduce to get U^(-T) * H * U^(-1)
    1439          280 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
    1440          280 :          CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
    1441              :          ! Diagonalize
    1442              :          CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
    1443          280 :                                  eigenvalues=eigenvalues)
    1444              :          ! Restore vectors C = U^(-1) * C*
    1445          280 :          CALL cp_fm_triangular_multiply(bmatrix, work)
    1446          280 :          CALL cp_fm_to_fm(work, eigenvectors, nmo)
    1447              :       END IF
    1448              : 
    1449          280 :       CALL timestop(handle)
    1450              : 
    1451          280 :    END SUBROUTINE cp_fm_geeig
    1452              : 
    1453              : ! **************************************************************************************************
    1454              : !> \brief General Eigenvalue Problem  AX = BXE
    1455              : !>        Use canonical diagonalization : U*s**(-1/2)
    1456              : !> \param amatrix ...
    1457              : !> \param bmatrix ...
    1458              : !> \param eigenvectors ...
    1459              : !> \param eigenvalues ...
    1460              : !> \param work ...
    1461              : !> \param epseig ...
    1462              : ! **************************************************************************************************
    1463           66 :    SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
    1464              : 
    1465              :       TYPE(cp_fm_type), INTENT(IN)                       :: amatrix, bmatrix, eigenvectors
    1466              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
    1467              :       TYPE(cp_fm_type), INTENT(IN)                       :: work
    1468              :       REAL(KIND=dp), INTENT(IN)                          :: epseig
    1469              : 
    1470              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_fm_geeig_canon'
    1471              : 
    1472              :       INTEGER                                            :: handle, i, icol, irow, nao, nc, ncol, &
    1473              :                                                             nmo, nx
    1474              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: evals
    1475              : 
    1476           66 :       CALL timeset(routineN, handle)
    1477              : 
    1478              :       ! Test sizees
    1479           66 :       CALL cp_fm_get_info(amatrix, nrow_global=nao)
    1480           66 :       nmo = SIZE(eigenvalues)
    1481          198 :       ALLOCATE (evals(nao))
    1482              : 
    1483              :       ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
    1484           66 :       CALL cp_fm_scale(-1.0_dp, bmatrix)
    1485           66 :       CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
    1486         4720 :       evals(:) = -evals(:)
    1487           66 :       nc = nao
    1488         4428 :       DO i = 1, nao
    1489         4428 :          IF (evals(i) < epseig) THEN
    1490           40 :             nc = i - 1
    1491           40 :             EXIT
    1492              :          END IF
    1493              :       END DO
    1494           66 :       CPASSERT(nc /= 0)
    1495              : 
    1496           66 :       IF (nc /= nao) THEN
    1497           40 :          IF (nc < nmo) THEN
    1498              :             ! Copy NULL space definition to last vectors of eigenvectors (if needed)
    1499            0 :             ncol = nmo - nc
    1500            0 :             CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
    1501              :          END IF
    1502              :          ! Set NULL space in eigenvector matrix of S to zero
    1503          332 :          DO icol = nc + 1, nao
    1504        36172 :             DO irow = 1, nao
    1505        36132 :                CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
    1506              :             END DO
    1507              :          END DO
    1508              :          ! Set small eigenvalues to a dummy save value
    1509          332 :          evals(nc + 1:nao) = 1.0_dp
    1510              :       END IF
    1511              :       ! Calculate U*s**(-1/2)
    1512         4720 :       evals(:) = 1.0_dp/SQRT(evals(:))
    1513           66 :       CALL cp_fm_column_scale(work, evals)
    1514              :       ! Reduce to get U^(-T) * H * U^(-1)
    1515           66 :       CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
    1516           66 :       CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
    1517           66 :       IF (nc /= nao) THEN
    1518              :          ! set diagonal values to save large value
    1519          332 :          DO icol = nc + 1, nao
    1520          332 :             CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
    1521              :          END DO
    1522              :       END IF
    1523              :       ! Diagonalize
    1524           66 :       CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
    1525           66 :       nx = MIN(nc, nmo)
    1526              :       ! Restore vectors C = U^(-1) * C*
    1527           66 :       CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
    1528              : 
    1529           66 :       DEALLOCATE (evals)
    1530              : 
    1531           66 :       CALL timestop(handle)
    1532              : 
    1533           66 :    END SUBROUTINE cp_fm_geeig_canon
    1534              : 
    1535              : END MODULE cp_fm_diag

Generated by: LCOV version 2.0-1