LCOV - code coverage report
Current view: top level - src/fm - cp_fm_elpa.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 85.9 % 156 134
Test Date: 2025-07-25 12:55:17 Functions: 100.0 % 7 7

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Wrapper for ELPA
      10              : !> \author Ole Schuett
      11              : ! **************************************************************************************************
      12              : MODULE cp_fm_elpa
      13              :    USE cp_log_handling, ONLY: cp_to_string
      14              :    USE machine, ONLY: m_cpuid, &
      15              :                       MACHINE_X86, &
      16              :                       MACHINE_CPU_GENERIC, &
      17              :                       MACHINE_X86_SSE4, &
      18              :                       MACHINE_X86_AVX, &
      19              :                       MACHINE_X86_AVX2
      20              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21              :    USE cp_fm_basic_linalg, ONLY: cp_fm_uplo_to_full
      22              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
      23              :                                cp_fm_redistribute_end, &
      24              :                                cp_fm_redistribute_info
      25              :    USE cp_fm_struct, ONLY: cp_fm_struct_get
      26              :    USE cp_fm_types, ONLY: cp_fm_type, &
      27              :                           cp_fm_to_fm, &
      28              :                           cp_fm_release, &
      29              :                           cp_fm_create, &
      30              :                           cp_fm_write_info
      31              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      32              :                               cp_logger_get_default_io_unit, &
      33              :                               cp_logger_type
      34              :    USE kinds, ONLY: default_string_length, &
      35              :                     dp
      36              :    USE message_passing, ONLY: mp_comm_type
      37              :    USE OMP_LIB, ONLY: omp_get_max_threads
      38              : 
      39              : #include "../base/base_uses.f90"
      40              : 
      41              : #if defined (__ELPA)
      42              :    USE elpa_constants, ONLY: ELPA_2STAGE_REAL_INVALID, &
      43              :                              ELPA_2STAGE_REAL_DEFAULT, &
      44              :                              ELPA_2STAGE_REAL_GENERIC, &
      45              :                              ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      46              :                              ELPA_2STAGE_REAL_BGP, &
      47              :                              ELPA_2STAGE_REAL_BGQ, &
      48              :                              ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      49              :                              ELPA_2STAGE_REAL_SSE_BLOCK2, &
      50              :                              ELPA_2STAGE_REAL_SSE_BLOCK4, &
      51              :                              ELPA_2STAGE_REAL_SSE_BLOCK6, &
      52              :                              ELPA_2STAGE_REAL_AVX_BLOCK2, &
      53              :                              ELPA_2STAGE_REAL_AVX_BLOCK4, &
      54              :                              ELPA_2STAGE_REAL_AVX_BLOCK6, &
      55              :                              ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      56              :                              ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      57              :                              ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      58              :                              ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      59              :                              ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      60              :                              ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      61              :                              ELPA_2STAGE_REAL_NVIDIA_GPU, &
      62              :                              ELPA_2STAGE_REAL_AMD_GPU, &
      63              :                              ELPA_2STAGE_REAL_INTEL_GPU_SYCL
      64              : 
      65              :    USE elpa, ONLY: elpa_t, elpa_solver_2stage, &
      66              :                    elpa_init, elpa_uninit, &
      67              :                    elpa_allocate, elpa_deallocate, elpa_ok
      68              : #endif
      69              : 
      70              :    IMPLICIT NONE
      71              : 
      72              :    PRIVATE
      73              : 
      74              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
      75              : 
      76              : #if defined(__ELPA)
      77              :    INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
      78              :                                         ELPA_2STAGE_REAL_INVALID, & ! auto
      79              :                                         ELPA_2STAGE_REAL_GENERIC, &
      80              :                                         ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      81              :                                         ELPA_2STAGE_REAL_BGP, &
      82              :                                         ELPA_2STAGE_REAL_BGQ, &
      83              :                                         ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      84              :                                         ELPA_2STAGE_REAL_SSE_BLOCK2, &
      85              :                                         ELPA_2STAGE_REAL_SSE_BLOCK4, &
      86              :                                         ELPA_2STAGE_REAL_SSE_BLOCK6, &
      87              :                                         ELPA_2STAGE_REAL_AVX_BLOCK2, &
      88              :                                         ELPA_2STAGE_REAL_AVX_BLOCK4, &
      89              :                                         ELPA_2STAGE_REAL_AVX_BLOCK6, &
      90              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      91              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      92              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      93              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      94              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      95              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      96              :                                         ELPA_2STAGE_REAL_NVIDIA_GPU, &
      97              :                                         ELPA_2STAGE_REAL_AMD_GPU, &
      98              :                                         ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
      99              : 
     100              :    CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     101              :       elpa_kernel_names = [CHARACTER(len=14) :: &
     102              :                            "AUTO", &
     103              :                            "GENERIC", &
     104              :                            "GENERIC_SIMPLE", &
     105              :                            "BGP", &
     106              :                            "BGQ", &
     107              :                            "SSE", &
     108              :                            "SSE_BLOCK2", &
     109              :                            "SSE_BLOCK4", &
     110              :                            "SSE_BLOCK6", &
     111              :                            "AVX_BLOCK2", &
     112              :                            "AVX_BLOCK4", &
     113              :                            "AVX_BLOCK6", &
     114              :                            "AVX2_BLOCK2", &
     115              :                            "AVX2_BLOCK4", &
     116              :                            "AVX2_BLOCK6", &
     117              :                            "AVX512_BLOCK2", &
     118              :                            "AVX512_BLOCK4", &
     119              :                            "AVX512_BLOCK6", &
     120              :                            "NVIDIA_GPU", &
     121              :                            "AMD_GPU", &
     122              :                            "INTEL_GPU"]
     123              : 
     124              :    CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     125              :       elpa_kernel_descriptions = [CHARACTER(len=44) :: &
     126              :                                   "Automatically selected kernel", &
     127              :                                   "Generic kernel", &
     128              :                                   "Simplified generic kernel", &
     129              :                                   "Kernel optimized for IBM BGP", &
     130              :                                   "Kernel optimized for IBM BGQ", &
     131              :                                   "Kernel optimized for x86_64/SSE", &
     132              :                                   "Kernel optimized for x86_64/SSE (block=2)", &
     133              :                                   "Kernel optimized for x86_64/SSE (block=4)", &
     134              :                                   "Kernel optimized for x86_64/SSE (block=6)", &
     135              :                                   "Kernel optimized for Intel AVX (block=2)", &
     136              :                                   "Kernel optimized for Intel AVX (block=4)", &
     137              :                                   "Kernel optimized for Intel AVX (block=6)", &
     138              :                                   "Kernel optimized for Intel AVX2 (block=2)", &
     139              :                                   "Kernel optimized for Intel AVX2 (block=4)", &
     140              :                                   "Kernel optimized for Intel AVX2 (block=6)", &
     141              :                                   "Kernel optimized for Intel AVX-512 (block=2)", &
     142              :                                   "Kernel optimized for Intel AVX-512 (block=4)", &
     143              :                                   "Kernel optimized for Intel AVX-512 (block=6)", &
     144              :                                   "Kernel targeting Nvidia GPUs", &
     145              :                                   "Kernel targeting AMD GPUs", &
     146              :                                   "Kernel targeting Intel GPUs"]
     147              : 
     148              : #else
     149              :    INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
     150              :    CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
     151              :    CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
     152              : #endif
     153              : 
     154              : #if defined(__ELPA)
     155              :    INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
     156              : #endif
     157              :    LOGICAL, SAVE :: elpa_qr = .FALSE., &
     158              :                     elpa_qr_unsafe = .FALSE., &
     159              :                     elpa_should_print = .FALSE.
     160              : 
     161              :    PUBLIC :: cp_fm_diag_elpa, &
     162              :              set_elpa_kernel, &
     163              :              set_elpa_qr, &
     164              :              set_elpa_print, &
     165              :              elpa_kernel_ids, &
     166              :              elpa_kernel_names, &
     167              :              elpa_kernel_descriptions, &
     168              :              initialize_elpa_library, &
     169              :              finalize_elpa_library
     170              : 
     171              : CONTAINS
     172              : 
     173              : ! **************************************************************************************************
     174              : !> \brief Initialize the ELPA library
     175              : ! **************************************************************************************************
     176         9048 :    SUBROUTINE initialize_elpa_library()
     177              : #if defined(__ELPA)
     178         9048 :       IF (elpa_init(20180525) /= elpa_ok) &
     179            0 :          CPABORT("The linked ELPA library does not support the required API version")
     180              : #else
     181              :       CPABORT("Initialization of ELPA library requested but not enabled during build")
     182              : #endif
     183         9048 :    END SUBROUTINE
     184              : 
     185              : ! **************************************************************************************************
     186              : !> \brief Finalize the ELPA library
     187              : ! **************************************************************************************************
     188         9435 :    SUBROUTINE finalize_elpa_library()
     189              : #if defined(__ELPA)
     190         9435 :       CALL elpa_uninit()
     191              : #else
     192              :       CPABORT("Finalization of ELPA library requested but not enabled during build")
     193              : #endif
     194         9435 :    END SUBROUTINE
     195              : 
     196              : ! **************************************************************************************************
     197              : !> \brief Sets the active ELPA kernel.
     198              : !> \param requested_kernel one of the elpa_kernel_ids
     199              : ! **************************************************************************************************
     200         9048 :    SUBROUTINE set_elpa_kernel(requested_kernel)
     201              :       INTEGER, INTENT(IN)                                :: requested_kernel
     202              : 
     203              : #if defined (__ELPA)
     204              :       INTEGER                                            :: cpuid
     205              : 
     206         9048 :       elpa_kernel = requested_kernel
     207              : 
     208              :       ! Resolve AUTO kernel.
     209         9048 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     210         9042 :          cpuid = m_cpuid()
     211         9042 :          IF ((MACHINE_CPU_GENERIC .LT. cpuid) .AND. (cpuid .LE. MACHINE_X86)) THEN
     212            0 :             SELECT CASE (cpuid)
     213              :             CASE (MACHINE_X86_SSE4)
     214            0 :                elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
     215              :             CASE (MACHINE_X86_AVX)
     216            0 :                elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
     217              :             CASE (MACHINE_X86_AVX2)
     218         9042 :                elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
     219              :             CASE DEFAULT
     220         9042 :                elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
     221              :             END SELECT
     222              :          END IF
     223              : 
     224              :          ! Prefer GPU kernel if available.
     225              : #if defined (__ELPA_NVIDIA_GPU)
     226              :          elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
     227              : #endif
     228              : #if defined (__ELPA_AMD_GPU)
     229              :          elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
     230              : #endif
     231              : #if defined (__ELPA_INTEL_GPU)
     232              :          elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
     233              : #endif
     234              : 
     235              :          ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
     236         9042 :          IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     237            0 :             elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
     238              :          END IF
     239              :       END IF
     240              : #else
     241              :       MARK_USED(requested_kernel)
     242              : #endif
     243         9048 :    END SUBROUTINE set_elpa_kernel
     244              : 
     245              : ! **************************************************************************************************
     246              : !> \brief Sets flags that determines if ELPA should try to use QR during diagonalization
     247              : !>        If use_qr = .TRUE., the QR step is performed only if the size of the input matrix is
     248              : !>        suitable. Check cp_fm_diag_elpa for further details.
     249              : !> \param use_qr the logical flag
     250              : !> \param use_qr_unsafe logical which determines if block size checks should be bypassed for some
     251              : !>                      ELPA versions, potentially leading to incorrect eigenvalues
     252              : ! **************************************************************************************************
     253         9048 :    SUBROUTINE set_elpa_qr(use_qr, use_qr_unsafe)
     254              :       LOGICAL, INTENT(IN)                                :: use_qr, use_qr_unsafe
     255              : 
     256         9048 :       elpa_qr = use_qr
     257         9048 :       elpa_qr_unsafe = use_qr_unsafe
     258         9048 :    END SUBROUTINE set_elpa_qr
     259              : 
     260              : ! **************************************************************************************************
     261              : !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
     262              : !>        should be printed when the diagonalization routine is called.
     263              : !> \param flag the logical flag
     264              : ! **************************************************************************************************
     265         9048 :    SUBROUTINE set_elpa_print(flag)
     266              :       LOGICAL, INTENT(IN)                                :: flag
     267              : 
     268         9048 :       elpa_should_print = flag
     269         9048 :    END SUBROUTINE set_elpa_print
     270              : 
     271              : ! **************************************************************************************************
     272              : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
     273              : !> \param matrix the matrix that is diagonalized
     274              : !> \param eigenvectors eigenvectors of the input matrix
     275              : !> \param eigenvalues eigenvalues of the input matrix
     276              : ! **************************************************************************************************
     277        64091 :    SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     278              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     279              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     280              : 
     281              : #if defined(__ELPA)
     282              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
     283              : 
     284              :       INTEGER                                  :: handle
     285              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     286              :       TYPE(cp_fm_redistribute_info)            :: rdinfo
     287              : 
     288        64091 :       CALL timeset(routineN, handle)
     289              : 
     290              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     291              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     292              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     293              :       ! to the original matrix if no redistribution is required.
     294              :       ! With ELPA, we have to make sure that all processor columns have nonzero width
     295              :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
     296        64091 :                                     caller_is_elpa=.TRUE., redist_info=rdinfo)
     297              : 
     298              :       ! Call ELPA on CPUs that hold the new matrix
     299        64091 :       IF (ASSOCIATED(matrix_new%matrix_struct)) &
     300        43142 :          CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
     301              : 
     302              :       ! Redistribute results and clean up
     303        64091 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
     304              : 
     305        64091 :       CALL timestop(handle)
     306              : #else
     307              :       MARK_USED(matrix)
     308              :       MARK_USED(eigenvectors)
     309              :       MARK_USED(eigenvalues)
     310              : 
     311              :       CPABORT("CP2K compiled without the ELPA library.")
     312              : #endif
     313        64091 :    END SUBROUTINE cp_fm_diag_elpa
     314              : 
     315              : #if defined(__ELPA)
     316              : ! **************************************************************************************************
     317              : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
     318              : !> \param matrix the matrix that is diagonalized
     319              : !> \param eigenvectors eigenvectors of the input matrix
     320              : !> \param eigenvalues eigenvalues of the input matrix
     321              : !> \param rdinfo ...
     322              : ! **************************************************************************************************
     323        43142 :    SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
     324              : 
     325              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     326              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     327              :       TYPE(cp_fm_redistribute_info), INTENT(IN)          :: rdinfo
     328              : 
     329              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
     330              : 
     331              :       INTEGER                                            :: handle
     332              : 
     333              :       CLASS(elpa_t), POINTER                   :: elpa_obj
     334              :       CHARACTER(len=default_string_length)     :: kernel_name
     335              :       TYPE(mp_comm_type) :: group
     336              :       INTEGER                                  :: i, &
     337              :                                                   mypcol, myprow, n, &
     338              :                                                   n_rows, n_cols, &
     339              :                                                   nblk, neig, io_unit, &
     340              :                                                   success
     341              :       LOGICAL                                  :: use_qr, check_eigenvalues
     342        43142 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
     343              :       TYPE(cp_blacs_env_type), POINTER         :: context
     344              :       TYPE(cp_fm_type)                         :: matrix_noqr, eigenvectors_noqr
     345              :       TYPE(cp_logger_type), POINTER            :: logger
     346              :       REAL(KIND=dp), PARAMETER                 :: th = 1.0E-14_dp
     347        43142 :       INTEGER, DIMENSION(:), POINTER           :: ncol_locals
     348              : 
     349        43142 :       CALL timeset(routineN, handle)
     350        43142 :       NULLIFY (logger)
     351        43142 :       NULLIFY (ncol_locals)
     352              : 
     353        43142 :       check_eigenvalues = .FALSE.
     354              : 
     355        43142 :       logger => cp_get_default_logger()
     356        43142 :       io_unit = cp_logger_get_default_io_unit(logger)
     357              : 
     358        43142 :       n = matrix%matrix_struct%nrow_global
     359        43142 :       context => matrix%matrix_struct%context
     360        43142 :       group = matrix%matrix_struct%para_env
     361              : 
     362        43142 :       myprow = context%mepos(1)
     363        43142 :       mypcol = context%mepos(2)
     364              : 
     365              :       ! elpa needs the full matrix
     366        43142 :       CALL cp_fm_uplo_to_full(matrix, eigenvectors)
     367              : 
     368              :       CALL cp_fm_struct_get(matrix%matrix_struct, &
     369              :                             local_leading_dimension=n_rows, &
     370              :                             ncol_local=n_cols, &
     371              :                             nrow_block=nblk, &
     372        43142 :                             ncol_locals=ncol_locals)
     373              : 
     374              :       ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
     375        86284 :       IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
     376            0 :          CALL rdinfo%write(io_unit)
     377            0 :          CALL cp_fm_write_info(matrix, io_unit)
     378            0 :          CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
     379              :       END IF
     380              : 
     381        43142 :       neig = SIZE(eigenvalues, 1)
     382              :       ! Decide if matrix is suitable for ELPA to use QR
     383              :       ! The definition of what is considered a suitable matrix depends on the ELPA version
     384              :       ! The relevant ELPA files to check are
     385              :       !     - Proper matrix order:  src/elpa2/elpa2_template.F90
     386              :       !     - Proper block size:    test/Fortran/test.F90
     387              :       ! Note that the names of these files might change in different ELPA versions
     388              :       ! Matrix order must be even
     389        43142 :       use_qr = elpa_qr .AND. (MODULO(n, 2) .EQ. 0)
     390              :       ! Matrix order and block size must be greater than or equal to 64
     391        43142 :       IF (.NOT. elpa_qr_unsafe) &
     392        43138 :          use_qr = use_qr .AND. (n .GE. 64) .AND. (nblk .GE. 64)
     393              : 
     394              :       ! Check if eigenvalues computed with ELPA_QR_UNSAFE should be verified
     395            8 :       IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
     396            2 :          check_eigenvalues = .TRUE.
     397              : 
     398        43142 :       CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
     399              : 
     400        43142 :       IF (check_eigenvalues) THEN
     401              :          ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
     402           12 :          ALLOCATE (eval_noqr(n))
     403            4 :          CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
     404            4 :          CALL cp_fm_to_fm(matrix, matrix_noqr)
     405            4 :          CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
     406            8 :          CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
     407              :       END IF
     408              : 
     409        43142 :       IF (io_unit > 0 .AND. elpa_should_print) THEN
     410              :          WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
     411          119 :             "ELPA| Matrix diagonalization information"
     412              : 
     413              :          ! Find name for given kernel id.
     414              :          ! In case ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
     415          119 :          kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
     416         2618 :          DO i = 1, SIZE(elpa_kernel_ids)
     417         2618 :             IF (elpa_kernel_ids(i) == elpa_kernel) THEN
     418          119 :                kernel_name = elpa_kernel_names(i)
     419              :             END IF
     420              :          END DO
     421              : 
     422              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
     423          119 :             "ELPA| Matrix order (NA) ", n, &
     424          119 :             "ELPA| Matrix block size (NBLK) ", nblk, &
     425          119 :             "ELPA| Number of eigenvectors (NEV) ", neig, &
     426          119 :             "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
     427          238 :             "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
     428              :          WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
     429          119 :             "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
     430          119 :          IF (elpa_qr) THEN
     431              :             WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     432            4 :                "ELPA| QR step requested ", "YES"
     433              :          ELSE
     434              :             WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     435          115 :                "ELPA| QR step requested ", "NO"
     436              :          END IF
     437              : 
     438          119 :          IF (elpa_qr) THEN
     439            4 :             IF (elpa_qr_unsafe) THEN
     440              :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     441            2 :                   "ELPA| Use potentially unsafe QR ", "YES"
     442              :             ELSE
     443              :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     444            2 :                   "ELPA| Use potentially unsafe QR ", "NO"
     445              :             END IF
     446            4 :             IF (use_qr) THEN
     447              :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     448            4 :                   "ELPA| Matrix is suitable for QR ", "YES"
     449              :             ELSE
     450              :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     451            0 :                   "ELPA| Matrix is suitable for QR ", "NO"
     452              :             END IF
     453            4 :             IF (.NOT. use_qr) THEN
     454            0 :                IF (MODULO(n, 2) /= 0) THEN
     455              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     456            0 :                      "ELPA| Matrix order is NOT even"
     457              :                END IF
     458            0 :                IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
     459              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     460            0 :                      "ELPA| Matrix block size is NOT 64 or greater"
     461              :                END IF
     462              :             ELSE
     463            4 :                IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
     464              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     465            2 :                      "ELPA| Matrix block size check was bypassed"
     466              :                END IF
     467              :             END IF
     468              :          END IF
     469              :       END IF
     470              : 
     471              :       ! the full eigenvalues vector is needed
     472       129426 :       ALLOCATE (eval(n))
     473              : 
     474        43142 :       elpa_obj => elpa_allocate()
     475              : 
     476        43142 :       CALL elpa_obj%set("na", n, success)
     477        43142 :       CPASSERT(success == elpa_ok)
     478              : 
     479        43142 :       CALL elpa_obj%set("nev", neig, success)
     480        43142 :       CPASSERT(success == elpa_ok)
     481              : 
     482        43142 :       CALL elpa_obj%set("local_nrows", n_rows, success)
     483        43142 :       CPASSERT(success == elpa_ok)
     484              : 
     485        43142 :       CALL elpa_obj%set("local_ncols", n_cols, success)
     486        43142 :       CPASSERT(success == elpa_ok)
     487              : 
     488        43142 :       CALL elpa_obj%set("nblk", nblk, success)
     489        43142 :       CPASSERT(success == elpa_ok)
     490              : 
     491        43142 :       CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
     492        43142 :       CPASSERT(success == elpa_ok)
     493              : 
     494        43142 :       CALL elpa_obj%set("process_row", myprow, success)
     495        43142 :       CPASSERT(success == elpa_ok)
     496              : 
     497        43142 :       CALL elpa_obj%set("process_col", mypcol, success)
     498        43142 :       CPASSERT(success == elpa_ok)
     499              : 
     500        43142 :       success = elpa_obj%setup()
     501        43142 :       CPASSERT(success == elpa_ok)
     502              : 
     503        43142 :       CALL elpa_obj%set("solver", elpa_solver_2stage, success)
     504        43142 :       CPASSERT(success == elpa_ok)
     505              : 
     506              :       ! enabling the GPU must happen before setting the kernel
     507        43142 :       IF (elpa_kernel == ELPA_2STAGE_REAL_NVIDIA_GPU) THEN
     508            0 :          CALL elpa_obj%set("nvidia-gpu", 1, success)
     509            0 :          CPASSERT(success == elpa_ok)
     510              :       END IF
     511        43142 :       IF (elpa_kernel == ELPA_2STAGE_REAL_AMD_GPU) THEN
     512            0 :          CALL elpa_obj%set("amd-gpu", 1, success)
     513            0 :          CPASSERT(success == elpa_ok)
     514              :       END IF
     515        43142 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INTEL_GPU_SYCL) THEN
     516            0 :          CALL elpa_obj%set("intel-gpu", 1, success)
     517            0 :          CPASSERT(success == elpa_ok)
     518              :       END IF
     519              : 
     520        43142 :       CALL elpa_obj%set("real_kernel", elpa_kernel, success)
     521        43142 :       CPWARN_IF(success /= elpa_ok, "Setting real_kernel for ELPA failed")
     522              : 
     523        43142 :       IF (use_qr) THEN
     524            8 :          CALL elpa_obj%set("qr", 1, success)
     525            8 :          CPASSERT(success == elpa_ok)
     526              :       END IF
     527              : 
     528              :       ! Set number of threads only when ELPA was built with OpenMP support.
     529        43142 :       IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
     530        43142 :          CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
     531        43142 :          CPASSERT(success == elpa_ok)
     532              :       END IF
     533              : 
     534        43142 :       CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
     535        43142 :       IF (success /= elpa_ok) &
     536            0 :          CPABORT("ELPA failed to diagonalize a matrix")
     537              : 
     538        43142 :       IF (check_eigenvalues) THEN
     539              :          ! run again without QR
     540            4 :          CALL elpa_obj%set("qr", 0, success)
     541            4 :          CPASSERT(success == elpa_ok)
     542              : 
     543            4 :          CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
     544            4 :          IF (success /= elpa_ok) &
     545            0 :             CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
     546              : 
     547          340 :          IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) .GT. th)) &
     548            0 :             CPABORT("Eigenvalues calculated with QR decomp. in ELPA are wrong. Disable ELPA_QR_UNSAFE.")
     549              : 
     550            4 :          DEALLOCATE (eval_noqr)
     551            4 :          CALL cp_fm_release(matrix_noqr)
     552            4 :          CALL cp_fm_release(eigenvectors_noqr)
     553              :       END IF
     554              : 
     555        43142 :       CALL elpa_deallocate(elpa_obj, success)
     556        43142 :       CPASSERT(success == elpa_ok)
     557              : 
     558      1396282 :       eigenvalues(1:neig) = eval(1:neig)
     559        43142 :       DEALLOCATE (eval)
     560              : 
     561        43142 :       CALL timestop(handle)
     562              : 
     563       172568 :    END SUBROUTINE cp_fm_diag_elpa_base
     564              : #endif
     565              : 
     566              : END MODULE cp_fm_elpa
        

Generated by: LCOV version 2.0-1