LCOV - code coverage report
Current view: top level - src/fm - cp_fm_elpa.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:8ebf9ad) Lines: 84.7 % 150 127
Test Date: 2026-01-22 06:43:13 Functions: 100.0 % 6 6

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Wrapper for ELPA
      10              : !> \author Ole Schuett
      11              : ! **************************************************************************************************
      12              : MODULE cp_fm_elpa
      13              :    USE cp_log_handling, ONLY: cp_to_string
      14              :    USE machine, ONLY: m_cpuid, &
      15              :                       MACHINE_X86, &
      16              :                       MACHINE_CPU_GENERIC, &
      17              :                       MACHINE_X86_SSE4, &
      18              :                       MACHINE_X86_AVX, &
      19              :                       MACHINE_X86_AVX2
      20              :    USE cp_blacs_env, ONLY: cp_blacs_env_type
      21              :    USE cp_fm_basic_linalg, ONLY: cp_fm_uplo_to_full
      22              :    USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
      23              :                                cp_fm_redistribute_end, &
      24              :                                cp_fm_redistribute_info
      25              :    USE cp_fm_struct, ONLY: cp_fm_struct_get
      26              :    USE cp_fm_types, ONLY: cp_fm_type, &
      27              :                           cp_fm_to_fm, &
      28              :                           cp_fm_release, &
      29              :                           cp_fm_create, &
      30              :                           cp_fm_write_info
      31              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      32              :                               cp_logger_get_default_io_unit, &
      33              :                               cp_logger_type
      34              :    USE kinds, ONLY: default_string_length, dp
      35              :    USE message_passing, ONLY: mp_comm_type
      36              :    USE OMP_LIB, ONLY: omp_get_max_threads
      37              : #if defined(__HAS_IEEE_EXCEPTIONS)
      38              :    USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
      39              :                               ieee_set_halting_mode, &
      40              :                               IEEE_ALL
      41              : #endif
      42              : #include "../base/base_uses.f90"
      43              : 
      44              : #if defined(__ELPA)
      45              :    USE elpa_constants, ONLY: ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, ELPA_OK, &
      46              :                              ELPA_2STAGE_REAL_INVALID, &
      47              :                              ELPA_2STAGE_REAL_DEFAULT, &
      48              :                              ELPA_2STAGE_REAL_GENERIC, &
      49              :                              ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      50              :                              ELPA_2STAGE_REAL_BGP, &
      51              :                              ELPA_2STAGE_REAL_BGQ, &
      52              :                              ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      53              :                              ELPA_2STAGE_REAL_SSE_BLOCK2, &
      54              :                              ELPA_2STAGE_REAL_SSE_BLOCK4, &
      55              :                              ELPA_2STAGE_REAL_SSE_BLOCK6, &
      56              :                              ELPA_2STAGE_REAL_AVX_BLOCK2, &
      57              :                              ELPA_2STAGE_REAL_AVX_BLOCK4, &
      58              :                              ELPA_2STAGE_REAL_AVX_BLOCK6, &
      59              :                              ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      60              :                              ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      61              :                              ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      62              :                              ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      63              :                              ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      64              :                              ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      65              :                              ELPA_2STAGE_REAL_NVIDIA_GPU, &
      66              :                              ELPA_2STAGE_REAL_AMD_GPU, &
      67              :                              ELPA_2STAGE_REAL_INTEL_GPU_SYCL
      68              : 
      69              :    USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
      70              :                    elpa_allocate, elpa_deallocate
      71              : #endif
      72              : 
      73              :    IMPLICIT NONE
      74              : 
      75              :    PRIVATE
      76              : 
      77              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
      78              : 
      79              : #if defined(__ELPA)
      80              :    INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
      81              :                                         ELPA_2STAGE_REAL_INVALID, & ! auto
      82              :                                         ELPA_2STAGE_REAL_GENERIC, &
      83              :                                         ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
      84              :                                         ELPA_2STAGE_REAL_BGP, &
      85              :                                         ELPA_2STAGE_REAL_BGQ, &
      86              :                                         ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
      87              :                                         ELPA_2STAGE_REAL_SSE_BLOCK2, &
      88              :                                         ELPA_2STAGE_REAL_SSE_BLOCK4, &
      89              :                                         ELPA_2STAGE_REAL_SSE_BLOCK6, &
      90              :                                         ELPA_2STAGE_REAL_AVX_BLOCK2, &
      91              :                                         ELPA_2STAGE_REAL_AVX_BLOCK4, &
      92              :                                         ELPA_2STAGE_REAL_AVX_BLOCK6, &
      93              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK2, &
      94              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK4, &
      95              :                                         ELPA_2STAGE_REAL_AVX2_BLOCK6, &
      96              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK2, &
      97              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK4, &
      98              :                                         ELPA_2STAGE_REAL_AVX512_BLOCK6, &
      99              :                                         ELPA_2STAGE_REAL_NVIDIA_GPU, &
     100              :                                         ELPA_2STAGE_REAL_AMD_GPU, &
     101              :                                         ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
     102              : 
     103              :    CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     104              :       elpa_kernel_names = [CHARACTER(len=14) :: &
     105              :                            "AUTO", &
     106              :                            "GENERIC", &
     107              :                            "GENERIC_SIMPLE", &
     108              :                            "BGP", &
     109              :                            "BGQ", &
     110              :                            "SSE", &
     111              :                            "SSE_BLOCK2", &
     112              :                            "SSE_BLOCK4", &
     113              :                            "SSE_BLOCK6", &
     114              :                            "AVX_BLOCK2", &
     115              :                            "AVX_BLOCK4", &
     116              :                            "AVX_BLOCK6", &
     117              :                            "AVX2_BLOCK2", &
     118              :                            "AVX2_BLOCK4", &
     119              :                            "AVX2_BLOCK6", &
     120              :                            "AVX512_BLOCK2", &
     121              :                            "AVX512_BLOCK4", &
     122              :                            "AVX512_BLOCK6", &
     123              :                            "NVIDIA_GPU", &
     124              :                            "AMD_GPU", &
     125              :                            "INTEL_GPU"]
     126              : 
     127              :    CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
     128              :       elpa_kernel_descriptions = [CHARACTER(len=44) :: &
     129              :                                   "Automatically selected kernel", &
     130              :                                   "Generic kernel", &
     131              :                                   "Simplified generic kernel", &
     132              :                                   "Kernel optimized for IBM BGP", &
     133              :                                   "Kernel optimized for IBM BGQ", &
     134              :                                   "Kernel optimized for x86_64/SSE", &
     135              :                                   "Kernel optimized for x86_64/SSE (block=2)", &
     136              :                                   "Kernel optimized for x86_64/SSE (block=4)", &
     137              :                                   "Kernel optimized for x86_64/SSE (block=6)", &
     138              :                                   "Kernel optimized for Intel AVX (block=2)", &
     139              :                                   "Kernel optimized for Intel AVX (block=4)", &
     140              :                                   "Kernel optimized for Intel AVX (block=6)", &
     141              :                                   "Kernel optimized for Intel AVX2 (block=2)", &
     142              :                                   "Kernel optimized for Intel AVX2 (block=4)", &
     143              :                                   "Kernel optimized for Intel AVX2 (block=6)", &
     144              :                                   "Kernel optimized for Intel AVX-512 (block=2)", &
     145              :                                   "Kernel optimized for Intel AVX-512 (block=4)", &
     146              :                                   "Kernel optimized for Intel AVX-512 (block=6)", &
     147              :                                   "Kernel targeting Nvidia GPUs", &
     148              :                                   "Kernel targeting AMD GPUs", &
     149              :                                   "Kernel targeting Intel GPUs"]
     150              : #else
     151              :    INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
     152              :    CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
     153              :    CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
     154              : #endif
     155              : 
     156              : #if defined(__ELPA)
     157              :    INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
     158              : #endif
     159              : 
     160              :    ! elpa_qr_unsafe: disable block size limitations
     161              :    LOGICAL, SAVE :: elpa_should_print = .FALSE., &
     162              :                     elpa_qr_unsafe = .TRUE., &
     163              :                     elpa_qr = .FALSE.
     164              : 
     165              : #if defined(__OFFLOAD_OPENCL)
     166              :    LOGICAL, SAVE :: elpa_one_stage = .TRUE.
     167              : #else
     168              :    LOGICAL, SAVE :: elpa_one_stage = .FALSE.
     169              : #endif
     170              : 
     171              :    PUBLIC :: cp_fm_diag_elpa, &
     172              :              set_elpa_kernel, &
     173              :              set_elpa_print, &
     174              :              elpa_qr, &
     175              :              elpa_one_stage, &
     176              :              elpa_kernel_ids, &
     177              :              elpa_kernel_names, &
     178              :              elpa_kernel_descriptions, &
     179              :              initialize_elpa_library, &
     180              :              finalize_elpa_library
     181              : 
     182              : CONTAINS
     183              : 
     184              : ! **************************************************************************************************
     185              : !> \brief Initialize the ELPA library
     186              : !> \param one_stage ...
     187              : !> \param qr ...
     188              : ! **************************************************************************************************
     189         9156 :    SUBROUTINE initialize_elpa_library(one_stage, qr)
     190              :       LOGICAL, INTENT(IN), OPTIONAL                      :: one_stage, qr
     191              : 
     192              : #if defined(__ELPA)
     193         9156 :       IF (elpa_init(20180525) /= ELPA_OK) &
     194            0 :          CPABORT("The linked ELPA library does not support the required API version")
     195         9156 :       IF (PRESENT(one_stage)) elpa_one_stage = one_stage
     196         9156 :       IF (PRESENT(qr)) elpa_qr = qr
     197              : #else
     198              :       CPABORT("Initialization of ELPA library requested but not enabled during build")
     199              : #endif
     200         9156 :    END SUBROUTINE initialize_elpa_library
     201              : 
     202              : ! **************************************************************************************************
     203              : !> \brief Finalize the ELPA library
     204              : ! **************************************************************************************************
     205         9543 :    SUBROUTINE finalize_elpa_library()
     206              : #if defined(__ELPA)
     207         9543 :       CALL elpa_uninit()
     208              : #else
     209              :       CPABORT("Finalization of ELPA library requested but not enabled during build")
     210              : #endif
     211         9543 :    END SUBROUTINE finalize_elpa_library
     212              : 
     213              : ! **************************************************************************************************
     214              : !> \brief Sets the active ELPA kernel.
     215              : !> \param requested_kernel one of the elpa_kernel_ids
     216              : ! **************************************************************************************************
     217         9156 :    SUBROUTINE set_elpa_kernel(requested_kernel)
     218              :       INTEGER, INTENT(IN)                                :: requested_kernel
     219              : 
     220              : #if defined(__ELPA)
     221              :       INTEGER                                            :: cpuid
     222              : 
     223         9156 :       elpa_kernel = requested_kernel
     224              : 
     225              :       ! Resolve AUTO kernel.
     226         9156 :       IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     227         9152 :          cpuid = m_cpuid()
     228         9152 :          IF ((MACHINE_CPU_GENERIC < cpuid) .AND. (cpuid <= MACHINE_X86)) THEN
     229            0 :             SELECT CASE (cpuid)
     230              :             CASE (MACHINE_X86_SSE4)
     231            0 :                elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
     232              :             CASE (MACHINE_X86_AVX)
     233            0 :                elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
     234              :             CASE (MACHINE_X86_AVX2)
     235         9152 :                elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
     236              :             CASE DEFAULT
     237         9152 :                elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
     238              :             END SELECT
     239              :          END IF
     240              : 
     241              :          ! Prefer GPU kernel if available.
     242              : #if !defined(__NO_OFFLOAD_ELPA)
     243              : #if defined(__OFFLOAD_CUDA)
     244              :          elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
     245              : #endif
     246              : #if defined(__OFFLOAD_HIP)
     247              :          elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
     248              : #endif
     249              : #if defined(__OFFLOAD_OPENCL)
     250              :          elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
     251              : #endif
     252              : #endif
     253              :          ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
     254         9152 :          IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
     255            0 :             elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
     256              :          END IF
     257              :       END IF
     258              : #else
     259              :       MARK_USED(requested_kernel)
     260              : #endif
     261         9156 :    END SUBROUTINE set_elpa_kernel
     262              : 
     263              : ! **************************************************************************************************
     264              : !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
     265              : !>        should be printed when the diagonalization routine is called.
     266              : !> \param flag the logical flag
     267              : ! **************************************************************************************************
     268         9156 :    SUBROUTINE set_elpa_print(flag)
     269              :       LOGICAL, INTENT(IN)                                :: flag
     270              : 
     271         9156 :       elpa_should_print = flag
     272         9156 :    END SUBROUTINE set_elpa_print
     273              : 
     274              : ! **************************************************************************************************
     275              : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
     276              : !> \param matrix the matrix that is diagonalized
     277              : !> \param eigenvectors eigenvectors of the input matrix
     278              : !> \param eigenvalues eigenvalues of the input matrix
     279              : ! **************************************************************************************************
     280        11144 :    SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
     281              :       TYPE(cp_fm_type), INTENT(IN)          :: matrix, eigenvectors
     282              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
     283              : 
     284              : #if defined(__ELPA)
     285              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
     286              : 
     287              :       INTEGER                                  :: handle
     288              :       TYPE(cp_fm_type)                         :: eigenvectors_new, matrix_new
     289              :       TYPE(cp_fm_redistribute_info)            :: rdinfo
     290              : 
     291        11144 :       CALL timeset(routineN, handle)
     292              : 
     293              :       ! Determine if the input matrix needs to be redistributed before diagonalization.
     294              :       ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
     295              :       ! The redistributed matrix is stored in matrix_new, which is just a pointer
     296              :       ! to the original matrix if no redistribution is required.
     297              :       ! With ELPA, we have to make sure that all processor columns have nonzero width
     298              :       CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
     299        11144 :                                     caller_is_elpa=.TRUE., redist_info=rdinfo)
     300              : 
     301              :       ! Call ELPA on CPUs that hold the new matrix
     302        11144 :       IF (ASSOCIATED(matrix_new%matrix_struct)) &
     303         8106 :          CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
     304              : 
     305              :       ! Redistribute results and clean up
     306        11144 :       CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
     307              : 
     308        11144 :       CALL timestop(handle)
     309              : #else
     310              :       eigenvalues = 0
     311              :       MARK_USED(matrix)
     312              :       MARK_USED(eigenvectors)
     313              : 
     314              :       CPABORT("CP2K compiled without the ELPA library.")
     315              : #endif
     316        11144 :    END SUBROUTINE cp_fm_diag_elpa
     317              : 
     318              : #if defined(__ELPA)
     319              : ! **************************************************************************************************
     320              : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
     321              : !> \param matrix the matrix that is diagonalized
     322              : !> \param eigenvectors eigenvectors of the input matrix
     323              : !> \param eigenvalues eigenvalues of the input matrix
     324              : !> \param rdinfo ...
     325              : ! **************************************************************************************************
     326         8106 :    SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
     327              : 
     328              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix, eigenvectors
     329              :       REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues
     330              :       TYPE(cp_fm_redistribute_info), INTENT(IN)          :: rdinfo
     331              : 
     332              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
     333              : 
     334              :       INTEGER                                            :: handle
     335              : 
     336              :       CLASS(elpa_t), POINTER                   :: elpa_obj
     337              :       CHARACTER(len=default_string_length)     :: kernel_name
     338              :       TYPE(mp_comm_type) :: group
     339              :       INTEGER                                  :: i, &
     340              :                                                   mypcol, myprow, n, &
     341              :                                                   n_rows, n_cols, &
     342              :                                                   nblk, neig, io_unit, &
     343              :                                                   success
     344              :       LOGICAL                                  :: use_qr, check_eigenvalues
     345         8106 :       REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
     346              :       TYPE(cp_blacs_env_type), POINTER         :: context
     347              :       TYPE(cp_fm_type)                         :: matrix_noqr, eigenvectors_noqr
     348              :       TYPE(cp_logger_type), POINTER            :: logger
     349              :       REAL(KIND=dp), PARAMETER                 :: th = 1.0E-14_dp
     350         8106 :       INTEGER, DIMENSION(:), POINTER           :: ncol_locals
     351              : #if defined(__HAS_IEEE_EXCEPTIONS)
     352              :       LOGICAL, DIMENSION(5)                    :: halt
     353              : #endif
     354              : 
     355         8106 :       CALL timeset(routineN, handle)
     356         8106 :       NULLIFY (logger)
     357         8106 :       NULLIFY (ncol_locals)
     358              : 
     359         8106 :       check_eigenvalues = .FALSE.
     360              : 
     361         8106 :       logger => cp_get_default_logger()
     362         8106 :       io_unit = cp_logger_get_default_io_unit(logger)
     363              : 
     364         8106 :       n = matrix%matrix_struct%nrow_global
     365         8106 :       context => matrix%matrix_struct%context
     366         8106 :       group = matrix%matrix_struct%para_env
     367              : 
     368         8106 :       myprow = context%mepos(1)
     369         8106 :       mypcol = context%mepos(2)
     370              : 
     371              :       ! elpa needs the full matrix
     372         8106 :       CALL cp_fm_uplo_to_full(matrix, eigenvectors)
     373              : 
     374              :       CALL cp_fm_struct_get(matrix%matrix_struct, &
     375              :                             local_leading_dimension=n_rows, &
     376              :                             ncol_local=n_cols, &
     377              :                             nrow_block=nblk, &
     378         8106 :                             ncol_locals=ncol_locals)
     379              : 
     380              :       ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
     381        16212 :       IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
     382            0 :          CALL rdinfo%write(io_unit)
     383            0 :          CALL cp_fm_write_info(matrix, io_unit)
     384            0 :          CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
     385              :       END IF
     386              : 
     387         8106 :       neig = SIZE(eigenvalues, 1)
     388              :       ! Decide if matrix is suitable for ELPA to use QR
     389              :       ! The definition of what is considered a suitable matrix depends on the ELPA version
     390              :       ! The relevant ELPA files to check are
     391              :       !     - Proper matrix order:  src/elpa2/elpa2_template.F90
     392              :       !     - Proper block size:    test/Fortran/test.F90
     393              :       ! Note that the names of these files might change in different ELPA versions
     394              :       ! Matrix order must be even
     395         8106 :       use_qr = elpa_qr .AND. (MODULO(n, 2) == 0)
     396              :       ! Matrix order and block size must be greater than or equal to 64
     397         8106 :       IF (.NOT. elpa_qr_unsafe) &
     398            0 :          use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
     399              : 
     400              :       ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
     401         8106 :       IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
     402            4 :          check_eigenvalues = .TRUE.
     403              : 
     404         8106 :       CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
     405              : 
     406         8106 :       IF (check_eigenvalues) THEN
     407              :          ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
     408           24 :          ALLOCATE (eval_noqr(n))
     409            8 :          CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
     410            8 :          CALL cp_fm_to_fm(matrix, matrix_noqr)
     411            8 :          CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
     412           16 :          CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
     413              :       END IF
     414              : 
     415         8106 :       IF (io_unit > 0 .AND. elpa_should_print) THEN
     416              :          WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
     417           40 :             "ELPA| Matrix diagonalization information"
     418              : 
     419              :          ! Find name for given kernel id.
     420              :          ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
     421           40 :          kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
     422          880 :          DO i = 1, SIZE(elpa_kernel_ids)
     423          880 :             IF (elpa_kernel_ids(i) == elpa_kernel) THEN
     424           40 :                kernel_name = elpa_kernel_names(i)
     425              :             END IF
     426              :          END DO
     427              : 
     428              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
     429           40 :             "ELPA| Matrix order (NA) ", n, &
     430           40 :             "ELPA| Matrix block size (NBLK) ", nblk, &
     431           40 :             "ELPA| Number of eigenvectors (NEV) ", neig, &
     432           40 :             "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
     433           80 :             "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
     434              :          WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
     435           40 :             "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
     436           40 :          IF (elpa_qr) THEN
     437              :             WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     438            4 :                "ELPA| QR step requested ", "YES"
     439              :          ELSE
     440              :             WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     441           36 :                "ELPA| QR step requested ", "NO"
     442              :          END IF
     443              : 
     444           40 :          IF (elpa_qr) THEN
     445            4 :             IF (use_qr) THEN
     446              :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     447            4 :                   "ELPA| Matrix is suitable for QR ", "YES"
     448              :             ELSE
     449              :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     450            0 :                   "ELPA| Matrix is suitable for QR ", "NO"
     451              :             END IF
     452            4 :             IF (.NOT. use_qr) THEN
     453            0 :                IF (MODULO(n, 2) /= 0) THEN
     454              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     455            0 :                      "ELPA| Matrix order is NOT even"
     456              :                END IF
     457            0 :                IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
     458              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     459            0 :                      "ELPA| Matrix block size is NOT 64 or greater"
     460              :                END IF
     461              :             ELSE
     462            4 :                IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
     463              :                   WRITE (UNIT=io_unit, FMT="(T2,A)") &
     464            2 :                      "ELPA| Matrix block size check was bypassed"
     465              :                END IF
     466              :             END IF
     467              :          END IF
     468              :       END IF
     469              : 
     470              :       ! the full eigenvalues vector is needed
     471        24318 :       ALLOCATE (eval(n))
     472              : 
     473         8106 :       elpa_obj => elpa_allocate()
     474              : 
     475         8106 :       CALL elpa_obj%set("na", n, success)
     476         8106 :       CPASSERT(success == ELPA_OK)
     477              : 
     478         8106 :       CALL elpa_obj%set("nev", neig, success)
     479         8106 :       CPASSERT(success == ELPA_OK)
     480              : 
     481         8106 :       CALL elpa_obj%set("local_nrows", n_rows, success)
     482         8106 :       CPASSERT(success == ELPA_OK)
     483              : 
     484         8106 :       CALL elpa_obj%set("local_ncols", n_cols, success)
     485         8106 :       CPASSERT(success == ELPA_OK)
     486              : 
     487         8106 :       CALL elpa_obj%set("nblk", nblk, success)
     488         8106 :       CPASSERT(success == ELPA_OK)
     489              : 
     490         8106 :       CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
     491         8106 :       CPASSERT(success == ELPA_OK)
     492              : 
     493         8106 :       CALL elpa_obj%set("process_row", myprow, success)
     494         8106 :       CPASSERT(success == ELPA_OK)
     495              : 
     496         8106 :       CALL elpa_obj%set("process_col", mypcol, success)
     497         8106 :       CPASSERT(success == ELPA_OK)
     498              : 
     499         8106 :       success = elpa_obj%setup()
     500         8106 :       CPASSERT(success == ELPA_OK)
     501              : 
     502              :       CALL elpa_obj%set("solver", &
     503              :                         MERGE(ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, elpa_one_stage), &
     504        16208 :                         success)
     505         8106 :       CPASSERT(success == ELPA_OK)
     506              : 
     507              :       ! enabling the GPU must happen before setting the kernel
     508            0 :       SELECT CASE (elpa_kernel)
     509              :       CASE (ELPA_2STAGE_REAL_NVIDIA_GPU)
     510            0 :          CALL elpa_obj%set("nvidia-gpu", 1, success)
     511            0 :          CPASSERT(success == ELPA_OK)
     512              :       CASE (ELPA_2STAGE_REAL_AMD_GPU)
     513            0 :          CALL elpa_obj%set("amd-gpu", 1, success)
     514            0 :          CPASSERT(success == ELPA_OK)
     515              :       CASE (ELPA_2STAGE_REAL_INTEL_GPU_SYCL)
     516            0 :          CALL elpa_obj%set("intel-gpu", 1, success)
     517         8106 :          CPASSERT(success == ELPA_OK)
     518              :       END SELECT
     519              : 
     520         8106 :       IF (.NOT. elpa_one_stage) THEN
     521         8102 :          CALL elpa_obj%set("real_kernel", elpa_kernel, success)
     522         8102 :          CPWARN_IF(success /= ELPA_OK, "Setting real_kernel for ELPA failed")
     523              : 
     524         8102 :          IF (use_qr) THEN
     525            8 :             CALL elpa_obj%set("qr", 1, success)
     526            8 :             CPASSERT(success == ELPA_OK)
     527              :          END IF
     528              :       END IF
     529              : 
     530              :       ! Set number of threads only when ELPA was built with OpenMP support.
     531         8106 :       IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
     532         8106 :          CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
     533         8106 :          CPASSERT(success == ELPA_OK)
     534              :       END IF
     535              : 
     536              :       ! ELPA solver: calculate the Eigenvalues/vectors
     537              : #if defined(__HAS_IEEE_EXCEPTIONS)
     538              :       CALL ieee_get_halting_mode(IEEE_ALL, halt)
     539              :       CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
     540              : #endif
     541         8106 :       CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
     542              : #if defined(__HAS_IEEE_EXCEPTIONS)
     543              :       CALL ieee_set_halting_mode(IEEE_ALL, halt)
     544              : #endif
     545              : 
     546         8106 :       IF (success /= ELPA_OK) &
     547            0 :          CPABORT("ELPA failed to diagonalize a matrix")
     548              : 
     549         8106 :       IF (check_eigenvalues) THEN
     550              :          ! run again without QR
     551            8 :          CALL elpa_obj%set("qr", 0, success)
     552            8 :          CPASSERT(success == ELPA_OK)
     553              : 
     554            8 :          CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
     555            8 :          IF (success /= ELPA_OK) &
     556            0 :             CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
     557              : 
     558          680 :          IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) > th)) &
     559            0 :             CPABORT("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
     560              : 
     561            8 :          DEALLOCATE (eval_noqr)
     562            8 :          CALL cp_fm_release(matrix_noqr)
     563            8 :          CALL cp_fm_release(eigenvectors_noqr)
     564              :       END IF
     565              : 
     566         8106 :       CALL elpa_deallocate(elpa_obj, success)
     567         8106 :       CPASSERT(success == ELPA_OK)
     568              : 
     569       909217 :       eigenvalues(1:neig) = eval(1:neig)
     570         8106 :       DEALLOCATE (eval)
     571              : 
     572         8106 :       CALL timestop(handle)
     573              : 
     574        32424 :    END SUBROUTINE cp_fm_diag_elpa_base
     575              : #endif
     576              : 
     577              : END MODULE cp_fm_elpa
        

Generated by: LCOV version 2.0-1