Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Wrapper for ELPA
10 : !> \author Ole Schuett
11 : ! **************************************************************************************************
12 : MODULE cp_fm_elpa
13 : USE cp_log_handling, ONLY: cp_to_string
14 : USE machine, ONLY: m_cpuid, &
15 : MACHINE_X86, &
16 : MACHINE_CPU_GENERIC, &
17 : MACHINE_X86_SSE4, &
18 : MACHINE_X86_AVX, &
19 : MACHINE_X86_AVX2
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_fm_basic_linalg, ONLY: cp_fm_uplo_to_full
22 : USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_start, &
23 : cp_fm_redistribute_end, &
24 : cp_fm_redistribute_info
25 : USE cp_fm_struct, ONLY: cp_fm_struct_get
26 : USE cp_fm_types, ONLY: cp_fm_type, &
27 : cp_fm_to_fm, &
28 : cp_fm_release, &
29 : cp_fm_create, &
30 : cp_fm_write_info
31 : USE cp_log_handling, ONLY: cp_get_default_logger, &
32 : cp_logger_get_default_io_unit, &
33 : cp_logger_type
34 : USE kinds, ONLY: default_string_length, dp
35 : USE message_passing, ONLY: mp_comm_type
36 : USE OMP_LIB, ONLY: omp_get_max_threads
37 : #if defined(__HAS_IEEE_EXCEPTIONS)
38 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
39 : ieee_set_halting_mode, &
40 : IEEE_ALL
41 : #endif
42 : #include "../base/base_uses.f90"
43 :
44 : #if defined(__ELPA)
45 : USE elpa_constants, ONLY: ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, ELPA_OK, &
46 : ELPA_2STAGE_REAL_INVALID, &
47 : ELPA_2STAGE_REAL_DEFAULT, &
48 : ELPA_2STAGE_REAL_GENERIC, &
49 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
50 : ELPA_2STAGE_REAL_BGP, &
51 : ELPA_2STAGE_REAL_BGQ, &
52 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
53 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
54 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
55 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
56 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
57 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
58 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
59 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
60 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
61 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
62 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
63 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
64 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
65 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
66 : ELPA_2STAGE_REAL_AMD_GPU, &
67 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL
68 :
69 : USE elpa, ONLY: elpa_t, elpa_init, elpa_uninit, &
70 : elpa_allocate, elpa_deallocate
71 : #endif
72 :
73 : IMPLICIT NONE
74 :
75 : PRIVATE
76 :
77 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_elpa'
78 :
79 : #if defined(__ELPA)
80 : INTEGER, DIMENSION(21), PARAMETER :: elpa_kernel_ids = [ &
81 : ELPA_2STAGE_REAL_INVALID, & ! auto
82 : ELPA_2STAGE_REAL_GENERIC, &
83 : ELPA_2STAGE_REAL_GENERIC_SIMPLE, &
84 : ELPA_2STAGE_REAL_BGP, &
85 : ELPA_2STAGE_REAL_BGQ, &
86 : ELPA_2STAGE_REAL_SSE_ASSEMBLY, &
87 : ELPA_2STAGE_REAL_SSE_BLOCK2, &
88 : ELPA_2STAGE_REAL_SSE_BLOCK4, &
89 : ELPA_2STAGE_REAL_SSE_BLOCK6, &
90 : ELPA_2STAGE_REAL_AVX_BLOCK2, &
91 : ELPA_2STAGE_REAL_AVX_BLOCK4, &
92 : ELPA_2STAGE_REAL_AVX_BLOCK6, &
93 : ELPA_2STAGE_REAL_AVX2_BLOCK2, &
94 : ELPA_2STAGE_REAL_AVX2_BLOCK4, &
95 : ELPA_2STAGE_REAL_AVX2_BLOCK6, &
96 : ELPA_2STAGE_REAL_AVX512_BLOCK2, &
97 : ELPA_2STAGE_REAL_AVX512_BLOCK4, &
98 : ELPA_2STAGE_REAL_AVX512_BLOCK6, &
99 : ELPA_2STAGE_REAL_NVIDIA_GPU, &
100 : ELPA_2STAGE_REAL_AMD_GPU, &
101 : ELPA_2STAGE_REAL_INTEL_GPU_SYCL]
102 :
103 : CHARACTER(len=14), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
104 : elpa_kernel_names = [CHARACTER(len=14) :: &
105 : "AUTO", &
106 : "GENERIC", &
107 : "GENERIC_SIMPLE", &
108 : "BGP", &
109 : "BGQ", &
110 : "SSE", &
111 : "SSE_BLOCK2", &
112 : "SSE_BLOCK4", &
113 : "SSE_BLOCK6", &
114 : "AVX_BLOCK2", &
115 : "AVX_BLOCK4", &
116 : "AVX_BLOCK6", &
117 : "AVX2_BLOCK2", &
118 : "AVX2_BLOCK4", &
119 : "AVX2_BLOCK6", &
120 : "AVX512_BLOCK2", &
121 : "AVX512_BLOCK4", &
122 : "AVX512_BLOCK6", &
123 : "NVIDIA_GPU", &
124 : "AMD_GPU", &
125 : "INTEL_GPU"]
126 :
127 : CHARACTER(len=44), DIMENSION(SIZE(elpa_kernel_ids)), PARAMETER :: &
128 : elpa_kernel_descriptions = [CHARACTER(len=44) :: &
129 : "Automatically selected kernel", &
130 : "Generic kernel", &
131 : "Simplified generic kernel", &
132 : "Kernel optimized for IBM BGP", &
133 : "Kernel optimized for IBM BGQ", &
134 : "Kernel optimized for x86_64/SSE", &
135 : "Kernel optimized for x86_64/SSE (block=2)", &
136 : "Kernel optimized for x86_64/SSE (block=4)", &
137 : "Kernel optimized for x86_64/SSE (block=6)", &
138 : "Kernel optimized for Intel AVX (block=2)", &
139 : "Kernel optimized for Intel AVX (block=4)", &
140 : "Kernel optimized for Intel AVX (block=6)", &
141 : "Kernel optimized for Intel AVX2 (block=2)", &
142 : "Kernel optimized for Intel AVX2 (block=4)", &
143 : "Kernel optimized for Intel AVX2 (block=6)", &
144 : "Kernel optimized for Intel AVX-512 (block=2)", &
145 : "Kernel optimized for Intel AVX-512 (block=4)", &
146 : "Kernel optimized for Intel AVX-512 (block=6)", &
147 : "Kernel targeting Nvidia GPUs", &
148 : "Kernel targeting AMD GPUs", &
149 : "Kernel targeting Intel GPUs"]
150 : #else
151 : INTEGER, DIMENSION(1), PARAMETER :: elpa_kernel_ids = [-1]
152 : CHARACTER(len=14), DIMENSION(1), PARAMETER :: elpa_kernel_names = ["AUTO"]
153 : CHARACTER(len=44), DIMENSION(1), PARAMETER :: elpa_kernel_descriptions = ["Automatically selected kernel"]
154 : #endif
155 :
156 : #if defined(__ELPA)
157 : INTEGER, SAVE :: elpa_kernel = elpa_kernel_ids(1) ! auto
158 : #endif
159 :
160 : ! elpa_qr_unsafe: disable block size limitations
161 : LOGICAL, SAVE :: elpa_should_print = .FALSE., &
162 : elpa_qr_unsafe = .TRUE., &
163 : elpa_qr = .FALSE.
164 :
165 : #if defined(__OFFLOAD_OPENCL)
166 : LOGICAL, SAVE :: elpa_one_stage = .TRUE.
167 : #else
168 : LOGICAL, SAVE :: elpa_one_stage = .FALSE.
169 : #endif
170 :
171 : PUBLIC :: cp_fm_diag_elpa, &
172 : set_elpa_kernel, &
173 : set_elpa_print, &
174 : elpa_qr, &
175 : elpa_one_stage, &
176 : elpa_kernel_ids, &
177 : elpa_kernel_names, &
178 : elpa_kernel_descriptions, &
179 : initialize_elpa_library, &
180 : finalize_elpa_library
181 :
182 : CONTAINS
183 :
184 : ! **************************************************************************************************
185 : !> \brief Initialize the ELPA library
186 : !> \param one_stage ...
187 : !> \param qr ...
188 : ! **************************************************************************************************
189 9156 : SUBROUTINE initialize_elpa_library(one_stage, qr)
190 : LOGICAL, INTENT(IN), OPTIONAL :: one_stage, qr
191 :
192 : #if defined(__ELPA)
193 9156 : IF (elpa_init(20180525) /= ELPA_OK) &
194 0 : CPABORT("The linked ELPA library does not support the required API version")
195 9156 : IF (PRESENT(one_stage)) elpa_one_stage = one_stage
196 9156 : IF (PRESENT(qr)) elpa_qr = qr
197 : #else
198 : CPABORT("Initialization of ELPA library requested but not enabled during build")
199 : #endif
200 9156 : END SUBROUTINE initialize_elpa_library
201 :
202 : ! **************************************************************************************************
203 : !> \brief Finalize the ELPA library
204 : ! **************************************************************************************************
205 9543 : SUBROUTINE finalize_elpa_library()
206 : #if defined(__ELPA)
207 9543 : CALL elpa_uninit()
208 : #else
209 : CPABORT("Finalization of ELPA library requested but not enabled during build")
210 : #endif
211 9543 : END SUBROUTINE finalize_elpa_library
212 :
213 : ! **************************************************************************************************
214 : !> \brief Sets the active ELPA kernel.
215 : !> \param requested_kernel one of the elpa_kernel_ids
216 : ! **************************************************************************************************
217 9156 : SUBROUTINE set_elpa_kernel(requested_kernel)
218 : INTEGER, INTENT(IN) :: requested_kernel
219 :
220 : #if defined(__ELPA)
221 : INTEGER :: cpuid
222 :
223 9156 : elpa_kernel = requested_kernel
224 :
225 : ! Resolve AUTO kernel.
226 9156 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
227 9152 : cpuid = m_cpuid()
228 9152 : IF ((MACHINE_CPU_GENERIC < cpuid) .AND. (cpuid <= MACHINE_X86)) THEN
229 0 : SELECT CASE (cpuid)
230 : CASE (MACHINE_X86_SSE4)
231 0 : elpa_kernel = ELPA_2STAGE_REAL_SSE_BLOCK4
232 : CASE (MACHINE_X86_AVX)
233 0 : elpa_kernel = ELPA_2STAGE_REAL_AVX_BLOCK4
234 : CASE (MACHINE_X86_AVX2)
235 9152 : elpa_kernel = ELPA_2STAGE_REAL_AVX2_BLOCK4
236 : CASE DEFAULT
237 9152 : elpa_kernel = ELPA_2STAGE_REAL_AVX512_BLOCK4
238 : END SELECT
239 : END IF
240 :
241 : ! Prefer GPU kernel if available.
242 : #if !defined(__NO_OFFLOAD_ELPA)
243 : #if defined(__OFFLOAD_CUDA)
244 : elpa_kernel = ELPA_2STAGE_REAL_NVIDIA_GPU
245 : #endif
246 : #if defined(__OFFLOAD_HIP)
247 : elpa_kernel = ELPA_2STAGE_REAL_AMD_GPU
248 : #endif
249 : #if defined(__OFFLOAD_OPENCL)
250 : elpa_kernel = ELPA_2STAGE_REAL_INTEL_GPU_SYCL
251 : #endif
252 : #endif
253 : ! If we could not find a suitable kernel then use ELPA_2STAGE_REAL_DEFAULT.
254 9152 : IF (elpa_kernel == ELPA_2STAGE_REAL_INVALID) THEN
255 0 : elpa_kernel = ELPA_2STAGE_REAL_DEFAULT
256 : END IF
257 : END IF
258 : #else
259 : MARK_USED(requested_kernel)
260 : #endif
261 9156 : END SUBROUTINE set_elpa_kernel
262 :
263 : ! **************************************************************************************************
264 : !> \brief Sets a flag that determines if additional information about the ELPA diagonalization
265 : !> should be printed when the diagonalization routine is called.
266 : !> \param flag the logical flag
267 : ! **************************************************************************************************
268 9156 : SUBROUTINE set_elpa_print(flag)
269 : LOGICAL, INTENT(IN) :: flag
270 :
271 9156 : elpa_should_print = flag
272 9156 : END SUBROUTINE set_elpa_print
273 :
274 : ! **************************************************************************************************
275 : !> \brief Driver routine to diagonalize a FM matrix with the ELPA library.
276 : !> \param matrix the matrix that is diagonalized
277 : !> \param eigenvectors eigenvectors of the input matrix
278 : !> \param eigenvalues eigenvalues of the input matrix
279 : ! **************************************************************************************************
280 11144 : SUBROUTINE cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
281 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
282 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
283 :
284 : #if defined(__ELPA)
285 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa'
286 :
287 : INTEGER :: handle
288 : TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
289 : TYPE(cp_fm_redistribute_info) :: rdinfo
290 :
291 11144 : CALL timeset(routineN, handle)
292 :
293 : ! Determine if the input matrix needs to be redistributed before diagonalization.
294 : ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
295 : ! The redistributed matrix is stored in matrix_new, which is just a pointer
296 : ! to the original matrix if no redistribution is required.
297 : ! With ELPA, we have to make sure that all processor columns have nonzero width
298 : CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
299 11144 : caller_is_elpa=.TRUE., redist_info=rdinfo)
300 :
301 : ! Call ELPA on CPUs that hold the new matrix
302 11144 : IF (ASSOCIATED(matrix_new%matrix_struct)) &
303 8106 : CALL cp_fm_diag_elpa_base(matrix_new, eigenvectors_new, eigenvalues, rdinfo)
304 :
305 : ! Redistribute results and clean up
306 11144 : CALL cp_fm_redistribute_end(matrix, eigenvectors, eigenvalues, matrix_new, eigenvectors_new)
307 :
308 11144 : CALL timestop(handle)
309 : #else
310 : eigenvalues = 0
311 : MARK_USED(matrix)
312 : MARK_USED(eigenvectors)
313 :
314 : CPABORT("CP2K compiled without the ELPA library.")
315 : #endif
316 11144 : END SUBROUTINE cp_fm_diag_elpa
317 :
318 : #if defined(__ELPA)
319 : ! **************************************************************************************************
320 : !> \brief Actual routine that calls ELPA to diagonalize a FM matrix.
321 : !> \param matrix the matrix that is diagonalized
322 : !> \param eigenvectors eigenvectors of the input matrix
323 : !> \param eigenvalues eigenvalues of the input matrix
324 : !> \param rdinfo ...
325 : ! **************************************************************************************************
326 8106 : SUBROUTINE cp_fm_diag_elpa_base(matrix, eigenvectors, eigenvalues, rdinfo)
327 :
328 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
329 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
330 : TYPE(cp_fm_redistribute_info), INTENT(IN) :: rdinfo
331 :
332 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_diag_elpa_base'
333 :
334 : INTEGER :: handle
335 :
336 : CLASS(elpa_t), POINTER :: elpa_obj
337 : CHARACTER(len=default_string_length) :: kernel_name
338 : TYPE(mp_comm_type) :: group
339 : INTEGER :: i, &
340 : mypcol, myprow, n, &
341 : n_rows, n_cols, &
342 : nblk, neig, io_unit, &
343 : success
344 : LOGICAL :: use_qr, check_eigenvalues
345 8106 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eval, eval_noqr
346 : TYPE(cp_blacs_env_type), POINTER :: context
347 : TYPE(cp_fm_type) :: matrix_noqr, eigenvectors_noqr
348 : TYPE(cp_logger_type), POINTER :: logger
349 : REAL(KIND=dp), PARAMETER :: th = 1.0E-14_dp
350 8106 : INTEGER, DIMENSION(:), POINTER :: ncol_locals
351 : #if defined(__HAS_IEEE_EXCEPTIONS)
352 : LOGICAL, DIMENSION(5) :: halt
353 : #endif
354 :
355 8106 : CALL timeset(routineN, handle)
356 8106 : NULLIFY (logger)
357 8106 : NULLIFY (ncol_locals)
358 :
359 8106 : check_eigenvalues = .FALSE.
360 :
361 8106 : logger => cp_get_default_logger()
362 8106 : io_unit = cp_logger_get_default_io_unit(logger)
363 :
364 8106 : n = matrix%matrix_struct%nrow_global
365 8106 : context => matrix%matrix_struct%context
366 8106 : group = matrix%matrix_struct%para_env
367 :
368 8106 : myprow = context%mepos(1)
369 8106 : mypcol = context%mepos(2)
370 :
371 : ! elpa needs the full matrix
372 8106 : CALL cp_fm_uplo_to_full(matrix, eigenvectors)
373 :
374 : CALL cp_fm_struct_get(matrix%matrix_struct, &
375 : local_leading_dimension=n_rows, &
376 : ncol_local=n_cols, &
377 : nrow_block=nblk, &
378 8106 : ncol_locals=ncol_locals)
379 :
380 : ! ELPA will fail in 'solve_tridi', with no useful error message, fail earlier
381 16212 : IF (io_unit > 0 .AND. ANY(ncol_locals == 0)) THEN
382 0 : CALL rdinfo%write(io_unit)
383 0 : CALL cp_fm_write_info(matrix, io_unit)
384 0 : CPABORT("ELPA [pre-fail]: Problem contains processor column with zero width.")
385 : END IF
386 :
387 8106 : neig = SIZE(eigenvalues, 1)
388 : ! Decide if matrix is suitable for ELPA to use QR
389 : ! The definition of what is considered a suitable matrix depends on the ELPA version
390 : ! The relevant ELPA files to check are
391 : ! - Proper matrix order: src/elpa2/elpa2_template.F90
392 : ! - Proper block size: test/Fortran/test.F90
393 : ! Note that the names of these files might change in different ELPA versions
394 : ! Matrix order must be even
395 8106 : use_qr = elpa_qr .AND. (MODULO(n, 2) == 0)
396 : ! Matrix order and block size must be greater than or equal to 64
397 8106 : IF (.NOT. elpa_qr_unsafe) &
398 0 : use_qr = use_qr .AND. (n >= 64) .AND. (nblk >= 64)
399 :
400 : ! Check if eigenvalues computed with elpa_qr_unsafe should be verified
401 8106 : IF (use_qr .AND. elpa_qr_unsafe .AND. elpa_should_print) &
402 4 : check_eigenvalues = .TRUE.
403 :
404 8106 : CALL matrix%matrix_struct%para_env%bcast(check_eigenvalues)
405 :
406 8106 : IF (check_eigenvalues) THEN
407 : ! Allocate and initialize needed temporaries to compute eigenvalues without ELPA QR
408 24 : ALLOCATE (eval_noqr(n))
409 8 : CALL cp_fm_create(matrix=matrix_noqr, matrix_struct=matrix%matrix_struct)
410 8 : CALL cp_fm_to_fm(matrix, matrix_noqr)
411 8 : CALL cp_fm_create(matrix=eigenvectors_noqr, matrix_struct=eigenvectors%matrix_struct)
412 16 : CALL cp_fm_uplo_to_full(matrix_noqr, eigenvectors_noqr)
413 : END IF
414 :
415 8106 : IF (io_unit > 0 .AND. elpa_should_print) THEN
416 : WRITE (UNIT=io_unit, FMT="(/,T2,A)") &
417 40 : "ELPA| Matrix diagonalization information"
418 :
419 : ! Find name for given kernel id.
420 : ! In case of ELPA_2STAGE_REAL_DEFAULT was used it might not be in our elpa_kernel_ids list.
421 40 : kernel_name = "id: "//TRIM(ADJUSTL(cp_to_string(elpa_kernel)))
422 880 : DO i = 1, SIZE(elpa_kernel_ids)
423 880 : IF (elpa_kernel_ids(i) == elpa_kernel) THEN
424 40 : kernel_name = elpa_kernel_names(i)
425 : END IF
426 : END DO
427 :
428 : WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
429 40 : "ELPA| Matrix order (NA) ", n, &
430 40 : "ELPA| Matrix block size (NBLK) ", nblk, &
431 40 : "ELPA| Number of eigenvectors (NEV) ", neig, &
432 40 : "ELPA| Local rows (LOCAL_NROWS) ", n_rows, &
433 80 : "ELPA| Local columns (LOCAL_NCOLS) ", n_cols
434 : WRITE (UNIT=io_unit, FMT="(T2,A,T61,A20)") &
435 40 : "ELPA| Kernel ", ADJUSTR(TRIM(kernel_name))
436 40 : IF (elpa_qr) THEN
437 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
438 4 : "ELPA| QR step requested ", "YES"
439 : ELSE
440 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
441 36 : "ELPA| QR step requested ", "NO"
442 : END IF
443 :
444 40 : IF (elpa_qr) THEN
445 4 : IF (use_qr) THEN
446 : WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
447 4 : "ELPA| Matrix is suitable for QR ", "YES"
448 : ELSE
449 : WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
450 0 : "ELPA| Matrix is suitable for QR ", "NO"
451 : END IF
452 4 : IF (.NOT. use_qr) THEN
453 0 : IF (MODULO(n, 2) /= 0) THEN
454 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
455 0 : "ELPA| Matrix order is NOT even"
456 : END IF
457 0 : IF ((nblk < 64) .AND. (.NOT. elpa_qr_unsafe)) THEN
458 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
459 0 : "ELPA| Matrix block size is NOT 64 or greater"
460 : END IF
461 : ELSE
462 4 : IF ((nblk < 64) .AND. elpa_qr_unsafe) THEN
463 : WRITE (UNIT=io_unit, FMT="(T2,A)") &
464 2 : "ELPA| Matrix block size check was bypassed"
465 : END IF
466 : END IF
467 : END IF
468 : END IF
469 :
470 : ! the full eigenvalues vector is needed
471 24318 : ALLOCATE (eval(n))
472 :
473 8106 : elpa_obj => elpa_allocate()
474 :
475 8106 : CALL elpa_obj%set("na", n, success)
476 8106 : CPASSERT(success == ELPA_OK)
477 :
478 8106 : CALL elpa_obj%set("nev", neig, success)
479 8106 : CPASSERT(success == ELPA_OK)
480 :
481 8106 : CALL elpa_obj%set("local_nrows", n_rows, success)
482 8106 : CPASSERT(success == ELPA_OK)
483 :
484 8106 : CALL elpa_obj%set("local_ncols", n_cols, success)
485 8106 : CPASSERT(success == ELPA_OK)
486 :
487 8106 : CALL elpa_obj%set("nblk", nblk, success)
488 8106 : CPASSERT(success == ELPA_OK)
489 :
490 8106 : CALL elpa_obj%set("mpi_comm_parent", group%get_handle(), success)
491 8106 : CPASSERT(success == ELPA_OK)
492 :
493 8106 : CALL elpa_obj%set("process_row", myprow, success)
494 8106 : CPASSERT(success == ELPA_OK)
495 :
496 8106 : CALL elpa_obj%set("process_col", mypcol, success)
497 8106 : CPASSERT(success == ELPA_OK)
498 :
499 8106 : success = elpa_obj%setup()
500 8106 : CPASSERT(success == ELPA_OK)
501 :
502 : CALL elpa_obj%set("solver", &
503 : MERGE(ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE, elpa_one_stage), &
504 16208 : success)
505 8106 : CPASSERT(success == ELPA_OK)
506 :
507 : ! enabling the GPU must happen before setting the kernel
508 0 : SELECT CASE (elpa_kernel)
509 : CASE (ELPA_2STAGE_REAL_NVIDIA_GPU)
510 0 : CALL elpa_obj%set("nvidia-gpu", 1, success)
511 0 : CPASSERT(success == ELPA_OK)
512 : CASE (ELPA_2STAGE_REAL_AMD_GPU)
513 0 : CALL elpa_obj%set("amd-gpu", 1, success)
514 0 : CPASSERT(success == ELPA_OK)
515 : CASE (ELPA_2STAGE_REAL_INTEL_GPU_SYCL)
516 0 : CALL elpa_obj%set("intel-gpu", 1, success)
517 8106 : CPASSERT(success == ELPA_OK)
518 : END SELECT
519 :
520 8106 : IF (.NOT. elpa_one_stage) THEN
521 8102 : CALL elpa_obj%set("real_kernel", elpa_kernel, success)
522 8102 : CPWARN_IF(success /= ELPA_OK, "Setting real_kernel for ELPA failed")
523 :
524 8102 : IF (use_qr) THEN
525 8 : CALL elpa_obj%set("qr", 1, success)
526 8 : CPASSERT(success == ELPA_OK)
527 : END IF
528 : END IF
529 :
530 : ! Set number of threads only when ELPA was built with OpenMP support.
531 8106 : IF (elpa_obj%can_set("omp_threads", omp_get_max_threads()) == ELPA_OK) THEN
532 8106 : CALL elpa_obj%set("omp_threads", omp_get_max_threads(), success)
533 8106 : CPASSERT(success == ELPA_OK)
534 : END IF
535 :
536 : ! ELPA solver: calculate the Eigenvalues/vectors
537 : #if defined(__HAS_IEEE_EXCEPTIONS)
538 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
539 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
540 : #endif
541 8106 : CALL elpa_obj%eigenvectors(matrix%local_data, eval, eigenvectors%local_data, success)
542 : #if defined(__HAS_IEEE_EXCEPTIONS)
543 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
544 : #endif
545 :
546 8106 : IF (success /= ELPA_OK) &
547 0 : CPABORT("ELPA failed to diagonalize a matrix")
548 :
549 8106 : IF (check_eigenvalues) THEN
550 : ! run again without QR
551 8 : CALL elpa_obj%set("qr", 0, success)
552 8 : CPASSERT(success == ELPA_OK)
553 :
554 8 : CALL elpa_obj%eigenvectors(matrix_noqr%local_data, eval_noqr, eigenvectors_noqr%local_data, success)
555 8 : IF (success /= ELPA_OK) &
556 0 : CPABORT("ELPA failed to diagonalize a matrix even without QR decomposition")
557 :
558 680 : IF (ANY(ABS(eval(1:neig) - eval_noqr(1:neig)) > th)) &
559 0 : CPABORT("ELPA failed to calculate Eigenvalues with ELPA's QR decomposition")
560 :
561 8 : DEALLOCATE (eval_noqr)
562 8 : CALL cp_fm_release(matrix_noqr)
563 8 : CALL cp_fm_release(eigenvectors_noqr)
564 : END IF
565 :
566 8106 : CALL elpa_deallocate(elpa_obj, success)
567 8106 : CPASSERT(success == ELPA_OK)
568 :
569 909217 : eigenvalues(1:neig) = eval(1:neig)
570 8106 : DEALLOCATE (eval)
571 :
572 8106 : CALL timestop(handle)
573 :
574 32424 : END SUBROUTINE cp_fm_diag_elpa_base
575 : #endif
576 :
577 : END MODULE cp_fm_elpa
|