Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief used for collecting some of the diagonalization schemes available for
10 : !> cp_fm_type. cp_fm_power also moved here as it is very related
11 : !> \note
12 : !> first version : most routines imported
13 : !> \par History
14 : !> - unused Jacobi routines removed, cosmetics (05.04.06,MK)
15 : !> \author Joost VandeVondele (2003-08)
16 : ! **************************************************************************************************
17 : MODULE cp_fm_diag
18 : USE cp_blacs_types, ONLY: cp_blacs_type
19 : USE cp_blacs_env, ONLY: cp_blacs_env_type
20 : USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale, &
21 : cp_fm_gemm, &
22 : cp_fm_scale, &
23 : cp_fm_syrk, &
24 : cp_fm_triangular_invert, &
25 : cp_fm_triangular_multiply, &
26 : cp_fm_uplo_to_full
27 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
28 : USE cp_fm_diag_utils, ONLY: cp_fm_redistribute_end, &
29 : cp_fm_redistribute_start
30 : USE cp_fm_elpa, ONLY: cp_fm_diag_elpa, &
31 : finalize_elpa_library, &
32 : initialize_elpa_library, &
33 : set_elpa_kernel
34 : USE cp_fm_cusolver_api, ONLY: cp_fm_diag_cusolver, &
35 : cp_fm_general_cusolver
36 : #if defined(__DLAF)
37 : USE cp_fm_dlaf_api, ONLY: cp_fm_diag_dlaf, cp_fm_diag_gen_dlaf
38 : USE cp_dlaf_utils_api, ONLY: cp_dlaf_initialize, cp_dlaf_finalize
39 : #endif
40 : USE cp_fm_types, ONLY: cp_fm_get_info, &
41 : cp_fm_set_element, &
42 : cp_fm_to_fm, &
43 : cp_fm_type, &
44 : cp_fm_create, &
45 : cp_fm_get_info, &
46 : cp_fm_release, &
47 : cp_fm_set_all, &
48 : cp_fm_to_fm, &
49 : cp_fm_to_fm_submat, &
50 : cp_fm_type
51 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent, &
52 : cp_fm_struct_create, &
53 : cp_fm_struct_release, &
54 : cp_fm_struct_type
55 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
56 : cp_get_default_logger, &
57 : cp_logger_get_default_io_unit, &
58 : cp_logger_type, &
59 : cp_to_string
60 : USE cp_log_handling, ONLY: cp_get_default_logger, &
61 : cp_logger_get_default_unit_nr, &
62 : cp_logger_get_unit_nr, &
63 : cp_logger_type
64 : USE kinds, ONLY: default_string_length, &
65 : dp
66 : USE machine, ONLY: default_output_unit, &
67 : m_memory
68 : #if defined (__parallel)
69 : USE message_passing, ONLY: mp_comm_type
70 : #endif
71 : #if defined (__HAS_IEEE_EXCEPTIONS)
72 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
73 : ieee_set_halting_mode, &
74 : IEEE_ALL
75 : #endif
76 : #include "../base/base_uses.f90"
77 :
78 : IMPLICIT NONE
79 : PRIVATE
80 :
81 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag'
82 :
83 : REAL(KIND=dp), PARAMETER, PUBLIC :: eps_check_diag_default = 5.0E-14_dp
84 :
85 : ! The following saved variables are diagonalization global
86 : ! Stores the default library for diagonalization
87 : INTEGER, SAVE, PUBLIC :: diag_type = 0
88 : ! Minimum number of eigenvectors for the use of the ELPA eigensolver.
89 : ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
90 : INTEGER, SAVE :: elpa_neigvec_min = 0
91 : #if defined(__DLAF)
92 : ! Minimum number of eigenvectors for the use of the DLAF eigensolver.
93 : ! The ScaLAPACK eigensolver is used as fallback for all smaller cases.
94 : INTEGER, SAVE, PUBLIC :: dlaf_neigvec_min = 0
95 : #endif
96 : LOGICAL, SAVE, PUBLIC :: cusolver_generalized = .TRUE.
97 : ! Threshold value for the orthonormality check of the eigenvectors obtained
98 : ! after a diagonalization. A negative value disables the check.
99 : REAL(KIND=dp), SAVE :: eps_check_diag = -1.0_dp
100 :
101 : ! Constants for the diag_type above
102 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_SCALAPACK = 101, &
103 : FM_DIAG_TYPE_ELPA = 102, &
104 : FM_DIAG_TYPE_CUSOLVER = 103, &
105 : FM_DIAG_TYPE_DLAF = 104
106 : #if defined(__CUSOLVERMP)
107 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_CUSOLVER
108 : #elif defined(__ELPA)
109 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_ELPA
110 : #else
111 : INTEGER, PARAMETER, PUBLIC :: FM_DIAG_TYPE_DEFAULT = FM_DIAG_TYPE_SCALAPACK
112 : #endif
113 :
114 : ! Public subroutines
115 : PUBLIC :: choose_eigv_solver, &
116 : cp_fm_block_jacobi, &
117 : cp_fm_power, &
118 : cp_fm_syevd, &
119 : cp_fm_syevx, &
120 : cp_fm_svd, &
121 : cp_fm_geeig, &
122 : cp_fm_geeig_canon, &
123 : diag_init, &
124 : diag_finalize
125 :
126 : CONTAINS
127 :
128 : ! **************************************************************************************************
129 : !> \brief Setup the diagonalization library to be used
130 : !> \param diag_lib diag_library flag from GLOBAL section in input
131 : !> \param fallback_applied .TRUE. if support for the requested library was not compiled-in and fallback
132 : !> to ScaLAPACK was applied, .FALSE. otherwise.
133 : !> \param elpa_kernel integer that determines which ELPA kernel to use for diagonalization
134 : !> \param elpa_neigvec_min_input ...
135 : !> \param elpa_qr logical that determines if ELPA should try to use QR to accelerate the
136 : !> diagonalization procedure of suitably sized matrices
137 : !> \param elpa_print logical that determines if information about the ELPA diagonalization should
138 : !> be printed
139 : !> \param elpa_one_stage logical that enables the one-stage solver
140 : !> \param dlaf_neigvec_min_input ...
141 : !> \param eps_check_diag_input ...
142 : !> \param cusolver_generalized_input ...
143 : !> \par History
144 : !> - Add support for DLA-Future (05.09.2023, RMeli)
145 : !> \author MI 11.2013
146 : ! **************************************************************************************************
147 10117 : SUBROUTINE diag_init(diag_lib, fallback_applied, elpa_kernel, elpa_neigvec_min_input, elpa_qr, &
148 : elpa_print, elpa_one_stage, dlaf_neigvec_min_input, eps_check_diag_input, &
149 : cusolver_generalized_input)
150 : CHARACTER(LEN=*), INTENT(IN) :: diag_lib
151 : LOGICAL, INTENT(OUT) :: fallback_applied
152 : INTEGER, INTENT(IN) :: elpa_kernel, elpa_neigvec_min_input
153 : LOGICAL, INTENT(IN) :: elpa_qr, elpa_print, elpa_one_stage
154 : INTEGER, INTENT(IN) :: dlaf_neigvec_min_input
155 : REAL(KIND=dp), INTENT(IN) :: eps_check_diag_input
156 : LOGICAL, INTENT(IN), OPTIONAL :: cusolver_generalized_input
157 :
158 : LOGICAL, SAVE :: initialized = .FALSE.
159 :
160 10117 : fallback_applied = .FALSE.
161 :
162 10117 : IF (diag_lib == "ScaLAPACK") THEN
163 194 : diag_type = FM_DIAG_TYPE_SCALAPACK
164 9923 : ELSE IF (diag_lib == "ELPA") THEN
165 : #if defined (__ELPA)
166 : ! ELPA is requested and available
167 9923 : diag_type = FM_DIAG_TYPE_ELPA
168 : #else
169 : ! ELPA library requested but not linked, switch back to SL
170 : diag_type = FM_DIAG_TYPE_SCALAPACK
171 : fallback_applied = .TRUE.
172 : #endif
173 0 : ELSE IF (diag_lib == "cuSOLVER") THEN
174 0 : diag_type = FM_DIAG_TYPE_CUSOLVER
175 0 : ELSE IF (diag_lib == "DLAF") THEN
176 : #if defined (__DLAF)
177 : diag_type = FM_DIAG_TYPE_DLAF
178 : #else
179 0 : CPABORT("ERROR in diag_init: CP2K was not compiled with DLA-Future support")
180 : #endif
181 : ELSE
182 0 : CPABORT("ERROR in diag_init: Initialization of unknown diagonalization library requested")
183 : END IF
184 :
185 : ! Initialization of requested diagonalization library
186 10117 : IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_ELPA) THEN
187 9326 : CALL initialize_elpa_library(one_stage=elpa_one_stage, qr=elpa_qr, should_print=elpa_print)
188 9326 : CALL set_elpa_kernel(elpa_kernel)
189 9326 : initialized = .TRUE.
190 : END IF
191 : #if defined(__DLAF)
192 : IF (.NOT. initialized .AND. diag_type == FM_DIAG_TYPE_DLAF) THEN
193 : CALL cp_dlaf_initialize()
194 : initialized = .TRUE.
195 : END IF
196 : dlaf_neigvec_min = dlaf_neigvec_min_input
197 : #else
198 : MARK_USED(dlaf_neigvec_min_input)
199 : #endif
200 :
201 10117 : elpa_neigvec_min = elpa_neigvec_min_input
202 10117 : eps_check_diag = eps_check_diag_input
203 10117 : IF (PRESENT(cusolver_generalized_input)) THEN
204 10117 : cusolver_generalized = cusolver_generalized_input
205 : ELSE
206 0 : cusolver_generalized = .TRUE.
207 : END IF
208 :
209 10117 : END SUBROUTINE diag_init
210 :
211 : ! **************************************************************************************************
212 : !> \brief Finalize the diagonalization library
213 : ! **************************************************************************************************
214 9907 : SUBROUTINE diag_finalize()
215 : #if defined (__ELPA)
216 9907 : IF (diag_type == FM_DIAG_TYPE_ELPA) &
217 9713 : CALL finalize_elpa_library()
218 : #endif
219 : #if defined (__DLAF)
220 : IF (diag_type == FM_DIAG_TYPE_DLAF) &
221 : CALL cp_dlaf_finalize()
222 : #endif
223 9907 : END SUBROUTINE diag_finalize
224 :
225 : ! **************************************************************************************************
226 : !> \brief Choose the Eigensolver depending on which library is available
227 : !> ELPA seems to be unstable for small systems
228 : !> \param matrix ...
229 : !> \param eigenvectors ...
230 : !> \param eigenvalues ...
231 : !> \param info ...
232 : !> \par info If present returns error code and prevents program stops.
233 : !> Works currently only for cp_fm_syevd with ScaLAPACK.
234 : !> Other solvers will end the program regardless of PRESENT(info).
235 : !> \par History
236 : !> - Do not use ELPA for small matrices and use instead ScaLAPACK as fallback (10.05.2021, MK)
237 : ! **************************************************************************************************
238 194923 : SUBROUTINE choose_eigv_solver(matrix, eigenvectors, eigenvalues, info)
239 :
240 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
241 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
242 : INTEGER, INTENT(OUT), OPTIONAL :: info
243 :
244 : CHARACTER(LEN=*), PARAMETER :: routineN = 'choose_eigv_solver'
245 :
246 : ! Sample peak memory
247 194923 : CALL m_memory()
248 :
249 194923 : IF (PRESENT(info)) info = 0 ! Default for solvers that do not return an info.
250 :
251 194923 : IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
252 4576 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
253 :
254 190347 : ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
255 190347 : IF (matrix%matrix_struct%nrow_global < elpa_neigvec_min) THEN
256 : ! We don't trust ELPA with very small matrices.
257 179203 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
258 : ELSE
259 11144 : CALL cp_fm_diag_elpa(matrix, eigenvectors, eigenvalues)
260 : END IF
261 :
262 0 : ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
263 0 : IF (matrix%matrix_struct%nrow_global < 64) THEN
264 : ! We don't trust cuSolver with very small matrices.
265 0 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
266 : ELSE
267 0 : CALL cp_fm_diag_cusolver(matrix, eigenvectors, eigenvalues)
268 : END IF
269 :
270 : #if defined(__DLAF)
271 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
272 : IF (matrix%matrix_struct%nrow_global < dlaf_neigvec_min) THEN
273 : ! Use ScaLAPACK for small matrices
274 : CALL cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
275 : ELSE
276 : CALL cp_fm_diag_dlaf(matrix, eigenvectors, eigenvalues)
277 : END IF
278 : #endif
279 :
280 : ELSE
281 0 : CPABORT("ERROR in "//routineN//": Invalid diagonalization type requested")
282 : END IF
283 :
284 194923 : CALL check_diag(matrix, eigenvectors, nvec=SIZE(eigenvalues))
285 :
286 194923 : END SUBROUTINE choose_eigv_solver
287 :
288 : ! **************************************************************************************************
289 : !> \brief Check result of diagonalization, i.e. the orthonormality of the eigenvectors
290 : !> \param matrix Work matrix
291 : !> \param eigenvectors Eigenvectors to be checked
292 : !> \param nvec ...
293 : ! **************************************************************************************************
294 382068 : SUBROUTINE check_diag(matrix, eigenvectors, nvec)
295 :
296 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
297 : INTEGER, INTENT(IN) :: nvec
298 :
299 : CHARACTER(LEN=*), PARAMETER :: routineN = 'check_diag'
300 :
301 : CHARACTER(LEN=default_string_length) :: diag_type_name
302 : REAL(KIND=dp) :: eps, eps_abort, eps_warning, gold, test
303 : INTEGER :: handle, i, j, ncol, nrow, output_unit
304 : LOGICAL :: check_eigenvectors
305 : #if defined(__parallel)
306 : TYPE(cp_blacs_env_type), POINTER :: context
307 : INTEGER :: il, jl, ipcol, iprow, &
308 : mypcol, myprow, npcol, nprow
309 : INTEGER, DIMENSION(9) :: desca
310 : #endif
311 :
312 382068 : CALL timeset(routineN, handle)
313 :
314 382068 : output_unit = default_output_unit
315 382068 : eps_warning = eps_check_diag_default
316 : #if defined(__CHECK_DIAG)
317 : check_eigenvectors = .TRUE.
318 : IF (eps_check_diag >= 0.0_dp) THEN
319 : eps_warning = eps_check_diag
320 : END IF
321 : #else
322 382068 : IF (eps_check_diag >= 0.0_dp) THEN
323 332 : check_eigenvectors = .TRUE.
324 332 : eps_warning = eps_check_diag
325 : ELSE
326 : check_eigenvectors = .FALSE.
327 : END IF
328 : #endif
329 382068 : eps_abort = 10.0_dp*eps_warning
330 :
331 382068 : gold = 0.0_dp
332 382068 : test = 0.0_dp
333 382068 : eps = 0.0_dp
334 :
335 382068 : IF (check_eigenvectors) THEN
336 : #if defined(__parallel)
337 332 : nrow = eigenvectors%matrix_struct%nrow_global
338 332 : ncol = MIN(eigenvectors%matrix_struct%ncol_global, nvec)
339 332 : CALL cp_fm_gemm("T", "N", ncol, ncol, nrow, 1.0_dp, eigenvectors, eigenvectors, 0.0_dp, matrix)
340 332 : context => matrix%matrix_struct%context
341 332 : myprow = context%mepos(1)
342 332 : mypcol = context%mepos(2)
343 332 : nprow = context%num_pe(1)
344 332 : npcol = context%num_pe(2)
345 3320 : desca(:) = matrix%matrix_struct%descriptor(:)
346 5336 : outer: DO j = 1, ncol
347 125564 : DO i = 1, ncol
348 120228 : CALL infog2l(i, j, desca, nprow, npcol, myprow, mypcol, il, jl, iprow, ipcol)
349 125232 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
350 60114 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
351 60114 : test = matrix%local_data(il, jl)
352 60114 : eps = ABS(test - gold)
353 60114 : IF (eps > eps_warning) EXIT outer
354 : END IF
355 : END DO
356 : END DO outer
357 : #else
358 : nrow = SIZE(eigenvectors%local_data, 1)
359 : ncol = MIN(SIZE(eigenvectors%local_data, 2), nvec)
360 : CALL dgemm("T", "N", ncol, ncol, nrow, 1.0_dp, &
361 : eigenvectors%local_data(1, 1), nrow, &
362 : eigenvectors%local_data(1, 1), nrow, &
363 : 0.0_dp, matrix%local_data(1, 1), nrow)
364 : outer: DO j = 1, ncol
365 : DO i = 1, ncol
366 : gold = MERGE(0.0_dp, 1.0_dp, i /= j)
367 : test = matrix%local_data(i, j)
368 : eps = ABS(test - gold)
369 : IF (eps > eps_warning) EXIT outer
370 : END DO
371 : END DO outer
372 : #endif
373 332 : IF (eps > eps_warning) THEN
374 0 : IF (diag_type == FM_DIAG_TYPE_SCALAPACK) THEN
375 0 : diag_type_name = "SYEVD"
376 0 : ELSE IF (diag_type == FM_DIAG_TYPE_ELPA) THEN
377 0 : diag_type_name = "ELPA"
378 0 : ELSE IF (diag_type == FM_DIAG_TYPE_CUSOLVER) THEN
379 0 : diag_type_name = "CUSOLVER"
380 0 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF) THEN
381 0 : diag_type_name = "DLAF"
382 : ELSE
383 0 : CPABORT("Unknown diag_type")
384 : END IF
385 : WRITE (UNIT=output_unit, FMT="(/,T2,A,/,T2,A,I0,A,I0,A,F0.15,/,T2,A,F0.0,A,ES10.3)") &
386 0 : "The eigenvectors returned by "//TRIM(diag_type_name)//" are not orthonormal", &
387 0 : "Matrix element (", i, ", ", j, ") = ", test, &
388 0 : "The deviation from the expected value ", gold, " is", eps
389 0 : IF (eps > eps_abort) THEN
390 0 : CPABORT("ERROR in "//routineN//": Check of matrix diagonalization failed")
391 : ELSE
392 0 : CPWARN("Check of matrix diagonalization failed in routine "//routineN)
393 : END IF
394 : END IF
395 : END IF
396 :
397 382068 : CALL timestop(handle)
398 :
399 382068 : END SUBROUTINE check_diag
400 :
401 : ! **************************************************************************************************
402 : !> \brief Issues an error messages and exits (optionally only warns).
403 : !> \param mesg message to be issued
404 : !> \param info error code (optional)
405 : !> \param warn only warn (optional)
406 : ! **************************************************************************************************
407 0 : SUBROUTINE cp_fm_error(mesg, info, warn)
408 : CHARACTER(LEN=*), INTENT(IN) :: mesg
409 : INTEGER, INTENT(IN), OPTIONAL :: info
410 : LOGICAL, INTENT(IN), OPTIONAL :: warn
411 :
412 : CHARACTER(LEN=2*default_string_length) :: message
413 : LOGICAL :: warning
414 :
415 0 : IF (PRESENT(info)) THEN
416 0 : WRITE (message, "(A,A,I0,A)") mesg, " (INFO = ", info, ")"
417 : ELSE
418 0 : WRITE (message, "(A)") mesg
419 : END IF
420 :
421 0 : IF (PRESENT(warn)) THEN
422 0 : warning = warn
423 : ELSE ! abort
424 : warning = .FALSE.
425 : END IF
426 :
427 0 : IF (warning) THEN
428 0 : CPWARN(TRIM(message))
429 : ELSE
430 0 : CPABORT(TRIM(message))
431 : END IF
432 0 : END SUBROUTINE cp_fm_error
433 :
434 : ! **************************************************************************************************
435 : !> \brief Computes all eigenvalues and vectors of a real symmetric matrix
436 : !> significantly faster than syevx, scales also much better.
437 : !> Needs workspace to allocate all the eigenvectors
438 : !> \param matrix ...
439 : !> \param eigenvectors ...
440 : !> \param eigenvalues ...
441 : !> \param info ...
442 : !> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
443 : !> \par info If present returns error code and prevents program stops.
444 : !> Works currently only for scalapack.
445 : !> Other solvers will end the program regardless of PRESENT(info).
446 : ! **************************************************************************************************
447 187105 : SUBROUTINE cp_fm_syevd(matrix, eigenvectors, eigenvalues, info)
448 :
449 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
450 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
451 : INTEGER, INTENT(OUT), OPTIONAL :: info
452 :
453 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_syevd'
454 :
455 : INTEGER :: handle, myinfo, n, nmo
456 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eig
457 : #if defined(__parallel)
458 : TYPE(cp_fm_type) :: eigenvectors_new, matrix_new
459 : #else
460 : INTEGER :: liwork, lwork
461 : INTEGER, DIMENSION(:), POINTER :: iwork
462 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: m
463 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
464 : INTEGER, TARGET :: v(1)
465 : REAL(KIND=dp), TARGET :: w(1)
466 : #endif
467 :
468 187105 : CALL timeset(routineN, handle)
469 :
470 187105 : myinfo = 0
471 :
472 187105 : n = matrix%matrix_struct%nrow_global
473 561315 : ALLOCATE (eig(n))
474 :
475 : #if defined(__parallel)
476 : ! Determine if the input matrix needs to be redistributed before diagonalization.
477 : ! Heuristics are used to determine the optimal number of CPUs for diagonalization.
478 : ! The redistributed matrix is stored in matrix_new, which is just a pointer
479 : ! to the original matrix if no redistribution is required
480 187105 : CALL cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new)
481 :
482 : ! Call scalapack on CPUs that hold the new matrix
483 187105 : IF (ASSOCIATED(matrix_new%matrix_struct)) THEN
484 95597 : IF (PRESENT(info)) THEN
485 3251 : CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig, myinfo)
486 : ELSE
487 92346 : CALL cp_fm_syevd_base(matrix_new, eigenvectors_new, eig)
488 : END IF
489 : END IF
490 : ! Redistribute results and clean up
491 187105 : CALL cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
492 : #else
493 : ! Retrieve the optimal work array sizes first
494 : lwork = -1
495 : liwork = -1
496 : m => matrix%local_data
497 : iwork => v
498 : work => w
499 :
500 : CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
501 :
502 : IF (myinfo /= 0) THEN
503 : CALL cp_fm_error("ERROR in DSYEVD: Work space query failed", myinfo, PRESENT(info))
504 : END IF
505 :
506 : ! Reallocate work arrays and perform diagonalisation
507 : lwork = NINT(work(1))
508 : ALLOCATE (work(lwork))
509 :
510 : liwork = iwork(1)
511 : ALLOCATE (iwork(liwork))
512 :
513 : CALL dsyevd('V', 'U', n, m(1, 1), SIZE(m, 1), eig(1), work(1), lwork, iwork(1), liwork, myinfo)
514 :
515 : IF (myinfo /= 0) THEN
516 : CALL cp_fm_error("ERROR in DSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
517 : END IF
518 :
519 : CALL cp_fm_to_fm(matrix, eigenvectors)
520 :
521 : DEALLOCATE (iwork)
522 : DEALLOCATE (work)
523 : #endif
524 :
525 187105 : IF (PRESENT(info)) info = myinfo
526 :
527 187105 : nmo = SIZE(eigenvalues, 1)
528 187105 : IF (nmo > n) THEN
529 0 : eigenvalues(1:n) = eig(1:n)
530 : ELSE
531 1778032 : eigenvalues(1:nmo) = eig(1:nmo)
532 : END IF
533 :
534 187105 : DEALLOCATE (eig)
535 :
536 187105 : CALL check_diag(matrix, eigenvectors, n)
537 :
538 187105 : CALL timestop(handle)
539 :
540 374210 : END SUBROUTINE cp_fm_syevd
541 :
542 : ! **************************************************************************************************
543 : !> \brief ...
544 : !> \param matrix ...
545 : !> \param eigenvectors ...
546 : !> \param eigenvalues ...
547 : !> \param info ...
548 : ! **************************************************************************************************
549 95597 : SUBROUTINE cp_fm_syevd_base(matrix, eigenvectors, eigenvalues, info)
550 :
551 : TYPE(cp_fm_type), INTENT(IN) :: matrix, eigenvectors
552 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
553 : INTEGER, INTENT(OUT), OPTIONAL :: info
554 :
555 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_syevd_base'
556 :
557 : INTEGER :: handle, myinfo
558 : #if defined(__parallel)
559 : TYPE(cp_blacs_env_type), POINTER :: context
560 : INTEGER :: liwork, lwork, n
561 : INTEGER, DIMENSION(9) :: descm, descv
562 95597 : INTEGER, DIMENSION(:), POINTER :: iwork
563 95597 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
564 95597 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: m, v
565 : REAL(KIND=dp), TARGET :: w(1)
566 : #if defined (__HAS_IEEE_EXCEPTIONS)
567 : LOGICAL, DIMENSION(5) :: halt
568 : #endif
569 : #endif
570 :
571 95597 : CALL timeset(routineN, handle)
572 :
573 95597 : myinfo = 0
574 :
575 : #if defined(__parallel)
576 :
577 95597 : n = matrix%matrix_struct%nrow_global
578 95597 : m => matrix%local_data
579 95597 : context => matrix%matrix_struct%context
580 955970 : descm(:) = matrix%matrix_struct%descriptor(:)
581 :
582 95597 : v => eigenvectors%local_data
583 955970 : descv(:) = eigenvectors%matrix_struct%descriptor(:)
584 :
585 95597 : liwork = 7*n + 8*context%num_pe(2) + 2
586 286791 : ALLOCATE (iwork(liwork))
587 :
588 : ! Work space query
589 95597 : lwork = -1
590 95597 : work => w
591 :
592 : CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
593 95597 : work(1), lwork, iwork(1), liwork, myinfo)
594 :
595 95597 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
596 0 : CALL cp_fm_error("ERROR in PDSYEVD: Work space query failed", myinfo, PRESENT(info))
597 : END IF
598 :
599 95597 : lwork = NINT(work(1)) ! can be insufficient due to bug in reference ScaLAPACK
600 : #if !defined(__SCALAPACK_NO_WA)
601 : ! Query workspace for QDORMTR as called by reference ScaLAPACK (PDSYEVD).
602 : CALL pdormtr('L', 'U', 'N', n, n, m(1, 1), 1, 1, descm, m(1, 1), &
603 95597 : v(1, 1), 1, 1, descv, work(1), -1, myinfo)
604 :
605 95597 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
606 0 : CALL cp_fm_error("ERROR in PDORMTR: Work space query failed", myinfo, PRESENT(info))
607 : END IF
608 :
609 95597 : IF (lwork < (work(1) + 2*n)) THEN
610 44654 : lwork = NINT(work(1)) + 2*n ! still wrong by 2*N
611 : END IF
612 : #endif
613 286791 : ALLOCATE (work(lwork))
614 :
615 : ! Initial/documented amount of liwork is exceeded (slightly worrisome too).
616 95597 : IF (liwork < iwork(1)) THEN
617 0 : liwork = iwork(1)
618 0 : DEALLOCATE (iwork)
619 0 : ALLOCATE (iwork(liwork))
620 : END IF
621 :
622 : ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
623 : ! Therefore, we disable floating point traps temporarily.
624 : #if defined (__HAS_IEEE_EXCEPTIONS)
625 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
626 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
627 : #endif
628 :
629 : CALL pdsyevd('V', 'U', n, m(1, 1), 1, 1, descm, eigenvalues(1), v(1, 1), 1, 1, descv, &
630 95597 : work(1), lwork, iwork(1), liwork, myinfo)
631 :
632 : #if defined (__HAS_IEEE_EXCEPTIONS)
633 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
634 : #endif
635 95597 : IF (matrix%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
636 0 : CALL cp_fm_error("ERROR in PDSYEVD: Matrix diagonalization failed", myinfo, PRESENT(info))
637 : END IF
638 :
639 95597 : IF (PRESENT(info)) info = myinfo
640 :
641 95597 : DEALLOCATE (work)
642 95597 : DEALLOCATE (iwork)
643 : #else
644 : MARK_USED(matrix)
645 : MARK_USED(eigenvectors)
646 : MARK_USED(eigenvalues)
647 : myinfo = -1
648 : IF (PRESENT(info)) info = myinfo
649 : CALL cp_fm_error("ERROR in "//TRIM(routineN)// &
650 : ": Matrix diagonalization using PDSYEVD requested without ScaLAPACK support")
651 : #endif
652 :
653 95597 : CALL timestop(handle)
654 :
655 95597 : END SUBROUTINE cp_fm_syevd_base
656 :
657 : ! **************************************************************************************************
658 : !> \brief compute eigenvalues and optionally eigenvectors of a real symmetric matrix using scalapack.
659 : !> If eigenvectors are required this routine will replicate a full matrix on each CPU...
660 : !> if more than a handful of vectors are needed, use cp_fm_syevd instead
661 : !> \param matrix ...
662 : !> \param eigenvectors ...
663 : !> \param eigenvalues ...
664 : !> \param neig ...
665 : !> \param work_syevx ...
666 : !> \par matrix is supposed to be in upper triangular form, and overwritten by this routine
667 : !> neig is the number of vectors needed (default all)
668 : !> work_syevx evec calculation only, is the fraction of the working buffer allowed (1.0 use full buffer)
669 : !> reducing this saves time, but might cause the routine to fail
670 : ! **************************************************************************************************
671 40 : SUBROUTINE cp_fm_syevx(matrix, eigenvectors, eigenvalues, neig, work_syevx)
672 :
673 : ! Diagonalise the symmetric n by n matrix using the LAPACK library.
674 :
675 : TYPE(cp_fm_type), INTENT(IN) :: matrix
676 : TYPE(cp_fm_type), OPTIONAL, INTENT(IN) :: eigenvectors
677 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: work_syevx
678 : INTEGER, INTENT(IN), OPTIONAL :: neig
679 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
680 :
681 : CHARACTER(LEN=*), PARAMETER :: routineN = "cp_fm_syevx"
682 :
683 : #if defined(__parallel)
684 : REAL(KIND=dp), PARAMETER :: orfac = -1.0_dp
685 : #endif
686 : REAL(KIND=dp), PARAMETER :: vl = 0.0_dp, &
687 : vu = 0.0_dp
688 :
689 : TYPE(cp_blacs_env_type), POINTER :: context
690 : TYPE(cp_logger_type), POINTER :: logger
691 : CHARACTER(LEN=1) :: job_type
692 : REAL(KIND=dp) :: abstol, work_syevx_local
693 : INTEGER :: handle, info, liwork, lwork, &
694 : m, n, nb, npcol, nprow, &
695 : output_unit, neig_local
696 : LOGICAL :: ionode, needs_evecs
697 40 : INTEGER, DIMENSION(:), ALLOCATABLE :: ifail, iwork
698 40 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: w, work
699 40 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, z
700 :
701 : REAL(KIND=dp), EXTERNAL :: dlamch
702 :
703 : #if defined(__parallel)
704 : INTEGER :: nn, np0, npe, nq0, nz
705 : INTEGER, DIMENSION(9) :: desca, descz
706 40 : INTEGER, DIMENSION(:), ALLOCATABLE :: iclustr
707 40 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: gap
708 : INTEGER, EXTERNAL :: iceil, numroc
709 : #else
710 : INTEGER :: nla, nlz
711 : INTEGER, EXTERNAL :: ilaenv
712 : #endif
713 : #if defined (__HAS_IEEE_EXCEPTIONS)
714 : LOGICAL, DIMENSION(5) :: halt
715 : #endif
716 :
717 : ! by default all
718 40 : n = matrix%matrix_struct%nrow_global
719 40 : neig_local = n
720 40 : IF (PRESENT(neig)) neig_local = neig
721 40 : IF (neig_local == 0) RETURN
722 :
723 40 : CALL timeset(routineN, handle)
724 :
725 40 : needs_evecs = PRESENT(eigenvectors)
726 :
727 40 : logger => cp_get_default_logger()
728 40 : ionode = logger%para_env%is_source()
729 40 : n = matrix%matrix_struct%nrow_global
730 :
731 : ! by default allocate all needed space
732 40 : work_syevx_local = 1.0_dp
733 40 : IF (PRESENT(work_syevx)) work_syevx_local = work_syevx
734 :
735 : ! set scalapack job type
736 40 : IF (needs_evecs) THEN
737 40 : job_type = "V"
738 : ELSE
739 0 : job_type = "N"
740 : END IF
741 :
742 : ! target the most accurate calculation of the eigenvalues
743 40 : abstol = 2.0_dp*dlamch("S")
744 :
745 40 : context => matrix%matrix_struct%context
746 40 : nprow = context%num_pe(1)
747 40 : npcol = context%num_pe(2)
748 :
749 120 : ALLOCATE (w(n))
750 560 : eigenvalues(:) = 0.0_dp
751 : #if defined(__parallel)
752 :
753 40 : IF (matrix%matrix_struct%nrow_block /= matrix%matrix_struct%ncol_block) THEN
754 0 : CPABORT("ERROR in "//routineN//": Invalid blocksize (no square blocks) found")
755 : END IF
756 :
757 40 : a => matrix%local_data
758 400 : desca(:) = matrix%matrix_struct%descriptor(:)
759 :
760 40 : IF (needs_evecs) THEN
761 40 : z => eigenvectors%local_data
762 400 : descz(:) = eigenvectors%matrix_struct%descriptor(:)
763 : ELSE
764 : ! z will not be referenced
765 0 : z => matrix%local_data
766 0 : descz = desca
767 : END IF
768 :
769 : ! Get the optimal work storage size
770 :
771 40 : npe = nprow*npcol
772 40 : nb = matrix%matrix_struct%nrow_block
773 40 : nn = MAX(n, nb, 2)
774 40 : np0 = numroc(nn, nb, 0, 0, nprow)
775 40 : nq0 = MAX(numroc(nn, nb, 0, 0, npcol), nb)
776 :
777 40 : IF (needs_evecs) THEN
778 : lwork = 5*n + MAX(5*nn, np0*nq0) + iceil(neig_local, npe)*nn + 2*nb*nb + &
779 40 : INT(work_syevx_local*REAL((neig_local - 1)*n, dp)) !!!! allocates a full matrix on every CPU !!!!!
780 : ELSE
781 0 : lwork = 5*n + MAX(5*nn, nb*(np0 + 1))
782 : END IF
783 40 : liwork = 6*MAX(N, npe + 1, 4)
784 :
785 120 : ALLOCATE (gap(npe))
786 120 : gap = 0.0_dp
787 120 : ALLOCATE (iclustr(2*npe))
788 200 : iclustr = 0
789 120 : ALLOCATE (ifail(n))
790 560 : ifail = 0
791 120 : ALLOCATE (iwork(liwork))
792 120 : ALLOCATE (work(lwork))
793 :
794 : ! ScaLAPACK takes advantage of IEEE754 exceptions for speedup.
795 : ! Therefore, we disable floating point traps temporarily.
796 : #if defined (__HAS_IEEE_EXCEPTIONS)
797 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
798 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
799 : #endif
800 : CALL pdsyevx(job_type, "I", "U", n, a(1, 1), 1, 1, desca, vl, vu, 1, neig_local, abstol, m, nz, w(1), orfac, &
801 40 : z(1, 1), 1, 1, descz, work(1), lwork, iwork(1), liwork, ifail(1), iclustr(1), gap, info)
802 : #if defined (__HAS_IEEE_EXCEPTIONS)
803 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
804 : #endif
805 :
806 : ! Error handling
807 40 : IF (info /= 0) THEN
808 0 : IF (ionode) THEN
809 0 : output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
810 : WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
811 0 : "info = ", info, &
812 0 : "lwork = ", lwork, &
813 0 : "liwork = ", liwork, &
814 0 : "nz = ", nz
815 0 : IF (info > 0) THEN
816 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
817 0 : "ifail = ", ifail
818 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
819 0 : "iclustr = ", iclustr
820 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,E10.3)))") &
821 0 : "gap = ", gap
822 : END IF
823 : END IF
824 0 : CPABORT("ERROR in PDSYEVX (ScaLAPACK)")
825 : END IF
826 :
827 : ! Release work storage
828 40 : DEALLOCATE (gap)
829 40 : DEALLOCATE (iclustr)
830 :
831 : #else
832 :
833 : a => matrix%local_data
834 : IF (needs_evecs) THEN
835 : z => eigenvectors%local_data
836 : ELSE
837 : ! z will not be referenced
838 : z => matrix%local_data
839 : END IF
840 :
841 : ! Get the optimal work storage size
842 :
843 : nb = MAX(ilaenv(1, "DSYTRD", "U", n, -1, -1, -1), &
844 : ilaenv(1, "DORMTR", "U", n, -1, -1, -1))
845 :
846 : lwork = MAX((nb + 3)*n, 8*n) + n ! sun bug fix
847 : liwork = 5*n
848 :
849 : ALLOCATE (ifail(n))
850 : ifail = 0
851 : ALLOCATE (iwork(liwork))
852 : ALLOCATE (work(lwork))
853 : info = 0
854 : nla = SIZE(a, 1)
855 : nlz = SIZE(z, 1)
856 :
857 : ! LAPACK takes advantage of IEEE754 exceptions for speedup.
858 : ! Therefore, we disable floating point traps temporarily.
859 : #if defined (__HAS_IEEE_EXCEPTIONS)
860 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
861 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
862 : #endif
863 : CALL dsyevx(job_type, "I", "U", n, a(1, 1), nla, vl, vu, 1, neig_local, &
864 : abstol, m, w, z(1, 1), nlz, work(1), lwork, iwork(1), ifail(1), info)
865 : #if defined (__HAS_IEEE_EXCEPTIONS)
866 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
867 : #endif
868 :
869 : ! Error handling
870 : IF (info /= 0) THEN
871 : output_unit = cp_logger_get_unit_nr(logger, local=.FALSE.)
872 : WRITE (unit=output_unit, FMT="(/,(T3,A,T12,1X,I10))") &
873 : "info = ", info
874 : IF (info > 0) THEN
875 : WRITE (unit=output_unit, FMT="(/,T3,A,(T12,6(1X,I10)))") &
876 : "ifail = ", ifail
877 : END IF
878 : CPABORT("Error in DSYEVX (ScaLAPACK)")
879 : END IF
880 :
881 : #endif
882 : ! Release work storage
883 40 : DEALLOCATE (ifail)
884 40 : DEALLOCATE (iwork)
885 40 : DEALLOCATE (work)
886 400 : eigenvalues(1:neig_local) = w(1:neig_local)
887 40 : DEALLOCATE (w)
888 :
889 40 : IF (needs_evecs) CALL check_diag(matrix, eigenvectors, neig_local)
890 :
891 40 : CALL timestop(handle)
892 :
893 120 : END SUBROUTINE cp_fm_syevx
894 :
895 : ! **************************************************************************************************
896 : !> \brief decomposes a quadratic matrix into its singular value decomposition
897 : !> \param matrix_a ...
898 : !> \param matrix_eigvl ...
899 : !> \param matrix_eigvr_t ...
900 : !> \param eigval ...
901 : !> \param info ...
902 : !> \author Maximilian Graml
903 : ! **************************************************************************************************
904 100 : SUBROUTINE cp_fm_svd(matrix_a, matrix_eigvl, matrix_eigvr_t, eigval, info)
905 :
906 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
907 : TYPE(cp_fm_type), INTENT(INOUT) :: matrix_eigvl, matrix_eigvr_t
908 : REAL(KIND=dp), DIMENSION(:), POINTER, &
909 : INTENT(INOUT) :: eigval
910 : INTEGER, INTENT(OUT), OPTIONAL :: info
911 :
912 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_svd'
913 :
914 : INTEGER :: handle, n, m, myinfo, lwork
915 100 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
916 : TYPE(cp_fm_type) :: matrix_lu
917 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
918 : REAL(KIND=dp), TARGET :: w(1)
919 : #if defined(__parallel)
920 : INTEGER, DIMENSION(9) :: desca, descu, descvt
921 : #endif
922 :
923 100 : CALL timeset(routineN, handle)
924 :
925 : CALL cp_fm_create(matrix=matrix_lu, &
926 : matrix_struct=matrix_a%matrix_struct, &
927 100 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
928 100 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
929 100 : a => matrix_lu%local_data
930 100 : m = matrix_lu%matrix_struct%nrow_global
931 100 : n = matrix_lu%matrix_struct%ncol_global
932 : ! Assert that incoming matrix is quadratic
933 100 : CPASSERT(m == n)
934 :
935 : ! Prepare for workspace queries
936 100 : myinfo = 0
937 100 : lwork = -1
938 100 : work => w
939 : #if defined(__parallel)
940 : ! To do: That might need a redistribution check as in cp_fm_syevd
941 1000 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
942 1000 : descu(:) = matrix_eigvl%matrix_struct%descriptor(:)
943 1000 : descvt(:) = matrix_eigvr_t%matrix_struct%descriptor(:)
944 :
945 : ! Workspace query
946 : CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
947 100 : 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
948 :
949 100 : IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
950 0 : CALL cp_fm_error("ERROR in PDGESVD: Work space query failed", myinfo, PRESENT(info))
951 : END IF
952 :
953 100 : lwork = NINT(work(1))
954 300 : ALLOCATE (work(lwork))
955 :
956 : CALL pdgesvd('V', 'V', m, m, matrix_lu%local_data, 1, 1, desca, eigval, matrix_eigvl%local_data, &
957 100 : 1, 1, descu, matrix_eigvr_t%local_data, 1, 1, descvt, work, lwork, myinfo)
958 :
959 100 : IF (matrix_lu%matrix_struct%para_env%is_source() .AND. (myinfo /= 0)) THEN
960 0 : CALL cp_fm_error("ERROR in PDGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
961 : END IF
962 : #else
963 : ! Workspace query
964 : CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
965 : m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
966 :
967 : IF (myinfo /= 0) THEN
968 : CALL cp_fm_error("ERROR in DGESVD: Work space query failed", myinfo, PRESENT(info))
969 : END IF
970 :
971 : lwork = NINT(work(1))
972 : ALLOCATE (work(lwork))
973 :
974 : CALL dgesvd('S', 'S', m, m, matrix_lu%local_data, m, eigval, matrix_eigvl%local_data, &
975 : m, matrix_eigvr_t%local_data, m, work, lwork, myinfo)
976 :
977 : IF (myinfo /= 0) THEN
978 : CALL cp_fm_error("ERROR in DGESVD: Matrix diagonalization failed", myinfo, PRESENT(info))
979 : END IF
980 :
981 : #endif
982 : ! Release intermediary matrices
983 100 : DEALLOCATE (work)
984 100 : CALL cp_fm_release(matrix_lu)
985 :
986 100 : IF (PRESENT(info)) info = myinfo
987 :
988 100 : CALL timestop(handle)
989 100 : END SUBROUTINE cp_fm_svd
990 :
991 : ! **************************************************************************************************
992 : !> \brief ...
993 : !> \param matrix ...
994 : !> \param work ...
995 : !> \param exponent ...
996 : !> \param threshold ...
997 : !> \param n_dependent ...
998 : !> \param verbose ...
999 : !> \param eigvals ...
1000 : ! **************************************************************************************************
1001 2168 : SUBROUTINE cp_fm_power(matrix, work, exponent, threshold, n_dependent, verbose, eigvals)
1002 :
1003 : ! Raise the real symmetric n by n matrix to the power given by
1004 : ! the exponent. All eigenvectors with a corresponding eigenvalue lower
1005 : ! than threshold are quenched. result in matrix
1006 :
1007 : ! - Creation (29.03.1999, Matthias Krack)
1008 : ! - Parallelised using BLACS and ScaLAPACK (06.06.2001,MK)
1009 :
1010 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1011 : REAL(KIND=dp), INTENT(IN) :: exponent, threshold
1012 : INTEGER, INTENT(OUT) :: n_dependent
1013 : LOGICAL, INTENT(IN), OPTIONAL :: verbose
1014 : REAL(KIND=dp), DIMENSION(2), INTENT(OUT), &
1015 : OPTIONAL :: eigvals
1016 :
1017 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_power'
1018 :
1019 : INTEGER :: handle, icol_global, &
1020 : mypcol, myprow, &
1021 : ncol_global, nrow_global
1022 : LOGICAL :: my_verbose
1023 : REAL(KIND=dp) :: condition_number, f, p
1024 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: eigenvalues
1025 2168 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: eigenvectors
1026 : TYPE(cp_blacs_env_type), POINTER :: context
1027 :
1028 : #if defined(__parallel)
1029 : INTEGER :: icol_local, ipcol, iprow, irow_global, irow_local
1030 : #endif
1031 :
1032 2168 : CALL timeset(routineN, handle)
1033 :
1034 2168 : my_verbose = .FALSE.
1035 2168 : IF (PRESENT(verbose)) my_verbose = verbose
1036 :
1037 2168 : context => matrix%matrix_struct%context
1038 2168 : myprow = context%mepos(1)
1039 2168 : mypcol = context%mepos(2)
1040 2168 : n_dependent = 0
1041 2168 : p = 0.5_dp*exponent
1042 :
1043 2168 : nrow_global = matrix%matrix_struct%nrow_global
1044 2168 : ncol_global = matrix%matrix_struct%ncol_global
1045 :
1046 6504 : ALLOCATE (eigenvalues(ncol_global))
1047 53811 : eigenvalues(:) = 0.0_dp
1048 :
1049 : ! Compute the eigenvectors and eigenvalues
1050 :
1051 2168 : CALL choose_eigv_solver(matrix, work, eigenvalues)
1052 :
1053 2168 : IF (PRESENT(eigvals)) THEN
1054 348 : eigvals(1) = eigenvalues(1)
1055 348 : eigvals(2) = eigenvalues(ncol_global)
1056 : END IF
1057 :
1058 : #if defined(__parallel)
1059 2168 : eigenvectors => work%local_data
1060 :
1061 : ! Build matrix**exponent with eigenvector quenching
1062 :
1063 53811 : DO icol_global = 1, ncol_global
1064 :
1065 53811 : IF (eigenvalues(icol_global) < threshold) THEN
1066 :
1067 50 : n_dependent = n_dependent + 1
1068 :
1069 50 : ipcol = work%matrix_struct%g2p_col(icol_global)
1070 :
1071 50 : IF (mypcol == ipcol) THEN
1072 50 : icol_local = work%matrix_struct%g2l_col(icol_global)
1073 5850 : DO irow_global = 1, nrow_global
1074 5800 : iprow = work%matrix_struct%g2p_row(irow_global)
1075 5850 : IF (myprow == iprow) THEN
1076 2900 : irow_local = work%matrix_struct%g2l_row(irow_global)
1077 2900 : eigenvectors(irow_local, icol_local) = 0.0_dp
1078 : END IF
1079 : END DO
1080 : END IF
1081 :
1082 : ELSE
1083 :
1084 51593 : f = eigenvalues(icol_global)**p
1085 :
1086 51593 : ipcol = work%matrix_struct%g2p_col(icol_global)
1087 :
1088 51593 : IF (mypcol == ipcol) THEN
1089 51593 : icol_local = work%matrix_struct%g2l_col(icol_global)
1090 2990128 : DO irow_global = 1, nrow_global
1091 2938535 : iprow = work%matrix_struct%g2p_row(irow_global)
1092 2990128 : IF (myprow == iprow) THEN
1093 1513342 : irow_local = work%matrix_struct%g2l_row(irow_global)
1094 : eigenvectors(irow_local, icol_local) = &
1095 1513342 : f*eigenvectors(irow_local, icol_local)
1096 : END IF
1097 : END DO
1098 : END IF
1099 :
1100 : END IF
1101 :
1102 : END DO
1103 :
1104 : #else
1105 :
1106 : eigenvectors => work%local_data
1107 :
1108 : ! Build matrix**exponent with eigenvector quenching
1109 :
1110 : DO icol_global = 1, ncol_global
1111 :
1112 : IF (eigenvalues(icol_global) < threshold) THEN
1113 :
1114 : n_dependent = n_dependent + 1
1115 : eigenvectors(1:nrow_global, icol_global) = 0.0_dp
1116 :
1117 : ELSE
1118 :
1119 : f = eigenvalues(icol_global)**p
1120 : eigenvectors(1:nrow_global, icol_global) = &
1121 : f*eigenvectors(1:nrow_global, icol_global)
1122 :
1123 : END IF
1124 :
1125 : END DO
1126 :
1127 : #endif
1128 2168 : CALL cp_fm_syrk("U", "N", ncol_global, 1.0_dp, work, 1, 1, 0.0_dp, matrix)
1129 2168 : CALL cp_fm_uplo_to_full(matrix, work)
1130 :
1131 : ! Print some warnings/notes
1132 2168 : IF (matrix%matrix_struct%para_env%is_source() .AND. my_verbose) THEN
1133 0 : condition_number = ABS(eigenvalues(ncol_global)/eigenvalues(1))
1134 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,(T2,A,ES15.6))") &
1135 0 : "CP_FM_POWER: smallest eigenvalue:", eigenvalues(1), &
1136 0 : "CP_FM_POWER: largest eigenvalue: ", eigenvalues(ncol_global), &
1137 0 : "CP_FM_POWER: condition number: ", condition_number
1138 0 : IF (eigenvalues(1) <= 0.0_dp) THEN
1139 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
1140 0 : "WARNING: matrix has a negative eigenvalue, tighten EPS_DEFAULT"
1141 : END IF
1142 0 : IF (condition_number > 1.0E12_dp) THEN
1143 : WRITE (UNIT=cp_logger_get_default_unit_nr(), FMT="(/,T2,A)") &
1144 0 : "WARNING: high condition number => possibly ill-conditioned matrix"
1145 : END IF
1146 : END IF
1147 :
1148 2168 : DEALLOCATE (eigenvalues)
1149 :
1150 2168 : CALL timestop(handle)
1151 :
1152 2168 : END SUBROUTINE cp_fm_power
1153 :
1154 : ! **************************************************************************************************
1155 : !> \brief ...
1156 : !> \param matrix ...
1157 : !> \param eigenvectors ...
1158 : !> \param eigval ...
1159 : !> \param thresh ...
1160 : !> \param start_sec_block ...
1161 : ! **************************************************************************************************
1162 18 : SUBROUTINE cp_fm_block_jacobi(matrix, eigenvectors, eigval, thresh, start_sec_block)
1163 :
1164 : ! Calculates block diagonalization of a full symmetric matrix
1165 : ! It has its origin in cp_fm_syevx. This routine rotates only elements
1166 : ! which are larger than a threshold values "thresh".
1167 : ! start_sec_block is the start of the second block.
1168 : ! IT DOES ONLY ONE SWEEP!
1169 :
1170 : ! - Creation (07.10.2002, Martin Fengler)
1171 : ! - Cosmetics (05.04.06, MK)
1172 :
1173 : TYPE(cp_fm_type), INTENT(IN) :: eigenvectors, matrix
1174 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigval
1175 : INTEGER, INTENT(IN) :: start_sec_block
1176 : REAL(KIND=dp), INTENT(IN) :: thresh
1177 :
1178 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_block_jacobi'
1179 :
1180 : INTEGER :: handle
1181 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, ev
1182 :
1183 : REAL(KIND=dp) :: tan_theta, tau, c, s
1184 : INTEGER :: q, p, N
1185 18 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: c_ip
1186 :
1187 : #if defined(__parallel)
1188 : TYPE(cp_blacs_env_type), POINTER :: context
1189 :
1190 : INTEGER :: nprow, npcol, block_dim_row, block_dim_col, info, &
1191 : ev_row_block_size, iam, mynumrows, mype, npe, q_loc
1192 18 : REAL(KIND=dp), DIMENSION(:, :), ALLOCATABLE :: a_loc, ev_loc
1193 : INTEGER, DIMENSION(9) :: desca, descz, &
1194 : desc_a_block, &
1195 : desc_ev_loc
1196 : TYPE(mp_comm_type):: allgrp
1197 : TYPE(cp_blacs_type) :: ictxt_loc
1198 : INTEGER, EXTERNAL :: numroc
1199 : #endif
1200 :
1201 : ! -------------------------------------------------------------------------
1202 :
1203 18 : CALL timeset(routineN, handle)
1204 :
1205 : #if defined(__parallel)
1206 18 : context => matrix%matrix_struct%context
1207 18 : allgrp = matrix%matrix_struct%para_env
1208 :
1209 18 : nprow = context%num_pe(1)
1210 18 : npcol = context%num_pe(2)
1211 :
1212 18 : N = matrix%matrix_struct%nrow_global
1213 :
1214 18 : A => matrix%local_data
1215 180 : desca(:) = matrix%matrix_struct%descriptor(:)
1216 18 : EV => eigenvectors%local_data
1217 180 : descz(:) = eigenvectors%matrix_struct%descriptor(:)
1218 :
1219 : ! Copy the block to be rotated to the master process firstly and broadcast to all processes
1220 : ! start_sec_block defines where the second block starts!
1221 : ! Block will be processed together with the OO block
1222 :
1223 18 : block_dim_row = start_sec_block - 1
1224 18 : block_dim_col = N - block_dim_row
1225 72 : ALLOCATE (A_loc(block_dim_row, block_dim_col))
1226 :
1227 18 : mype = matrix%matrix_struct%para_env%mepos
1228 18 : npe = matrix%matrix_struct%para_env%num_pe
1229 : ! Get a new context
1230 18 : CALL ictxt_loc%gridinit(matrix%matrix_struct%para_env, 'Row-major', nprow*npcol, 1)
1231 :
1232 : CALL descinit(desc_a_block, block_dim_row, block_dim_col, block_dim_row, &
1233 18 : block_dim_col, 0, 0, ictxt_loc%get_handle(), block_dim_row, info)
1234 :
1235 : CALL pdgemr2d(block_dim_row, block_dim_col, A, 1, start_sec_block, desca, &
1236 18 : A_loc, 1, 1, desc_a_block, context%get_handle())
1237 : ! Only the master (root) process received data yet
1238 18 : CALL allgrp%bcast(A_loc, 0)
1239 :
1240 : ! Since each process owns now the upper block, the eigenvectors can be re-sorted in such a way that
1241 : ! each process has a NN*1 grid, i.e. the process owns a bunch of rows which can be modified locally
1242 :
1243 : ! Initialize distribution of the eigenvectors
1244 18 : iam = mype
1245 18 : ev_row_block_size = n/(nprow*npcol)
1246 18 : mynumrows = NUMROC(N, ev_row_block_size, iam, 0, nprow*npcol)
1247 :
1248 108 : ALLOCATE (EV_loc(mynumrows, N), c_ip(mynumrows))
1249 :
1250 : CALL descinit(desc_ev_loc, N, N, ev_row_block_size, N, 0, 0, ictxt_loc%get_handle(), &
1251 18 : mynumrows, info)
1252 :
1253 18 : CALL pdgemr2d(N, N, EV, 1, 1, descz, EV_loc, 1, 1, desc_ev_loc, context%get_handle())
1254 :
1255 : ! Start block diagonalization of matrix
1256 :
1257 18 : q_loc = 0
1258 :
1259 1170 : DO q = start_sec_block, N
1260 1152 : q_loc = q_loc + 1
1261 148626 : DO p = 1, (start_sec_block - 1)
1262 :
1263 148608 : IF (ABS(A_loc(p, q_loc)) > thresh) THEN
1264 :
1265 117566 : tau = (eigval(q) - eigval(p))/(2.0_dp*A_loc(p, q_loc))
1266 :
1267 117566 : tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
1268 :
1269 : ! Cos(theta)
1270 117566 : c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
1271 117566 : s = tan_theta*c
1272 :
1273 : ! Calculate eigenvectors: Q*J
1274 : ! c_ip = c*EV_loc(:,p) - s*EV_loc(:,q)
1275 : ! c_iq = s*EV_loc(:,p) + c*EV_loc(:,q)
1276 : ! EV(:,p) = c_ip
1277 : ! EV(:,q) = c_iq
1278 117566 : CALL dcopy(mynumrows, EV_loc(1, p), 1, c_ip(1), 1)
1279 117566 : CALL dscal(mynumrows, c, EV_loc(1, p), 1)
1280 117566 : CALL daxpy(mynumrows, -s, EV_loc(1, q), 1, EV_loc(1, p), 1)
1281 117566 : CALL dscal(mynumrows, c, EV_loc(1, q), 1)
1282 117566 : CALL daxpy(mynumrows, s, c_ip(1), 1, EV_loc(1, q), 1)
1283 :
1284 : END IF
1285 :
1286 : END DO
1287 : END DO
1288 :
1289 : ! Copy eigenvectors back to the original distribution
1290 18 : CALL pdgemr2d(N, N, EV_loc, 1, 1, desc_ev_loc, EV, 1, 1, descz, context%get_handle())
1291 :
1292 : ! Release work storage
1293 18 : DEALLOCATE (A_loc, EV_loc, c_ip)
1294 :
1295 18 : CALL ictxt_loc%gridexit()
1296 :
1297 : #else
1298 :
1299 : N = matrix%matrix_struct%nrow_global
1300 :
1301 : ALLOCATE (c_ip(N)) ! Local eigenvalue vector
1302 :
1303 : A => matrix%local_data ! Contains the Matrix to be worked on
1304 : EV => eigenvectors%local_data ! Contains the eigenvectors up to blocksize, rest is garbage
1305 :
1306 : ! Start matrix diagonalization
1307 :
1308 : tan_theta = 0.0_dp
1309 : tau = 0.0_dp
1310 :
1311 : DO q = start_sec_block, N
1312 : DO p = 1, (start_sec_block - 1)
1313 :
1314 : IF (ABS(A(p, q)) > thresh) THEN
1315 :
1316 : tau = (eigval(q) - eigval(p))/(2.0_dp*A(p, q))
1317 :
1318 : tan_theta = SIGN(1.0_dp, tau)/(ABS(tau) + SQRT(1.0_dp + tau*tau))
1319 :
1320 : ! Cos(theta)
1321 : c = 1.0_dp/SQRT(1.0_dp + tan_theta*tan_theta)
1322 : s = tan_theta*c
1323 :
1324 : ! Calculate eigenvectors: Q*J
1325 : ! c_ip = c*EV(:,p) - s*EV(:,q)
1326 : ! c_iq = s*EV(:,p) + c*EV(:,q)
1327 : ! EV(:,p) = c_ip
1328 : ! EV(:,q) = c_iq
1329 : CALL dcopy(N, EV(1, p), 1, c_ip(1), 1)
1330 : CALL dscal(N, c, EV(1, p), 1)
1331 : CALL daxpy(N, -s, EV(1, q), 1, EV(1, p), 1)
1332 : CALL dscal(N, c, EV(1, q), 1)
1333 : CALL daxpy(N, s, c_ip(1), 1, EV(1, q), 1)
1334 :
1335 : END IF
1336 :
1337 : END DO
1338 : END DO
1339 :
1340 : ! Release work storage
1341 :
1342 : DEALLOCATE (c_ip)
1343 :
1344 : #endif
1345 :
1346 18 : CALL timestop(handle)
1347 :
1348 90 : END SUBROUTINE cp_fm_block_jacobi
1349 :
1350 : ! **************************************************************************************************
1351 : !> \brief General Eigenvalue Problem AX = BXE
1352 : !> Single option version: Cholesky decomposition of B
1353 : !> \param amatrix ...
1354 : !> \param bmatrix ...
1355 : !> \param eigenvectors ...
1356 : !> \param eigenvalues ...
1357 : !> \param work ...
1358 : ! **************************************************************************************************
1359 368 : SUBROUTINE cp_fm_geeig(amatrix, bmatrix, eigenvectors, eigenvalues, work)
1360 :
1361 : TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1362 : REAL(KIND=dp), DIMENSION(:) :: eigenvalues
1363 : TYPE(cp_fm_type), INTENT(IN) :: work
1364 :
1365 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geeig'
1366 :
1367 : INTEGER :: handle, nao, nmo
1368 :
1369 368 : CALL timeset(routineN, handle)
1370 :
1371 368 : CALL cp_fm_get_info(amatrix, nrow_global=nao)
1372 368 : nmo = SIZE(eigenvalues)
1373 :
1374 368 : IF (diag_type == FM_DIAG_TYPE_CUSOLVER .AND. cusolver_generalized .AND. nao >= 64) THEN
1375 : ! Use cuSolverMP generalized eigenvalue solver for large matrices
1376 : ! Use work as intermediate buffer since eigenvectors may be smaller (nao x nmo)
1377 0 : CALL cp_fm_general_cusolver(amatrix, bmatrix, work, eigenvalues)
1378 0 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1379 : #if defined(__DLAF)
1380 : ELSE IF (diag_type == FM_DIAG_TYPE_DLAF .AND. amatrix%matrix_struct%nrow_global >= dlaf_neigvec_min) THEN
1381 : ! Use DLA-Future generalized eigenvalue solver for large matrices
1382 : CALL cp_fm_diag_gen_dlaf(amatrix, bmatrix, eigenvectors, eigenvalues)
1383 : #endif
1384 : ELSE
1385 : ! Cholesky decompose S=U(T)U
1386 368 : CALL cp_fm_cholesky_decompose(bmatrix)
1387 : ! Invert to get U^(-1)
1388 368 : CALL cp_fm_triangular_invert(bmatrix)
1389 : ! Reduce to get U^(-T) * H * U^(-1)
1390 368 : CALL cp_fm_triangular_multiply(bmatrix, amatrix, side="R")
1391 368 : CALL cp_fm_triangular_multiply(bmatrix, amatrix, transpose_tr=.TRUE.)
1392 : ! Diagonalize
1393 : CALL choose_eigv_solver(matrix=amatrix, eigenvectors=work, &
1394 368 : eigenvalues=eigenvalues)
1395 : ! Restore vectors C = U^(-1) * C*
1396 368 : CALL cp_fm_triangular_multiply(bmatrix, work)
1397 368 : CALL cp_fm_to_fm(work, eigenvectors, nmo)
1398 : END IF
1399 :
1400 368 : CALL timestop(handle)
1401 :
1402 368 : END SUBROUTINE cp_fm_geeig
1403 :
1404 : ! **************************************************************************************************
1405 : !> \brief General Eigenvalue Problem AX = BXE
1406 : !> Use canonical diagonalization : U*s**(-1/2)
1407 : !> \param amatrix ...
1408 : !> \param bmatrix ...
1409 : !> \param eigenvectors ...
1410 : !> \param eigenvalues ...
1411 : !> \param work ...
1412 : !> \param epseig ...
1413 : ! **************************************************************************************************
1414 66 : SUBROUTINE cp_fm_geeig_canon(amatrix, bmatrix, eigenvectors, eigenvalues, work, epseig)
1415 :
1416 : TYPE(cp_fm_type), INTENT(IN) :: amatrix, bmatrix, eigenvectors
1417 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: eigenvalues
1418 : TYPE(cp_fm_type), INTENT(IN) :: work
1419 : REAL(KIND=dp), INTENT(IN) :: epseig
1420 :
1421 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geeig_canon'
1422 :
1423 : INTEGER :: handle, i, icol, irow, nao, nc, ncol, &
1424 : nmo, nx
1425 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: evals
1426 :
1427 66 : CALL timeset(routineN, handle)
1428 :
1429 : ! Test sizees
1430 66 : CALL cp_fm_get_info(amatrix, nrow_global=nao)
1431 66 : nmo = SIZE(eigenvalues)
1432 198 : ALLOCATE (evals(nao))
1433 :
1434 : ! Diagonalize -S matrix, this way the NULL space is at the end of the spectrum
1435 66 : CALL cp_fm_scale(-1.0_dp, bmatrix)
1436 66 : CALL choose_eigv_solver(matrix=bmatrix, eigenvectors=work, eigenvalues=evals)
1437 4720 : evals(:) = -evals(:)
1438 66 : nc = nao
1439 4428 : DO i = 1, nao
1440 4428 : IF (evals(i) < epseig) THEN
1441 40 : nc = i - 1
1442 40 : EXIT
1443 : END IF
1444 : END DO
1445 66 : CPASSERT(nc /= 0)
1446 :
1447 66 : IF (nc /= nao) THEN
1448 40 : IF (nc < nmo) THEN
1449 : ! Copy NULL space definition to last vectors of eigenvectors (if needed)
1450 0 : ncol = nmo - nc
1451 0 : CALL cp_fm_to_fm(work, eigenvectors, ncol, nc + 1, nc + 1)
1452 : END IF
1453 : ! Set NULL space in eigenvector matrix of S to zero
1454 332 : DO icol = nc + 1, nao
1455 36172 : DO irow = 1, nao
1456 36132 : CALL cp_fm_set_element(work, irow, icol, 0.0_dp)
1457 : END DO
1458 : END DO
1459 : ! Set small eigenvalues to a dummy save value
1460 332 : evals(nc + 1:nao) = 1.0_dp
1461 : END IF
1462 : ! Calculate U*s**(-1/2)
1463 4720 : evals(:) = 1.0_dp/SQRT(evals(:))
1464 66 : CALL cp_fm_column_scale(work, evals)
1465 : ! Reduce to get U^(-T) * H * U^(-1)
1466 66 : CALL cp_fm_gemm("T", "N", nao, nao, nao, 1.0_dp, work, amatrix, 0.0_dp, bmatrix)
1467 66 : CALL cp_fm_gemm("N", "N", nao, nao, nao, 1.0_dp, bmatrix, work, 0.0_dp, amatrix)
1468 66 : IF (nc /= nao) THEN
1469 : ! set diagonal values to save large value
1470 332 : DO icol = nc + 1, nao
1471 332 : CALL cp_fm_set_element(amatrix, icol, icol, 10000.0_dp)
1472 : END DO
1473 : END IF
1474 : ! Diagonalize
1475 66 : CALL choose_eigv_solver(matrix=amatrix, eigenvectors=bmatrix, eigenvalues=eigenvalues)
1476 66 : nx = MIN(nc, nmo)
1477 : ! Restore vectors C = U^(-1) * C*
1478 66 : CALL cp_fm_gemm("N", "N", nao, nx, nc, 1.0_dp, work, bmatrix, 0.0_dp, eigenvectors)
1479 :
1480 66 : DEALLOCATE (evals)
1481 :
1482 66 : CALL timestop(handle)
1483 :
1484 66 : END SUBROUTINE cp_fm_geeig_canon
1485 :
1486 : END MODULE cp_fm_diag
|