Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Basic linear algebra operations for full matrices.
10 : !> \par History
11 : !> 08.2002 split out of qs_blacs [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_basic_linalg
15 : USE cp_blacs_env, ONLY: cp_blacs_env_type
16 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
17 : USE cp_fm_types, ONLY: &
18 : cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
19 : cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20 : cp_fm_type
21 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
22 : cp_to_string
23 : USE kahan_sum, ONLY: accurate_dot_product, &
24 : accurate_sum
25 : USE kinds, ONLY: dp, &
26 : int_8
27 : USE machine, ONLY: m_memory
28 : USE mathlib, ONLY: get_pseudo_inverse_svd, &
29 : invert_matrix
30 : USE message_passing, ONLY: mp_comm_type
31 : #if defined (__HAS_IEEE_EXCEPTIONS)
32 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
33 : ieee_set_halting_mode, &
34 : IEEE_ALL
35 : #endif
36 : #include "../base/base_uses.f90"
37 :
38 : IMPLICIT NONE
39 : PRIVATE
40 :
41 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
42 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
43 :
44 : PUBLIC :: cp_fm_scale, & ! scale a matrix
45 : cp_fm_scale_and_add, & ! scale and add two matrices
46 : cp_fm_geadd, & ! general addition
47 : cp_fm_add_columns, & ! daxpy type column add
48 : cp_fm_column_scale, & ! scale columns of a matrix
49 : cp_fm_row_scale, & ! scale rows of a matrix
50 : cp_fm_trace, & ! trace of the transpose(A)*B
51 : cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
52 : cp_fm_norm, & ! different norms of A
53 : cp_fm_schur_product, & ! schur product
54 : cp_fm_transpose, & ! transpose a matrix
55 : cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
56 : cp_fm_syrk, & ! rank k update
57 : cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
58 : cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
59 : cp_fm_gemm, & ! multiply two matrices
60 : cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
61 : cp_fm_invert, & ! computes the inverse and determinant
62 : cp_fm_frobenius_norm, & ! frobenius norm
63 : cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
64 : cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
65 : cp_fm_solve, & ! solves the equation A*B=C A and C are input
66 : cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
67 : cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
68 : cp_fm_potrf, & ! Cholesky decomposition
69 : cp_fm_potri, & ! Invert triangular matrix
70 : cp_fm_rot_rows, & ! rotates two rows
71 : cp_fm_rot_cols, & ! rotates two columns
72 : cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
73 : cp_fm_det, & ! determinant of a real matrix with correct sign
74 : cp_fm_matvec ! matrix-vector multiplication (vector replicated)
75 :
76 : REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
77 :
78 : INTERFACE cp_fm_trace
79 : MODULE PROCEDURE cp_fm_trace_a0b0t0
80 : MODULE PROCEDURE cp_fm_trace_a1b0t1_a
81 : MODULE PROCEDURE cp_fm_trace_a1b0t1_p
82 : MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
83 : MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
84 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
85 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
86 : END INTERFACE cp_fm_trace
87 :
88 : INTERFACE cp_fm_contracted_trace
89 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
90 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
91 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
92 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
93 : END INTERFACE cp_fm_contracted_trace
94 : CONTAINS
95 :
96 : ! **************************************************************************************************
97 : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
98 : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
99 : ! **************************************************************************************************
100 0 : SUBROUTINE cp_fm_det(matrix_a, det_a)
101 :
102 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
103 : REAL(KIND=dp), INTENT(OUT) :: det_a
104 : REAL(KIND=dp) :: determinant
105 : TYPE(cp_fm_type) :: matrix_lu
106 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
107 : INTEGER :: n, i, info, P
108 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
109 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
110 :
111 : #if defined(__parallel)
112 : INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
113 : INTEGER, DIMENSION(9) :: desca
114 : #endif
115 :
116 : CALL cp_fm_create(matrix=matrix_lu, &
117 : matrix_struct=matrix_a%matrix_struct, &
118 0 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
119 0 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
120 :
121 0 : a => matrix_lu%local_data
122 0 : n = matrix_lu%matrix_struct%nrow_global
123 0 : ALLOCATE (ipivot(n))
124 0 : ipivot(:) = 0
125 0 : P = 0
126 0 : ALLOCATE (diag(n))
127 0 : diag(:) = 0.0_dp
128 : #if defined(__parallel)
129 : ! Use LU decomposition
130 0 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
131 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
132 0 : CALL cp_fm_get_diag(matrix_lu, diag)
133 0 : determinant = PRODUCT(diag)
134 0 : myprow = matrix_lu%matrix_struct%context%mepos(1)
135 0 : nprow = matrix_lu%matrix_struct%context%num_pe(1)
136 0 : npcol = matrix_lu%matrix_struct%context%num_pe(2)
137 0 : nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
138 0 : nrow_block = matrix_lu%matrix_struct%nrow_block
139 0 : DO irow_local = 1, nrow_local
140 0 : i = matrix_lu%matrix_struct%row_indices(irow_local)
141 0 : IF (ipivot(irow_local) /= i) P = P + 1
142 : END DO
143 0 : CALL matrix_lu%matrix_struct%para_env%sum(P)
144 : ! very important fix
145 0 : P = P/npcol
146 : #else
147 : CALL dgetrf(n, n, a, n, ipivot, info)
148 : CALL cp_fm_get_diag(matrix_lu, diag)
149 : determinant = PRODUCT(diag)
150 : DO i = 1, n
151 : IF (ipivot(i) /= i) P = P + 1
152 : END DO
153 : #endif
154 0 : DEALLOCATE (ipivot)
155 0 : DEALLOCATE (diag)
156 0 : CALL cp_fm_release(matrix_lu)
157 0 : det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
158 0 : END SUBROUTINE cp_fm_det
159 :
160 : ! **************************************************************************************************
161 : !> \brief calc A <- alpha*A + beta*B
162 : !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
163 : !> scale A)
164 : !> \param alpha ...
165 : !> \param matrix_a ...
166 : !> \param beta ...
167 : !> \param matrix_b ...
168 : ! **************************************************************************************************
169 1229924 : SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
170 :
171 : REAL(KIND=dp), INTENT(IN) :: alpha
172 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
173 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
174 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
175 :
176 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
177 :
178 : INTEGER :: handle, size_a, size_b
179 : REAL(KIND=dp) :: my_beta
180 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
181 :
182 1229924 : CALL timeset(routineN, handle)
183 :
184 1229924 : my_beta = 0.0_dp
185 1229924 : IF (PRESENT(matrix_b)) my_beta = 1.0_dp
186 1229924 : IF (PRESENT(beta)) my_beta = beta
187 : NULLIFY (a, b)
188 :
189 1229924 : IF (PRESENT(beta)) THEN
190 1229922 : CPASSERT(PRESENT(matrix_b))
191 1229922 : IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
192 0 : CPWARN("Bad use of routine. Call cp_fm_scale instead")
193 0 : CALL cp_fm_scale(alpha + beta, matrix_a)
194 0 : CALL timestop(handle)
195 0 : RETURN
196 : END IF
197 : END IF
198 :
199 1229924 : a => matrix_a%local_data
200 :
201 1229924 : size_a = SIZE(a, 1)*SIZE(a, 2)
202 :
203 1229924 : IF (alpha /= 1.0_dp) THEN
204 80542 : CALL dscal(size_a, alpha, a, 1)
205 : END IF
206 1229924 : IF (my_beta /= 0.0_dp) THEN
207 1223094 : IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
208 0 : CPABORT("Matrices must be in the same blacs context")
209 :
210 1223094 : IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
211 : matrix_b%matrix_struct)) THEN
212 :
213 1223094 : b => matrix_b%local_data
214 1223094 : size_b = SIZE(b, 1)*SIZE(b, 2)
215 1223094 : IF (size_a /= size_b) &
216 0 : CPABORT("Matrices must have same local sizes")
217 :
218 1223094 : CALL daxpy(size_a, my_beta, b, 1, a, 1)
219 :
220 : ELSE
221 : CALL cp_abort(__LOCATION__, &
222 : "cp_fm_scale_and_add is not yet implemented for cases "// &
223 0 : "where two input matrix structures are not equivalent")
224 : END IF
225 :
226 : END IF
227 :
228 1229924 : CALL timestop(handle)
229 :
230 : END SUBROUTINE cp_fm_scale_and_add
231 :
232 : ! **************************************************************************************************
233 : !> \brief interface to BLACS geadd:
234 : !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
235 : !> where opt(matrix_a) can be either:
236 : !> 'N': matrix_a
237 : !> 'T': matrix_a^T
238 : !> 'C': matrix_a^H (Hermitian conjugate)
239 : !> note that this is a level three routine, use cp_fm_scale_and_add if that
240 : !> is sufficient for your needs
241 : !> \param alpha : complex scalar
242 : !> \param trans : 'N' normal, 'T' transposed
243 : !> \param matrix_a : input matrix_a
244 : !> \param beta : complex scalar
245 : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
246 : !> \author Lianheng Tong
247 : ! **************************************************************************************************
248 602 : SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
249 : REAL(KIND=dp), INTENT(IN) :: alpha, beta
250 : CHARACTER, INTENT(IN) :: trans
251 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
252 :
253 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
254 :
255 : INTEGER :: nrow_global, ncol_global, handle
256 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
257 : #if defined(__parallel)
258 : INTEGER, DIMENSION(9) :: desca, descb
259 : #elif !defined(__MKL)
260 : INTEGER :: ii, jj
261 : #endif
262 :
263 602 : CALL timeset(routineN, handle)
264 :
265 602 : nrow_global = matrix_a%matrix_struct%nrow_global
266 602 : ncol_global = matrix_a%matrix_struct%ncol_global
267 602 : CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
268 602 : CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
269 :
270 602 : aa => matrix_a%local_data
271 602 : bb => matrix_b%local_data
272 :
273 : #if defined(__parallel)
274 6020 : desca = matrix_a%matrix_struct%descriptor
275 6020 : descb = matrix_b%matrix_struct%descriptor
276 : CALL pdgeadd(trans, &
277 : nrow_global, &
278 : ncol_global, &
279 : alpha, &
280 : aa, &
281 : 1, 1, &
282 : desca, &
283 : beta, &
284 : bb, &
285 : 1, 1, &
286 602 : descb)
287 : #elif defined(__MKL)
288 : CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
289 : alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
290 : #else
291 : ! dgeadd is not a standard BLAS function, although it is implemented
292 : ! in some libraries like OpenBLAS, so not going to use it here
293 : SELECT CASE (trans)
294 : CASE ('T')
295 : DO jj = 1, ncol_global
296 : DO ii = 1, nrow_global
297 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
298 : END DO
299 : END DO
300 : CASE DEFAULT
301 : DO jj = 1, ncol_global
302 : DO ii = 1, nrow_global
303 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
304 : END DO
305 : END DO
306 : END SELECT
307 : #endif
308 :
309 602 : CALL timestop(handle)
310 :
311 602 : END SUBROUTINE cp_fm_geadd
312 :
313 : ! **************************************************************************************************
314 : !> \brief Add (and scale) a subset of columns of a fm to a fm
315 : !> b = alpha*a + b
316 : !> \param msource ...
317 : !> \param mtarget ...
318 : !> \param ncol ...
319 : !> \param alpha ...
320 : !> \param source_start ...
321 : !> \param target_start ...
322 : ! **************************************************************************************************
323 0 : SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
324 :
325 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
326 : INTEGER, INTENT(IN) :: ncol
327 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
328 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
329 :
330 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
331 :
332 : INTEGER :: handle, n, ss, ts, i
333 : REAL(KIND=dp) :: fscale
334 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
335 : #if defined(__parallel)
336 : INTEGER, DIMENSION(9) :: desca, descb
337 : #endif
338 :
339 0 : CALL timeset(routineN, handle)
340 :
341 0 : ss = 1
342 0 : ts = 1
343 :
344 0 : IF (PRESENT(source_start)) ss = source_start
345 0 : IF (PRESENT(target_start)) ts = target_start
346 :
347 0 : fscale = 1.0_dp
348 0 : IF (PRESENT(alpha)) fscale = alpha
349 :
350 0 : n = msource%matrix_struct%nrow_global
351 :
352 0 : a => msource%local_data
353 0 : b => mtarget%local_data
354 :
355 : #if defined(__parallel)
356 : MARK_USED(i)
357 0 : desca(:) = msource%matrix_struct%descriptor(:)
358 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
359 0 : CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
360 : #else
361 : DO i = 0, ncol - 1
362 : b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
363 : END DO
364 : #endif
365 :
366 0 : CALL timestop(handle)
367 :
368 0 : END SUBROUTINE cp_fm_add_columns
369 :
370 : ! **************************************************************************************************
371 : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
372 : !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
373 : !> \param matrix_a ...
374 : !> \param almost_determinant ...
375 : !> \param correct_sign ...
376 : !> \par History
377 : !> added correct_sign 02.07 (fschiff)
378 : !> \author Joost VandeVondele
379 : !> \note
380 : !> - matrix_a is overwritten
381 : !> - the sign of the determinant might be wrong
382 : !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
383 : !> - one should be able to find out if ipivot is an even or an odd permutation...
384 : !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
385 : !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
386 : ! **************************************************************************************************
387 0 : SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
388 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
389 : REAL(KIND=dp), INTENT(OUT) :: almost_determinant
390 : LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
391 :
392 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
393 :
394 : INTEGER :: handle, i, info, n
395 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
396 : REAL(KIND=dp) :: determinant
397 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
398 : #if defined(__parallel)
399 : INTEGER, DIMENSION(9) :: desca
400 0 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
401 : #else
402 : INTEGER :: lda
403 : #endif
404 :
405 0 : CALL timeset(routineN, handle)
406 :
407 0 : a => matrix_a%local_data
408 0 : n = matrix_a%matrix_struct%nrow_global
409 0 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
410 :
411 : #if defined(__parallel)
412 : MARK_USED(correct_sign)
413 0 : desca(:) = matrix_a%matrix_struct%descriptor(:)
414 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
415 :
416 0 : ALLOCATE (diag(n))
417 0 : diag(:) = 0.0_dp
418 0 : CALL cp_fm_get_diag(matrix_a, diag)
419 0 : determinant = 1.0_dp
420 0 : DO i = 1, n
421 0 : determinant = determinant*diag(i)
422 : END DO
423 0 : DEALLOCATE (diag)
424 : #else
425 : lda = SIZE(a, 1)
426 : CALL dgetrf(n, n, a, lda, ipivot, info)
427 : determinant = 1.0_dp
428 : IF (correct_sign) THEN
429 : DO i = 1, n
430 : IF (ipivot(i) /= i) THEN
431 : determinant = -determinant*a(i, i)
432 : ELSE
433 : determinant = determinant*a(i, i)
434 : END IF
435 : END DO
436 : ELSE
437 : DO i = 1, n
438 : determinant = determinant*a(i, i)
439 : END DO
440 : END IF
441 : #endif
442 : ! info is allowed to be zero
443 : ! this does just signal a zero diagonal element
444 0 : DEALLOCATE (ipivot)
445 0 : almost_determinant = determinant ! notice that the sign is random
446 0 : CALL timestop(handle)
447 0 : END SUBROUTINE cp_fm_lu_decompose
448 :
449 : ! **************************************************************************************************
450 : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
451 : !> \param transa : 'N' -> normal 'T' -> transpose
452 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
453 : !> \param transb ...
454 : !> \param m ...
455 : !> \param n ...
456 : !> \param k ...
457 : !> \param alpha ...
458 : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
459 : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
460 : !> \param beta ...
461 : !> \param matrix_c : m x n matrix
462 : !> \param a_first_col ...
463 : !> \param a_first_row ...
464 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
465 : !> \param b_first_row ...
466 : !> \param c_first_col ...
467 : !> \param c_first_row ...
468 : !> \author Matthias Krack
469 : !> \note
470 : !> matrix_c should have no overlap with matrix_a, matrix_b
471 : ! **************************************************************************************************
472 656 : SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
473 : matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
474 : c_first_col, c_first_row)
475 :
476 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
477 : INTEGER, INTENT(IN) :: m, n, k
478 : REAL(KIND=dp), INTENT(IN) :: alpha
479 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
480 : REAL(KIND=dp), INTENT(IN) :: beta
481 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
482 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
483 : b_first_col, b_first_row, &
484 : c_first_col, c_first_row
485 :
486 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
487 :
488 : INTEGER :: handle, i_a, i_b, i_c, j_a, &
489 : j_b, j_c
490 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
491 : #if defined(__parallel)
492 : INTEGER, DIMENSION(9) :: desca, descb, descc
493 : #else
494 : INTEGER :: lda, ldb, ldc
495 : #endif
496 :
497 656 : CALL timeset(routineN, handle)
498 :
499 : !sample peak memory
500 656 : CALL m_memory()
501 :
502 656 : a => matrix_a%local_data
503 656 : b => matrix_b%local_data
504 656 : c => matrix_c%local_data
505 :
506 656 : i_a = 1
507 656 : IF (PRESENT(a_first_row)) i_a = a_first_row
508 :
509 656 : j_a = 1
510 656 : IF (PRESENT(a_first_col)) j_a = a_first_col
511 :
512 656 : i_b = 1
513 656 : IF (PRESENT(b_first_row)) i_b = b_first_row
514 :
515 656 : j_b = 1
516 656 : IF (PRESENT(b_first_col)) j_b = b_first_col
517 :
518 656 : i_c = 1
519 656 : IF (PRESENT(c_first_row)) i_c = c_first_row
520 :
521 656 : j_c = 1
522 656 : IF (PRESENT(c_first_col)) j_c = c_first_col
523 :
524 : #if defined(__parallel)
525 :
526 6560 : desca(:) = matrix_a%matrix_struct%descriptor(:)
527 6560 : descb(:) = matrix_b%matrix_struct%descriptor(:)
528 6560 : descc(:) = matrix_c%matrix_struct%descriptor(:)
529 :
530 : CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
531 656 : descb, beta, c, i_c, j_c, descc)
532 :
533 : #else
534 :
535 : lda = SIZE(a, 1)
536 : ldb = SIZE(b, 1)
537 : ldc = SIZE(c, 1)
538 :
539 : CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
540 :
541 : #endif
542 656 : CALL timestop(handle)
543 :
544 656 : END SUBROUTINE cp_fm_gemm
545 :
546 : ! **************************************************************************************************
547 : !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
548 : !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
549 : !> where matrix_a is symmetric
550 : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
551 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
552 : !> \param uplo triangular format
553 : !> \param m ...
554 : !> \param n ...
555 : !> \param alpha ...
556 : !> \param matrix_a : m x m matrix
557 : !> \param matrix_b : m x n matrix
558 : !> \param beta ...
559 : !> \param matrix_c : m x n matrix
560 : !> \author Matthias Krack
561 : !> \note
562 : !> matrix_c should have no overlap with matrix_a, matrix_b
563 : !> all matrices in QS are triangular according to uplo
564 : !> matrix_a is always an m x m matrix
565 : !> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
566 : ! **************************************************************************************************
567 204608 : SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
568 :
569 : CHARACTER(LEN=1), INTENT(IN) :: side, uplo
570 : INTEGER, INTENT(IN) :: m, n
571 : REAL(KIND=dp), INTENT(IN) :: alpha
572 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
573 : REAL(KIND=dp), INTENT(IN) :: beta
574 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
575 :
576 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
577 :
578 : INTEGER :: handle
579 204608 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
580 : #if defined(__parallel)
581 : INTEGER, DIMENSION(9) :: desca, descb, descc
582 : #else
583 : INTEGER :: lda, ldb, ldc
584 : #endif
585 :
586 204608 : CALL timeset(routineN, handle)
587 :
588 204608 : a => matrix_a%local_data
589 204608 : b => matrix_b%local_data
590 204608 : c => matrix_c%local_data
591 :
592 : #if defined(__parallel)
593 :
594 2046080 : desca(:) = matrix_a%matrix_struct%descriptor(:)
595 2046080 : descb(:) = matrix_b%matrix_struct%descriptor(:)
596 2046080 : descc(:) = matrix_c%matrix_struct%descriptor(:)
597 :
598 204608 : CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
599 :
600 : #else
601 :
602 : lda = matrix_a%matrix_struct%local_leading_dimension
603 : ldb = matrix_b%matrix_struct%local_leading_dimension
604 : ldc = matrix_c%matrix_struct%local_leading_dimension
605 :
606 : CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
607 :
608 : #endif
609 204608 : CALL timestop(handle)
610 :
611 204608 : END SUBROUTINE cp_fm_symm
612 :
613 : ! **************************************************************************************************
614 : !> \brief computes the Frobenius norm of matrix_a
615 : !> \brief computes the Frobenius norm of matrix_a
616 : !> \param matrix_a : m x n matrix
617 : !> \return ...
618 : !> \author VW
619 : ! **************************************************************************************************
620 7926 : FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
621 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
622 : REAL(KIND=dp) :: norm
623 :
624 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
625 :
626 : INTEGER :: handle, size_a
627 7926 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
628 : REAL(KIND=dp), EXTERNAL :: DDOT
629 : #if defined(__parallel)
630 : TYPE(mp_comm_type) :: group
631 : #endif
632 :
633 7926 : CALL timeset(routineN, handle)
634 :
635 : norm = 0.0_dp
636 7926 : a => matrix_a%local_data
637 7926 : size_a = SIZE(a, 1)*SIZE(a, 2)
638 7926 : norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
639 : #if defined(__parallel)
640 7926 : group = matrix_a%matrix_struct%para_env
641 7926 : CALL group%sum(norm)
642 : #endif
643 7926 : norm = SQRT(norm)
644 :
645 7926 : CALL timestop(handle)
646 :
647 7926 : END FUNCTION cp_fm_frobenius_norm
648 :
649 : ! **************************************************************************************************
650 : !> \brief performs a rank-k update of a symmetric matrix_c
651 : !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
652 : !> \param uplo : 'U' ('L')
653 : !> \param trans : 'N' ('T')
654 : !> \param k : number of cols to use in matrix_a
655 : !> ia,ja :: 1,1 (could be used for selecting subblock of a)
656 : !> \param alpha ...
657 : !> \param matrix_a ...
658 : !> \param ia ...
659 : !> \param ja ...
660 : !> \param beta ...
661 : !> \param matrix_c ...
662 : !> \author Matthias Krack
663 : ! **************************************************************************************************
664 10542 : SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
665 : CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
666 : INTEGER, INTENT(IN) :: k
667 : REAL(KIND=dp), INTENT(IN) :: alpha
668 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
669 : INTEGER, INTENT(IN) :: ia, ja
670 : REAL(KIND=dp), INTENT(IN) :: beta
671 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
672 :
673 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
674 :
675 : INTEGER :: handle, n
676 10542 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
677 : #if defined(__parallel)
678 : INTEGER, DIMENSION(9) :: desca, descc
679 : #else
680 : INTEGER :: lda, ldc
681 : #endif
682 : #if defined (__HAS_IEEE_EXCEPTIONS)
683 : LOGICAL, DIMENSION(5) :: halt
684 : #endif
685 :
686 10542 : CALL timeset(routineN, handle)
687 :
688 10542 : n = matrix_c%matrix_struct%nrow_global
689 :
690 10542 : a => matrix_a%local_data
691 10542 : c => matrix_c%local_data
692 :
693 : #if defined (__HAS_IEEE_EXCEPTIONS)
694 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
695 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
696 : #endif
697 : #if defined(__parallel)
698 105420 : desca(:) = matrix_a%matrix_struct%descriptor(:)
699 105420 : descc(:) = matrix_c%matrix_struct%descriptor(:)
700 :
701 10542 : CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
702 : #else
703 : lda = SIZE(a, 1)
704 : ldc = SIZE(c, 1)
705 :
706 : CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
707 : #endif
708 : #if defined (__HAS_IEEE_EXCEPTIONS)
709 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
710 : #endif
711 10542 : CALL timestop(handle)
712 :
713 10542 : END SUBROUTINE cp_fm_syrk
714 :
715 : ! **************************************************************************************************
716 : !> \brief computes the schur product of two matrices
717 : !> c_ij = a_ij * b_ij
718 : !> \param matrix_a ...
719 : !> \param matrix_b ...
720 : !> \param matrix_c ...
721 : !> \author Joost VandeVondele
722 : ! **************************************************************************************************
723 9218 : SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
724 :
725 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
726 :
727 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
728 :
729 : INTEGER :: handle, icol_local, irow_local, mypcol, &
730 : myprow, ncol_local, &
731 : nrow_local
732 9218 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
733 : TYPE(cp_blacs_env_type), POINTER :: context
734 :
735 9218 : CALL timeset(routineN, handle)
736 :
737 9218 : context => matrix_a%matrix_struct%context
738 9218 : myprow = context%mepos(1)
739 9218 : mypcol = context%mepos(2)
740 :
741 9218 : a => matrix_a%local_data
742 9218 : b => matrix_b%local_data
743 9218 : c => matrix_c%local_data
744 :
745 9218 : nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
746 9218 : ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
747 :
748 99168 : DO icol_local = 1, ncol_local
749 6670463 : DO irow_local = 1, nrow_local
750 6661245 : c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
751 : END DO
752 : END DO
753 :
754 9218 : CALL timestop(handle)
755 :
756 9218 : END SUBROUTINE cp_fm_schur_product
757 :
758 : ! **************************************************************************************************
759 : !> \brief returns the trace of matrix_a^T matrix_b, i.e
760 : !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
761 : !> \param matrix_a a matrix
762 : !> \param matrix_b another matrix
763 : !> \param trace ...
764 : !> \par History
765 : !> 11.06.2001 Creation (Matthias Krack)
766 : !> 12.2002 added doc [fawzi]
767 : !> \author Matthias Krack
768 : !> \note
769 : !> note the transposition of matrix_a!
770 : ! **************************************************************************************************
771 577496 : SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
772 :
773 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
774 : REAL(KIND=dp), INTENT(OUT) :: trace
775 :
776 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
777 :
778 : INTEGER :: handle, mypcol, myprow, ncol_local, &
779 : nrow_local
780 577496 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
781 : TYPE(cp_blacs_env_type), POINTER :: context
782 : TYPE(mp_comm_type) :: group
783 :
784 577496 : CALL timeset(routineN, handle)
785 :
786 577496 : context => matrix_a%matrix_struct%context
787 577496 : myprow = context%mepos(1)
788 577496 : mypcol = context%mepos(2)
789 :
790 577496 : group = matrix_a%matrix_struct%para_env
791 :
792 577496 : a => matrix_a%local_data
793 577496 : b => matrix_b%local_data
794 :
795 577496 : nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
796 577496 : ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
797 :
798 : ! cries for an accurate_dot_product
799 : trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
800 577496 : b(1:nrow_local, 1:ncol_local))
801 :
802 577496 : CALL group%sum(trace)
803 :
804 577496 : CALL timestop(handle)
805 :
806 577496 : END SUBROUTINE cp_fm_trace_a0b0t0
807 :
808 : #:mute
809 : #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
810 : #:endmute
811 :
812 : ! **************************************************************************************************
813 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
814 : !> \param matrix_a list of A matrices
815 : !> \param matrix_b B matrix
816 : !> \param trace computed traces
817 : !> \par History
818 : !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
819 : !> \note \parblock
820 : !> Computing the trace requires collective communication between involved MPI processes
821 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
822 : !> the amount of time wasted in such synchronisation by performing one large collective
823 : !> operation which involves all the matrices in question.
824 : !>
825 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
826 : !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
827 : !> 'matrix_b' is a single matrix.
828 : !> \endparblock
829 : ! **************************************************************************************************
830 : #:for longname, shortname, appendix in types
831 3930 : SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
832 : TYPE(${longname}$), DIMENSION(:), INTENT(IN) :: matrix_a
833 : TYPE(cp_fm_type), INTENT(IN) :: matrix_b
834 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
835 :
836 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
837 :
838 : INTEGER :: handle, imatrix, n_matrices, &
839 : ncols_local, nrows_local
840 3930 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
841 : TYPE(mp_comm_type) :: group
842 :
843 3930 : CALL timeset(routineN, handle)
844 :
845 3930 : n_matrices = SIZE(trace)
846 3930 : CPASSERT(SIZE(matrix_a) == n_matrices)
847 :
848 3930 : CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
849 :
850 3930 : ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
851 :
852 : !$OMP PARALLEL DO DEFAULT(NONE), &
853 : !$OMP PRIVATE(imatrix, ldata_a), &
854 : !$OMP SHARED(ldata_b, matrix_a, matrix_b), &
855 3930 : !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace)
856 :
857 : DO imatrix = 1, n_matrices
858 :
859 : ! assume that the matrices A(i) and B have identical shapes and distribution schemes
860 : ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
861 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
862 : END DO
863 : !$OMP END PARALLEL DO
864 :
865 3930 : group = matrix_b%matrix_struct%para_env
866 23250 : CALL group%sum(trace)
867 :
868 3930 : CALL timestop(handle)
869 3930 : END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
870 : #:endfor
871 :
872 : ! **************************************************************************************************
873 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
874 : !> \param matrix_a list of A matrices
875 : !> \param matrix_b list of B matrices
876 : !> \param trace computed traces
877 : !> \param accurate ...
878 : !> \par History
879 : !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
880 : !> \note \parblock
881 : !> Computing the trace requires collective communication between involved MPI processes
882 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
883 : !> the amount of time wasted in such synchronisation by performing one large collective
884 : !> operation which involves all the matrices in question.
885 : !>
886 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
887 : !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
888 : !> \endparblock
889 : ! **************************************************************************************************
890 : #:for longname1, shortname1, appendix1 in types
891 : #:for longname2, shortname2, appendix2 in types
892 172132 : SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
893 : TYPE(${longname1}$), DIMENSION(:), INTENT(IN) :: matrix_a
894 : TYPE(${longname2}$), DIMENSION(:), INTENT(IN) :: matrix_b
895 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
896 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
897 :
898 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
899 :
900 : INTEGER :: handle, imatrix, n_matrices, &
901 : ncols_local, nrows_local
902 : LOGICAL :: use_accurate_sum
903 172132 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
904 : TYPE(mp_comm_type) :: group
905 :
906 172132 : CALL timeset(routineN, handle)
907 :
908 172132 : n_matrices = SIZE(trace)
909 172132 : CPASSERT(SIZE(matrix_a) == n_matrices)
910 172132 : CPASSERT(SIZE(matrix_b) == n_matrices)
911 :
912 172132 : use_accurate_sum = .TRUE.
913 172132 : IF (PRESENT(accurate)) use_accurate_sum = accurate
914 :
915 : !$OMP PARALLEL DO DEFAULT(NONE), &
916 : !$OMP PRIVATE(imatrix, ldata_a, ldata_b, ncols_local), &
917 : !$OMP PRIVATE(nrows_local), &
918 172132 : !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
919 : DO imatrix = 1, n_matrices
920 : CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
921 :
922 : ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
923 : ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
924 : ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
925 : IF (use_accurate_sum) THEN
926 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
927 : ELSE
928 : trace(imatrix) = SUM(ldata_a*ldata_b)
929 : END IF
930 : END DO
931 : !$OMP END PARALLEL DO
932 :
933 172132 : group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
934 577196 : CALL group%sum(trace)
935 :
936 172132 : CALL timestop(handle)
937 172132 : END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
938 : #:endfor
939 : #:endfor
940 :
941 : ! **************************************************************************************************
942 : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
943 : !> \param matrix_a list of A matrices
944 : !> \param matrix_b list of B matrices
945 : !> \param trace computed traces
946 : !> \param accurate ...
947 : ! **************************************************************************************************
948 : #:for longname1, shortname1, appendix1 in types
949 : #:for longname2, shortname2, appendix2 in types
950 16874 : SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
951 : TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN) :: matrix_a
952 : TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN) :: matrix_b
953 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
954 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
955 :
956 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
957 :
958 : INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
959 : nrows_local, nz
960 : INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
961 : LOGICAL :: use_accurate_sum
962 : REAL(kind=dp) :: t
963 16874 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
964 : TYPE(mp_comm_type) :: group
965 :
966 16874 : CALL timeset(routineN, handle)
967 :
968 16874 : nz = SIZE(matrix_a, 1)
969 16874 : CPASSERT(SIZE(matrix_b, 1) == nz)
970 :
971 16874 : na = SIZE(matrix_a, 2)
972 16874 : nb = SIZE(matrix_b, 2)
973 16874 : CPASSERT(SIZE(trace, 1) == na)
974 16874 : CPASSERT(SIZE(trace, 2) == nb)
975 :
976 16874 : use_accurate_sum = .TRUE.
977 16874 : IF (PRESENT(accurate)) use_accurate_sum = accurate
978 :
979 : ! here we use one running index (itrace) instead of two (ia, ib) in order to
980 : ! improve load balance between shared-memory threads
981 16874 : ntraces = na*nb
982 16874 : na8 = INT(na, kind=int_8)
983 :
984 : !$OMP PARALLEL DO DEFAULT(NONE), &
985 : !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_b, ncols_local), &
986 : !$OMP PRIVATE(nrows_local, t), &
987 16874 : !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
988 : DO itrace = 1, ntraces
989 : ib8 = (itrace - 1)/na8
990 : ia = INT(itrace - ib8*na8)
991 : ib = INT(ib8) + 1
992 :
993 : t = 0.0_dp
994 : DO iz = 1, nz
995 : CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
996 :
997 : ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
998 : ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
999 : ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1000 : IF (use_accurate_sum) THEN
1001 : t = t + accurate_dot_product(ldata_a, ldata_b)
1002 : ELSE
1003 : t = t + SUM(ldata_a*ldata_b)
1004 : END IF
1005 : END DO
1006 : trace(ia, ib) = t
1007 : END DO
1008 : !$OMP END PARALLEL DO
1009 :
1010 16874 : group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
1011 766714 : CALL group%sum(trace)
1012 :
1013 16874 : CALL timestop(handle)
1014 16874 : END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
1015 : #:endfor
1016 : #:endfor
1017 :
1018 : ! **************************************************************************************************
1019 : !> \brief multiplies in place by a triangular matrix:
1020 : !> matrix_b = alpha op(triangular_matrix) matrix_b
1021 : !> or (if side='R')
1022 : !> matrix_b = alpha matrix_b op(triangular_matrix)
1023 : !> op(triangular_matrix) is:
1024 : !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1025 : !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1026 : !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1027 : !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1028 : !> \param triangular_matrix the triangular matrix that multiplies the other
1029 : !> \param matrix_b the matrix that gets multiplied and stores the result
1030 : !> \param side on which side of matrix_b stays op(triangular_matrix)
1031 : !> (defaults to 'L')
1032 : !> \param transpose_tr if the triangular matrix should be transposed
1033 : !> (defaults to false)
1034 : !> \param invert_tr if the triangular matrix should be inverted
1035 : !> (defaults to false)
1036 : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1037 : !> lower ('L') triangle (defaults to 'U')
1038 : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1039 : !> be assumed to be 1 (defaults to false)
1040 : !> \param n_rows the number of rows of the result (defaults to
1041 : !> size(matrix_b,1))
1042 : !> \param n_cols the number of columns of the result (defaults to
1043 : !> size(matrix_b,2))
1044 : !> \param alpha ...
1045 : !> \par History
1046 : !> 08.2002 created [fawzi]
1047 : !> \author Fawzi Mohamed
1048 : !> \note
1049 : !> needs an mpi env
1050 : ! **************************************************************************************************
1051 122468 : SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1052 : transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1053 : alpha)
1054 : TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1055 : CHARACTER, INTENT(IN), OPTIONAL :: side
1056 : LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1057 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1058 : LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1059 : INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1060 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
1061 :
1062 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
1063 :
1064 : CHARACTER :: side_char, transa, unit_diag, uplo
1065 : INTEGER :: handle, mdim, m, n
1066 : LOGICAL :: invert
1067 : REAL(KIND=dp) :: al
1068 :
1069 61234 : CALL timeset(routineN, handle)
1070 61234 : side_char = 'L'
1071 61234 : unit_diag = 'N'
1072 61234 : uplo = 'U'
1073 61234 : transa = 'N'
1074 61234 : invert = .FALSE.
1075 61234 : al = 1.0_dp
1076 61234 : CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1077 61234 : IF (PRESENT(side)) side_char = side
1078 : mdim = MERGE(1, 2, 'L' == side_char)
1079 61234 : IF (PRESENT(invert_tr)) invert = invert_tr
1080 61234 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1081 61234 : IF (PRESENT(unit_diag_tr)) THEN
1082 0 : IF (unit_diag_tr) THEN
1083 0 : unit_diag = 'U'
1084 : ELSE
1085 : unit_diag = 'N'
1086 : END IF
1087 : END IF
1088 61234 : IF (PRESENT(transpose_tr)) THEN
1089 4648 : IF (transpose_tr) THEN
1090 2032 : transa = 'T'
1091 : ELSE
1092 : transa = 'N'
1093 : END IF
1094 : END IF
1095 61234 : IF (PRESENT(alpha)) al = alpha
1096 61234 : IF (PRESENT(n_rows)) m = n_rows
1097 61234 : IF (PRESENT(n_cols)) n = n_cols
1098 :
1099 61234 : IF (invert) THEN
1100 :
1101 : #if defined(__parallel)
1102 : CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1103 : triangular_matrix%local_data(1, 1), 1, 1, &
1104 : triangular_matrix%matrix_struct%descriptor, &
1105 : matrix_b%local_data(1, 1), 1, 1, &
1106 47256 : matrix_b%matrix_struct%descriptor(1))
1107 : #else
1108 : CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1109 : triangular_matrix%local_data(1, 1), &
1110 : SIZE(triangular_matrix%local_data, mdim), &
1111 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1112 : #endif
1113 :
1114 : ELSE
1115 :
1116 : #if defined(__parallel)
1117 : CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1118 : triangular_matrix%local_data(1, 1), 1, 1, &
1119 : triangular_matrix%matrix_struct%descriptor, &
1120 : matrix_b%local_data(1, 1), 1, 1, &
1121 13978 : matrix_b%matrix_struct%descriptor(1))
1122 : #else
1123 : CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1124 : triangular_matrix%local_data(1, 1), &
1125 : SIZE(triangular_matrix%local_data, mdim), &
1126 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1127 : #endif
1128 :
1129 : END IF
1130 :
1131 61234 : CALL timestop(handle)
1132 61234 : END SUBROUTINE cp_fm_triangular_multiply
1133 :
1134 : ! **************************************************************************************************
1135 : !> \brief scales a matrix
1136 : !> matrix_a = alpha * matrix_b
1137 : !> \param alpha ...
1138 : !> \param matrix_a ...
1139 : !> \note
1140 : !> use cp_fm_set_all to zero (avoids problems with nan)
1141 : ! **************************************************************************************************
1142 203445 : SUBROUTINE cp_fm_scale(alpha, matrix_a)
1143 : REAL(KIND=dp), INTENT(IN) :: alpha
1144 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1145 :
1146 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale'
1147 :
1148 : INTEGER :: handle, size_a
1149 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1150 :
1151 203445 : CALL timeset(routineN, handle)
1152 :
1153 : NULLIFY (a)
1154 :
1155 203445 : a => matrix_a%local_data
1156 203445 : size_a = SIZE(a, 1)*SIZE(a, 2)
1157 :
1158 203445 : CALL DSCAL(size_a, alpha, a, 1)
1159 :
1160 203445 : CALL timestop(handle)
1161 :
1162 203445 : END SUBROUTINE cp_fm_scale
1163 :
1164 : ! **************************************************************************************************
1165 : !> \brief transposes a matrix
1166 : !> matrixt = matrix ^ T
1167 : !> \param matrix ...
1168 : !> \param matrixt ...
1169 : !> \note
1170 : !> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1171 : ! **************************************************************************************************
1172 21692 : SUBROUTINE cp_fm_transpose(matrix, matrixt)
1173 : TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1174 :
1175 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
1176 :
1177 : INTEGER :: handle, ncol_global, &
1178 : nrow_global, ncol_globalt, nrow_globalt
1179 10846 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
1180 : #if defined(__parallel)
1181 : INTEGER, DIMENSION(9) :: desca, descc
1182 : #elif !defined(__MKL)
1183 : INTEGER :: i, j
1184 : #endif
1185 :
1186 10846 : nrow_global = matrix%matrix_struct%nrow_global
1187 10846 : ncol_global = matrix%matrix_struct%ncol_global
1188 10846 : nrow_globalt = matrixt%matrix_struct%nrow_global
1189 10846 : ncol_globalt = matrixt%matrix_struct%ncol_global
1190 0 : CPASSERT(nrow_global == ncol_globalt)
1191 10846 : CPASSERT(nrow_globalt == ncol_global)
1192 :
1193 10846 : CALL timeset(routineN, handle)
1194 :
1195 10846 : a => matrix%local_data
1196 10846 : c => matrixt%local_data
1197 :
1198 : #if defined(__parallel)
1199 108460 : desca(:) = matrix%matrix_struct%descriptor(:)
1200 108460 : descc(:) = matrixt%matrix_struct%descriptor(:)
1201 10846 : CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1202 : #elif defined(__MKL)
1203 : CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1204 : #else
1205 : DO j = 1, ncol_global
1206 : DO i = 1, nrow_global
1207 : c(j, i) = a(i, j)
1208 : END DO
1209 : END DO
1210 : #endif
1211 10846 : CALL timestop(handle)
1212 :
1213 10846 : END SUBROUTINE cp_fm_transpose
1214 :
1215 : ! **************************************************************************************************
1216 : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1217 : !> \param matrix the triangular matrix as input, the full matrix as output
1218 : !> \param work a matrix of the same size as matrix
1219 : !> \param uplo triangular format; defaults to 'U'
1220 : !> \author Matthias Krack
1221 : !> \note
1222 : !> the opposite triangular part is irrelevant
1223 : ! **************************************************************************************************
1224 354674 : SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1225 :
1226 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1227 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1228 :
1229 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
1230 :
1231 : CHARACTER :: myuplo
1232 : INTEGER :: handle, icol_global, irow_global, &
1233 : mypcol, myprow, ncol_global, &
1234 : nrow_global
1235 177337 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1236 : TYPE(cp_blacs_env_type), POINTER :: context
1237 :
1238 : #if defined(__parallel)
1239 : INTEGER :: icol_local, irow_local, &
1240 : ncol_block, ncol_local, &
1241 : nrow_block, nrow_local
1242 : INTEGER, DIMENSION(9) :: desca, descc
1243 177337 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: c
1244 : #endif
1245 :
1246 177337 : myuplo = 'U'
1247 0 : IF (PRESENT(uplo)) myuplo = uplo
1248 :
1249 177337 : nrow_global = matrix%matrix_struct%nrow_global
1250 177337 : ncol_global = matrix%matrix_struct%ncol_global
1251 177337 : CPASSERT(nrow_global == ncol_global)
1252 177337 : nrow_global = work%matrix_struct%nrow_global
1253 177337 : ncol_global = work%matrix_struct%ncol_global
1254 177337 : CPASSERT(nrow_global == ncol_global)
1255 :
1256 177337 : CALL timeset(routineN, handle)
1257 :
1258 177337 : context => matrix%matrix_struct%context
1259 177337 : myprow = context%mepos(1)
1260 177337 : mypcol = context%mepos(2)
1261 :
1262 : #if defined(__parallel)
1263 :
1264 177337 : nrow_block = matrix%matrix_struct%nrow_block
1265 177337 : ncol_block = matrix%matrix_struct%ncol_block
1266 :
1267 177337 : nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1268 177337 : ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1269 :
1270 177337 : a => work%local_data
1271 1773370 : desca(:) = work%matrix_struct%descriptor(:)
1272 177337 : c => matrix%local_data
1273 1773370 : descc(:) = matrix%matrix_struct%descriptor(:)
1274 :
1275 5671817 : DO icol_local = 1, ncol_local
1276 5494480 : icol_global = matrix%matrix_struct%col_indices(icol_local)
1277 234497952 : DO irow_local = 1, nrow_local
1278 228826135 : irow_global = matrix%matrix_struct%row_indices(irow_local)
1279 234320615 : IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1280 112900060 : c(irow_local, icol_local) = 0.0_dp
1281 115926075 : ELSE IF (irow_global == icol_global) THEN
1282 3026015 : c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1283 : END IF
1284 : END DO
1285 : END DO
1286 :
1287 5671817 : DO icol_local = 1, ncol_local
1288 234497952 : DO irow_local = 1, nrow_local
1289 234320615 : a(irow_local, icol_local) = c(irow_local, icol_local)
1290 : END DO
1291 : END DO
1292 :
1293 177337 : CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1294 :
1295 : #else
1296 :
1297 : a => matrix%local_data
1298 :
1299 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1300 : DO irow_global = 1, nrow_global
1301 : DO icol_global = irow_global + 1, ncol_global
1302 : a(icol_global, irow_global) = a(irow_global, icol_global)
1303 : END DO
1304 : END DO
1305 : ELSE
1306 : DO icol_global = 1, ncol_global
1307 : DO irow_global = icol_global + 1, nrow_global
1308 : a(irow_global, icol_global) = a(icol_global, irow_global)
1309 : END DO
1310 : END DO
1311 : END IF
1312 :
1313 : #endif
1314 177337 : CALL timestop(handle)
1315 :
1316 177337 : END SUBROUTINE cp_fm_uplo_to_full
1317 :
1318 : ! **************************************************************************************************
1319 : !> \brief scales column i of matrix a with scaling(i)
1320 : !> \param matrixa ...
1321 : !> \param scaling : an array used for scaling the columns,
1322 : !> SIZE(scaling) determines the number of columns to be scaled
1323 : !> \author Joost VandeVondele
1324 : !> \note
1325 : !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1326 : !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1327 : !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1328 : !> where every vector has a different prefactor
1329 : ! **************************************************************************************************
1330 248232 : SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1331 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1332 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1333 :
1334 : INTEGER :: k, mypcol, myprow, n, ncol_global, &
1335 : npcol, nprow
1336 248232 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1337 : #if defined(__parallel)
1338 : INTEGER :: icol_global, icol_local, &
1339 : ipcol, iprow, irow_local
1340 : #else
1341 : INTEGER :: i
1342 : #endif
1343 :
1344 248232 : myprow = matrixa%matrix_struct%context%mepos(1)
1345 248232 : mypcol = matrixa%matrix_struct%context%mepos(2)
1346 248232 : nprow = matrixa%matrix_struct%context%num_pe(1)
1347 248232 : npcol = matrixa%matrix_struct%context%num_pe(2)
1348 :
1349 248232 : ncol_global = matrixa%matrix_struct%ncol_global
1350 :
1351 248232 : a => matrixa%local_data
1352 248232 : n = SIZE(a, 1)
1353 248232 : k = MIN(SIZE(scaling), ncol_global)
1354 :
1355 : #if defined(__parallel)
1356 :
1357 3893581 : DO icol_global = 1, k
1358 : CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1359 : nprow, npcol, myprow, mypcol, &
1360 3645349 : irow_local, icol_local, iprow, ipcol)
1361 3893581 : IF ((ipcol == mypcol)) THEN
1362 3645349 : CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
1363 : END IF
1364 : END DO
1365 : #else
1366 : DO i = 1, k
1367 : CALL DSCAL(n, scaling(i), a(:, i), 1)
1368 : END DO
1369 : #endif
1370 248232 : END SUBROUTINE cp_fm_column_scale
1371 :
1372 : ! **************************************************************************************************
1373 : !> \brief scales row i of matrix a with scaling(i)
1374 : !> \param matrixa ...
1375 : !> \param scaling : an array used for scaling the columns,
1376 : !> \author JGH
1377 : !> \note
1378 : ! **************************************************************************************************
1379 6322 : SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1380 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1381 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1382 :
1383 : INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1384 6322 : INTEGER, DIMENSION(:), POINTER :: row_indices
1385 6322 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1386 : #if defined(__parallel)
1387 : INTEGER :: irow_global, icol, irow
1388 : #else
1389 : INTEGER :: j
1390 : #endif
1391 :
1392 : CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1393 6322 : nrow_local=nrow_local, ncol_local=ncol_local)
1394 6322 : CPASSERT(SIZE(scaling) == nrow_global)
1395 :
1396 6322 : a => matrixa%local_data
1397 6322 : n = SIZE(a, 1)
1398 6322 : m = SIZE(a, 2)
1399 :
1400 : #if defined(__parallel)
1401 79038 : DO icol = 1, ncol_local
1402 6488000 : DO irow = 1, nrow_local
1403 6408962 : irow_global = row_indices(irow)
1404 6481678 : a(irow, icol) = scaling(irow_global)*a(irow, icol)
1405 : END DO
1406 : END DO
1407 : #else
1408 : DO j = 1, m
1409 : a(1:n, j) = scaling(1:n)*a(1:n, j)
1410 : END DO
1411 : #endif
1412 6322 : END SUBROUTINE cp_fm_row_scale
1413 :
1414 : ! **************************************************************************************************
1415 : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1416 : !> \param matrix_a the matrix to invert
1417 : !> \param matrix_inverse the inverse of matrix_a
1418 : !> \param det_a the determinant of matrix_a
1419 : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1420 : !> are screened
1421 : !> \param eigval optionally return matrix eigenvalues/singular values
1422 : !> \par History
1423 : !> note of Jan Wilhelm (12.2015)
1424 : !> - computation of determinant corrected
1425 : !> - determinant only computed if det_a is present
1426 : !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
1427 : !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
1428 : !> \author Florian Schiffmann(02.2007)
1429 : ! **************************************************************************************************
1430 1860 : SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1431 :
1432 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
1433 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a
1434 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd
1435 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1436 : INTENT(INOUT), OPTIONAL :: eigval
1437 :
1438 : INTEGER :: n
1439 1860 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1440 : REAL(KIND=dp) :: determinant, my_eps_svd
1441 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1442 : TYPE(cp_fm_type) :: matrix_lu
1443 :
1444 : #if defined(__parallel)
1445 : TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
1446 : TYPE(mp_comm_type) :: group
1447 : INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
1448 : INTEGER, DIMENSION(9) :: desca
1449 : LOGICAL :: quenched
1450 : REAL(KIND=dp) :: alpha, beta
1451 1860 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
1452 1860 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1453 : #else
1454 : LOGICAL :: sign
1455 : REAL(KIND=dp) :: eps1
1456 : #endif
1457 :
1458 1860 : my_eps_svd = 0.0_dp
1459 478 : IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1460 :
1461 : CALL cp_fm_create(matrix=matrix_lu, &
1462 : matrix_struct=matrix_a%matrix_struct, &
1463 1860 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1464 1860 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1465 :
1466 1860 : a => matrix_lu%local_data
1467 1860 : n = matrix_lu%matrix_struct%nrow_global
1468 5580 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1469 1860 : ipivot(:) = 0
1470 : #if defined(__parallel)
1471 1860 : IF (my_eps_svd == 0.0_dp) THEN
1472 : ! Use LU decomposition
1473 : lwork = 3*n
1474 : liwork = 3*n
1475 18300 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1476 1830 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1477 :
1478 1830 : IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1479 :
1480 1128 : ALLOCATE (diag(n))
1481 924 : diag(:) = 0.0_dp
1482 376 : CALL cp_fm_get_diag(matrix_lu, diag)
1483 :
1484 376 : exponent_of_minus_one = 0
1485 376 : determinant = 1.0_dp
1486 924 : DO i = 1, n
1487 548 : determinant = determinant*diag(i)
1488 924 : IF (ipivot(i) /= i) THEN
1489 224 : exponent_of_minus_one = exponent_of_minus_one + 1
1490 : END IF
1491 : END DO
1492 376 : IF (PRESENT(eigval)) THEN
1493 0 : CPASSERT(.NOT. ASSOCIATED(eigval))
1494 0 : ALLOCATE (eigval(n))
1495 0 : eigval(:) = diag
1496 : END IF
1497 376 : DEALLOCATE (diag)
1498 :
1499 376 : group = matrix_lu%matrix_struct%para_env
1500 376 : CALL group%sum(exponent_of_minus_one)
1501 :
1502 376 : determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1503 :
1504 : END IF
1505 :
1506 1830 : alpha = 0.0_dp
1507 1830 : beta = 1.0_dp
1508 1830 : CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1509 1830 : CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1510 : ELSE
1511 : ! Use singular value decomposition
1512 : CALL cp_fm_create(matrix=u, &
1513 : matrix_struct=matrix_a%matrix_struct, &
1514 30 : name="LEFT_SINGULAR_MATRIX")
1515 30 : CALL cp_fm_set_all(u, alpha=0.0_dp)
1516 : CALL cp_fm_create(matrix=vt, &
1517 : matrix_struct=matrix_a%matrix_struct, &
1518 30 : name="RIGHT_SINGULAR_MATRIX")
1519 30 : CALL cp_fm_set_all(vt, alpha=0.0_dp)
1520 90 : ALLOCATE (diag(n))
1521 96 : diag(:) = 0.0_dp
1522 300 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1523 30 : ALLOCATE (work(1))
1524 : ! Workspace query
1525 30 : lwork = -1
1526 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1527 30 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1528 30 : lwork = INT(work(1))
1529 30 : DEALLOCATE (work)
1530 90 : ALLOCATE (work(lwork))
1531 : ! SVD
1532 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1533 30 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1534 : ! info == n+1 implies homogeneity error when the number of procs is large
1535 : ! this likely isnt a problem, but maybe we should handle it separately
1536 30 : IF (info /= 0 .AND. info /= n + 1) &
1537 0 : CPABORT("Singular value decomposition of matrix failed.")
1538 : ! (Pseudo)inverse and (pseudo)determinant
1539 : CALL cp_fm_create(matrix=sigma, &
1540 : matrix_struct=matrix_a%matrix_struct, &
1541 30 : name="SINGULAR_VALUE_MATRIX")
1542 30 : CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1543 30 : determinant = 1.0_dp
1544 30 : quenched = .FALSE.
1545 30 : IF (PRESENT(eigval)) THEN
1546 28 : CPASSERT(.NOT. ASSOCIATED(eigval))
1547 84 : ALLOCATE (eigval(n))
1548 156 : eigval(:) = diag
1549 : END IF
1550 96 : DO i = 1, n
1551 66 : IF (diag(i) < my_eps_svd) THEN
1552 18 : diag(i) = 0.0_dp
1553 18 : quenched = .TRUE.
1554 : ELSE
1555 48 : determinant = determinant*diag(i)
1556 48 : diag(i) = 1.0_dp/diag(i)
1557 : END IF
1558 96 : CALL cp_fm_set_element(sigma, i, i, diag(i))
1559 : END DO
1560 30 : DEALLOCATE (diag)
1561 30 : IF (quenched) &
1562 : CALL cp_warn(__LOCATION__, &
1563 : "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1564 12 : ". At least one singular value has been quenched.")
1565 : ! Sigma^-1 * U^T
1566 : CALL cp_fm_create(matrix=inv_sigma_ut, &
1567 : matrix_struct=matrix_a%matrix_struct, &
1568 30 : name="SINGULAR_VALUE_MATRIX")
1569 30 : CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1570 : CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1571 30 : u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1572 : ! A^-1 = V * (Sigma^-1 * U^T)
1573 30 : CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1574 : CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1575 30 : inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1576 : ! Clean up
1577 30 : DEALLOCATE (work)
1578 30 : CALL cp_fm_release(u)
1579 30 : CALL cp_fm_release(vt)
1580 30 : CALL cp_fm_release(sigma)
1581 150 : CALL cp_fm_release(inv_sigma_ut)
1582 : END IF
1583 : #else
1584 : IF (my_eps_svd == 0.0_dp) THEN
1585 : sign = .TRUE.
1586 : CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1587 : eval_error=eps1)
1588 : CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1589 : IF (PRESENT(eigval)) &
1590 : CALL cp_abort(__LOCATION__, &
1591 : "NYI. Eigenvalues not available for return without SCALAPACK.")
1592 : ELSE
1593 : CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1594 : determinant, eigval)
1595 : END IF
1596 : #endif
1597 1860 : CALL cp_fm_release(matrix_lu)
1598 1860 : DEALLOCATE (ipivot)
1599 1860 : IF (PRESENT(det_a)) det_a = determinant
1600 1860 : END SUBROUTINE cp_fm_invert
1601 :
1602 : ! **************************************************************************************************
1603 : !> \brief inverts a triangular matrix
1604 : !> \param matrix_a ...
1605 : !> \param uplo_tr triangular format; defaults to 'U'
1606 : !> \author MI
1607 : ! **************************************************************************************************
1608 5312 : SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1609 :
1610 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1611 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1612 :
1613 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
1614 :
1615 : CHARACTER :: unit_diag, uplo
1616 : INTEGER :: handle, info, ncol_global
1617 5312 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1618 : #if defined(__parallel)
1619 : INTEGER, DIMENSION(9) :: desca
1620 : #endif
1621 :
1622 5312 : CALL timeset(routineN, handle)
1623 :
1624 5312 : unit_diag = 'N'
1625 5312 : uplo = 'U'
1626 5312 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1627 :
1628 5312 : ncol_global = matrix_a%matrix_struct%ncol_global
1629 :
1630 5312 : a => matrix_a%local_data
1631 :
1632 : #if defined(__parallel)
1633 53120 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1634 :
1635 5312 : CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1636 :
1637 : #else
1638 : CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1639 : #endif
1640 :
1641 5312 : CALL timestop(handle)
1642 5312 : END SUBROUTINE cp_fm_triangular_invert
1643 :
1644 : ! **************************************************************************************************
1645 : !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1646 : !> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
1647 : !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1648 : !> \param matrix_a ...
1649 : !> \param matrix_r ...
1650 : !> \param nrow_fact ...
1651 : !> \param ncol_fact ...
1652 : !> \param first_row ...
1653 : !> \param first_col ...
1654 : !> \author MI
1655 : ! **************************************************************************************************
1656 19320 : SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
1657 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
1658 : INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
1659 : first_row, first_col
1660 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1661 :
1662 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
1663 :
1664 : CHARACTER :: myuplo
1665 : INTEGER :: handle, i, icol, info, irow, &
1666 : j, lda, lwork, ncol, &
1667 : ndim, nrow
1668 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
1669 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
1670 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1671 : #if defined(__parallel)
1672 : INTEGER, DIMENSION(9) :: desca
1673 : #endif
1674 :
1675 19320 : CALL timeset(routineN, handle)
1676 :
1677 19320 : myuplo = 'U'
1678 19320 : IF (PRESENT(uplo)) myuplo = uplo
1679 :
1680 19320 : ncol = matrix_a%matrix_struct%ncol_global
1681 19320 : nrow = matrix_a%matrix_struct%nrow_global
1682 19320 : lda = nrow
1683 :
1684 19320 : a => matrix_a%local_data
1685 :
1686 19320 : IF (PRESENT(nrow_fact)) nrow = nrow_fact
1687 19320 : IF (PRESENT(ncol_fact)) ncol = ncol_fact
1688 19320 : irow = 1
1689 19320 : IF (PRESENT(first_row)) irow = first_row
1690 19320 : icol = 1
1691 19320 : IF (PRESENT(first_col)) icol = first_col
1692 :
1693 19320 : CPASSERT(nrow >= ncol)
1694 19320 : ndim = SIZE(a, 2)
1695 57960 : ALLOCATE (tau(ndim))
1696 :
1697 : #if defined(__parallel)
1698 :
1699 193200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1700 :
1701 19320 : lwork = -1
1702 57960 : ALLOCATE (work(2*ndim))
1703 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1704 19320 : lwork = INT(work(1))
1705 19320 : DEALLOCATE (work)
1706 57960 : ALLOCATE (work(lwork))
1707 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1708 :
1709 : #else
1710 : lwork = -1
1711 : ALLOCATE (work(2*ndim))
1712 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1713 : lwork = INT(work(1))
1714 : DEALLOCATE (work)
1715 : ALLOCATE (work(lwork))
1716 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1717 :
1718 : #endif
1719 :
1720 77280 : ALLOCATE (r_mat(ncol, ncol))
1721 19320 : CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1722 19320 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1723 38640 : DO i = 1, ncol
1724 38640 : DO j = i + 1, ncol
1725 19320 : r_mat(j, i) = 0.0_dp
1726 : END DO
1727 : END DO
1728 : ELSE
1729 0 : DO j = 1, ncol
1730 0 : DO i = j + 1, ncol
1731 0 : r_mat(i, j) = 0.0_dp
1732 : END DO
1733 : END DO
1734 : END IF
1735 19320 : CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1736 :
1737 19320 : DEALLOCATE (tau, work, r_mat)
1738 :
1739 19320 : CALL timestop(handle)
1740 :
1741 19320 : END SUBROUTINE cp_fm_qr_factorization
1742 :
1743 : ! **************************************************************************************************
1744 : !> \brief computes the the solution to A*b=A_general using lu decomposition
1745 : !> \param matrix_a input matrix; will be overwritten
1746 : !> \param general_a contains the result
1747 : !> \author Florian Schiffmann
1748 : ! **************************************************************************************************
1749 8210 : SUBROUTINE cp_fm_solve(matrix_a, general_a)
1750 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
1751 :
1752 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
1753 :
1754 : INTEGER :: handle, info, n, nrhs
1755 8210 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1756 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general
1757 : #if defined(__parallel)
1758 : INTEGER, DIMENSION(9) :: desca, descb
1759 : #else
1760 : INTEGER :: lda, ldb
1761 : #endif
1762 :
1763 8210 : CALL timeset(routineN, handle)
1764 :
1765 8210 : a => matrix_a%local_data
1766 8210 : a_general => general_a%local_data
1767 8210 : n = matrix_a%matrix_struct%nrow_global
1768 8210 : nrhs = general_a%matrix_struct%ncol_global
1769 24630 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1770 :
1771 : #if defined(__parallel)
1772 82100 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1773 82100 : descb(:) = general_a%matrix_struct%descriptor(:)
1774 8210 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1775 : CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
1776 8210 : 1, 1, descb, info)
1777 :
1778 : #else
1779 : lda = SIZE(a, 1)
1780 : ldb = SIZE(a_general, 1)
1781 : CALL dgetrf(n, n, a, lda, ipivot, info)
1782 : CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
1783 :
1784 : #endif
1785 : ! info is allowed to be zero
1786 : ! this does just signal a zero diagonal element
1787 8210 : DEALLOCATE (ipivot)
1788 8210 : CALL timestop(handle)
1789 8210 : END SUBROUTINE
1790 :
1791 : ! **************************************************************************************************
1792 : !> \brief Convenience function. Computes the matrix multiplications needed
1793 : !> for the multiplication of complex matrices.
1794 : !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
1795 : !> \param transa : 'N' -> normal 'T' -> transpose
1796 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
1797 : !> \param transb ...
1798 : !> \param m ...
1799 : !> \param n ...
1800 : !> \param k ...
1801 : !> \param alpha ...
1802 : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
1803 : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1804 : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
1805 : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1806 : !> \param beta ...
1807 : !> \param C_re m x n matrix, real part
1808 : !> \param C_im m x n matrix, imaginary part
1809 : !> \param a_first_col ...
1810 : !> \param a_first_row ...
1811 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1812 : !> \param b_first_row ...
1813 : !> \param c_first_col ...
1814 : !> \param c_first_row ...
1815 : !> \author Samuel Andermatt
1816 : !> \note
1817 : !> C should have no overlap with A, B
1818 : ! **************************************************************************************************
1819 0 : SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
1820 : C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
1821 : c_first_row)
1822 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
1823 : INTEGER, INTENT(IN) :: m, n, k
1824 : REAL(KIND=dp), INTENT(IN) :: alpha
1825 : TYPE(cp_fm_type), INTENT(IN) :: A_re, A_im, B_re, B_im
1826 : REAL(KIND=dp), INTENT(IN) :: beta
1827 : TYPE(cp_fm_type), INTENT(IN) :: C_re, C_im
1828 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
1829 : b_first_row, c_first_col, c_first_row
1830 :
1831 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
1832 :
1833 : INTEGER :: handle
1834 :
1835 0 : CALL timeset(routineN, handle)
1836 :
1837 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
1838 : a_first_col=a_first_col, &
1839 : a_first_row=a_first_row, &
1840 : b_first_col=b_first_col, &
1841 : b_first_row=b_first_row, &
1842 : c_first_col=c_first_col, &
1843 0 : c_first_row=c_first_row)
1844 : CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
1845 : a_first_col=a_first_col, &
1846 : a_first_row=a_first_row, &
1847 : b_first_col=b_first_col, &
1848 : b_first_row=b_first_row, &
1849 : c_first_col=c_first_col, &
1850 0 : c_first_row=c_first_row)
1851 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
1852 : a_first_col=a_first_col, &
1853 : a_first_row=a_first_row, &
1854 : b_first_col=b_first_col, &
1855 : b_first_row=b_first_row, &
1856 : c_first_col=c_first_col, &
1857 0 : c_first_row=c_first_row)
1858 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
1859 : a_first_col=a_first_col, &
1860 : a_first_row=a_first_row, &
1861 : b_first_col=b_first_col, &
1862 : b_first_row=b_first_row, &
1863 : c_first_col=c_first_col, &
1864 0 : c_first_row=c_first_row)
1865 :
1866 0 : CALL timestop(handle)
1867 :
1868 0 : END SUBROUTINE cp_complex_fm_gemm
1869 :
1870 : ! **************************************************************************************************
1871 : !> \brief inverts a matrix using LU decomposition
1872 : !> the input matrix will be overwritten
1873 : !> \param matrix : input a general square non-singular matrix, outputs its inverse
1874 : !> \param info_out : optional, if present outputs the info from (p)zgetri
1875 : !> \author Lianheng Tong
1876 : ! **************************************************************************************************
1877 0 : SUBROUTINE cp_fm_lu_invert(matrix, info_out)
1878 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1879 : INTEGER, INTENT(OUT), OPTIONAL :: info_out
1880 :
1881 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
1882 :
1883 : INTEGER :: nrows_global, handle, info, lwork
1884 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
1885 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat
1886 0 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
1887 : #if defined(__parallel)
1888 : INTEGER :: liwork
1889 : INTEGER, DIMENSION(9) :: desca
1890 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
1891 : #else
1892 : INTEGER :: lda
1893 : #endif
1894 :
1895 0 : CALL timeset(routineN, handle)
1896 :
1897 0 : mat => matrix%local_data
1898 0 : nrows_global = matrix%matrix_struct%nrow_global
1899 0 : CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
1900 0 : ALLOCATE (ipivot(nrows_global))
1901 : ! do LU decomposition
1902 : #if defined(__parallel)
1903 0 : desca = matrix%matrix_struct%descriptor
1904 : CALL pdgetrf(nrows_global, nrows_global, &
1905 0 : mat, 1, 1, desca, ipivot, info)
1906 : #else
1907 : lda = SIZE(mat, 1)
1908 : CALL dgetrf(nrows_global, nrows_global, &
1909 : mat, lda, ipivot, info)
1910 : #endif
1911 0 : IF (info /= 0) THEN
1912 0 : CALL cp_abort(__LOCATION__, "LU decomposition has failed")
1913 : END IF
1914 : ! do inversion
1915 : #if defined(__parallel)
1916 0 : ALLOCATE (iwork(1))
1917 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
1918 0 : ipivot, work, -1, iwork, -1, info)
1919 : lwork = INT(work(1))
1920 0 : DEALLOCATE (work)
1921 : ALLOCATE (work(lwork))
1922 : liwork = INT(iwork(1))
1923 : DEALLOCATE (iwork)
1924 : ALLOCATE (iwork(liwork))
1925 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
1926 : ipivot, work, lwork, iwork, liwork, info)
1927 : DEALLOCATE (iwork)
1928 : #else
1929 : CALL dgetri(nrows_global, mat, lda, &
1930 : ipivot, work, -1, info)
1931 : lwork = INT(work(1))
1932 : DEALLOCATE (work)
1933 : ALLOCATE (work(lwork))
1934 : CALL dgetri(nrows_global, mat, lda, &
1935 : ipivot, work, lwork, info)
1936 : #endif
1937 : DEALLOCATE (work)
1938 : DEALLOCATE (ipivot)
1939 :
1940 : IF (PRESENT(info_out)) THEN
1941 : info_out = info
1942 : ELSE
1943 : IF (info /= 0) &
1944 : CALL cp_abort(__LOCATION__, "LU inversion has failed")
1945 : END IF
1946 :
1947 : CALL timestop(handle)
1948 :
1949 0 : END SUBROUTINE cp_fm_lu_invert
1950 :
1951 : ! **************************************************************************************************
1952 : !> \brief norm of matrix using (p)dlange
1953 : !> \param matrix : input a general matrix
1954 : !> \param mode : 'M' max abs element value,
1955 : !> '1' or 'O' one norm, i.e. maximum column sum
1956 : !> 'I' infinity norm, i.e. maximum row sum
1957 : !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
1958 : !> \return : the norm according to mode
1959 : !> \author Lianheng Tong
1960 : ! **************************************************************************************************
1961 496 : FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
1962 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1963 : CHARACTER, INTENT(IN) :: mode
1964 : REAL(KIND=dp) :: res
1965 :
1966 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
1967 :
1968 : INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
1969 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
1970 496 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
1971 : #if defined(__parallel)
1972 : INTEGER, DIMENSION(9) :: desca
1973 : #else
1974 : INTEGER :: lda
1975 : #endif
1976 :
1977 496 : CALL timeset(routineN, handle)
1978 :
1979 : CALL cp_fm_get_info(matrix=matrix, &
1980 : nrow_global=nrows, &
1981 : ncol_global=ncols, &
1982 : nrow_local=nrows_local, &
1983 496 : ncol_local=ncols_local)
1984 496 : aa => matrix%local_data
1985 :
1986 : #if defined(__parallel)
1987 4960 : desca = matrix%matrix_struct%descriptor
1988 : SELECT CASE (mode)
1989 : CASE ('M', 'm')
1990 496 : lwork = 1
1991 : CASE ('1', 'O', 'o')
1992 496 : lwork = ncols_local
1993 : CASE ('I', 'i')
1994 0 : lwork = nrows_local
1995 : CASE ('F', 'f', 'E', 'e')
1996 0 : lwork = 1
1997 : CASE DEFAULT
1998 496 : CPABORT("mode input is not valid")
1999 : END SELECT
2000 1488 : ALLOCATE (work(lwork))
2001 496 : res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2002 496 : DEALLOCATE (work)
2003 : #else
2004 : SELECT CASE (mode)
2005 : CASE ('M', 'm')
2006 : lwork = 1
2007 : CASE ('1', 'O', 'o')
2008 : lwork = 1
2009 : CASE ('I', 'i')
2010 : lwork = nrows
2011 : CASE ('F', 'f', 'E', 'e')
2012 : lwork = 1
2013 : CASE DEFAULT
2014 : CPABORT("mode input is not valid")
2015 : END SELECT
2016 : ALLOCATE (work(lwork))
2017 : lda = SIZE(aa, 1)
2018 : res = dlange(mode, nrows, ncols, aa, lda, work)
2019 : DEALLOCATE (work)
2020 : #endif
2021 :
2022 496 : CALL timestop(handle)
2023 :
2024 496 : END FUNCTION cp_fm_norm
2025 :
2026 : ! **************************************************************************************************
2027 : !> \brief trace of a matrix using pdlatra
2028 : !> \param matrix : input a square matrix
2029 : !> \return : the trace
2030 : !> \author Lianheng Tong
2031 : ! **************************************************************************************************
2032 0 : FUNCTION cp_fm_latra(matrix) RESULT(res)
2033 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2034 : REAL(KIND=dp) :: res
2035 :
2036 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
2037 :
2038 : INTEGER :: nrows, ncols, handle
2039 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2040 : #if defined(__parallel)
2041 : INTEGER, DIMENSION(9) :: desca
2042 : #else
2043 : INTEGER :: ii
2044 : #endif
2045 :
2046 0 : CALL timeset(routineN, handle)
2047 :
2048 0 : nrows = matrix%matrix_struct%nrow_global
2049 0 : ncols = matrix%matrix_struct%ncol_global
2050 0 : CPASSERT(nrows == ncols)
2051 0 : aa => matrix%local_data
2052 :
2053 : #if defined(__parallel)
2054 0 : desca = matrix%matrix_struct%descriptor
2055 0 : res = pdlatra(nrows, aa, 1, 1, desca)
2056 : #else
2057 : res = 0.0_dp
2058 : DO ii = 1, nrows
2059 : res = res + aa(ii, ii)
2060 : END DO
2061 : #endif
2062 :
2063 0 : CALL timestop(handle)
2064 :
2065 0 : END FUNCTION cp_fm_latra
2066 :
2067 : ! **************************************************************************************************
2068 : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2069 : !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2070 : !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2071 : !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2072 : !> \param nrow ...
2073 : !> \param ncol ...
2074 : !> \param first_row ...
2075 : !> \param first_col ...
2076 : !> \author MI
2077 : ! **************************************************************************************************
2078 38 : SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2079 :
2080 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2081 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2082 : INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2083 :
2084 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
2085 :
2086 : INTEGER :: handle
2087 : INTEGER :: info, lwork
2088 38 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2089 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2090 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2091 : #if defined(__parallel)
2092 : INTEGER, DIMENSION(9) :: descc
2093 : #else
2094 : INTEGER :: lda
2095 : #endif
2096 :
2097 38 : CALL timeset(routineN, handle)
2098 :
2099 38 : a => matrix%local_data
2100 38 : lwork = -1
2101 114 : ALLOCATE (work(2*nrow))
2102 114 : ALLOCATE (ipiv(ncol))
2103 38 : info = 0
2104 :
2105 : #if defined(__parallel)
2106 380 : descc(:) = matrix%matrix_struct%descriptor(:)
2107 : ! Call SCALAPACK routine to get optimal work dimension
2108 38 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2109 38 : lwork = INT(work(1))
2110 38 : DEALLOCATE (work)
2111 114 : ALLOCATE (work(lwork))
2112 254 : tau = 0.0_dp
2113 38 : ipiv = 0
2114 :
2115 : ! Call SCALAPACK routine to get QR decomposition of CTs
2116 38 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2117 : #else
2118 : CPASSERT(first_row == 1 .AND. first_col == 1)
2119 : lda = SIZE(a, 1)
2120 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2121 : lwork = INT(work(1))
2122 : DEALLOCATE (work)
2123 : ALLOCATE (work(lwork))
2124 : tau = 0.0_dp
2125 : ipiv = 0
2126 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2127 : #endif
2128 38 : CPASSERT(info == 0)
2129 :
2130 38 : DEALLOCATE (work)
2131 38 : DEALLOCATE (ipiv)
2132 :
2133 38 : CALL timestop(handle)
2134 :
2135 38 : END SUBROUTINE cp_fm_pdgeqpf
2136 :
2137 : ! **************************************************************************************************
2138 : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2139 : !> with orthonormal columns, which is defined as the first N columns of a product of K
2140 : !> elementary reflectors of order M
2141 : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2142 : !> as returned from PDGEQRF
2143 : !> On exit it contains the M-by-N distributed matrix Q
2144 : !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2145 : !> \param nrow ...
2146 : !> \param first_row ...
2147 : !> \param first_col ...
2148 : !> \author MI
2149 : ! **************************************************************************************************
2150 38 : SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2151 :
2152 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2153 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2154 : INTEGER, INTENT(IN) :: nrow, first_row, first_col
2155 :
2156 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
2157 :
2158 : INTEGER :: handle
2159 : INTEGER :: info, lwork
2160 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2161 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2162 : #if defined(__parallel)
2163 : INTEGER, DIMENSION(9) :: descc
2164 : #else
2165 : INTEGER :: lda
2166 : #endif
2167 :
2168 38 : CALL timeset(routineN, handle)
2169 :
2170 38 : a => matrix%local_data
2171 38 : lwork = -1
2172 114 : ALLOCATE (work(2*nrow))
2173 38 : info = 0
2174 :
2175 : #if defined(__parallel)
2176 380 : descc(:) = matrix%matrix_struct%descriptor(:)
2177 :
2178 38 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2179 38 : CPASSERT(info == 0)
2180 38 : lwork = INT(work(1))
2181 38 : DEALLOCATE (work)
2182 114 : ALLOCATE (work(lwork))
2183 :
2184 : ! Call SCALAPACK routine to get Q
2185 38 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2186 : #else
2187 : CPASSERT(first_row == 1 .AND. first_col == 1)
2188 : lda = SIZE(a, 1)
2189 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2190 : lwork = INT(work(1))
2191 : DEALLOCATE (work)
2192 : ALLOCATE (work(lwork))
2193 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2194 : #endif
2195 38 : CPASSERT(INFO == 0)
2196 :
2197 38 : DEALLOCATE (work)
2198 38 : CALL timestop(handle)
2199 :
2200 38 : END SUBROUTINE cp_fm_pdorgqr
2201 :
2202 : ! **************************************************************************************************
2203 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2204 : !> \param matrix ...
2205 : !> \param irow ...
2206 : !> \param jrow ...
2207 : !> \param cs cosine of the rotation angle
2208 : !> \param sn sinus of the rotation angle
2209 : !> \author Ole Schuett
2210 : ! **************************************************************************************************
2211 31928 : SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2212 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2213 : INTEGER, INTENT(IN) :: irow, jrow
2214 : REAL(dp), INTENT(IN) :: cs, sn
2215 :
2216 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
2217 : INTEGER :: handle, ncol
2218 :
2219 : #if defined(__parallel)
2220 : INTEGER :: info, lwork
2221 : INTEGER, DIMENSION(9) :: desc
2222 31928 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2223 : #endif
2224 31928 : CALL timeset(routineN, handle)
2225 31928 : CALL cp_fm_get_info(matrix, ncol_global=ncol)
2226 : #if defined(__parallel)
2227 31928 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2228 31928 : lwork = 2*ncol + 1
2229 95784 : ALLOCATE (work(lwork))
2230 319280 : desc(:) = matrix%matrix_struct%descriptor(:)
2231 : CALL pdrot(ncol, &
2232 : matrix%local_data(1, 1), irow, 1, desc, ncol, &
2233 : matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2234 31928 : cs, sn, work, lwork, info)
2235 31928 : CPASSERT(info == 0)
2236 31928 : DEALLOCATE (work)
2237 : ELSE
2238 : #endif
2239 0 : CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2240 : #if defined(__parallel)
2241 : END IF
2242 : #endif
2243 31928 : CALL timestop(handle)
2244 31928 : END SUBROUTINE cp_fm_rot_rows
2245 :
2246 : ! **************************************************************************************************
2247 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2248 : !> \param matrix ...
2249 : !> \param icol ...
2250 : !> \param jcol ...
2251 : !> \param cs cosine of the rotation angle
2252 : !> \param sn sinus of the rotation angle
2253 : !> \author Ole Schuett
2254 : ! **************************************************************************************************
2255 36928 : SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2256 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2257 : INTEGER, INTENT(IN) :: icol, jcol
2258 : REAL(dp), INTENT(IN) :: cs, sn
2259 :
2260 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
2261 : INTEGER :: handle, nrow
2262 :
2263 : #if defined(__parallel)
2264 : INTEGER :: info, lwork
2265 : INTEGER, DIMENSION(9) :: desc
2266 36928 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2267 : #endif
2268 36928 : CALL timeset(routineN, handle)
2269 36928 : CALL cp_fm_get_info(matrix, nrow_global=nrow)
2270 : #if defined(__parallel)
2271 36928 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2272 36928 : lwork = 2*nrow + 1
2273 110784 : ALLOCATE (work(lwork))
2274 369280 : desc(:) = matrix%matrix_struct%descriptor(:)
2275 : CALL pdrot(nrow, &
2276 : matrix%local_data(1, 1), 1, icol, desc, 1, &
2277 : matrix%local_data(1, 1), 1, jcol, desc, 1, &
2278 36928 : cs, sn, work, lwork, info)
2279 36928 : CPASSERT(info == 0)
2280 36928 : DEALLOCATE (work)
2281 : ELSE
2282 : #endif
2283 0 : CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2284 : #if defined(__parallel)
2285 : END IF
2286 : #endif
2287 36928 : CALL timestop(handle)
2288 36928 : END SUBROUTINE cp_fm_rot_cols
2289 :
2290 : ! **************************************************************************************************
2291 : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2292 : !> \param matrix_a ...
2293 : !> \param B ...
2294 : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2295 : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2296 : !> \param start_row starting index of rows, optional, defaults to 1
2297 : !> \param start_col starting index of columns, optional, defaults to 1
2298 : !> \param do_norm ...
2299 : !> \param do_print ...
2300 : ! **************************************************************************************************
2301 0 : SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2302 : do_norm, do_print)
2303 :
2304 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2305 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: B
2306 : INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2307 : LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2308 :
2309 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
2310 :
2311 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2312 : j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2313 : start_col_local, start_row_global, start_row_local, this_col, unit_nr
2314 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2315 : LOGICAL :: my_do_norm, my_do_print
2316 : REAL(KIND=dp) :: norm
2317 0 : REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2318 :
2319 0 : CALL timeset(routineN, handle)
2320 :
2321 0 : my_do_norm = .TRUE.
2322 0 : IF (PRESENT(do_norm)) my_do_norm = do_norm
2323 :
2324 0 : my_do_print = .FALSE.
2325 0 : IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2326 :
2327 0 : unit_nr = -1
2328 0 : IF (my_do_print) THEN
2329 0 : unit_nr = cp_logger_get_default_unit_nr()
2330 0 : IF (unit_nr < 1) my_do_print = .FALSE.
2331 : END IF
2332 :
2333 0 : IF (SIZE(B) /= 0) THEN
2334 0 : IF (PRESENT(nrows)) THEN
2335 0 : nrow_global = nrows
2336 : ELSE
2337 0 : nrow_global = SIZE(B, 1)
2338 : END IF
2339 :
2340 0 : IF (PRESENT(ncols)) THEN
2341 0 : ncol_global = ncols
2342 : ELSE
2343 0 : ncol_global = SIZE(B, 2)
2344 : END IF
2345 :
2346 0 : IF (PRESENT(start_row)) THEN
2347 0 : start_row_global = start_row
2348 : ELSE
2349 : start_row_global = 1
2350 : END IF
2351 :
2352 0 : IF (PRESENT(start_col)) THEN
2353 0 : start_col_global = start_col
2354 : ELSE
2355 : start_col_global = 1
2356 : END IF
2357 :
2358 0 : end_row_global = start_row_global + nrow_global - 1
2359 0 : end_col_global = start_col_global + ncol_global - 1
2360 :
2361 : CALL cp_fm_get_info(matrix=matrix_a, &
2362 : nrow_global=nrow_global, ncol_global=ncol_global, &
2363 : nrow_local=nrow_local, ncol_local=ncol_local, &
2364 0 : row_indices=row_indices, col_indices=col_indices)
2365 0 : IF (end_row_global > nrow_global) THEN
2366 : end_row_global = nrow_global
2367 : END IF
2368 0 : IF (end_col_global > ncol_global) THEN
2369 : end_col_global = ncol_global
2370 : END IF
2371 :
2372 : ! find out row/column indices of locally stored matrix elements that
2373 : ! needs to be copied.
2374 : ! Arrays row_indices and col_indices are assumed to be sorted in
2375 : ! ascending order
2376 0 : DO start_row_local = 1, nrow_local
2377 0 : IF (row_indices(start_row_local) >= start_row_global) EXIT
2378 : END DO
2379 :
2380 0 : DO end_row_local = start_row_local, nrow_local
2381 0 : IF (row_indices(end_row_local) > end_row_global) EXIT
2382 : END DO
2383 0 : end_row_local = end_row_local - 1
2384 :
2385 0 : DO start_col_local = 1, ncol_local
2386 0 : IF (col_indices(start_col_local) >= start_col_global) EXIT
2387 : END DO
2388 :
2389 0 : DO end_col_local = start_col_local, ncol_local
2390 0 : IF (col_indices(end_col_local) > end_col_global) EXIT
2391 : END DO
2392 0 : end_col_local = end_col_local - 1
2393 :
2394 0 : a => matrix_a%local_data
2395 :
2396 0 : this_col = col_indices(start_col_local) - start_col_global + 1
2397 :
2398 0 : B(:, this_col) = a(:, start_col_local)
2399 :
2400 0 : IF (my_do_norm) THEN
2401 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2402 0 : B(:, this_col) = B(:, this_col)/norm
2403 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2404 : END IF
2405 :
2406 0 : DO i = start_col_local + 1, end_col_local
2407 0 : this_col = col_indices(i) - start_col_global + 1
2408 0 : B(:, this_col) = a(:, i)
2409 0 : DO j = start_col_local, i - 1
2410 0 : j_col = col_indices(j) - start_col_global + 1
2411 : B(:, this_col) = B(:, this_col) - &
2412 : accurate_dot_product(B(:, j_col), B(:, this_col))* &
2413 0 : B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
2414 : END DO
2415 :
2416 0 : IF (my_do_norm) THEN
2417 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2418 0 : B(:, this_col) = B(:, this_col)/norm
2419 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2420 : END IF
2421 :
2422 : END DO
2423 0 : CALL matrix_a%matrix_struct%para_env%sum(B)
2424 : END IF
2425 :
2426 0 : CALL timestop(handle)
2427 :
2428 0 : END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
2429 :
2430 : ! **************************************************************************************************
2431 : !> \brief Cholesky decomposition
2432 : !> \param fm_matrix ...
2433 : !> \param n ...
2434 : !> \param uplo triangular format; defaults to 'U'
2435 : ! **************************************************************************************************
2436 10544 : SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
2437 : TYPE(cp_fm_type) :: fm_matrix
2438 : INTEGER, INTENT(IN) :: n
2439 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2440 :
2441 : CHARACTER :: myuplo
2442 : INTEGER :: info
2443 10544 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2444 : #if defined(__parallel)
2445 : INTEGER, DIMENSION(9) :: desca
2446 : #endif
2447 :
2448 10544 : myuplo = 'U'
2449 0 : IF (PRESENT(uplo)) myuplo = uplo
2450 :
2451 10544 : a => fm_matrix%local_data
2452 : #if defined(__parallel)
2453 105440 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2454 10544 : CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
2455 : #else
2456 : CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
2457 : #endif
2458 10544 : IF (info /= 0) &
2459 0 : CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
2460 :
2461 10544 : END SUBROUTINE cp_fm_potrf
2462 :
2463 : ! **************************************************************************************************
2464 : !> \brief Invert trianguar matrix
2465 : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
2466 : !> \param n size of the matrix to invert
2467 : !> \param uplo triangular format; defaults to 'U'
2468 : ! **************************************************************************************************
2469 9750 : SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
2470 : TYPE(cp_fm_type) :: fm_matrix
2471 : INTEGER, INTENT(IN) :: n
2472 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2473 :
2474 : CHARACTER :: myuplo
2475 9750 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2476 : INTEGER :: info
2477 : #if defined(__parallel)
2478 : INTEGER, DIMENSION(9) :: desca
2479 : #endif
2480 :
2481 9750 : myuplo = 'U'
2482 0 : IF (PRESENT(uplo)) myuplo = uplo
2483 :
2484 9750 : a => fm_matrix%local_data
2485 : #if defined(__parallel)
2486 97500 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2487 9750 : CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
2488 : #else
2489 : CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
2490 : #endif
2491 9750 : CPASSERT(info == 0)
2492 9750 : END SUBROUTINE cp_fm_potri
2493 :
2494 : ! **************************************************************************************************
2495 : !> \brief Calculates
2496 : !> yv = alpha*amat*xv + beta*yv
2497 : !> where amat: fm matrix
2498 : !> xv : vector replicated
2499 : !> yv : vector replicated
2500 : !> Defaults: alpha = 1, beta = 0
2501 : ! **************************************************************************************************
2502 21320 : SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
2503 : TYPE(cp_fm_type), INTENT(IN) :: amat
2504 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: xv
2505 : REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: yv
2506 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: alpha, beta
2507 :
2508 : INTEGER :: na, nc, nx, ny
2509 : REAL(KIND=dp) :: aval, bval
2510 : #if defined(__parallel)
2511 : INTEGER :: nrl, ncl, ic, ir
2512 21320 : INTEGER, DIMENSION(:), POINTER :: rind, cind
2513 21320 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
2514 : #endif
2515 :
2516 21320 : aval = 1.0_dp
2517 21320 : IF (PRESENT(alpha)) aval = alpha
2518 21320 : bval = 0.0_dp
2519 21320 : IF (PRESENT(beta)) bval = beta
2520 :
2521 21320 : CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
2522 21320 : nx = SIZE(xv)
2523 21320 : ny = SIZE(yv)
2524 21320 : IF ((nx /= ny) .OR. (nc /= nx)) THEN
2525 0 : CPABORT("cp_fm_matvec: incompatible dimensions")
2526 : END IF
2527 : #if defined(__parallel)
2528 : CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
2529 21320 : row_indices=rind, col_indices=cind)
2530 149240 : ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
2531 210636 : DO ic = 1, ncl
2532 210636 : xvl(ic) = xv(cind(ic))
2533 : END DO
2534 4017978 : yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
2535 21320 : yvm = 0.0_dp
2536 115978 : DO ir = 1, nrl
2537 115978 : yvm(rind(ir)) = yvl(ir)
2538 : END DO
2539 21320 : CALL amat%matrix_struct%para_env%sum(yvm)
2540 21320 : IF (bval == 0.0_dp) THEN
2541 118562 : yv = aval*yvm
2542 : ELSE
2543 92074 : yv = bval*yv + aval*yvm
2544 : END IF
2545 : #else
2546 : IF (bval == 0.0_dp) THEN
2547 : yv = aval*MATMUL(amat%local_data, xv)
2548 : ELSE
2549 : yv = bval*yv + aval*MATMUL(amat%local_data, xv)
2550 : END IF
2551 : #endif
2552 :
2553 85280 : END SUBROUTINE cp_fm_matvec
2554 :
2555 : END MODULE cp_fm_basic_linalg
|