Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Basic linear algebra operations for full matrices.
10 : !> \par History
11 : !> 08.2002 split out of qs_blacs [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_basic_linalg
15 : USE cp_blacs_env, ONLY: cp_blacs_env_type
16 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
17 : USE cp_fm_types, ONLY: &
18 : cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
19 : cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20 : cp_fm_type
21 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
22 : cp_to_string
23 : USE kahan_sum, ONLY: accurate_dot_product, &
24 : accurate_sum
25 : USE kinds, ONLY: dp, &
26 : int_8, &
27 : sp
28 : USE machine, ONLY: m_memory
29 : USE mathlib, ONLY: get_pseudo_inverse_svd, &
30 : invert_matrix
31 : USE message_passing, ONLY: mp_comm_type
32 : #if defined (__HAS_IEEE_EXCEPTIONS)
33 : USE ieee_exceptions, ONLY: ieee_get_halting_mode, &
34 : ieee_set_halting_mode, &
35 : IEEE_ALL
36 : #endif
37 : #include "../base/base_uses.f90"
38 :
39 : IMPLICIT NONE
40 : PRIVATE
41 :
42 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
43 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
44 :
45 : PUBLIC :: cp_fm_scale, & ! scale a matrix
46 : cp_fm_scale_and_add, & ! scale and add two matrices
47 : cp_fm_geadd, & ! general addition
48 : cp_fm_add_columns, & ! daxpy type column add
49 : cp_fm_column_scale, & ! scale columns of a matrix
50 : cp_fm_row_scale, & ! scale rows of a matrix
51 : cp_fm_trace, & ! trace of the transpose(A)*B
52 : cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
53 : cp_fm_norm, & ! different norms of A
54 : cp_fm_schur_product, & ! schur product
55 : cp_fm_transpose, & ! transpose a matrix
56 : cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
57 : cp_fm_syrk, & ! rank k update
58 : cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
59 : cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
60 : cp_fm_gemm, & ! multiply two matrices
61 : cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
62 : cp_fm_invert, & ! computes the inverse and determinant
63 : cp_fm_frobenius_norm, & ! frobenius norm
64 : cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
65 : cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
66 : cp_fm_solve, & ! solves the equation A*B=C A and C are input
67 : cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
68 : cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
69 : cp_fm_potrf, & ! Cholesky decomposition
70 : cp_fm_potri, & ! Invert triangular matrix
71 : cp_fm_rot_rows, & ! rotates two rows
72 : cp_fm_rot_cols, & ! rotates two columns
73 : cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
74 : cp_fm_det, & ! determinant of a real matrix with correct sign
75 : cp_fm_matvec ! matrix-vector multiplication (vector replicated)
76 :
77 : REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
78 : REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
79 :
80 : INTERFACE cp_fm_trace
81 : MODULE PROCEDURE cp_fm_trace_a0b0t0
82 : MODULE PROCEDURE cp_fm_trace_a1b0t1_a
83 : MODULE PROCEDURE cp_fm_trace_a1b0t1_p
84 : MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
85 : MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
86 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
87 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
88 : END INTERFACE cp_fm_trace
89 :
90 : INTERFACE cp_fm_contracted_trace
91 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
92 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
93 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
94 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
95 : END INTERFACE cp_fm_contracted_trace
96 : CONTAINS
97 :
98 : ! **************************************************************************************************
99 : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
100 : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
101 : ! **************************************************************************************************
102 0 : SUBROUTINE cp_fm_det(matrix_a, det_a)
103 :
104 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
105 : REAL(KIND=dp), INTENT(OUT) :: det_a
106 : REAL(KIND=dp) :: determinant
107 : TYPE(cp_fm_type) :: matrix_lu
108 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
109 : INTEGER :: n, i, info, P
110 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
111 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
112 :
113 : #if defined(__parallel)
114 : INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
115 : INTEGER, DIMENSION(9) :: desca
116 : #endif
117 :
118 : CALL cp_fm_create(matrix=matrix_lu, &
119 : matrix_struct=matrix_a%matrix_struct, &
120 0 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
121 0 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
122 :
123 0 : a => matrix_lu%local_data
124 0 : n = matrix_lu%matrix_struct%nrow_global
125 0 : ALLOCATE (ipivot(n))
126 0 : ipivot(:) = 0
127 0 : P = 0
128 0 : ALLOCATE (diag(n))
129 0 : diag(:) = 0.0_dp
130 : #if defined(__parallel)
131 : ! Use LU decomposition
132 0 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
133 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
134 0 : CALL cp_fm_get_diag(matrix_lu, diag)
135 0 : determinant = PRODUCT(diag)
136 0 : myprow = matrix_lu%matrix_struct%context%mepos(1)
137 0 : nprow = matrix_lu%matrix_struct%context%num_pe(1)
138 0 : npcol = matrix_lu%matrix_struct%context%num_pe(2)
139 0 : nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
140 0 : nrow_block = matrix_lu%matrix_struct%nrow_block
141 0 : DO irow_local = 1, nrow_local
142 0 : i = matrix_lu%matrix_struct%row_indices(irow_local)
143 0 : IF (ipivot(irow_local) /= i) P = P + 1
144 : END DO
145 0 : CALL matrix_lu%matrix_struct%para_env%sum(P)
146 : ! very important fix
147 0 : P = P/npcol
148 : #else
149 : CALL dgetrf(n, n, a, n, ipivot, info)
150 : CALL cp_fm_get_diag(matrix_lu, diag)
151 : determinant = PRODUCT(diag)
152 : DO i = 1, n
153 : IF (ipivot(i) /= i) P = P + 1
154 : END DO
155 : #endif
156 0 : DEALLOCATE (ipivot)
157 0 : DEALLOCATE (diag)
158 0 : CALL cp_fm_release(matrix_lu)
159 0 : det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
160 0 : END SUBROUTINE cp_fm_det
161 :
162 : ! **************************************************************************************************
163 : !> \brief calc A <- alpha*A + beta*B
164 : !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
165 : !> scale A)
166 : !> \param alpha ...
167 : !> \param matrix_a ...
168 : !> \param beta ...
169 : !> \param matrix_b ...
170 : ! **************************************************************************************************
171 1109542 : SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
172 :
173 : REAL(KIND=dp), INTENT(IN) :: alpha
174 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
175 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
176 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
177 :
178 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
179 :
180 : INTEGER :: handle, size_a, size_b
181 : REAL(KIND=dp) :: my_beta
182 1109542 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
183 1109542 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
184 :
185 1109542 : CALL timeset(routineN, handle)
186 :
187 1109542 : my_beta = 0.0_dp
188 1109542 : IF (PRESENT(matrix_b)) my_beta = 1.0_dp
189 1109542 : IF (PRESENT(beta)) my_beta = beta
190 1109542 : NULLIFY (a, b)
191 :
192 1109542 : IF (PRESENT(beta)) THEN
193 1109542 : CPASSERT(PRESENT(matrix_b))
194 1109542 : IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
195 0 : CPWARN("Bad use of routine. Call cp_fm_scale instead")
196 0 : CALL cp_fm_scale(alpha + beta, matrix_a)
197 0 : CALL timestop(handle)
198 0 : RETURN
199 : END IF
200 : END IF
201 :
202 1109542 : a => matrix_a%local_data
203 1109542 : a_sp => matrix_a%local_data_sp
204 :
205 1109542 : IF (matrix_a%use_sp) THEN
206 0 : size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
207 : ELSE
208 1109542 : size_a = SIZE(a, 1)*SIZE(a, 2)
209 : END IF
210 :
211 1109542 : IF (alpha /= 1.0_dp) THEN
212 79682 : IF (matrix_a%use_sp) THEN
213 0 : CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
214 : ELSE
215 79682 : CALL dscal(size_a, alpha, a, 1)
216 : END IF
217 : END IF
218 1109542 : IF (my_beta /= 0.0_dp) THEN
219 1102070 : IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
220 0 : CPABORT("Matrices must be in the same blacs context")
221 :
222 1102070 : IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
223 : matrix_b%matrix_struct)) THEN
224 :
225 1102070 : b => matrix_b%local_data
226 1102070 : b_sp => matrix_b%local_data_sp
227 1102070 : IF (matrix_b%use_sp) THEN
228 0 : size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
229 : ELSE
230 1102070 : size_b = SIZE(b, 1)*SIZE(b, 2)
231 : END IF
232 1102070 : IF (size_a /= size_b) &
233 0 : CPABORT("Matrices must have same local sizes")
234 :
235 1102070 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
236 0 : CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
237 1102070 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
238 0 : CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
239 1102070 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
240 0 : CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
241 : ELSE
242 1102070 : CALL daxpy(size_a, my_beta, b, 1, a, 1)
243 : END IF
244 :
245 : ELSE
246 : #ifdef __parallel
247 0 : CPABORT("to do (pdscal,pdcopy,pdaxpy)")
248 : #else
249 : CPABORT("")
250 : #endif
251 : END IF
252 :
253 : END IF
254 :
255 1109542 : CALL timestop(handle)
256 :
257 1109542 : END SUBROUTINE cp_fm_scale_and_add
258 :
259 : ! **************************************************************************************************
260 : !> \brief interface to BLACS geadd:
261 : !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
262 : !> where opt(matrix_a) can be either:
263 : !> 'N': matrix_a
264 : !> 'T': matrix_a^T
265 : !> 'C': matrix_a^H (Hermitian conjugate)
266 : !> note that this is a level three routine, use cp_fm_scale_and_add if that
267 : !> is sufficient for your needs
268 : !> \param alpha : complex scalar
269 : !> \param trans : 'N' normal, 'T' transposed
270 : !> \param matrix_a : input matrix_a
271 : !> \param beta : complex scalar
272 : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
273 : !> \author Lianheng Tong
274 : ! **************************************************************************************************
275 532 : SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
276 : REAL(KIND=dp), INTENT(IN) :: alpha, beta
277 : CHARACTER, INTENT(IN) :: trans
278 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
279 :
280 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
281 :
282 : INTEGER :: nrow_global, ncol_global, handle
283 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
284 : #if defined(__parallel)
285 : INTEGER, DIMENSION(9) :: desca, descb
286 : #elif !defined(__MKL)
287 : INTEGER :: ii, jj
288 : #endif
289 :
290 532 : CALL timeset(routineN, handle)
291 :
292 532 : nrow_global = matrix_a%matrix_struct%nrow_global
293 532 : ncol_global = matrix_a%matrix_struct%ncol_global
294 532 : CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
295 532 : CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
296 :
297 532 : aa => matrix_a%local_data
298 532 : bb => matrix_b%local_data
299 :
300 : #if defined(__parallel)
301 5320 : desca = matrix_a%matrix_struct%descriptor
302 5320 : descb = matrix_b%matrix_struct%descriptor
303 : CALL pdgeadd(trans, &
304 : nrow_global, &
305 : ncol_global, &
306 : alpha, &
307 : aa, &
308 : 1, 1, &
309 : desca, &
310 : beta, &
311 : bb, &
312 : 1, 1, &
313 532 : descb)
314 : #elif defined(__MKL)
315 : CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
316 : alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
317 : #else
318 : ! dgeadd is not a standard BLAS function, although it is implemented
319 : ! in some libraries like OpenBLAS, so not going to use it here
320 : SELECT CASE (trans)
321 : CASE ('T')
322 : DO jj = 1, ncol_global
323 : DO ii = 1, nrow_global
324 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
325 : END DO
326 : END DO
327 : CASE DEFAULT
328 : DO jj = 1, ncol_global
329 : DO ii = 1, nrow_global
330 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
331 : END DO
332 : END DO
333 : END SELECT
334 : #endif
335 :
336 532 : CALL timestop(handle)
337 :
338 532 : END SUBROUTINE cp_fm_geadd
339 :
340 : ! **************************************************************************************************
341 : !> \brief Add (and scale) a subset of columns of a fm to a fm
342 : !> b = alpha*a + b
343 : !> \param msource ...
344 : !> \param mtarget ...
345 : !> \param ncol ...
346 : !> \param alpha ...
347 : !> \param source_start ...
348 : !> \param target_start ...
349 : ! **************************************************************************************************
350 0 : SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
351 :
352 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
353 : INTEGER, INTENT(IN) :: ncol
354 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
355 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
356 :
357 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
358 :
359 : INTEGER :: handle, n, ss, ts, i
360 : REAL(KIND=dp) :: fscale
361 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
362 : #if defined(__parallel)
363 : INTEGER, DIMENSION(9) :: desca, descb
364 : #endif
365 :
366 0 : CALL timeset(routineN, handle)
367 :
368 0 : ss = 1
369 0 : ts = 1
370 :
371 0 : IF (PRESENT(source_start)) ss = source_start
372 0 : IF (PRESENT(target_start)) ts = target_start
373 :
374 0 : fscale = 1.0_dp
375 0 : IF (PRESENT(alpha)) fscale = alpha
376 :
377 0 : n = msource%matrix_struct%nrow_global
378 :
379 0 : a => msource%local_data
380 0 : b => mtarget%local_data
381 :
382 : #if defined(__parallel)
383 : MARK_USED(i)
384 0 : desca(:) = msource%matrix_struct%descriptor(:)
385 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
386 0 : CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
387 : #else
388 : DO i = 0, ncol - 1
389 : b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
390 : END DO
391 : #endif
392 :
393 0 : CALL timestop(handle)
394 :
395 0 : END SUBROUTINE cp_fm_add_columns
396 :
397 : ! **************************************************************************************************
398 : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
399 : !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
400 : !> \param matrix_a ...
401 : !> \param almost_determinant ...
402 : !> \param correct_sign ...
403 : !> \par History
404 : !> added correct_sign 02.07 (fschiff)
405 : !> \author Joost VandeVondele
406 : !> \note
407 : !> - matrix_a is overwritten
408 : !> - the sign of the determinant might be wrong
409 : !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
410 : !> - one should be able to find out if ipivot is an even or an odd permutation...
411 : !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
412 : !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
413 : ! **************************************************************************************************
414 0 : SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
415 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
416 : REAL(KIND=dp), INTENT(OUT) :: almost_determinant
417 : LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
418 :
419 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
420 :
421 : INTEGER :: handle, i, info, n
422 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
423 : REAL(KIND=dp) :: determinant
424 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
425 : #if defined(__parallel)
426 : INTEGER, DIMENSION(9) :: desca
427 0 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
428 : #else
429 : INTEGER :: lda
430 : #endif
431 :
432 0 : CALL timeset(routineN, handle)
433 :
434 0 : a => matrix_a%local_data
435 0 : n = matrix_a%matrix_struct%nrow_global
436 0 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
437 :
438 : #if defined(__parallel)
439 : MARK_USED(correct_sign)
440 0 : desca(:) = matrix_a%matrix_struct%descriptor(:)
441 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
442 :
443 0 : ALLOCATE (diag(n))
444 0 : diag(:) = 0.0_dp
445 0 : CALL cp_fm_get_diag(matrix_a, diag)
446 0 : determinant = 1.0_dp
447 0 : DO i = 1, n
448 0 : determinant = determinant*diag(i)
449 : END DO
450 0 : DEALLOCATE (diag)
451 : #else
452 : lda = SIZE(a, 1)
453 : CALL dgetrf(n, n, a, lda, ipivot, info)
454 : determinant = 1.0_dp
455 : IF (correct_sign) THEN
456 : DO i = 1, n
457 : IF (ipivot(i) /= i) THEN
458 : determinant = -determinant*a(i, i)
459 : ELSE
460 : determinant = determinant*a(i, i)
461 : END IF
462 : END DO
463 : ELSE
464 : DO i = 1, n
465 : determinant = determinant*a(i, i)
466 : END DO
467 : END IF
468 : #endif
469 : ! info is allowed to be zero
470 : ! this does just signal a zero diagonal element
471 0 : DEALLOCATE (ipivot)
472 0 : almost_determinant = determinant ! notice that the sign is random
473 0 : CALL timestop(handle)
474 0 : END SUBROUTINE cp_fm_lu_decompose
475 :
476 : ! **************************************************************************************************
477 : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
478 : !> \param transa : 'N' -> normal 'T' -> transpose
479 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
480 : !> \param transb ...
481 : !> \param m ...
482 : !> \param n ...
483 : !> \param k ...
484 : !> \param alpha ...
485 : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
486 : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
487 : !> \param beta ...
488 : !> \param matrix_c : m x n matrix
489 : !> \param a_first_col ...
490 : !> \param a_first_row ...
491 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
492 : !> \param b_first_row ...
493 : !> \param c_first_col ...
494 : !> \param c_first_row ...
495 : !> \author Matthias Krack
496 : !> \note
497 : !> matrix_c should have no overlap with matrix_a, matrix_b
498 : ! **************************************************************************************************
499 610 : SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
500 : matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
501 : c_first_col, c_first_row)
502 :
503 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
504 : INTEGER, INTENT(IN) :: m, n, k
505 : REAL(KIND=dp), INTENT(IN) :: alpha
506 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
507 : REAL(KIND=dp), INTENT(IN) :: beta
508 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
509 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
510 : b_first_col, b_first_row, &
511 : c_first_col, c_first_row
512 :
513 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
514 :
515 : INTEGER :: handle, i_a, i_b, i_c, j_a, &
516 : j_b, j_c
517 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
518 610 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
519 : #if defined(__parallel)
520 : INTEGER, DIMENSION(9) :: desca, descb, descc
521 : #else
522 : INTEGER :: lda, ldb, ldc
523 : #endif
524 :
525 610 : CALL timeset(routineN, handle)
526 :
527 : !sample peak memory
528 610 : CALL m_memory()
529 :
530 610 : a => matrix_a%local_data
531 610 : b => matrix_b%local_data
532 610 : c => matrix_c%local_data
533 :
534 610 : a_sp => matrix_a%local_data_sp
535 610 : b_sp => matrix_b%local_data_sp
536 610 : c_sp => matrix_c%local_data_sp
537 :
538 610 : i_a = 1
539 610 : IF (PRESENT(a_first_row)) i_a = a_first_row
540 :
541 610 : j_a = 1
542 610 : IF (PRESENT(a_first_col)) j_a = a_first_col
543 :
544 610 : i_b = 1
545 610 : IF (PRESENT(b_first_row)) i_b = b_first_row
546 :
547 610 : j_b = 1
548 610 : IF (PRESENT(b_first_col)) j_b = b_first_col
549 :
550 610 : i_c = 1
551 610 : IF (PRESENT(c_first_row)) i_c = c_first_row
552 :
553 610 : j_c = 1
554 610 : IF (PRESENT(c_first_col)) j_c = c_first_col
555 :
556 : #if defined(__parallel)
557 :
558 6100 : desca(:) = matrix_a%matrix_struct%descriptor(:)
559 6100 : descb(:) = matrix_b%matrix_struct%descriptor(:)
560 6100 : descc(:) = matrix_c%matrix_struct%descriptor(:)
561 :
562 610 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
563 :
564 : CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
565 0 : descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
566 :
567 610 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
568 :
569 : CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
570 610 : descb, beta, c, i_c, j_c, descc)
571 :
572 : ELSE
573 0 : CPABORT("Mixed precision gemm NYI")
574 : END IF
575 : #else
576 :
577 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
578 :
579 : lda = SIZE(a_sp, 1)
580 : ldb = SIZE(b_sp, 1)
581 : ldc = SIZE(c_sp, 1)
582 :
583 : CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
584 : REAL(beta, sp), c_sp(i_c, j_c), ldc)
585 :
586 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
587 :
588 : lda = SIZE(a, 1)
589 : ldb = SIZE(b, 1)
590 : ldc = SIZE(c, 1)
591 :
592 : CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
593 :
594 : ELSE
595 : CPABORT("Mixed precision gemm NYI")
596 : END IF
597 :
598 : #endif
599 610 : CALL timestop(handle)
600 :
601 610 : END SUBROUTINE cp_fm_gemm
602 :
603 : ! **************************************************************************************************
604 : !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
605 : !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
606 : !> where matrix_a is symmetric
607 : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
608 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
609 : !> \param uplo triangular format
610 : !> \param m ...
611 : !> \param n ...
612 : !> \param alpha ...
613 : !> \param matrix_a : m x m matrix
614 : !> \param matrix_b : m x n matrix
615 : !> \param beta ...
616 : !> \param matrix_c : m x n matrix
617 : !> \author Matthias Krack
618 : !> \note
619 : !> matrix_c should have no overlap with matrix_a, matrix_b
620 : !> all matrices in QS are triangular according to uplo
621 : !> matrix_a is always an m x m matrix
622 : !> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
623 : ! **************************************************************************************************
624 167310 : SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
625 :
626 : CHARACTER(LEN=1), INTENT(IN) :: side, uplo
627 : INTEGER, INTENT(IN) :: m, n
628 : REAL(KIND=dp), INTENT(IN) :: alpha
629 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
630 : REAL(KIND=dp), INTENT(IN) :: beta
631 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
632 :
633 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
634 :
635 : INTEGER :: handle
636 167310 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
637 : #if defined(__parallel)
638 : INTEGER, DIMENSION(9) :: desca, descb, descc
639 : #else
640 : INTEGER :: lda, ldb, ldc
641 : #endif
642 :
643 167310 : CALL timeset(routineN, handle)
644 :
645 167310 : a => matrix_a%local_data
646 167310 : b => matrix_b%local_data
647 167310 : c => matrix_c%local_data
648 :
649 : #if defined(__parallel)
650 :
651 1673100 : desca(:) = matrix_a%matrix_struct%descriptor(:)
652 1673100 : descb(:) = matrix_b%matrix_struct%descriptor(:)
653 1673100 : descc(:) = matrix_c%matrix_struct%descriptor(:)
654 :
655 167310 : CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
656 :
657 : #else
658 :
659 : lda = matrix_a%matrix_struct%local_leading_dimension
660 : ldb = matrix_b%matrix_struct%local_leading_dimension
661 : ldc = matrix_c%matrix_struct%local_leading_dimension
662 :
663 : CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
664 :
665 : #endif
666 167310 : CALL timestop(handle)
667 :
668 167310 : END SUBROUTINE cp_fm_symm
669 :
670 : ! **************************************************************************************************
671 : !> \brief computes the Frobenius norm of matrix_a
672 : !> \brief computes the Frobenius norm of matrix_a
673 : !> \param matrix_a : m x n matrix
674 : !> \return ...
675 : !> \author VW
676 : ! **************************************************************************************************
677 7926 : FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
678 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
679 : REAL(KIND=dp) :: norm
680 :
681 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
682 :
683 : INTEGER :: handle, size_a
684 7926 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
685 : REAL(KIND=dp), EXTERNAL :: DDOT
686 : #if defined(__parallel)
687 : TYPE(mp_comm_type) :: group
688 : #endif
689 :
690 7926 : CALL timeset(routineN, handle)
691 :
692 : norm = 0.0_dp
693 7926 : a => matrix_a%local_data
694 7926 : size_a = SIZE(a, 1)*SIZE(a, 2)
695 7926 : norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
696 : #if defined(__parallel)
697 7926 : group = matrix_a%matrix_struct%para_env
698 7926 : CALL group%sum(norm)
699 : #endif
700 7926 : norm = SQRT(norm)
701 :
702 7926 : CALL timestop(handle)
703 :
704 7926 : END FUNCTION cp_fm_frobenius_norm
705 :
706 : ! **************************************************************************************************
707 : !> \brief performs a rank-k update of a symmetric matrix_c
708 : !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
709 : !> \param uplo : 'U' ('L')
710 : !> \param trans : 'N' ('T')
711 : !> \param k : number of cols to use in matrix_a
712 : !> ia,ja :: 1,1 (could be used for selecting subblock of a)
713 : !> \param alpha ...
714 : !> \param matrix_a ...
715 : !> \param ia ...
716 : !> \param ja ...
717 : !> \param beta ...
718 : !> \param matrix_c ...
719 : !> \author Matthias Krack
720 : ! **************************************************************************************************
721 8306 : SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
722 : CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
723 : INTEGER, INTENT(IN) :: k
724 : REAL(KIND=dp), INTENT(IN) :: alpha
725 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
726 : INTEGER, INTENT(IN) :: ia, ja
727 : REAL(KIND=dp), INTENT(IN) :: beta
728 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
729 :
730 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
731 :
732 : INTEGER :: handle, n
733 8306 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
734 : #if defined(__parallel)
735 : INTEGER, DIMENSION(9) :: desca, descc
736 : #else
737 : INTEGER :: lda, ldc
738 : #endif
739 : #if defined (__HAS_IEEE_EXCEPTIONS)
740 : LOGICAL, DIMENSION(5) :: halt
741 : #endif
742 :
743 8306 : CALL timeset(routineN, handle)
744 :
745 8306 : n = matrix_c%matrix_struct%nrow_global
746 :
747 8306 : a => matrix_a%local_data
748 8306 : c => matrix_c%local_data
749 :
750 : #if defined (__HAS_IEEE_EXCEPTIONS)
751 : CALL ieee_get_halting_mode(IEEE_ALL, halt)
752 : CALL ieee_set_halting_mode(IEEE_ALL, .FALSE.)
753 : #endif
754 : #if defined(__parallel)
755 83060 : desca(:) = matrix_a%matrix_struct%descriptor(:)
756 83060 : descc(:) = matrix_c%matrix_struct%descriptor(:)
757 :
758 8306 : CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
759 : #else
760 : lda = SIZE(a, 1)
761 : ldc = SIZE(c, 1)
762 :
763 : CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
764 : #endif
765 : #if defined (__HAS_IEEE_EXCEPTIONS)
766 : CALL ieee_set_halting_mode(IEEE_ALL, halt)
767 : #endif
768 8306 : CALL timestop(handle)
769 :
770 8306 : END SUBROUTINE cp_fm_syrk
771 :
772 : ! **************************************************************************************************
773 : !> \brief computes the schur product of two matrices
774 : !> c_ij = a_ij * b_ij
775 : !> \param matrix_a ...
776 : !> \param matrix_b ...
777 : !> \param matrix_c ...
778 : !> \author Joost VandeVondele
779 : ! **************************************************************************************************
780 9168 : SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
781 :
782 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
783 :
784 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
785 :
786 : INTEGER :: handle, icol_local, irow_local, mypcol, &
787 : myprow, ncol_local, &
788 : nrow_local
789 9168 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
790 : TYPE(cp_blacs_env_type), POINTER :: context
791 :
792 9168 : CALL timeset(routineN, handle)
793 :
794 9168 : context => matrix_a%matrix_struct%context
795 9168 : myprow = context%mepos(1)
796 9168 : mypcol = context%mepos(2)
797 :
798 9168 : a => matrix_a%local_data
799 9168 : b => matrix_b%local_data
800 9168 : c => matrix_c%local_data
801 :
802 9168 : nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
803 9168 : ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
804 :
805 98792 : DO icol_local = 1, ncol_local
806 6666536 : DO irow_local = 1, nrow_local
807 6657368 : c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
808 : END DO
809 : END DO
810 :
811 9168 : CALL timestop(handle)
812 :
813 9168 : END SUBROUTINE cp_fm_schur_product
814 :
815 : ! **************************************************************************************************
816 : !> \brief returns the trace of matrix_a^T matrix_b, i.e
817 : !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
818 : !> \param matrix_a a matrix
819 : !> \param matrix_b another matrix
820 : !> \param trace ...
821 : !> \par History
822 : !> 11.06.2001 Creation (Matthias Krack)
823 : !> 12.2002 added doc [fawzi]
824 : !> \author Matthias Krack
825 : !> \note
826 : !> note the transposition of matrix_a!
827 : ! **************************************************************************************************
828 555016 : SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
829 :
830 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
831 : REAL(KIND=dp), INTENT(OUT) :: trace
832 :
833 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
834 :
835 : INTEGER :: handle, mypcol, myprow, ncol_local, &
836 : nrow_local
837 555016 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
838 555016 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
839 : TYPE(cp_blacs_env_type), POINTER :: context
840 : TYPE(mp_comm_type) :: group
841 :
842 555016 : CALL timeset(routineN, handle)
843 :
844 555016 : context => matrix_a%matrix_struct%context
845 555016 : myprow = context%mepos(1)
846 555016 : mypcol = context%mepos(2)
847 :
848 555016 : group = matrix_a%matrix_struct%para_env
849 :
850 555016 : a => matrix_a%local_data
851 555016 : b => matrix_b%local_data
852 :
853 555016 : a_sp => matrix_a%local_data_sp
854 555016 : b_sp => matrix_b%local_data_sp
855 :
856 555016 : nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
857 555016 : ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
858 :
859 : ! cries for an accurate_dot_product
860 555016 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
861 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
862 0 : b_sp(1:nrow_local, 1:ncol_local), dp))
863 555016 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
864 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
865 0 : b(1:nrow_local, 1:ncol_local))
866 555016 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
867 : trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
868 0 : REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
869 : ELSE
870 : trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
871 555016 : b(1:nrow_local, 1:ncol_local))
872 : END IF
873 :
874 555016 : CALL group%sum(trace)
875 :
876 555016 : CALL timestop(handle)
877 :
878 555016 : END SUBROUTINE cp_fm_trace_a0b0t0
879 :
880 : #:mute
881 : #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
882 : #:endmute
883 :
884 : ! **************************************************************************************************
885 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
886 : !> \param matrix_a list of A matrices
887 : !> \param matrix_b B matrix
888 : !> \param trace computed traces
889 : !> \par History
890 : !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
891 : !> \note \parblock
892 : !> Computing the trace requires collective communication between involved MPI processes
893 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
894 : !> the amount of time wasted in such synchronisation by performing one large collective
895 : !> operation which involves all the matrices in question.
896 : !>
897 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
898 : !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
899 : !> 'matrix_b' is a single matrix.
900 : !> \endparblock
901 : ! **************************************************************************************************
902 : #:for longname, shortname, appendix in types
903 3174 : SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
904 : TYPE(${longname}$), DIMENSION(:), INTENT(IN) :: matrix_a
905 : TYPE(cp_fm_type), INTENT(IN) :: matrix_b
906 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
907 :
908 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
909 :
910 : INTEGER :: handle, imatrix, n_matrices, &
911 : ncols_local, nrows_local
912 : LOGICAL :: use_sp_a, use_sp_b
913 3174 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
914 3174 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
915 : TYPE(mp_comm_type) :: group
916 :
917 3174 : CALL timeset(routineN, handle)
918 :
919 3174 : n_matrices = SIZE(trace)
920 3174 : CPASSERT(SIZE(matrix_a) == n_matrices)
921 :
922 3174 : CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
923 3174 : use_sp_b = matrix_b%use_sp
924 :
925 3174 : IF (use_sp_b) THEN
926 0 : ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
927 : ELSE
928 3174 : ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
929 : END IF
930 :
931 : !$OMP PARALLEL DO DEFAULT(NONE), &
932 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
933 : !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
934 3174 : !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
935 :
936 : DO imatrix = 1, n_matrices
937 :
938 : use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
939 :
940 : ! assume that the matrices A(i) and B have identical shapes and distribution schemes
941 : IF (use_sp_a .AND. use_sp_b) THEN
942 : ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
943 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
944 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
945 : ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
946 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
947 : ELSE
948 : CPABORT("Matrices A and B are of different types")
949 : END IF
950 : END DO
951 : !$OMP END PARALLEL DO
952 :
953 3174 : group = matrix_b%matrix_struct%para_env
954 19878 : CALL group%sum(trace)
955 :
956 3174 : CALL timestop(handle)
957 3174 : END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
958 : #:endfor
959 :
960 : ! **************************************************************************************************
961 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
962 : !> \param matrix_a list of A matrices
963 : !> \param matrix_b list of B matrices
964 : !> \param trace computed traces
965 : !> \param accurate ...
966 : !> \par History
967 : !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
968 : !> \note \parblock
969 : !> Computing the trace requires collective communication between involved MPI processes
970 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
971 : !> the amount of time wasted in such synchronisation by performing one large collective
972 : !> operation which involves all the matrices in question.
973 : !>
974 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
975 : !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
976 : !> \endparblock
977 : ! **************************************************************************************************
978 : #:for longname1, shortname1, appendix1 in types
979 : #:for longname2, shortname2, appendix2 in types
980 149486 : SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
981 : TYPE(${longname1}$), DIMENSION(:), INTENT(IN) :: matrix_a
982 : TYPE(${longname2}$), DIMENSION(:), INTENT(IN) :: matrix_b
983 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
984 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
985 :
986 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
987 :
988 : INTEGER :: handle, imatrix, n_matrices, &
989 : ncols_local, nrows_local
990 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
991 149486 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
992 149486 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
993 : TYPE(mp_comm_type) :: group
994 :
995 149486 : CALL timeset(routineN, handle)
996 :
997 149486 : n_matrices = SIZE(trace)
998 149486 : CPASSERT(SIZE(matrix_a) == n_matrices)
999 149486 : CPASSERT(SIZE(matrix_b) == n_matrices)
1000 :
1001 149486 : use_accurate_sum = .TRUE.
1002 149486 : IF (PRESENT(accurate)) use_accurate_sum = accurate
1003 :
1004 : !$OMP PARALLEL DO DEFAULT(NONE), &
1005 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1006 : !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
1007 149486 : !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
1008 : DO imatrix = 1, n_matrices
1009 : CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
1010 :
1011 : use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
1012 : use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
1013 :
1014 : ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1015 : IF (use_sp_a .AND. use_sp_b) THEN
1016 : ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1017 : ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1018 : IF (use_accurate_sum) THEN
1019 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1020 : ELSE
1021 : trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
1022 : END IF
1023 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1024 : ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1025 : ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1026 : IF (use_accurate_sum) THEN
1027 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1028 : ELSE
1029 : trace(imatrix) = SUM(ldata_a*ldata_b)
1030 : END IF
1031 : ELSE
1032 : CPABORT("Matrices A and B are of different types")
1033 : END IF
1034 : END DO
1035 : !$OMP END PARALLEL DO
1036 :
1037 149486 : group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
1038 499638 : CALL group%sum(trace)
1039 :
1040 149486 : CALL timestop(handle)
1041 149486 : END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
1042 : #:endfor
1043 : #:endfor
1044 :
1045 : ! **************************************************************************************************
1046 : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1047 : !> \param matrix_a list of A matrices
1048 : !> \param matrix_b list of B matrices
1049 : !> \param trace computed traces
1050 : !> \param accurate ...
1051 : ! **************************************************************************************************
1052 : #:for longname1, shortname1, appendix1 in types
1053 : #:for longname2, shortname2, appendix2 in types
1054 13912 : SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
1055 : TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN) :: matrix_a
1056 : TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN) :: matrix_b
1057 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1058 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
1059 :
1060 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
1061 :
1062 : INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1063 : nrows_local, nz
1064 : INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1065 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1066 : REAL(kind=dp) :: t
1067 13912 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1068 13912 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1069 : TYPE(mp_comm_type) :: group
1070 :
1071 13912 : CALL timeset(routineN, handle)
1072 :
1073 13912 : nz = SIZE(matrix_a, 1)
1074 13912 : CPASSERT(SIZE(matrix_b, 1) == nz)
1075 :
1076 13912 : na = SIZE(matrix_a, 2)
1077 13912 : nb = SIZE(matrix_b, 2)
1078 13912 : CPASSERT(SIZE(trace, 1) == na)
1079 13912 : CPASSERT(SIZE(trace, 2) == nb)
1080 :
1081 13912 : use_accurate_sum = .TRUE.
1082 13912 : IF (PRESENT(accurate)) use_accurate_sum = accurate
1083 :
1084 : ! here we use one running index (itrace) instead of two (ia, ib) in order to
1085 : ! improve load balance between shared-memory threads
1086 13912 : ntraces = na*nb
1087 13912 : na8 = INT(na, kind=int_8)
1088 :
1089 : !$OMP PARALLEL DO DEFAULT(NONE), &
1090 : !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1091 : !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1092 13912 : !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1093 : DO itrace = 1, ntraces
1094 : ib8 = (itrace - 1)/na8
1095 : ia = INT(itrace - ib8*na8)
1096 : ib = INT(ib8) + 1
1097 :
1098 : t = 0.0_dp
1099 : DO iz = 1, nz
1100 : CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
1101 : use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
1102 : use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
1103 :
1104 : ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1105 : IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1106 : ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1107 : ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1108 : IF (use_accurate_sum) THEN
1109 : t = t + accurate_dot_product(ldata_a, ldata_b)
1110 : ELSE
1111 : t = t + SUM(ldata_a*ldata_b)
1112 : END IF
1113 : ELSE IF (use_sp_a .AND. use_sp_b) THEN
1114 : ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1115 : ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1116 : IF (use_accurate_sum) THEN
1117 : t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1118 : ELSE
1119 : t = t + SUM(ldata_a_sp*ldata_b_sp)
1120 : END IF
1121 : ELSE
1122 : CPABORT("Matrices A and B are of different types")
1123 : END IF
1124 : END DO
1125 : trace(ia, ib) = t
1126 : END DO
1127 : !$OMP END PARALLEL DO
1128 :
1129 13912 : group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
1130 664484 : CALL group%sum(trace)
1131 :
1132 13912 : CALL timestop(handle)
1133 13912 : END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
1134 : #:endfor
1135 : #:endfor
1136 :
1137 : ! **************************************************************************************************
1138 : !> \brief multiplies in place by a triangular matrix:
1139 : !> matrix_b = alpha op(triangular_matrix) matrix_b
1140 : !> or (if side='R')
1141 : !> matrix_b = alpha matrix_b op(triangular_matrix)
1142 : !> op(triangular_matrix) is:
1143 : !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1144 : !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1145 : !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1146 : !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1147 : !> \param triangular_matrix the triangular matrix that multiplies the other
1148 : !> \param matrix_b the matrix that gets multiplied and stores the result
1149 : !> \param side on which side of matrix_b stays op(triangular_matrix)
1150 : !> (defaults to 'L')
1151 : !> \param transpose_tr if the triangular matrix should be transposed
1152 : !> (defaults to false)
1153 : !> \param invert_tr if the triangular matrix should be inverted
1154 : !> (defaults to false)
1155 : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1156 : !> lower ('L') triangle (defaults to 'U')
1157 : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1158 : !> be assumed to be 1 (defaults to false)
1159 : !> \param n_rows the number of rows of the result (defaults to
1160 : !> size(matrix_b,1))
1161 : !> \param n_cols the number of columns of the result (defaults to
1162 : !> size(matrix_b,2))
1163 : !> \param alpha ...
1164 : !> \par History
1165 : !> 08.2002 created [fawzi]
1166 : !> \author Fawzi Mohamed
1167 : !> \note
1168 : !> needs an mpi env
1169 : ! **************************************************************************************************
1170 110104 : SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1171 : transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1172 : alpha)
1173 : TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1174 : CHARACTER, INTENT(IN), OPTIONAL :: side
1175 : LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1176 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1177 : LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1178 : INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1179 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
1180 :
1181 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
1182 :
1183 : CHARACTER :: side_char, transa, unit_diag, uplo
1184 : INTEGER :: handle, mdim, m, n
1185 : LOGICAL :: invert
1186 : REAL(KIND=dp) :: al
1187 :
1188 55052 : CALL timeset(routineN, handle)
1189 55052 : side_char = 'L'
1190 55052 : unit_diag = 'N'
1191 55052 : uplo = 'U'
1192 55052 : transa = 'N'
1193 55052 : invert = .FALSE.
1194 55052 : al = 1.0_dp
1195 55052 : CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1196 55052 : IF (PRESENT(side)) side_char = side
1197 : mdim = MERGE(1, 2, 'L' == side_char)
1198 55052 : IF (PRESENT(invert_tr)) invert = invert_tr
1199 55052 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1200 55052 : IF (PRESENT(unit_diag_tr)) THEN
1201 0 : IF (unit_diag_tr) THEN
1202 0 : unit_diag = 'U'
1203 : ELSE
1204 : unit_diag = 'N'
1205 : END IF
1206 : END IF
1207 55052 : IF (PRESENT(transpose_tr)) THEN
1208 3402 : IF (transpose_tr) THEN
1209 1290 : transa = 'T'
1210 : ELSE
1211 : transa = 'N'
1212 : END IF
1213 : END IF
1214 55052 : IF (PRESENT(alpha)) al = alpha
1215 55052 : IF (PRESENT(n_rows)) m = n_rows
1216 55052 : IF (PRESENT(n_cols)) n = n_cols
1217 :
1218 55052 : IF (invert) THEN
1219 :
1220 : #if defined(__parallel)
1221 : CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1222 : triangular_matrix%local_data(1, 1), 1, 1, &
1223 : triangular_matrix%matrix_struct%descriptor, &
1224 : matrix_b%local_data(1, 1), 1, 1, &
1225 44468 : matrix_b%matrix_struct%descriptor(1))
1226 : #else
1227 : CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1228 : triangular_matrix%local_data(1, 1), &
1229 : SIZE(triangular_matrix%local_data, mdim), &
1230 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1231 : #endif
1232 :
1233 : ELSE
1234 :
1235 : #if defined(__parallel)
1236 : CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1237 : triangular_matrix%local_data(1, 1), 1, 1, &
1238 : triangular_matrix%matrix_struct%descriptor, &
1239 : matrix_b%local_data(1, 1), 1, 1, &
1240 10584 : matrix_b%matrix_struct%descriptor(1))
1241 : #else
1242 : CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1243 : triangular_matrix%local_data(1, 1), &
1244 : SIZE(triangular_matrix%local_data, mdim), &
1245 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1246 : #endif
1247 :
1248 : END IF
1249 :
1250 55052 : CALL timestop(handle)
1251 55052 : END SUBROUTINE cp_fm_triangular_multiply
1252 :
1253 : ! **************************************************************************************************
1254 : !> \brief scales a matrix
1255 : !> matrix_a = alpha * matrix_b
1256 : !> \param alpha ...
1257 : !> \param matrix_a ...
1258 : !> \note
1259 : !> use cp_fm_set_all to zero (avoids problems with nan)
1260 : ! **************************************************************************************************
1261 86473 : SUBROUTINE cp_fm_scale(alpha, matrix_a)
1262 : REAL(KIND=dp), INTENT(IN) :: alpha
1263 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1264 :
1265 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale'
1266 :
1267 : INTEGER :: handle, size_a
1268 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1269 :
1270 86473 : CALL timeset(routineN, handle)
1271 :
1272 : NULLIFY (a)
1273 :
1274 86473 : a => matrix_a%local_data
1275 86473 : size_a = SIZE(a, 1)*SIZE(a, 2)
1276 :
1277 86473 : CALL DSCAL(size_a, alpha, a, 1)
1278 :
1279 86473 : CALL timestop(handle)
1280 :
1281 86473 : END SUBROUTINE cp_fm_scale
1282 :
1283 : ! **************************************************************************************************
1284 : !> \brief transposes a matrix
1285 : !> matrixt = matrix ^ T
1286 : !> \param matrix ...
1287 : !> \param matrixt ...
1288 : !> \note
1289 : !> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1290 : ! **************************************************************************************************
1291 25872 : SUBROUTINE cp_fm_transpose(matrix, matrixt)
1292 : TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1293 :
1294 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
1295 :
1296 : INTEGER :: handle, ncol_global, &
1297 : nrow_global, ncol_globalt, nrow_globalt
1298 12936 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
1299 : #if defined(__parallel)
1300 : INTEGER, DIMENSION(9) :: desca, descc
1301 : #elif !defined(__MKL)
1302 : INTEGER :: i, j
1303 : #endif
1304 :
1305 12936 : nrow_global = matrix%matrix_struct%nrow_global
1306 12936 : ncol_global = matrix%matrix_struct%ncol_global
1307 12936 : nrow_globalt = matrixt%matrix_struct%nrow_global
1308 12936 : ncol_globalt = matrixt%matrix_struct%ncol_global
1309 0 : CPASSERT(nrow_global == ncol_globalt)
1310 12936 : CPASSERT(nrow_globalt == ncol_global)
1311 :
1312 12936 : CALL timeset(routineN, handle)
1313 :
1314 12936 : a => matrix%local_data
1315 12936 : c => matrixt%local_data
1316 :
1317 : #if defined(__parallel)
1318 129360 : desca(:) = matrix%matrix_struct%descriptor(:)
1319 129360 : descc(:) = matrixt%matrix_struct%descriptor(:)
1320 12936 : CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1321 : #elif defined(__MKL)
1322 : CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1323 : #else
1324 : DO j = 1, ncol_global
1325 : DO i = 1, nrow_global
1326 : c(j, i) = a(i, j)
1327 : END DO
1328 : END DO
1329 : #endif
1330 12936 : CALL timestop(handle)
1331 :
1332 12936 : END SUBROUTINE cp_fm_transpose
1333 :
1334 : ! **************************************************************************************************
1335 : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1336 : !> \param matrix the triangular matrix as input, the full matrix as output
1337 : !> \param work a matrix of the same size as matrix
1338 : !> \param uplo triangular format; defaults to 'U'
1339 : !> \author Matthias Krack
1340 : !> \note
1341 : !> the opposite triangular part is irrelevant
1342 : ! **************************************************************************************************
1343 295114 : SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1344 :
1345 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1346 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1347 :
1348 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
1349 :
1350 : CHARACTER :: myuplo
1351 : INTEGER :: handle, icol_global, irow_global, &
1352 : mypcol, myprow, ncol_global, &
1353 : nrow_global
1354 147557 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1355 147557 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1356 : TYPE(cp_blacs_env_type), POINTER :: context
1357 :
1358 : #if defined(__parallel)
1359 : INTEGER :: icol_local, irow_local, &
1360 : ncol_block, ncol_local, &
1361 : nrow_block, nrow_local
1362 : INTEGER, DIMENSION(9) :: desca, descc
1363 147557 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: c
1364 147557 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp
1365 : #endif
1366 :
1367 147557 : myuplo = 'U'
1368 0 : IF (PRESENT(uplo)) myuplo = uplo
1369 :
1370 147557 : nrow_global = matrix%matrix_struct%nrow_global
1371 147557 : ncol_global = matrix%matrix_struct%ncol_global
1372 147557 : CPASSERT(nrow_global == ncol_global)
1373 147557 : nrow_global = work%matrix_struct%nrow_global
1374 147557 : ncol_global = work%matrix_struct%ncol_global
1375 147557 : CPASSERT(nrow_global == ncol_global)
1376 147557 : CPASSERT(matrix%use_sp .EQV. work%use_sp)
1377 :
1378 147557 : CALL timeset(routineN, handle)
1379 :
1380 147557 : context => matrix%matrix_struct%context
1381 147557 : myprow = context%mepos(1)
1382 147557 : mypcol = context%mepos(2)
1383 :
1384 : #if defined(__parallel)
1385 :
1386 147557 : nrow_block = matrix%matrix_struct%nrow_block
1387 147557 : ncol_block = matrix%matrix_struct%ncol_block
1388 :
1389 147557 : nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1390 147557 : ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1391 :
1392 147557 : a => work%local_data
1393 147557 : a_sp => work%local_data_sp
1394 1475570 : desca(:) = work%matrix_struct%descriptor(:)
1395 147557 : c => matrix%local_data
1396 147557 : c_sp => matrix%local_data_sp
1397 1475570 : descc(:) = matrix%matrix_struct%descriptor(:)
1398 :
1399 4966786 : DO icol_local = 1, ncol_local
1400 4819229 : icol_global = matrix%matrix_struct%col_indices(icol_local)
1401 229384342 : DO irow_local = 1, nrow_local
1402 224417556 : irow_global = matrix%matrix_struct%row_indices(irow_local)
1403 229236785 : IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1404 110920155 : IF (matrix%use_sp) THEN
1405 0 : c_sp(irow_local, icol_local) = 0.0_sp
1406 : ELSE
1407 110920155 : c(irow_local, icol_local) = 0.0_dp
1408 : END IF
1409 113497401 : ELSE IF (irow_global == icol_global) THEN
1410 2577246 : IF (matrix%use_sp) THEN
1411 0 : c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1412 : ELSE
1413 2577246 : c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1414 : END IF
1415 : END IF
1416 : END DO
1417 : END DO
1418 :
1419 4966786 : DO icol_local = 1, ncol_local
1420 229384342 : DO irow_local = 1, nrow_local
1421 229236785 : IF (matrix%use_sp) THEN
1422 0 : a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1423 : ELSE
1424 224417556 : a(irow_local, icol_local) = c(irow_local, icol_local)
1425 : END IF
1426 : END DO
1427 : END DO
1428 :
1429 147557 : IF (matrix%use_sp) THEN
1430 0 : CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1431 : ELSE
1432 147557 : CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1433 : END IF
1434 :
1435 : #else
1436 :
1437 : a => matrix%local_data
1438 : a_sp => matrix%local_data_sp
1439 :
1440 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1441 : DO irow_global = 1, nrow_global
1442 : DO icol_global = irow_global + 1, ncol_global
1443 : IF (matrix%use_sp) THEN
1444 : a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1445 : ELSE
1446 : a(icol_global, irow_global) = a(irow_global, icol_global)
1447 : END IF
1448 : END DO
1449 : END DO
1450 : ELSE
1451 : DO icol_global = 1, ncol_global
1452 : DO irow_global = icol_global + 1, nrow_global
1453 : IF (matrix%use_sp) THEN
1454 : a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1455 : ELSE
1456 : a(irow_global, icol_global) = a(icol_global, irow_global)
1457 : END IF
1458 : END DO
1459 : END DO
1460 : END IF
1461 :
1462 : #endif
1463 147557 : CALL timestop(handle)
1464 :
1465 147557 : END SUBROUTINE cp_fm_uplo_to_full
1466 :
1467 : ! **************************************************************************************************
1468 : !> \brief scales column i of matrix a with scaling(i)
1469 : !> \param matrixa ...
1470 : !> \param scaling : an array used for scaling the columns,
1471 : !> SIZE(scaling) determines the number of columns to be scaled
1472 : !> \author Joost VandeVondele
1473 : !> \note
1474 : !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1475 : !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1476 : !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1477 : !> where every vector has a different prefactor
1478 : ! **************************************************************************************************
1479 139973 : SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1480 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1481 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1482 :
1483 : INTEGER :: k, mypcol, myprow, n, ncol_global, &
1484 : npcol, nprow
1485 139973 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1486 139973 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1487 : #if defined(__parallel)
1488 : INTEGER :: icol_global, icol_local, &
1489 : ipcol, iprow, irow_local
1490 : #else
1491 : INTEGER :: i
1492 : #endif
1493 :
1494 139973 : myprow = matrixa%matrix_struct%context%mepos(1)
1495 139973 : mypcol = matrixa%matrix_struct%context%mepos(2)
1496 139973 : nprow = matrixa%matrix_struct%context%num_pe(1)
1497 139973 : npcol = matrixa%matrix_struct%context%num_pe(2)
1498 :
1499 139973 : ncol_global = matrixa%matrix_struct%ncol_global
1500 :
1501 139973 : a => matrixa%local_data
1502 139973 : a_sp => matrixa%local_data_sp
1503 139973 : IF (matrixa%use_sp) THEN
1504 0 : n = SIZE(a_sp, 1)
1505 : ELSE
1506 139973 : n = SIZE(a, 1)
1507 : END IF
1508 139973 : k = MIN(SIZE(scaling), ncol_global)
1509 :
1510 : #if defined(__parallel)
1511 :
1512 2222214 : DO icol_global = 1, k
1513 : CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1514 : nprow, npcol, myprow, mypcol, &
1515 2082241 : irow_local, icol_local, iprow, ipcol)
1516 2222214 : IF ((ipcol == mypcol)) THEN
1517 2082241 : IF (matrixa%use_sp) THEN
1518 0 : CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1519 : ELSE
1520 2082241 : CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
1521 : END IF
1522 : END IF
1523 : END DO
1524 : #else
1525 : DO i = 1, k
1526 : IF (matrixa%use_sp) THEN
1527 : CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
1528 : ELSE
1529 : CALL DSCAL(n, scaling(i), a(:, i), 1)
1530 : END IF
1531 : END DO
1532 : #endif
1533 139973 : END SUBROUTINE cp_fm_column_scale
1534 :
1535 : ! **************************************************************************************************
1536 : !> \brief scales row i of matrix a with scaling(i)
1537 : !> \param matrixa ...
1538 : !> \param scaling : an array used for scaling the columns,
1539 : !> \author JGH
1540 : !> \note
1541 : ! **************************************************************************************************
1542 6272 : SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1543 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1544 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1545 :
1546 : INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1547 6272 : INTEGER, DIMENSION(:), POINTER :: row_indices
1548 6272 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1549 6272 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1550 : #if defined(__parallel)
1551 : INTEGER :: irow_global, icol, irow
1552 : #else
1553 : INTEGER :: j
1554 : #endif
1555 :
1556 : CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1557 6272 : nrow_local=nrow_local, ncol_local=ncol_local)
1558 6272 : CPASSERT(SIZE(scaling) == nrow_global)
1559 :
1560 6272 : a => matrixa%local_data
1561 6272 : a_sp => matrixa%local_data_sp
1562 6272 : IF (matrixa%use_sp) THEN
1563 6272 : n = SIZE(a_sp, 1)
1564 6272 : m = SIZE(a_sp, 2)
1565 : ELSE
1566 6272 : n = SIZE(a, 1)
1567 6272 : m = SIZE(a, 2)
1568 : END IF
1569 :
1570 : #if defined(__parallel)
1571 78776 : DO icol = 1, ncol_local
1572 78776 : IF (matrixa%use_sp) THEN
1573 0 : DO irow = 1, nrow_local
1574 0 : irow_global = row_indices(irow)
1575 0 : a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
1576 : END DO
1577 : ELSE
1578 6479226 : DO irow = 1, nrow_local
1579 6406722 : irow_global = row_indices(irow)
1580 6479226 : a(irow, icol) = scaling(irow_global)*a(irow, icol)
1581 : END DO
1582 : END IF
1583 : END DO
1584 : #else
1585 : IF (matrixa%use_sp) THEN
1586 : DO j = 1, m
1587 : a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
1588 : END DO
1589 : ELSE
1590 : DO j = 1, m
1591 : a(1:n, j) = scaling(1:n)*a(1:n, j)
1592 : END DO
1593 : END IF
1594 : #endif
1595 6272 : END SUBROUTINE cp_fm_row_scale
1596 :
1597 : ! **************************************************************************************************
1598 : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1599 : !> \param matrix_a the matrix to invert
1600 : !> \param matrix_inverse the inverse of matrix_a
1601 : !> \param det_a the determinant of matrix_a
1602 : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1603 : !> are screened
1604 : !> \param eigval optionally return matrix eigenvalues/singular values
1605 : !> \par History
1606 : !> note of Jan Wilhelm (12.2015)
1607 : !> - computation of determinant corrected
1608 : !> - determinant only computed if det_a is present
1609 : !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
1610 : !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
1611 : !> \author Florian Schiffmann(02.2007)
1612 : ! **************************************************************************************************
1613 1590 : SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1614 :
1615 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
1616 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a
1617 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd
1618 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1619 : INTENT(INOUT), OPTIONAL :: eigval
1620 :
1621 : INTEGER :: n
1622 1590 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1623 : REAL(KIND=dp) :: determinant, my_eps_svd
1624 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1625 : TYPE(cp_fm_type) :: matrix_lu
1626 :
1627 : #if defined(__parallel)
1628 : TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
1629 : TYPE(mp_comm_type) :: group
1630 : INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
1631 : INTEGER, DIMENSION(9) :: desca
1632 : LOGICAL :: quenched
1633 : REAL(KIND=dp) :: alpha, beta
1634 1590 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
1635 1590 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1636 : #else
1637 : LOGICAL :: sign
1638 : REAL(KIND=dp) :: eps1
1639 : #endif
1640 :
1641 1590 : my_eps_svd = 0.0_dp
1642 468 : IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1643 :
1644 : CALL cp_fm_create(matrix=matrix_lu, &
1645 : matrix_struct=matrix_a%matrix_struct, &
1646 1590 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1647 1590 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1648 :
1649 1590 : a => matrix_lu%local_data
1650 1590 : n = matrix_lu%matrix_struct%nrow_global
1651 4770 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1652 19381 : ipivot(:) = 0
1653 : #if defined(__parallel)
1654 1590 : IF (my_eps_svd == 0.0_dp) THEN
1655 : ! Use LU decomposition
1656 1562 : lwork = 3*n
1657 1562 : liwork = 3*n
1658 15620 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1659 1562 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1660 :
1661 1562 : IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1662 :
1663 1116 : ALLOCATE (diag(n))
1664 916 : diag(:) = 0.0_dp
1665 372 : CALL cp_fm_get_diag(matrix_lu, diag)
1666 :
1667 372 : exponent_of_minus_one = 0
1668 372 : determinant = 1.0_dp
1669 916 : DO i = 1, n
1670 544 : determinant = determinant*diag(i)
1671 916 : IF (ipivot(i) /= i) THEN
1672 224 : exponent_of_minus_one = exponent_of_minus_one + 1
1673 : END IF
1674 : END DO
1675 372 : IF (PRESENT(eigval)) THEN
1676 0 : CPASSERT(.NOT. ASSOCIATED(eigval))
1677 0 : ALLOCATE (eigval(n))
1678 0 : eigval(:) = diag
1679 : END IF
1680 372 : DEALLOCATE (diag)
1681 :
1682 372 : group = matrix_lu%matrix_struct%para_env
1683 372 : CALL group%sum(exponent_of_minus_one)
1684 :
1685 372 : determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1686 :
1687 : END IF
1688 :
1689 1562 : alpha = 0.0_dp
1690 1562 : beta = 1.0_dp
1691 1562 : CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1692 1562 : CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1693 : ELSE
1694 : ! Use singular value decomposition
1695 : CALL cp_fm_create(matrix=u, &
1696 : matrix_struct=matrix_a%matrix_struct, &
1697 28 : name="LEFT_SINGULAR_MATRIX")
1698 28 : CALL cp_fm_set_all(u, alpha=0.0_dp)
1699 : CALL cp_fm_create(matrix=vt, &
1700 : matrix_struct=matrix_a%matrix_struct, &
1701 28 : name="RIGHT_SINGULAR_MATRIX")
1702 28 : CALL cp_fm_set_all(vt, alpha=0.0_dp)
1703 84 : ALLOCATE (diag(n))
1704 92 : diag(:) = 0.0_dp
1705 280 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1706 28 : ALLOCATE (work(1))
1707 : ! Workspace query
1708 28 : lwork = -1
1709 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1710 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1711 28 : lwork = INT(work(1))
1712 28 : DEALLOCATE (work)
1713 84 : ALLOCATE (work(lwork))
1714 : ! SVD
1715 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1716 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1717 : ! info == n+1 implies homogeneity error when the number of procs is large
1718 : ! this likely isnt a problem, but maybe we should handle it separately
1719 28 : IF (info /= 0 .AND. info /= n + 1) &
1720 0 : CPABORT("Singular value decomposition of matrix failed.")
1721 : ! (Pseudo)inverse and (pseudo)determinant
1722 : CALL cp_fm_create(matrix=sigma, &
1723 : matrix_struct=matrix_a%matrix_struct, &
1724 28 : name="SINGULAR_VALUE_MATRIX")
1725 28 : CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1726 28 : determinant = 1.0_dp
1727 28 : quenched = .FALSE.
1728 28 : IF (PRESENT(eigval)) THEN
1729 28 : CPASSERT(.NOT. ASSOCIATED(eigval))
1730 84 : ALLOCATE (eigval(n))
1731 156 : eigval(:) = diag
1732 : END IF
1733 92 : DO i = 1, n
1734 64 : IF (diag(i) < my_eps_svd) THEN
1735 18 : diag(i) = 0.0_dp
1736 18 : quenched = .TRUE.
1737 : ELSE
1738 46 : determinant = determinant*diag(i)
1739 46 : diag(i) = 1.0_dp/diag(i)
1740 : END IF
1741 92 : CALL cp_fm_set_element(sigma, i, i, diag(i))
1742 : END DO
1743 28 : DEALLOCATE (diag)
1744 28 : IF (quenched) &
1745 : CALL cp_warn(__LOCATION__, &
1746 : "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1747 12 : ". At least one singular value has been quenched.")
1748 : ! Sigma^-1 * U^T
1749 : CALL cp_fm_create(matrix=inv_sigma_ut, &
1750 : matrix_struct=matrix_a%matrix_struct, &
1751 28 : name="SINGULAR_VALUE_MATRIX")
1752 28 : CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1753 : CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1754 28 : u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1755 : ! A^-1 = V * (Sigma^-1 * U^T)
1756 28 : CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1757 : CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1758 28 : inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1759 : ! Clean up
1760 28 : DEALLOCATE (work)
1761 28 : CALL cp_fm_release(u)
1762 28 : CALL cp_fm_release(vt)
1763 28 : CALL cp_fm_release(sigma)
1764 140 : CALL cp_fm_release(inv_sigma_ut)
1765 : END IF
1766 : #else
1767 : IF (my_eps_svd == 0.0_dp) THEN
1768 : sign = .TRUE.
1769 : CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1770 : eval_error=eps1)
1771 : CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1772 : IF (PRESENT(eigval)) &
1773 : CALL cp_abort(__LOCATION__, &
1774 : "NYI. Eigenvalues not available for return without SCALAPACK.")
1775 : ELSE
1776 : CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1777 : determinant, eigval)
1778 : END IF
1779 : #endif
1780 1590 : CALL cp_fm_release(matrix_lu)
1781 1590 : DEALLOCATE (ipivot)
1782 1590 : IF (PRESENT(det_a)) det_a = determinant
1783 1590 : END SUBROUTINE cp_fm_invert
1784 :
1785 : ! **************************************************************************************************
1786 : !> \brief inverts a triangular matrix
1787 : !> \param matrix_a ...
1788 : !> \param uplo_tr triangular format; defaults to 'U'
1789 : !> \author MI
1790 : ! **************************************************************************************************
1791 5072 : SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1792 :
1793 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1794 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1795 :
1796 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
1797 :
1798 : CHARACTER :: unit_diag, uplo
1799 : INTEGER :: handle, info, ncol_global
1800 5072 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1801 : #if defined(__parallel)
1802 : INTEGER, DIMENSION(9) :: desca
1803 : #endif
1804 :
1805 5072 : CALL timeset(routineN, handle)
1806 :
1807 5072 : unit_diag = 'N'
1808 5072 : uplo = 'U'
1809 5072 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1810 :
1811 5072 : ncol_global = matrix_a%matrix_struct%ncol_global
1812 :
1813 5072 : a => matrix_a%local_data
1814 :
1815 : #if defined(__parallel)
1816 50720 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1817 :
1818 5072 : CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1819 :
1820 : #else
1821 : CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1822 : #endif
1823 :
1824 5072 : CALL timestop(handle)
1825 5072 : END SUBROUTINE cp_fm_triangular_invert
1826 :
1827 : ! **************************************************************************************************
1828 : !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1829 : !> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
1830 : !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1831 : !> \param matrix_a ...
1832 : !> \param matrix_r ...
1833 : !> \param nrow_fact ...
1834 : !> \param ncol_fact ...
1835 : !> \param first_row ...
1836 : !> \param first_col ...
1837 : !> \author MI
1838 : ! **************************************************************************************************
1839 19320 : SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
1840 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
1841 : INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
1842 : first_row, first_col
1843 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1844 :
1845 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
1846 :
1847 : CHARACTER :: myuplo
1848 : INTEGER :: handle, i, icol, info, irow, &
1849 : j, lda, lwork, ncol, &
1850 : ndim, nrow
1851 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
1852 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
1853 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1854 : #if defined(__parallel)
1855 : INTEGER, DIMENSION(9) :: desca
1856 : #endif
1857 :
1858 19320 : CALL timeset(routineN, handle)
1859 :
1860 19320 : myuplo = 'U'
1861 19320 : IF (PRESENT(uplo)) myuplo = uplo
1862 :
1863 19320 : ncol = matrix_a%matrix_struct%ncol_global
1864 19320 : nrow = matrix_a%matrix_struct%nrow_global
1865 19320 : lda = nrow
1866 :
1867 19320 : a => matrix_a%local_data
1868 :
1869 19320 : IF (PRESENT(nrow_fact)) nrow = nrow_fact
1870 19320 : IF (PRESENT(ncol_fact)) ncol = ncol_fact
1871 19320 : irow = 1
1872 19320 : IF (PRESENT(first_row)) irow = first_row
1873 19320 : icol = 1
1874 19320 : IF (PRESENT(first_col)) icol = first_col
1875 :
1876 19320 : CPASSERT(nrow >= ncol)
1877 19320 : ndim = SIZE(a, 2)
1878 57960 : ALLOCATE (tau(ndim))
1879 :
1880 : #if defined(__parallel)
1881 :
1882 193200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1883 :
1884 19320 : lwork = -1
1885 57960 : ALLOCATE (work(2*ndim))
1886 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1887 19320 : lwork = INT(work(1))
1888 19320 : DEALLOCATE (work)
1889 57960 : ALLOCATE (work(lwork))
1890 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1891 :
1892 : #else
1893 : lwork = -1
1894 : ALLOCATE (work(2*ndim))
1895 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1896 : lwork = INT(work(1))
1897 : DEALLOCATE (work)
1898 : ALLOCATE (work(lwork))
1899 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1900 :
1901 : #endif
1902 :
1903 77280 : ALLOCATE (r_mat(ncol, ncol))
1904 19320 : CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1905 19320 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1906 38640 : DO i = 1, ncol
1907 38640 : DO j = i + 1, ncol
1908 19320 : r_mat(j, i) = 0.0_dp
1909 : END DO
1910 : END DO
1911 : ELSE
1912 0 : DO j = 1, ncol
1913 0 : DO i = j + 1, ncol
1914 0 : r_mat(i, j) = 0.0_dp
1915 : END DO
1916 : END DO
1917 : END IF
1918 19320 : CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1919 :
1920 19320 : DEALLOCATE (tau, work, r_mat)
1921 :
1922 19320 : CALL timestop(handle)
1923 :
1924 19320 : END SUBROUTINE cp_fm_qr_factorization
1925 :
1926 : ! **************************************************************************************************
1927 : !> \brief computes the the solution to A*b=A_general using lu decomposition
1928 : !> \param matrix_a input matrix; will be overwritten
1929 : !> \param general_a contains the result
1930 : !> \author Florian Schiffmann
1931 : ! **************************************************************************************************
1932 6832 : SUBROUTINE cp_fm_solve(matrix_a, general_a)
1933 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
1934 :
1935 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
1936 :
1937 : INTEGER :: handle, info, n, nrhs
1938 6832 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1939 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general
1940 : #if defined(__parallel)
1941 : INTEGER, DIMENSION(9) :: desca, descb
1942 : #else
1943 : INTEGER :: lda, ldb
1944 : #endif
1945 :
1946 6832 : CALL timeset(routineN, handle)
1947 :
1948 6832 : a => matrix_a%local_data
1949 6832 : a_general => general_a%local_data
1950 6832 : n = matrix_a%matrix_struct%nrow_global
1951 6832 : nrhs = general_a%matrix_struct%ncol_global
1952 20496 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1953 :
1954 : #if defined(__parallel)
1955 68320 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1956 68320 : descb(:) = general_a%matrix_struct%descriptor(:)
1957 6832 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1958 : CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
1959 6832 : 1, 1, descb, info)
1960 :
1961 : #else
1962 : lda = SIZE(a, 1)
1963 : ldb = SIZE(a_general, 1)
1964 : CALL dgetrf(n, n, a, lda, ipivot, info)
1965 : CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
1966 :
1967 : #endif
1968 : ! info is allowed to be zero
1969 : ! this does just signal a zero diagonal element
1970 6832 : DEALLOCATE (ipivot)
1971 6832 : CALL timestop(handle)
1972 6832 : END SUBROUTINE
1973 :
1974 : ! **************************************************************************************************
1975 : !> \brief Convenience function. Computes the matrix multiplications needed
1976 : !> for the multiplication of complex matrices.
1977 : !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
1978 : !> \param transa : 'N' -> normal 'T' -> transpose
1979 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
1980 : !> \param transb ...
1981 : !> \param m ...
1982 : !> \param n ...
1983 : !> \param k ...
1984 : !> \param alpha ...
1985 : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
1986 : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1987 : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
1988 : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1989 : !> \param beta ...
1990 : !> \param C_re m x n matrix, real part
1991 : !> \param C_im m x n matrix, imaginary part
1992 : !> \param a_first_col ...
1993 : !> \param a_first_row ...
1994 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1995 : !> \param b_first_row ...
1996 : !> \param c_first_col ...
1997 : !> \param c_first_row ...
1998 : !> \author Samuel Andermatt
1999 : !> \note
2000 : !> C should have no overlap with A, B
2001 : ! **************************************************************************************************
2002 0 : SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
2003 : C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
2004 : c_first_row)
2005 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
2006 : INTEGER, INTENT(IN) :: m, n, k
2007 : REAL(KIND=dp), INTENT(IN) :: alpha
2008 : TYPE(cp_fm_type), INTENT(IN) :: A_re, A_im, B_re, B_im
2009 : REAL(KIND=dp), INTENT(IN) :: beta
2010 : TYPE(cp_fm_type), INTENT(IN) :: C_re, C_im
2011 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2012 : b_first_row, c_first_col, c_first_row
2013 :
2014 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
2015 :
2016 : INTEGER :: handle
2017 :
2018 0 : CALL timeset(routineN, handle)
2019 :
2020 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
2021 : a_first_col=a_first_col, &
2022 : a_first_row=a_first_row, &
2023 : b_first_col=b_first_col, &
2024 : b_first_row=b_first_row, &
2025 : c_first_col=c_first_col, &
2026 0 : c_first_row=c_first_row)
2027 : CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
2028 : a_first_col=a_first_col, &
2029 : a_first_row=a_first_row, &
2030 : b_first_col=b_first_col, &
2031 : b_first_row=b_first_row, &
2032 : c_first_col=c_first_col, &
2033 0 : c_first_row=c_first_row)
2034 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
2035 : a_first_col=a_first_col, &
2036 : a_first_row=a_first_row, &
2037 : b_first_col=b_first_col, &
2038 : b_first_row=b_first_row, &
2039 : c_first_col=c_first_col, &
2040 0 : c_first_row=c_first_row)
2041 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
2042 : a_first_col=a_first_col, &
2043 : a_first_row=a_first_row, &
2044 : b_first_col=b_first_col, &
2045 : b_first_row=b_first_row, &
2046 : c_first_col=c_first_col, &
2047 0 : c_first_row=c_first_row)
2048 :
2049 0 : CALL timestop(handle)
2050 :
2051 0 : END SUBROUTINE cp_complex_fm_gemm
2052 :
2053 : ! **************************************************************************************************
2054 : !> \brief inverts a matrix using LU decomposition
2055 : !> the input matrix will be overwritten
2056 : !> \param matrix : input a general square non-singular matrix, outputs its inverse
2057 : !> \param info_out : optional, if present outputs the info from (p)zgetri
2058 : !> \author Lianheng Tong
2059 : ! **************************************************************************************************
2060 0 : SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2061 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2062 : INTEGER, INTENT(OUT), OPTIONAL :: info_out
2063 :
2064 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
2065 :
2066 : INTEGER :: nrows_global, handle, info, lwork
2067 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2068 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat
2069 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: mat_sp
2070 0 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2071 0 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2072 : #if defined(__parallel)
2073 : INTEGER :: liwork
2074 : INTEGER, DIMENSION(9) :: desca
2075 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2076 : #else
2077 : INTEGER :: lda
2078 : #endif
2079 :
2080 0 : CALL timeset(routineN, handle)
2081 :
2082 0 : mat => matrix%local_data
2083 0 : mat_sp => matrix%local_data_sp
2084 0 : nrows_global = matrix%matrix_struct%nrow_global
2085 0 : CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
2086 0 : ALLOCATE (ipivot(nrows_global))
2087 : ! do LU decomposition
2088 : #if defined(__parallel)
2089 0 : desca = matrix%matrix_struct%descriptor
2090 0 : IF (matrix%use_sp) THEN
2091 : CALL psgetrf(nrows_global, nrows_global, &
2092 0 : mat_sp, 1, 1, desca, ipivot, info)
2093 : ELSE
2094 : CALL pdgetrf(nrows_global, nrows_global, &
2095 0 : mat, 1, 1, desca, ipivot, info)
2096 : END IF
2097 : #else
2098 : lda = SIZE(mat, 1)
2099 : IF (matrix%use_sp) THEN
2100 : CALL sgetrf(nrows_global, nrows_global, &
2101 : mat_sp, lda, ipivot, info)
2102 : ELSE
2103 : CALL dgetrf(nrows_global, nrows_global, &
2104 : mat, lda, ipivot, info)
2105 : END IF
2106 : #endif
2107 0 : IF (info /= 0) THEN
2108 0 : CALL cp_abort(__LOCATION__, "LU decomposition has failed")
2109 : END IF
2110 : ! do inversion
2111 0 : IF (matrix%use_sp) THEN
2112 0 : ALLOCATE (work(1))
2113 : ELSE
2114 0 : ALLOCATE (work_sp(1))
2115 : END IF
2116 : #if defined(__parallel)
2117 0 : ALLOCATE (iwork(1))
2118 0 : IF (matrix%use_sp) THEN
2119 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2120 0 : ipivot, work_sp, -1, iwork, -1, info)
2121 0 : lwork = INT(work_sp(1))
2122 0 : DEALLOCATE (work_sp)
2123 0 : ALLOCATE (work_sp(lwork))
2124 : ELSE
2125 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2126 0 : ipivot, work, -1, iwork, -1, info)
2127 0 : lwork = INT(work(1))
2128 0 : DEALLOCATE (work)
2129 0 : ALLOCATE (work(lwork))
2130 : END IF
2131 0 : liwork = INT(iwork(1))
2132 0 : DEALLOCATE (iwork)
2133 0 : ALLOCATE (iwork(liwork))
2134 0 : IF (matrix%use_sp) THEN
2135 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2136 0 : ipivot, work_sp, lwork, iwork, liwork, info)
2137 : ELSE
2138 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2139 0 : ipivot, work, lwork, iwork, liwork, info)
2140 : END IF
2141 0 : DEALLOCATE (iwork)
2142 : #else
2143 : IF (matrix%use_sp) THEN
2144 : CALL sgetri(nrows_global, mat_sp, lda, &
2145 : ipivot, work_sp, -1, info)
2146 : lwork = INT(work_sp(1))
2147 : DEALLOCATE (work_sp)
2148 : ALLOCATE (work_sp(lwork))
2149 : CALL sgetri(nrows_global, mat_sp, lda, &
2150 : ipivot, work_sp, lwork, info)
2151 : ELSE
2152 : CALL dgetri(nrows_global, mat, lda, &
2153 : ipivot, work, -1, info)
2154 : lwork = INT(work(1))
2155 : DEALLOCATE (work)
2156 : ALLOCATE (work(lwork))
2157 : CALL dgetri(nrows_global, mat, lda, &
2158 : ipivot, work, lwork, info)
2159 : END IF
2160 : #endif
2161 0 : IF (matrix%use_sp) THEN
2162 0 : DEALLOCATE (work_sp)
2163 : ELSE
2164 0 : DEALLOCATE (work)
2165 : END IF
2166 0 : DEALLOCATE (ipivot)
2167 :
2168 0 : IF (PRESENT(info_out)) THEN
2169 0 : info_out = info
2170 : ELSE
2171 0 : IF (info /= 0) &
2172 0 : CALL cp_abort(__LOCATION__, "LU inversion has failed")
2173 : END IF
2174 :
2175 0 : CALL timestop(handle)
2176 :
2177 0 : END SUBROUTINE cp_fm_lu_invert
2178 :
2179 : ! **************************************************************************************************
2180 : !> \brief norm of matrix using (p)dlange
2181 : !> \param matrix : input a general matrix
2182 : !> \param mode : 'M' max abs element value,
2183 : !> '1' or 'O' one norm, i.e. maximum column sum
2184 : !> 'I' infinity norm, i.e. maximum row sum
2185 : !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2186 : !> \return : the norm according to mode
2187 : !> \author Lianheng Tong
2188 : ! **************************************************************************************************
2189 492 : FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2190 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2191 : CHARACTER, INTENT(IN) :: mode
2192 : REAL(KIND=dp) :: res
2193 :
2194 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
2195 :
2196 : INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2197 : REAL(KIND=sp) :: res_sp
2198 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2199 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2200 492 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2201 492 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2202 : #if defined(__parallel)
2203 : INTEGER, DIMENSION(9) :: desca
2204 : #else
2205 : INTEGER :: lda
2206 : #endif
2207 :
2208 492 : CALL timeset(routineN, handle)
2209 :
2210 : CALL cp_fm_get_info(matrix=matrix, &
2211 : nrow_global=nrows, &
2212 : ncol_global=ncols, &
2213 : nrow_local=nrows_local, &
2214 492 : ncol_local=ncols_local)
2215 492 : aa => matrix%local_data
2216 492 : aa_sp => matrix%local_data_sp
2217 :
2218 : #if defined(__parallel)
2219 4920 : desca = matrix%matrix_struct%descriptor
2220 : SELECT CASE (mode)
2221 : CASE ('M', 'm')
2222 492 : lwork = 1
2223 : CASE ('1', 'O', 'o')
2224 492 : lwork = ncols_local
2225 : CASE ('I', 'i')
2226 0 : lwork = nrows_local
2227 : CASE ('F', 'f', 'E', 'e')
2228 0 : lwork = 1
2229 : CASE DEFAULT
2230 492 : CPABORT("mode input is not valid")
2231 : END SELECT
2232 492 : IF (matrix%use_sp) THEN
2233 0 : ALLOCATE (work_sp(lwork))
2234 0 : res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2235 0 : DEALLOCATE (work_sp)
2236 0 : res = REAL(res_sp, KIND=dp)
2237 : ELSE
2238 1476 : ALLOCATE (work(lwork))
2239 492 : res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2240 492 : DEALLOCATE (work)
2241 : END IF
2242 : #else
2243 : SELECT CASE (mode)
2244 : CASE ('M', 'm')
2245 : lwork = 1
2246 : CASE ('1', 'O', 'o')
2247 : lwork = 1
2248 : CASE ('I', 'i')
2249 : lwork = nrows
2250 : CASE ('F', 'f', 'E', 'e')
2251 : lwork = 1
2252 : CASE DEFAULT
2253 : CPABORT("mode input is not valid")
2254 : END SELECT
2255 : IF (matrix%use_sp) THEN
2256 : ALLOCATE (work_sp(lwork))
2257 : lda = SIZE(aa_sp, 1)
2258 : res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2259 : DEALLOCATE (work_sp)
2260 : res = REAL(res_sp, KIND=dp)
2261 : ELSE
2262 : ALLOCATE (work(lwork))
2263 : lda = SIZE(aa, 1)
2264 : res = dlange(mode, nrows, ncols, aa, lda, work)
2265 : DEALLOCATE (work)
2266 : END IF
2267 : #endif
2268 :
2269 492 : CALL timestop(handle)
2270 :
2271 492 : END FUNCTION cp_fm_norm
2272 :
2273 : ! **************************************************************************************************
2274 : !> \brief trace of a matrix using pdlatra
2275 : !> \param matrix : input a square matrix
2276 : !> \return : the trace
2277 : !> \author Lianheng Tong
2278 : ! **************************************************************************************************
2279 0 : FUNCTION cp_fm_latra(matrix) RESULT(res)
2280 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2281 : REAL(KIND=dp) :: res
2282 :
2283 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
2284 :
2285 : INTEGER :: nrows, ncols, handle
2286 : REAL(KIND=sp) :: res_sp
2287 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2288 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2289 : #if defined(__parallel)
2290 : INTEGER, DIMENSION(9) :: desca
2291 : #else
2292 : INTEGER :: ii
2293 : #endif
2294 :
2295 0 : CALL timeset(routineN, handle)
2296 :
2297 0 : nrows = matrix%matrix_struct%nrow_global
2298 0 : ncols = matrix%matrix_struct%ncol_global
2299 0 : CPASSERT(nrows == ncols)
2300 0 : aa => matrix%local_data
2301 0 : aa_sp => matrix%local_data_sp
2302 :
2303 : #if defined(__parallel)
2304 0 : desca = matrix%matrix_struct%descriptor
2305 0 : IF (matrix%use_sp) THEN
2306 0 : res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2307 0 : res = REAL(res_sp, KIND=dp)
2308 : ELSE
2309 0 : res = pdlatra(nrows, aa, 1, 1, desca)
2310 : END IF
2311 : #else
2312 : IF (matrix%use_sp) THEN
2313 : res_sp = 0.0_sp
2314 : DO ii = 1, nrows
2315 : res_sp = res_sp + aa_sp(ii, ii)
2316 : END DO
2317 : res = REAL(res_sp, KIND=dp)
2318 : ELSE
2319 : res = 0.0_dp
2320 : DO ii = 1, nrows
2321 : res = res + aa(ii, ii)
2322 : END DO
2323 : END IF
2324 : #endif
2325 :
2326 0 : CALL timestop(handle)
2327 :
2328 0 : END FUNCTION cp_fm_latra
2329 :
2330 : ! **************************************************************************************************
2331 : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2332 : !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2333 : !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2334 : !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2335 : !> \param nrow ...
2336 : !> \param ncol ...
2337 : !> \param first_row ...
2338 : !> \param first_col ...
2339 : !> \author MI
2340 : ! **************************************************************************************************
2341 36 : SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2342 :
2343 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2344 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2345 : INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2346 :
2347 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
2348 :
2349 : INTEGER :: handle
2350 : INTEGER :: info, lwork
2351 36 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2352 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2353 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2354 : #if defined(__parallel)
2355 : INTEGER, DIMENSION(9) :: descc
2356 : #else
2357 : INTEGER :: lda
2358 : #endif
2359 :
2360 36 : CALL timeset(routineN, handle)
2361 :
2362 36 : a => matrix%local_data
2363 36 : lwork = -1
2364 108 : ALLOCATE (work(2*nrow))
2365 108 : ALLOCATE (ipiv(ncol))
2366 36 : info = 0
2367 :
2368 : #if defined(__parallel)
2369 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2370 : ! Call SCALAPACK routine to get optimal work dimension
2371 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2372 36 : lwork = INT(work(1))
2373 36 : DEALLOCATE (work)
2374 108 : ALLOCATE (work(lwork))
2375 244 : tau = 0.0_dp
2376 354 : ipiv = 0
2377 :
2378 : ! Call SCALAPACK routine to get QR decomposition of CTs
2379 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2380 : #else
2381 : CPASSERT(first_row == 1 .AND. first_col == 1)
2382 : lda = SIZE(a, 1)
2383 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2384 : lwork = INT(work(1))
2385 : DEALLOCATE (work)
2386 : ALLOCATE (work(lwork))
2387 : tau = 0.0_dp
2388 : ipiv = 0
2389 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2390 : #endif
2391 36 : CPASSERT(info == 0)
2392 :
2393 36 : DEALLOCATE (work)
2394 36 : DEALLOCATE (ipiv)
2395 :
2396 36 : CALL timestop(handle)
2397 :
2398 36 : END SUBROUTINE cp_fm_pdgeqpf
2399 :
2400 : ! **************************************************************************************************
2401 : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2402 : !> with orthonormal columns, which is defined as the first N columns of a product of K
2403 : !> elementary reflectors of order M
2404 : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2405 : !> as returned from PDGEQRF
2406 : !> On exit it contains the M-by-N distributed matrix Q
2407 : !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2408 : !> \param nrow ...
2409 : !> \param first_row ...
2410 : !> \param first_col ...
2411 : !> \author MI
2412 : ! **************************************************************************************************
2413 36 : SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2414 :
2415 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2416 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2417 : INTEGER, INTENT(IN) :: nrow, first_row, first_col
2418 :
2419 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
2420 :
2421 : INTEGER :: handle
2422 : INTEGER :: info, lwork
2423 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2424 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2425 : #if defined(__parallel)
2426 : INTEGER, DIMENSION(9) :: descc
2427 : #else
2428 : INTEGER :: lda
2429 : #endif
2430 :
2431 36 : CALL timeset(routineN, handle)
2432 :
2433 36 : a => matrix%local_data
2434 36 : lwork = -1
2435 108 : ALLOCATE (work(2*nrow))
2436 36 : info = 0
2437 :
2438 : #if defined(__parallel)
2439 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2440 :
2441 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2442 36 : CPASSERT(info == 0)
2443 36 : lwork = INT(work(1))
2444 36 : DEALLOCATE (work)
2445 108 : ALLOCATE (work(lwork))
2446 :
2447 : ! Call SCALAPACK routine to get Q
2448 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2449 : #else
2450 : CPASSERT(first_row == 1 .AND. first_col == 1)
2451 : lda = SIZE(a, 1)
2452 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2453 : lwork = INT(work(1))
2454 : DEALLOCATE (work)
2455 : ALLOCATE (work(lwork))
2456 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2457 : #endif
2458 36 : CPASSERT(INFO == 0)
2459 :
2460 36 : DEALLOCATE (work)
2461 36 : CALL timestop(handle)
2462 :
2463 36 : END SUBROUTINE cp_fm_pdorgqr
2464 :
2465 : ! **************************************************************************************************
2466 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2467 : !> \param matrix ...
2468 : !> \param irow ...
2469 : !> \param jrow ...
2470 : !> \param cs cosine of the rotation angle
2471 : !> \param sn sinus of the rotation angle
2472 : !> \author Ole Schuett
2473 : ! **************************************************************************************************
2474 565504 : SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2475 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2476 : INTEGER, INTENT(IN) :: irow, jrow
2477 : REAL(dp), INTENT(IN) :: cs, sn
2478 :
2479 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
2480 : INTEGER :: handle, ncol
2481 :
2482 : #if defined(__parallel)
2483 : INTEGER :: info, lwork
2484 : INTEGER, DIMENSION(9) :: desc
2485 565504 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2486 : #endif
2487 565504 : CALL timeset(routineN, handle)
2488 565504 : CALL cp_fm_get_info(matrix, ncol_global=ncol)
2489 : #if defined(__parallel)
2490 565504 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2491 565504 : lwork = 2*ncol + 1
2492 1696512 : ALLOCATE (work(lwork))
2493 5655040 : desc(:) = matrix%matrix_struct%descriptor(:)
2494 : CALL pdrot(ncol, &
2495 : matrix%local_data(1, 1), irow, 1, desc, ncol, &
2496 : matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2497 565504 : cs, sn, work, lwork, info)
2498 565504 : CPASSERT(info == 0)
2499 565504 : DEALLOCATE (work)
2500 : ELSE
2501 : #endif
2502 0 : CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2503 : #if defined(__parallel)
2504 : END IF
2505 : #endif
2506 565504 : CALL timestop(handle)
2507 565504 : END SUBROUTINE cp_fm_rot_rows
2508 :
2509 : ! **************************************************************************************************
2510 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2511 : !> \param matrix ...
2512 : !> \param icol ...
2513 : !> \param jcol ...
2514 : !> \param cs cosine of the rotation angle
2515 : !> \param sn sinus of the rotation angle
2516 : !> \author Ole Schuett
2517 : ! **************************************************************************************************
2518 637106 : SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2519 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2520 : INTEGER, INTENT(IN) :: icol, jcol
2521 : REAL(dp), INTENT(IN) :: cs, sn
2522 :
2523 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
2524 : INTEGER :: handle, nrow
2525 :
2526 : #if defined(__parallel)
2527 : INTEGER :: info, lwork
2528 : INTEGER, DIMENSION(9) :: desc
2529 637106 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2530 : #endif
2531 637106 : CALL timeset(routineN, handle)
2532 637106 : CALL cp_fm_get_info(matrix, nrow_global=nrow)
2533 : #if defined(__parallel)
2534 637106 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2535 637106 : lwork = 2*nrow + 1
2536 1911318 : ALLOCATE (work(lwork))
2537 6371060 : desc(:) = matrix%matrix_struct%descriptor(:)
2538 : CALL pdrot(nrow, &
2539 : matrix%local_data(1, 1), 1, icol, desc, 1, &
2540 : matrix%local_data(1, 1), 1, jcol, desc, 1, &
2541 637106 : cs, sn, work, lwork, info)
2542 637106 : CPASSERT(info == 0)
2543 637106 : DEALLOCATE (work)
2544 : ELSE
2545 : #endif
2546 0 : CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2547 : #if defined(__parallel)
2548 : END IF
2549 : #endif
2550 637106 : CALL timestop(handle)
2551 637106 : END SUBROUTINE cp_fm_rot_cols
2552 :
2553 : ! **************************************************************************************************
2554 : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2555 : !> \param matrix_a ...
2556 : !> \param B ...
2557 : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2558 : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2559 : !> \param start_row starting index of rows, optional, defaults to 1
2560 : !> \param start_col starting index of columns, optional, defaults to 1
2561 : !> \param do_norm ...
2562 : !> \param do_print ...
2563 : ! **************************************************************************************************
2564 0 : SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2565 : do_norm, do_print)
2566 :
2567 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2568 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: B
2569 : INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2570 : LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2571 :
2572 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
2573 :
2574 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2575 : j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2576 : start_col_local, start_row_global, start_row_local, this_col, unit_nr
2577 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2578 : LOGICAL :: my_do_norm, my_do_print
2579 : REAL(KIND=dp) :: norm
2580 0 : REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2581 :
2582 0 : CALL timeset(routineN, handle)
2583 :
2584 0 : my_do_norm = .TRUE.
2585 0 : IF (PRESENT(do_norm)) my_do_norm = do_norm
2586 :
2587 0 : my_do_print = .FALSE.
2588 0 : IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2589 :
2590 0 : unit_nr = -1
2591 0 : IF (my_do_print) THEN
2592 0 : unit_nr = cp_logger_get_default_unit_nr()
2593 0 : IF (unit_nr < 1) my_do_print = .FALSE.
2594 : END IF
2595 :
2596 0 : IF (SIZE(B) /= 0) THEN
2597 0 : IF (PRESENT(nrows)) THEN
2598 0 : nrow_global = nrows
2599 : ELSE
2600 0 : nrow_global = SIZE(B, 1)
2601 : END IF
2602 :
2603 0 : IF (PRESENT(ncols)) THEN
2604 0 : ncol_global = ncols
2605 : ELSE
2606 0 : ncol_global = SIZE(B, 2)
2607 : END IF
2608 :
2609 0 : IF (PRESENT(start_row)) THEN
2610 0 : start_row_global = start_row
2611 : ELSE
2612 : start_row_global = 1
2613 : END IF
2614 :
2615 0 : IF (PRESENT(start_col)) THEN
2616 0 : start_col_global = start_col
2617 : ELSE
2618 : start_col_global = 1
2619 : END IF
2620 :
2621 0 : end_row_global = start_row_global + nrow_global - 1
2622 0 : end_col_global = start_col_global + ncol_global - 1
2623 :
2624 : CALL cp_fm_get_info(matrix=matrix_a, &
2625 : nrow_global=nrow_global, ncol_global=ncol_global, &
2626 : nrow_local=nrow_local, ncol_local=ncol_local, &
2627 0 : row_indices=row_indices, col_indices=col_indices)
2628 0 : IF (end_row_global > nrow_global) THEN
2629 : end_row_global = nrow_global
2630 : END IF
2631 0 : IF (end_col_global > ncol_global) THEN
2632 : end_col_global = ncol_global
2633 : END IF
2634 :
2635 : ! find out row/column indices of locally stored matrix elements that
2636 : ! needs to be copied.
2637 : ! Arrays row_indices and col_indices are assumed to be sorted in
2638 : ! ascending order
2639 0 : DO start_row_local = 1, nrow_local
2640 0 : IF (row_indices(start_row_local) >= start_row_global) EXIT
2641 : END DO
2642 :
2643 0 : DO end_row_local = start_row_local, nrow_local
2644 0 : IF (row_indices(end_row_local) > end_row_global) EXIT
2645 : END DO
2646 0 : end_row_local = end_row_local - 1
2647 :
2648 0 : DO start_col_local = 1, ncol_local
2649 0 : IF (col_indices(start_col_local) >= start_col_global) EXIT
2650 : END DO
2651 :
2652 0 : DO end_col_local = start_col_local, ncol_local
2653 0 : IF (col_indices(end_col_local) > end_col_global) EXIT
2654 : END DO
2655 0 : end_col_local = end_col_local - 1
2656 :
2657 0 : a => matrix_a%local_data
2658 :
2659 0 : this_col = col_indices(start_col_local) - start_col_global + 1
2660 :
2661 0 : B(:, this_col) = a(:, start_col_local)
2662 :
2663 0 : IF (my_do_norm) THEN
2664 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2665 0 : B(:, this_col) = B(:, this_col)/norm
2666 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2667 : END IF
2668 :
2669 0 : DO i = start_col_local + 1, end_col_local
2670 0 : this_col = col_indices(i) - start_col_global + 1
2671 0 : B(:, this_col) = a(:, i)
2672 0 : DO j = start_col_local, i - 1
2673 0 : j_col = col_indices(j) - start_col_global + 1
2674 : B(:, this_col) = B(:, this_col) - &
2675 : accurate_dot_product(B(:, j_col), B(:, this_col))* &
2676 0 : B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
2677 : END DO
2678 :
2679 0 : IF (my_do_norm) THEN
2680 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2681 0 : B(:, this_col) = B(:, this_col)/norm
2682 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2683 : END IF
2684 :
2685 : END DO
2686 0 : CALL matrix_a%matrix_struct%para_env%sum(B)
2687 : END IF
2688 :
2689 0 : CALL timestop(handle)
2690 :
2691 0 : END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
2692 :
2693 : ! **************************************************************************************************
2694 : !> \brief Cholesky decomposition
2695 : !> \param fm_matrix ...
2696 : !> \param n ...
2697 : !> \param uplo triangular format; defaults to 'U'
2698 : ! **************************************************************************************************
2699 10022 : SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
2700 : TYPE(cp_fm_type) :: fm_matrix
2701 : INTEGER, INTENT(IN) :: n
2702 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2703 :
2704 : CHARACTER :: myuplo
2705 : INTEGER :: info
2706 10022 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2707 10022 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2708 : #if defined(__parallel)
2709 : INTEGER, DIMENSION(9) :: desca
2710 : #endif
2711 :
2712 10022 : myuplo = 'U'
2713 0 : IF (PRESENT(uplo)) myuplo = uplo
2714 :
2715 10022 : a => fm_matrix%local_data
2716 10022 : a_sp => fm_matrix%local_data_sp
2717 : #if defined(__parallel)
2718 100220 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2719 10022 : IF (fm_matrix%use_sp) THEN
2720 0 : CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2721 : ELSE
2722 10022 : CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
2723 : END IF
2724 : #else
2725 : IF (fm_matrix%use_sp) THEN
2726 : CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2727 : ELSE
2728 : CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
2729 : END IF
2730 : #endif
2731 10022 : IF (info /= 0) &
2732 0 : CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
2733 :
2734 10022 : END SUBROUTINE cp_fm_potrf
2735 :
2736 : ! **************************************************************************************************
2737 : !> \brief Invert trianguar matrix
2738 : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
2739 : !> \param n size of the matrix to invert
2740 : !> \param uplo triangular format; defaults to 'U'
2741 : ! **************************************************************************************************
2742 9228 : SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
2743 : TYPE(cp_fm_type) :: fm_matrix
2744 : INTEGER, INTENT(IN) :: n
2745 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2746 :
2747 : CHARACTER :: myuplo
2748 9228 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2749 9228 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2750 : INTEGER :: info
2751 : #if defined(__parallel)
2752 : INTEGER, DIMENSION(9) :: desca
2753 : #endif
2754 :
2755 9228 : myuplo = 'U'
2756 0 : IF (PRESENT(uplo)) myuplo = uplo
2757 :
2758 9228 : a => fm_matrix%local_data
2759 9228 : a_sp => fm_matrix%local_data_sp
2760 : #if defined(__parallel)
2761 92280 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2762 9228 : IF (fm_matrix%use_sp) THEN
2763 0 : CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2764 : ELSE
2765 9228 : CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
2766 : END IF
2767 : #else
2768 : IF (fm_matrix%use_sp) THEN
2769 : CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2770 : ELSE
2771 : CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
2772 : END IF
2773 : #endif
2774 9228 : CPASSERT(info == 0)
2775 9228 : END SUBROUTINE cp_fm_potri
2776 :
2777 : ! **************************************************************************************************
2778 : !> \brief Calculates
2779 : !> yv = alpha*amat*xv + beta*yv
2780 : !> where amat: fm matrix
2781 : !> xv : vector replicated
2782 : !> yv : vector replicated
2783 : !> Defaults: alpha = 1, beta = 0
2784 : ! **************************************************************************************************
2785 17152 : SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
2786 : TYPE(cp_fm_type), INTENT(IN) :: amat
2787 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: xv
2788 : REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: yv
2789 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: alpha, beta
2790 :
2791 : INTEGER :: na, nc, nx, ny
2792 : REAL(KIND=dp) :: aval, bval
2793 : #if defined(__parallel)
2794 : INTEGER :: nrl, ncl, ic, ir
2795 17152 : INTEGER, DIMENSION(:), POINTER :: rind, cind
2796 17152 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
2797 : #endif
2798 :
2799 17152 : IF (amat%use_sp) THEN
2800 0 : CPABORT("cp_fm_matvec: SP option not available")
2801 : END IF
2802 17152 : aval = 1.0_dp
2803 17152 : IF (PRESENT(alpha)) aval = alpha
2804 17152 : bval = 0.0_dp
2805 17152 : IF (PRESENT(beta)) bval = beta
2806 :
2807 17152 : CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
2808 17152 : nx = SIZE(xv)
2809 17152 : ny = SIZE(yv)
2810 17152 : IF ((nx /= ny) .OR. (nc /= nx)) THEN
2811 0 : CPABORT("cp_fm_matvec: incompatible dimensions")
2812 : END IF
2813 : #if defined(__parallel)
2814 : CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
2815 17152 : row_indices=rind, col_indices=cind)
2816 120064 : ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
2817 150320 : DO ic = 1, ncl
2818 150320 : xvl(ic) = xv(cind(ic))
2819 : END DO
2820 1690376 : yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
2821 150320 : yvm = 0.0_dp
2822 83736 : DO ir = 1, nrl
2823 83736 : yvm(rind(ir)) = yvl(ir)
2824 : END DO
2825 17152 : CALL amat%matrix_struct%para_env%sum(yvm)
2826 17152 : IF (bval == 0.0_dp) THEN
2827 75160 : yv = aval*yvm
2828 : ELSE
2829 75160 : yv = bval*yv + aval*yvm
2830 : END IF
2831 : #else
2832 : IF (bval == 0.0_dp) THEN
2833 : yv = aval*MATMUL(amat%local_data, xv)
2834 : ELSE
2835 : yv = bval*yv + aval*MATMUL(amat%local_data, xv)
2836 : END IF
2837 : #endif
2838 :
2839 68608 : END SUBROUTINE cp_fm_matvec
2840 :
2841 : END MODULE cp_fm_basic_linalg
|