Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Basic linear algebra operations for full matrices.
10 : !> \par History
11 : !> 08.2002 split out of qs_blacs [fawzi]
12 : !> \author Fawzi Mohamed
13 : ! **************************************************************************************************
14 : MODULE cp_fm_basic_linalg
15 : USE cp_blacs_env, ONLY: cp_blacs_env_type
16 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent
17 : USE cp_fm_types, ONLY: &
18 : cp_fm_create, cp_fm_get_diag, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_p_type, &
19 : cp_fm_release, cp_fm_set_all, cp_fm_set_element, cp_fm_set_submatrix, cp_fm_to_fm, &
20 : cp_fm_type
21 : USE cp_log_handling, ONLY: cp_logger_get_default_unit_nr, &
22 : cp_to_string
23 : USE kahan_sum, ONLY: accurate_dot_product, &
24 : accurate_sum
25 : USE kinds, ONLY: dp, &
26 : int_8, &
27 : sp
28 : USE machine, ONLY: m_memory
29 : USE mathlib, ONLY: get_pseudo_inverse_svd, &
30 : invert_matrix
31 : USE message_passing, ONLY: mp_comm_type
32 : #include "../base/base_uses.f90"
33 :
34 : IMPLICIT NONE
35 : PRIVATE
36 :
37 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
38 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_basic_linalg'
39 :
40 : PUBLIC :: cp_fm_scale, & ! scale a matrix
41 : cp_fm_scale_and_add, & ! scale and add two matrices
42 : cp_fm_geadd, & ! general addition
43 : cp_fm_add_columns, & ! daxpy type column add
44 : cp_fm_column_scale, & ! scale columns of a matrix
45 : cp_fm_row_scale, & ! scale rows of a matrix
46 : cp_fm_trace, & ! trace of the transpose(A)*B
47 : cp_fm_contracted_trace, & ! sum_{i,...,k} Tr [A(i,...,k)^T * B(i,...,k)]
48 : cp_fm_norm, & ! different norms of A
49 : cp_fm_schur_product, & ! schur product
50 : cp_fm_transpose, & ! transpose a matrix
51 : cp_fm_uplo_to_full, & ! symmetrise a triangular matrix
52 : cp_fm_syrk, & ! rank k update
53 : cp_fm_triangular_multiply, & ! triangular matrix multiply / solve
54 : cp_fm_symm, & ! multiply a symmetric with a non-symmetric matrix
55 : cp_fm_gemm, & ! multiply two matrices
56 : cp_complex_fm_gemm, & ! multiply two complex matrices, represented by non_complex fm matrices
57 : cp_fm_invert, & ! computes the inverse and determinant
58 : cp_fm_frobenius_norm, & ! frobenius norm
59 : cp_fm_triangular_invert, & ! compute the reciprocal of a triangular matrix
60 : cp_fm_qr_factorization, & ! compute the QR factorization of a rectangular matrix
61 : cp_fm_solve, & ! solves the equation A*B=C A and C are input
62 : cp_fm_pdgeqpf, & ! compute a QR factorization with column pivoting of a M-by-N distributed matrix
63 : cp_fm_pdorgqr, & ! generates an M-by-N as first N columns of a product of K elementary reflectors
64 : cp_fm_potrf, & ! Cholesky decomposition
65 : cp_fm_potri, & ! Invert triangular matrix
66 : cp_fm_rot_rows, & ! rotates two rows
67 : cp_fm_rot_cols, & ! rotates two columns
68 : cp_fm_Gram_Schmidt_orthonorm, & ! Gram-Schmidt orthonormalization of columns of a full matrix, &
69 : cp_fm_det, & ! determinant of a real matrix with correct sign
70 : cp_fm_matvec ! matrix-vector multiplication (vector replicated)
71 :
72 : REAL(KIND=dp), EXTERNAL :: dlange, pdlange, pdlatra
73 : REAL(KIND=sp), EXTERNAL :: slange, pslange, pslatra
74 :
75 : INTERFACE cp_fm_trace
76 : MODULE PROCEDURE cp_fm_trace_a0b0t0
77 : MODULE PROCEDURE cp_fm_trace_a1b0t1_a
78 : MODULE PROCEDURE cp_fm_trace_a1b0t1_p
79 : MODULE PROCEDURE cp_fm_trace_a1b1t1_aa
80 : MODULE PROCEDURE cp_fm_trace_a1b1t1_ap
81 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pa
82 : MODULE PROCEDURE cp_fm_trace_a1b1t1_pp
83 : END INTERFACE cp_fm_trace
84 :
85 : INTERFACE cp_fm_contracted_trace
86 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_aa
87 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_ap
88 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pa
89 : MODULE PROCEDURE cp_fm_contracted_trace_a2b2t2_pp
90 : END INTERFACE cp_fm_contracted_trace
91 : CONTAINS
92 :
93 : ! **************************************************************************************************
94 : !> \brief Computes the determinant (with a correct sign even in parallel environment!) of a real square matrix
95 : !> \author A. Sinyavskiy (andrey.sinyavskiy@chem.uzh.ch)
96 : ! **************************************************************************************************
97 0 : SUBROUTINE cp_fm_det(matrix_a, det_a)
98 :
99 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
100 : REAL(KIND=dp), INTENT(OUT) :: det_a
101 : REAL(KIND=dp) :: determinant
102 : TYPE(cp_fm_type) :: matrix_lu
103 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
104 : INTEGER :: n, i, info, P
105 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
106 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
107 :
108 : #if defined(__parallel)
109 : INTEGER :: myprow, nprow, npcol, nrow_local, nrow_block, irow_local
110 : INTEGER, DIMENSION(9) :: desca
111 : #endif
112 :
113 : CALL cp_fm_create(matrix=matrix_lu, &
114 : matrix_struct=matrix_a%matrix_struct, &
115 0 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
116 0 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
117 :
118 0 : a => matrix_lu%local_data
119 0 : n = matrix_lu%matrix_struct%nrow_global
120 0 : ALLOCATE (ipivot(n))
121 0 : ipivot(:) = 0
122 0 : P = 0
123 0 : ALLOCATE (diag(n))
124 0 : diag(:) = 0.0_dp
125 : #if defined(__parallel)
126 : ! Use LU decomposition
127 0 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
128 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
129 0 : CALL cp_fm_get_diag(matrix_lu, diag)
130 0 : determinant = PRODUCT(diag)
131 0 : myprow = matrix_lu%matrix_struct%context%mepos(1)
132 0 : nprow = matrix_lu%matrix_struct%context%num_pe(1)
133 0 : npcol = matrix_lu%matrix_struct%context%num_pe(2)
134 0 : nrow_local = matrix_lu%matrix_struct%nrow_locals(myprow)
135 0 : nrow_block = matrix_lu%matrix_struct%nrow_block
136 0 : DO irow_local = 1, nrow_local
137 0 : i = matrix_lu%matrix_struct%row_indices(irow_local)
138 0 : IF (ipivot(irow_local) /= i) P = P + 1
139 : END DO
140 0 : CALL matrix_lu%matrix_struct%para_env%sum(P)
141 : ! very important fix
142 0 : P = P/npcol
143 : #else
144 : CALL dgetrf(n, n, a, n, ipivot, info)
145 : CALL cp_fm_get_diag(matrix_lu, diag)
146 : determinant = PRODUCT(diag)
147 : DO i = 1, n
148 : IF (ipivot(i) /= i) P = P + 1
149 : END DO
150 : #endif
151 0 : DEALLOCATE (ipivot)
152 0 : DEALLOCATE (diag)
153 0 : CALL cp_fm_release(matrix_lu)
154 0 : det_a = determinant*(-2*MOD(P, 2) + 1.0_dp)
155 0 : END SUBROUTINE cp_fm_det
156 :
157 : ! **************************************************************************************************
158 : !> \brief calc A <- alpha*A + beta*B
159 : !> optimized for alpha == 1.0 (just add beta*B) and beta == 0.0 (just
160 : !> scale A)
161 : !> \param alpha ...
162 : !> \param matrix_a ...
163 : !> \param beta ...
164 : !> \param matrix_b ...
165 : ! **************************************************************************************************
166 1069384 : SUBROUTINE cp_fm_scale_and_add(alpha, matrix_a, beta, matrix_b)
167 :
168 : REAL(KIND=dp), INTENT(IN) :: alpha
169 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
170 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
171 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: matrix_b
172 :
173 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale_and_add'
174 :
175 : INTEGER :: handle, size_a, size_b
176 : REAL(KIND=dp) :: my_beta
177 1069384 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
178 1069384 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
179 :
180 1069384 : CALL timeset(routineN, handle)
181 :
182 1069384 : my_beta = 0.0_dp
183 1069384 : IF (PRESENT(matrix_b)) my_beta = 1.0_dp
184 1069384 : IF (PRESENT(beta)) my_beta = beta
185 1069384 : NULLIFY (a, b)
186 :
187 1069384 : IF (PRESENT(beta)) THEN
188 1069384 : CPASSERT(PRESENT(matrix_b))
189 1069384 : IF (ASSOCIATED(matrix_a%local_data, matrix_b%local_data)) THEN
190 0 : CPWARN("Bad use of routine. Call cp_fm_scale instead")
191 0 : CALL cp_fm_scale(alpha + beta, matrix_a)
192 0 : CALL timestop(handle)
193 0 : RETURN
194 : END IF
195 : END IF
196 :
197 1069384 : a => matrix_a%local_data
198 1069384 : a_sp => matrix_a%local_data_sp
199 :
200 1069384 : IF (matrix_a%use_sp) THEN
201 0 : size_a = SIZE(a_sp, 1)*SIZE(a_sp, 2)
202 : ELSE
203 1069384 : size_a = SIZE(a, 1)*SIZE(a, 2)
204 : END IF
205 :
206 1069384 : IF (alpha /= 1.0_dp) THEN
207 79234 : IF (matrix_a%use_sp) THEN
208 0 : CALL sscal(size_a, REAL(alpha, sp), a_sp, 1)
209 : ELSE
210 79234 : CALL dscal(size_a, alpha, a, 1)
211 : END IF
212 : END IF
213 1069384 : IF (my_beta /= 0.0_dp) THEN
214 1061898 : IF (matrix_a%matrix_struct%context /= matrix_b%matrix_struct%context) &
215 0 : CPABORT("Matrices must be in the same blacs context")
216 :
217 1061898 : IF (cp_fm_struct_equivalent(matrix_a%matrix_struct, &
218 : matrix_b%matrix_struct)) THEN
219 :
220 1061898 : b => matrix_b%local_data
221 1061898 : b_sp => matrix_b%local_data_sp
222 1061898 : IF (matrix_b%use_sp) THEN
223 0 : size_b = SIZE(b_sp, 1)*SIZE(b_sp, 2)
224 : ELSE
225 1061898 : size_b = SIZE(b, 1)*SIZE(b, 2)
226 : END IF
227 1061898 : IF (size_a /= size_b) &
228 0 : CPABORT("Matrices must have same local sizes")
229 :
230 1061898 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
231 0 : CALL saxpy(size_a, REAL(my_beta, sp), b_sp, 1, a_sp, 1)
232 1061898 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
233 0 : CALL saxpy(size_a, REAL(my_beta, sp), REAL(b, sp), 1, a_sp, 1)
234 1061898 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
235 0 : CALL daxpy(size_a, my_beta, REAL(b_sp, dp), 1, a, 1)
236 : ELSE
237 1061898 : CALL daxpy(size_a, my_beta, b, 1, a, 1)
238 : END IF
239 :
240 : ELSE
241 : #ifdef __parallel
242 0 : CPABORT("to do (pdscal,pdcopy,pdaxpy)")
243 : #else
244 : CPABORT("")
245 : #endif
246 : END IF
247 :
248 : END IF
249 :
250 1069384 : CALL timestop(handle)
251 :
252 1069384 : END SUBROUTINE cp_fm_scale_and_add
253 :
254 : ! **************************************************************************************************
255 : !> \brief interface to BLACS geadd:
256 : !> matrix_b = beta*matrix_b + alpha*opt(matrix_a)
257 : !> where opt(matrix_a) can be either:
258 : !> 'N': matrix_a
259 : !> 'T': matrix_a^T
260 : !> 'C': matrix_a^H (Hermitian conjugate)
261 : !> note that this is a level three routine, use cp_fm_scale_and_add if that
262 : !> is sufficient for your needs
263 : !> \param alpha : complex scalar
264 : !> \param trans : 'N' normal, 'T' transposed
265 : !> \param matrix_a : input matrix_a
266 : !> \param beta : complex scalar
267 : !> \param matrix_b : input matrix_b, upon out put the updated matrix_b
268 : !> \author Lianheng Tong
269 : ! **************************************************************************************************
270 532 : SUBROUTINE cp_fm_geadd(alpha, trans, matrix_a, beta, matrix_b)
271 : REAL(KIND=dp), INTENT(IN) :: alpha, beta
272 : CHARACTER, INTENT(IN) :: trans
273 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
274 :
275 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_geadd'
276 :
277 : INTEGER :: nrow_global, ncol_global, handle
278 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa, bb
279 : #if defined(__parallel)
280 : INTEGER, DIMENSION(9) :: desca, descb
281 : #elif !defined(__MKL)
282 : INTEGER :: ii, jj
283 : #endif
284 :
285 532 : CALL timeset(routineN, handle)
286 :
287 532 : nrow_global = matrix_a%matrix_struct%nrow_global
288 532 : ncol_global = matrix_a%matrix_struct%ncol_global
289 532 : CPASSERT(nrow_global == matrix_b%matrix_struct%nrow_global)
290 532 : CPASSERT(ncol_global == matrix_b%matrix_struct%ncol_global)
291 :
292 532 : aa => matrix_a%local_data
293 532 : bb => matrix_b%local_data
294 :
295 : #if defined(__parallel)
296 5320 : desca = matrix_a%matrix_struct%descriptor
297 5320 : descb = matrix_b%matrix_struct%descriptor
298 : CALL pdgeadd(trans, &
299 : nrow_global, &
300 : ncol_global, &
301 : alpha, &
302 : aa, &
303 : 1, 1, &
304 : desca, &
305 : beta, &
306 : bb, &
307 : 1, 1, &
308 532 : descb)
309 : #elif defined(__MKL)
310 : CALL mkl_domatadd('C', trans, 'N', nrow_global, ncol_global, &
311 : alpha, aa, nrow_global, beta, bb, nrow_global, bb, nrow_global)
312 : #else
313 : ! dgeadd is not a standard BLAS function, although it is implemented
314 : ! in some libraries like OpenBLAS, so not going to use it here
315 : SELECT CASE (trans)
316 : CASE ('T')
317 : DO jj = 1, ncol_global
318 : DO ii = 1, nrow_global
319 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(jj, ii)
320 : END DO
321 : END DO
322 : CASE DEFAULT
323 : DO jj = 1, ncol_global
324 : DO ii = 1, nrow_global
325 : bb(ii, jj) = beta*bb(ii, jj) + alpha*aa(ii, jj)
326 : END DO
327 : END DO
328 : END SELECT
329 : #endif
330 :
331 532 : CALL timestop(handle)
332 :
333 532 : END SUBROUTINE cp_fm_geadd
334 :
335 : ! **************************************************************************************************
336 : !> \brief Add (and scale) a subset of columns of a fm to a fm
337 : !> b = alpha*a + b
338 : !> \param msource ...
339 : !> \param mtarget ...
340 : !> \param ncol ...
341 : !> \param alpha ...
342 : !> \param source_start ...
343 : !> \param target_start ...
344 : ! **************************************************************************************************
345 0 : SUBROUTINE cp_fm_add_columns(msource, mtarget, ncol, alpha, source_start, target_start)
346 :
347 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
348 : INTEGER, INTENT(IN) :: ncol
349 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
350 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
351 :
352 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_add_columns'
353 :
354 : INTEGER :: handle, n, ss, ts, i
355 : REAL(KIND=dp) :: fscale
356 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
357 : #if defined(__parallel)
358 : INTEGER, DIMENSION(9) :: desca, descb
359 : #endif
360 :
361 0 : CALL timeset(routineN, handle)
362 :
363 0 : ss = 1
364 0 : ts = 1
365 :
366 0 : IF (PRESENT(source_start)) ss = source_start
367 0 : IF (PRESENT(target_start)) ts = target_start
368 :
369 0 : fscale = 1.0_dp
370 0 : IF (PRESENT(alpha)) fscale = alpha
371 :
372 0 : n = msource%matrix_struct%nrow_global
373 :
374 0 : a => msource%local_data
375 0 : b => mtarget%local_data
376 :
377 : #if defined(__parallel)
378 : MARK_USED(i)
379 0 : desca(:) = msource%matrix_struct%descriptor(:)
380 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
381 0 : CALL pdgeadd("N", n, ncol, fscale, a, 1, ss, desca, 1.0_dp, b, 1, ts, descb)
382 : #else
383 : DO i = 0, ncol - 1
384 : b(1:n, ts + i) = b(1:n, ts + i) + fscale*a(1:n, ss + i)
385 : END DO
386 : #endif
387 :
388 0 : CALL timestop(handle)
389 :
390 0 : END SUBROUTINE cp_fm_add_columns
391 :
392 : ! **************************************************************************************************
393 : !> \brief Computes the LU-decomposition of the matrix, and the determinant of the matrix
394 : !> IMPORTANT : the sign of the determinant is not defined correctly yet ....
395 : !> \param matrix_a ...
396 : !> \param almost_determinant ...
397 : !> \param correct_sign ...
398 : !> \par History
399 : !> added correct_sign 02.07 (fschiff)
400 : !> \author Joost VandeVondele
401 : !> \note
402 : !> - matrix_a is overwritten
403 : !> - the sign of the determinant might be wrong
404 : !> - SERIOUS WARNING (KNOWN BUG) : the sign of the determinant depends on ipivot
405 : !> - one should be able to find out if ipivot is an even or an odd permutation...
406 : !> if you need the correct sign, just add correct_sign==.TRUE. (fschiff)
407 : !> - Use cp_fm_get_diag instead of n times cp_fm_get_element (A. Bussy)
408 : ! **************************************************************************************************
409 0 : SUBROUTINE cp_fm_lu_decompose(matrix_a, almost_determinant, correct_sign)
410 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
411 : REAL(KIND=dp), INTENT(OUT) :: almost_determinant
412 : LOGICAL, INTENT(IN), OPTIONAL :: correct_sign
413 :
414 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_decompose'
415 :
416 : INTEGER :: handle, i, info, n
417 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
418 : REAL(KIND=dp) :: determinant
419 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
420 : #if defined(__parallel)
421 : INTEGER, DIMENSION(9) :: desca
422 0 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
423 : #else
424 : INTEGER :: lda
425 : #endif
426 :
427 0 : CALL timeset(routineN, handle)
428 :
429 0 : a => matrix_a%local_data
430 0 : n = matrix_a%matrix_struct%nrow_global
431 0 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
432 :
433 : #if defined(__parallel)
434 : MARK_USED(correct_sign)
435 0 : desca(:) = matrix_a%matrix_struct%descriptor(:)
436 0 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
437 :
438 0 : ALLOCATE (diag(n))
439 0 : diag(:) = 0.0_dp
440 0 : CALL cp_fm_get_diag(matrix_a, diag)
441 0 : determinant = 1.0_dp
442 0 : DO i = 1, n
443 0 : determinant = determinant*diag(i)
444 : END DO
445 0 : DEALLOCATE (diag)
446 : #else
447 : lda = SIZE(a, 1)
448 : CALL dgetrf(n, n, a, lda, ipivot, info)
449 : determinant = 1.0_dp
450 : IF (correct_sign) THEN
451 : DO i = 1, n
452 : IF (ipivot(i) /= i) THEN
453 : determinant = -determinant*a(i, i)
454 : ELSE
455 : determinant = determinant*a(i, i)
456 : END IF
457 : END DO
458 : ELSE
459 : DO i = 1, n
460 : determinant = determinant*a(i, i)
461 : END DO
462 : END IF
463 : #endif
464 : ! info is allowed to be zero
465 : ! this does just signal a zero diagonal element
466 0 : DEALLOCATE (ipivot)
467 0 : almost_determinant = determinant ! notice that the sign is random
468 0 : CALL timestop(handle)
469 0 : END SUBROUTINE cp_fm_lu_decompose
470 :
471 : ! **************************************************************************************************
472 : !> \brief computes matrix_c = beta * matrix_c + alpha * ( matrix_a ** transa ) * ( matrix_b ** transb )
473 : !> \param transa : 'N' -> normal 'T' -> transpose
474 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
475 : !> \param transb ...
476 : !> \param m ...
477 : !> \param n ...
478 : !> \param k ...
479 : !> \param alpha ...
480 : !> \param matrix_a : m x k matrix ( ! for transa = 'N')
481 : !> \param matrix_b : k x n matrix ( ! for transb = 'N')
482 : !> \param beta ...
483 : !> \param matrix_c : m x n matrix
484 : !> \param a_first_col ...
485 : !> \param a_first_row ...
486 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
487 : !> \param b_first_row ...
488 : !> \param c_first_col ...
489 : !> \param c_first_row ...
490 : !> \author Matthias Krack
491 : !> \note
492 : !> matrix_c should have no overlap with matrix_a, matrix_b
493 : ! **************************************************************************************************
494 520 : SUBROUTINE cp_fm_gemm(transa, transb, m, n, k, alpha, matrix_a, matrix_b, beta, &
495 : matrix_c, a_first_col, a_first_row, b_first_col, b_first_row, &
496 : c_first_col, c_first_row)
497 :
498 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
499 : INTEGER, INTENT(IN) :: m, n, k
500 : REAL(KIND=dp), INTENT(IN) :: alpha
501 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
502 : REAL(KIND=dp), INTENT(IN) :: beta
503 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
504 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, &
505 : b_first_col, b_first_row, &
506 : c_first_col, c_first_row
507 :
508 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_gemm'
509 :
510 : INTEGER :: handle, i_a, i_b, i_c, j_a, &
511 : j_b, j_c
512 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
513 520 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp, c_sp
514 : #if defined(__parallel)
515 : INTEGER, DIMENSION(9) :: desca, descb, descc
516 : #else
517 : INTEGER :: lda, ldb, ldc
518 : #endif
519 :
520 520 : CALL timeset(routineN, handle)
521 :
522 : !sample peak memory
523 520 : CALL m_memory()
524 :
525 520 : a => matrix_a%local_data
526 520 : b => matrix_b%local_data
527 520 : c => matrix_c%local_data
528 :
529 520 : a_sp => matrix_a%local_data_sp
530 520 : b_sp => matrix_b%local_data_sp
531 520 : c_sp => matrix_c%local_data_sp
532 :
533 520 : i_a = 1
534 520 : IF (PRESENT(a_first_row)) i_a = a_first_row
535 :
536 520 : j_a = 1
537 520 : IF (PRESENT(a_first_col)) j_a = a_first_col
538 :
539 520 : i_b = 1
540 520 : IF (PRESENT(b_first_row)) i_b = b_first_row
541 :
542 520 : j_b = 1
543 520 : IF (PRESENT(b_first_col)) j_b = b_first_col
544 :
545 520 : i_c = 1
546 520 : IF (PRESENT(c_first_row)) i_c = c_first_row
547 :
548 520 : j_c = 1
549 520 : IF (PRESENT(c_first_col)) j_c = c_first_col
550 :
551 : #if defined(__parallel)
552 :
553 5200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
554 5200 : descb(:) = matrix_b%matrix_struct%descriptor(:)
555 5200 : descc(:) = matrix_c%matrix_struct%descriptor(:)
556 :
557 520 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
558 :
559 : CALL psgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(1, 1), i_a, j_a, desca, b_sp(1, 1), i_b, j_b, &
560 0 : descb, REAL(beta, sp), c_sp(1, 1), i_c, j_c, descc)
561 :
562 520 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
563 :
564 : CALL pdgemm(transa, transb, m, n, k, alpha, a, i_a, j_a, desca, b, i_b, j_b, &
565 520 : descb, beta, c, i_c, j_c, descc)
566 :
567 : ELSE
568 0 : CPABORT("Mixed precision gemm NYI")
569 : END IF
570 : #else
571 :
572 : IF (matrix_a%use_sp .AND. matrix_b%use_sp .AND. matrix_c%use_sp) THEN
573 :
574 : lda = SIZE(a_sp, 1)
575 : ldb = SIZE(b_sp, 1)
576 : ldc = SIZE(c_sp, 1)
577 :
578 : CALL sgemm(transa, transb, m, n, k, REAL(alpha, sp), a_sp(i_a, j_a), lda, b_sp(i_b, j_b), ldb, &
579 : REAL(beta, sp), c_sp(i_c, j_c), ldc)
580 :
581 : ELSEIF ((.NOT. matrix_a%use_sp) .AND. (.NOT. matrix_b%use_sp) .AND. (.NOT. matrix_c%use_sp)) THEN
582 :
583 : lda = SIZE(a, 1)
584 : ldb = SIZE(b, 1)
585 : ldc = SIZE(c, 1)
586 :
587 : CALL dgemm(transa, transb, m, n, k, alpha, a(i_a, j_a), lda, b(i_b, j_b), ldb, beta, c(i_c, j_c), ldc)
588 :
589 : ELSE
590 : CPABORT("Mixed precision gemm NYI")
591 : END IF
592 :
593 : #endif
594 520 : CALL timestop(handle)
595 :
596 520 : END SUBROUTINE cp_fm_gemm
597 :
598 : ! **************************************************************************************************
599 : !> \brief computes matrix_c = beta * matrix_c + alpha * matrix_a * matrix_b
600 : !> computes matrix_c = beta * matrix_c + alpha * matrix_b * matrix_a
601 : !> where matrix_a is symmetric
602 : !> \param side : 'L' -> matrix_a is on the left 'R' -> matrix_a is on the right
603 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
604 : !> \param uplo triangular format
605 : !> \param m ...
606 : !> \param n ...
607 : !> \param alpha ...
608 : !> \param matrix_a : m x m matrix
609 : !> \param matrix_b : m x n matrix
610 : !> \param beta ...
611 : !> \param matrix_c : m x n matrix
612 : !> \author Matthias Krack
613 : !> \note
614 : !> matrix_c should have no overlap with matrix_a, matrix_b
615 : !> all matrices in QS are triangular according to uplo
616 : !> matrix_a is always an m x m matrix
617 : !> typically slower than cp_fm_gemm (especially in parallel easily 50 percent)
618 : ! **************************************************************************************************
619 146338 : SUBROUTINE cp_fm_symm(side, uplo, m, n, alpha, matrix_a, matrix_b, beta, matrix_c)
620 :
621 : CHARACTER(LEN=1), INTENT(IN) :: side, uplo
622 : INTEGER, INTENT(IN) :: m, n
623 : REAL(KIND=dp), INTENT(IN) :: alpha
624 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
625 : REAL(KIND=dp), INTENT(IN) :: beta
626 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
627 :
628 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_symm'
629 :
630 : INTEGER :: handle
631 146338 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
632 : #if defined(__parallel)
633 : INTEGER, DIMENSION(9) :: desca, descb, descc
634 : #else
635 : INTEGER :: lda, ldb, ldc
636 : #endif
637 :
638 146338 : CALL timeset(routineN, handle)
639 :
640 146338 : a => matrix_a%local_data
641 146338 : b => matrix_b%local_data
642 146338 : c => matrix_c%local_data
643 :
644 : #if defined(__parallel)
645 :
646 1463380 : desca(:) = matrix_a%matrix_struct%descriptor(:)
647 1463380 : descb(:) = matrix_b%matrix_struct%descriptor(:)
648 1463380 : descc(:) = matrix_c%matrix_struct%descriptor(:)
649 :
650 146338 : CALL pdsymm(side, uplo, m, n, alpha, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb, beta, c(1, 1), 1, 1, descc)
651 :
652 : #else
653 :
654 : lda = matrix_a%matrix_struct%local_leading_dimension
655 : ldb = matrix_b%matrix_struct%local_leading_dimension
656 : ldc = matrix_c%matrix_struct%local_leading_dimension
657 :
658 : CALL dsymm(side, uplo, m, n, alpha, a(1, 1), lda, b(1, 1), ldb, beta, c(1, 1), ldc)
659 :
660 : #endif
661 146338 : CALL timestop(handle)
662 :
663 146338 : END SUBROUTINE cp_fm_symm
664 :
665 : ! **************************************************************************************************
666 : !> \brief computes the Frobenius norm of matrix_a
667 : !> \brief computes the Frobenius norm of matrix_a
668 : !> \param matrix_a : m x n matrix
669 : !> \return ...
670 : !> \author VW
671 : ! **************************************************************************************************
672 7926 : FUNCTION cp_fm_frobenius_norm(matrix_a) RESULT(norm)
673 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
674 : REAL(KIND=dp) :: norm
675 :
676 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_frobenius_norm'
677 :
678 : INTEGER :: handle, size_a
679 7926 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
680 : REAL(KIND=dp), EXTERNAL :: DDOT
681 : #if defined(__parallel)
682 : TYPE(mp_comm_type) :: group
683 : #endif
684 :
685 7926 : CALL timeset(routineN, handle)
686 :
687 : norm = 0.0_dp
688 7926 : a => matrix_a%local_data
689 7926 : size_a = SIZE(a, 1)*SIZE(a, 2)
690 7926 : norm = DDOT(size_a, a(1, 1), 1, a(1, 1), 1)
691 : #if defined(__parallel)
692 7926 : group = matrix_a%matrix_struct%para_env
693 7926 : CALL group%sum(norm)
694 : #endif
695 7926 : norm = SQRT(norm)
696 :
697 7926 : CALL timestop(handle)
698 :
699 7926 : END FUNCTION cp_fm_frobenius_norm
700 :
701 : ! **************************************************************************************************
702 : !> \brief performs a rank-k update of a symmetric matrix_c
703 : !> matrix_c = beta * matrix_c + alpha * matrix_a * transpose ( matrix_a )
704 : !> \param uplo : 'U' ('L')
705 : !> \param trans : 'N' ('T')
706 : !> \param k : number of cols to use in matrix_a
707 : !> ia,ja :: 1,1 (could be used for selecting subblock of a)
708 : !> \param alpha ...
709 : !> \param matrix_a ...
710 : !> \param ia ...
711 : !> \param ja ...
712 : !> \param beta ...
713 : !> \param matrix_c ...
714 : !> \author Matthias Krack
715 : ! **************************************************************************************************
716 7440 : SUBROUTINE cp_fm_syrk(uplo, trans, k, alpha, matrix_a, ia, ja, beta, matrix_c)
717 : CHARACTER(LEN=1), INTENT(IN) :: uplo, trans
718 : INTEGER, INTENT(IN) :: k
719 : REAL(KIND=dp), INTENT(IN) :: alpha
720 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
721 : INTEGER, INTENT(IN) :: ia, ja
722 : REAL(KIND=dp), INTENT(IN) :: beta
723 : TYPE(cp_fm_type), INTENT(IN) :: matrix_c
724 :
725 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_syrk'
726 :
727 : INTEGER :: handle, n
728 7440 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
729 : #if defined(__parallel)
730 : INTEGER, DIMENSION(9) :: desca, descc
731 : #else
732 : INTEGER :: lda, ldc
733 : #endif
734 :
735 7440 : CALL timeset(routineN, handle)
736 :
737 7440 : n = matrix_c%matrix_struct%nrow_global
738 :
739 7440 : a => matrix_a%local_data
740 7440 : c => matrix_c%local_data
741 :
742 : #if defined(__parallel)
743 :
744 74400 : desca(:) = matrix_a%matrix_struct%descriptor(:)
745 74400 : descc(:) = matrix_c%matrix_struct%descriptor(:)
746 :
747 7440 : CALL pdsyrk(uplo, trans, n, k, alpha, a(1, 1), ia, ja, desca, beta, c(1, 1), 1, 1, descc)
748 :
749 : #else
750 :
751 : lda = SIZE(a, 1)
752 : ldc = SIZE(c, 1)
753 :
754 : CALL dsyrk(uplo, trans, n, k, alpha, a(ia, ja), lda, beta, c(1, 1), ldc)
755 :
756 : #endif
757 7440 : CALL timestop(handle)
758 :
759 7440 : END SUBROUTINE cp_fm_syrk
760 :
761 : ! **************************************************************************************************
762 : !> \brief computes the schur product of two matrices
763 : !> c_ij = a_ij * b_ij
764 : !> \param matrix_a ...
765 : !> \param matrix_b ...
766 : !> \param matrix_c ...
767 : !> \author Joost VandeVondele
768 : ! **************************************************************************************************
769 9148 : SUBROUTINE cp_fm_schur_product(matrix_a, matrix_b, matrix_c)
770 :
771 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b, matrix_c
772 :
773 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_schur_product'
774 :
775 : INTEGER :: handle, icol_local, irow_local, mypcol, &
776 : myprow, ncol_local, &
777 : nrow_local
778 9148 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b, c
779 : TYPE(cp_blacs_env_type), POINTER :: context
780 :
781 9148 : CALL timeset(routineN, handle)
782 :
783 9148 : context => matrix_a%matrix_struct%context
784 9148 : myprow = context%mepos(1)
785 9148 : mypcol = context%mepos(2)
786 :
787 9148 : a => matrix_a%local_data
788 9148 : b => matrix_b%local_data
789 9148 : c => matrix_c%local_data
790 :
791 9148 : nrow_local = matrix_a%matrix_struct%nrow_locals(myprow)
792 9148 : ncol_local = matrix_a%matrix_struct%ncol_locals(mypcol)
793 :
794 98692 : DO icol_local = 1, ncol_local
795 6666116 : DO irow_local = 1, nrow_local
796 6656968 : c(irow_local, icol_local) = a(irow_local, icol_local)*b(irow_local, icol_local)
797 : END DO
798 : END DO
799 :
800 9148 : CALL timestop(handle)
801 :
802 9148 : END SUBROUTINE cp_fm_schur_product
803 :
804 : ! **************************************************************************************************
805 : !> \brief returns the trace of matrix_a^T matrix_b, i.e
806 : !> sum_{i,j}(matrix_a(i,j)*matrix_b(i,j))
807 : !> \param matrix_a a matrix
808 : !> \param matrix_b another matrix
809 : !> \param trace ...
810 : !> \par History
811 : !> 11.06.2001 Creation (Matthias Krack)
812 : !> 12.2002 added doc [fawzi]
813 : !> \author Matthias Krack
814 : !> \note
815 : !> note the transposition of matrix_a!
816 : ! **************************************************************************************************
817 512532 : SUBROUTINE cp_fm_trace_a0b0t0(matrix_a, matrix_b, trace)
818 :
819 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_b
820 : REAL(KIND=dp), INTENT(OUT) :: trace
821 :
822 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a0b0t0'
823 :
824 : INTEGER :: handle, mypcol, myprow, ncol_local, &
825 : nrow_local
826 512532 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
827 512532 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp, b_sp
828 : TYPE(cp_blacs_env_type), POINTER :: context
829 : TYPE(mp_comm_type) :: group
830 :
831 512532 : CALL timeset(routineN, handle)
832 :
833 512532 : context => matrix_a%matrix_struct%context
834 512532 : myprow = context%mepos(1)
835 512532 : mypcol = context%mepos(2)
836 :
837 512532 : group = matrix_a%matrix_struct%para_env
838 :
839 512532 : a => matrix_a%local_data
840 512532 : b => matrix_b%local_data
841 :
842 512532 : a_sp => matrix_a%local_data_sp
843 512532 : b_sp => matrix_b%local_data_sp
844 :
845 512532 : nrow_local = MIN(matrix_a%matrix_struct%nrow_locals(myprow), matrix_b%matrix_struct%nrow_locals(myprow))
846 512532 : ncol_local = MIN(matrix_a%matrix_struct%ncol_locals(mypcol), matrix_b%matrix_struct%ncol_locals(mypcol))
847 :
848 : ! cries for an accurate_dot_product
849 512532 : IF (matrix_a%use_sp .AND. matrix_b%use_sp) THEN
850 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local)* &
851 0 : b_sp(1:nrow_local, 1:ncol_local), dp))
852 512532 : ELSEIF (matrix_a%use_sp .AND. .NOT. matrix_b%use_sp) THEN
853 : trace = accurate_sum(REAL(a_sp(1:nrow_local, 1:ncol_local), dp)* &
854 0 : b(1:nrow_local, 1:ncol_local))
855 512532 : ELSEIF (.NOT. matrix_a%use_sp .AND. matrix_b%use_sp) THEN
856 : trace = accurate_sum(a(1:nrow_local, 1:ncol_local)* &
857 0 : REAL(b_sp(1:nrow_local, 1:ncol_local), dp))
858 : ELSE
859 : trace = accurate_dot_product(a(1:nrow_local, 1:ncol_local), &
860 512532 : b(1:nrow_local, 1:ncol_local))
861 : END IF
862 :
863 512532 : CALL group%sum(trace)
864 :
865 512532 : CALL timestop(handle)
866 :
867 512532 : END SUBROUTINE cp_fm_trace_a0b0t0
868 :
869 : #:mute
870 : #:set types = [("cp_fm_type", "a", ""), ("cp_fm_p_type", "p","%matrix")]
871 : #:endmute
872 :
873 : ! **************************************************************************************************
874 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b) for each pair of matrices A_k and B.
875 : !> \param matrix_a list of A matrices
876 : !> \param matrix_b B matrix
877 : !> \param trace computed traces
878 : !> \par History
879 : !> * 08.2018 forked from cp_fm_trace() [Sergey Chulkov]
880 : !> \note \parblock
881 : !> Computing the trace requires collective communication between involved MPI processes
882 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
883 : !> the amount of time wasted in such synchronisation by performing one large collective
884 : !> operation which involves all the matrices in question.
885 : !>
886 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b0t1' means that
887 : !> the dummy variables 'matrix_a' and 'trace' are 1-dimensional arrays, while the variable
888 : !> 'matrix_b' is a single matrix.
889 : !> \endparblock
890 : ! **************************************************************************************************
891 : #:for longname, shortname, appendix in types
892 3168 : SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$ (matrix_a, matrix_b, trace)
893 : TYPE(${longname}$), DIMENSION(:), INTENT(IN) :: matrix_a
894 : TYPE(cp_fm_type), INTENT(IN) :: matrix_b
895 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
896 :
897 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b0t1_${shortname}$'
898 :
899 : INTEGER :: handle, imatrix, n_matrices, &
900 : ncols_local, nrows_local
901 : LOGICAL :: use_sp_a, use_sp_b
902 3168 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
903 3168 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
904 : TYPE(mp_comm_type) :: group
905 :
906 3168 : CALL timeset(routineN, handle)
907 :
908 3168 : n_matrices = SIZE(trace)
909 3168 : CPASSERT(SIZE(matrix_a) == n_matrices)
910 :
911 3168 : CALL cp_fm_get_info(matrix_b, nrow_local=nrows_local, ncol_local=ncols_local)
912 3168 : use_sp_b = matrix_b%use_sp
913 :
914 3168 : IF (use_sp_b) THEN
915 0 : ldata_b_sp => matrix_b%local_data_sp(1:nrows_local, 1:ncols_local)
916 : ELSE
917 3168 : ldata_b => matrix_b%local_data(1:nrows_local, 1:ncols_local)
918 : END IF
919 :
920 : !$OMP PARALLEL DO DEFAULT(NONE), &
921 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, use_sp_a), &
922 : !$OMP SHARED(ldata_b, ldata_b_sp, matrix_a, matrix_b), &
923 3168 : !$OMP SHARED(ncols_local, nrows_local, n_matrices, trace, use_sp_b)
924 :
925 : DO imatrix = 1, n_matrices
926 :
927 : use_sp_a = matrix_a(imatrix) ${appendix}$%use_sp
928 :
929 : ! assume that the matrices A(i) and B have identical shapes and distribution schemes
930 : IF (use_sp_a .AND. use_sp_b) THEN
931 : ldata_a_sp => matrix_a(imatrix) ${appendix}$%local_data_sp(1:nrows_local, 1:ncols_local)
932 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
933 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
934 : ldata_a => matrix_a(imatrix) ${appendix}$%local_data(1:nrows_local, 1:ncols_local)
935 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
936 : ELSE
937 : CPABORT("Matrices A and B are of different types")
938 : END IF
939 : END DO
940 : !$OMP END PARALLEL DO
941 :
942 3168 : group = matrix_b%matrix_struct%para_env
943 19836 : CALL group%sum(trace)
944 :
945 3168 : CALL timestop(handle)
946 3168 : END SUBROUTINE cp_fm_trace_a1b0t1_${shortname}$
947 : #:endfor
948 :
949 : ! **************************************************************************************************
950 : !> \brief Compute trace(k) = Tr (matrix_a(k)^T matrix_b(k)) for each pair of matrices A_k and B_k.
951 : !> \param matrix_a list of A matrices
952 : !> \param matrix_b list of B matrices
953 : !> \param trace computed traces
954 : !> \param accurate ...
955 : !> \par History
956 : !> * 11.2016 forked from cp_fm_trace() [Sergey Chulkov]
957 : !> \note \parblock
958 : !> Computing the trace requires collective communication between involved MPI processes
959 : !> that implies a synchronisation point between them. The aim of this subroutine is to reduce
960 : !> the amount of time wasted in such synchronisation by performing one large collective
961 : !> operation which involves all the matrices in question.
962 : !>
963 : !> The subroutine's suffix reflects dimensionality of dummy arrays; 'a1b1t1' means that
964 : !> all dummy variables (matrix_a, matrix_b, and trace) are 1-dimensional arrays.
965 : !> \endparblock
966 : ! **************************************************************************************************
967 : #:for longname1, shortname1, appendix1 in types
968 : #:for longname2, shortname2, appendix2 in types
969 149172 : SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
970 : TYPE(${longname1}$), DIMENSION(:), INTENT(IN) :: matrix_a
971 : TYPE(${longname2}$), DIMENSION(:), INTENT(IN) :: matrix_b
972 : REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: trace
973 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
974 :
975 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$'
976 :
977 : INTEGER :: handle, imatrix, n_matrices, &
978 : ncols_local, nrows_local
979 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
980 149172 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
981 149172 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
982 : TYPE(mp_comm_type) :: group
983 :
984 149172 : CALL timeset(routineN, handle)
985 :
986 149172 : n_matrices = SIZE(trace)
987 149172 : CPASSERT(SIZE(matrix_a) == n_matrices)
988 149172 : CPASSERT(SIZE(matrix_b) == n_matrices)
989 :
990 149172 : use_accurate_sum = .TRUE.
991 149172 : IF (PRESENT(accurate)) use_accurate_sum = accurate
992 :
993 : !$OMP PARALLEL DO DEFAULT(NONE), &
994 : !$OMP PRIVATE(imatrix, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
995 : !$OMP PRIVATE(nrows_local, use_sp_a, use_sp_b), &
996 149172 : !$OMP SHARED(matrix_a, matrix_b, n_matrices, trace, use_accurate_sum)
997 : DO imatrix = 1, n_matrices
998 : CALL cp_fm_get_info(matrix_a(imatrix) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
999 :
1000 : use_sp_a = matrix_a(imatrix) ${appendix1}$%use_sp
1001 : use_sp_b = matrix_b(imatrix) ${appendix2}$%use_sp
1002 :
1003 : ! assume that the matrices A(i) and B(i) have identical shapes and distribution schemes
1004 : IF (use_sp_a .AND. use_sp_b) THEN
1005 : ldata_a_sp => matrix_a(imatrix) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1006 : ldata_b_sp => matrix_b(imatrix) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1007 : IF (use_accurate_sum) THEN
1008 : trace(imatrix) = accurate_dot_product(ldata_a_sp, ldata_b_sp)
1009 : ELSE
1010 : trace(imatrix) = SUM(ldata_a_sp*ldata_b_sp)
1011 : END IF
1012 : ELSE IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1013 : ldata_a => matrix_a(imatrix) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1014 : ldata_b => matrix_b(imatrix) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1015 : IF (use_accurate_sum) THEN
1016 : trace(imatrix) = accurate_dot_product(ldata_a, ldata_b)
1017 : ELSE
1018 : trace(imatrix) = SUM(ldata_a*ldata_b)
1019 : END IF
1020 : ELSE
1021 : CPABORT("Matrices A and B are of different types")
1022 : END IF
1023 : END DO
1024 : !$OMP END PARALLEL DO
1025 :
1026 149172 : group = matrix_a(1) ${appendix1}$%matrix_struct%para_env
1027 498696 : CALL group%sum(trace)
1028 :
1029 149172 : CALL timestop(handle)
1030 149172 : END SUBROUTINE cp_fm_trace_a1b1t1_${shortname1}$${shortname2}$
1031 : #:endfor
1032 : #:endfor
1033 :
1034 : ! **************************************************************************************************
1035 : !> \brief Compute trace(i,j) = \sum_k Tr (matrix_a(k,i)^T matrix_b(k,j)).
1036 : !> \param matrix_a list of A matrices
1037 : !> \param matrix_b list of B matrices
1038 : !> \param trace computed traces
1039 : !> \param accurate ...
1040 : ! **************************************************************************************************
1041 : #:for longname1, shortname1, appendix1 in types
1042 : #:for longname2, shortname2, appendix2 in types
1043 13876 : SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$ (matrix_a, matrix_b, trace, accurate)
1044 : TYPE(${longname1}$), DIMENSION(:, :), INTENT(IN) :: matrix_a
1045 : TYPE(${longname2}$), DIMENSION(:, :), INTENT(IN) :: matrix_b
1046 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: trace
1047 : LOGICAL, INTENT(IN), OPTIONAL :: accurate
1048 :
1049 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$'
1050 :
1051 : INTEGER :: handle, ia, ib, iz, na, nb, ncols_local, &
1052 : nrows_local, nz
1053 : INTEGER(kind=int_8) :: ib8, itrace, na8, ntraces
1054 : LOGICAL :: use_accurate_sum, use_sp_a, use_sp_b
1055 : REAL(kind=dp) :: t
1056 13876 : REAL(kind=dp), DIMENSION(:, :), POINTER :: ldata_a, ldata_b
1057 13876 : REAL(kind=sp), DIMENSION(:, :), POINTER :: ldata_a_sp, ldata_b_sp
1058 : TYPE(mp_comm_type) :: group
1059 :
1060 13876 : CALL timeset(routineN, handle)
1061 :
1062 13876 : nz = SIZE(matrix_a, 1)
1063 13876 : CPASSERT(SIZE(matrix_b, 1) == nz)
1064 :
1065 13876 : na = SIZE(matrix_a, 2)
1066 13876 : nb = SIZE(matrix_b, 2)
1067 13876 : CPASSERT(SIZE(trace, 1) == na)
1068 13876 : CPASSERT(SIZE(trace, 2) == nb)
1069 :
1070 13876 : use_accurate_sum = .TRUE.
1071 13876 : IF (PRESENT(accurate)) use_accurate_sum = accurate
1072 :
1073 : ! here we use one running index (itrace) instead of two (ia, ib) in order to
1074 : ! improve load balance between shared-memory threads
1075 13876 : ntraces = na*nb
1076 13876 : na8 = INT(na, kind=int_8)
1077 :
1078 : !$OMP PARALLEL DO DEFAULT(NONE), &
1079 : !$OMP PRIVATE(ia, ib, ib8, itrace, iz, ldata_a, ldata_a_sp, ldata_b, ldata_b_sp, ncols_local), &
1080 : !$OMP PRIVATE(nrows_local, t, use_sp_a, use_sp_b), &
1081 13876 : !$OMP SHARED(matrix_a, matrix_b, na, na8, nb, ntraces, nz, trace, use_accurate_sum)
1082 : DO itrace = 1, ntraces
1083 : ib8 = (itrace - 1)/na8
1084 : ia = INT(itrace - ib8*na8)
1085 : ib = INT(ib8) + 1
1086 :
1087 : t = 0.0_dp
1088 : DO iz = 1, nz
1089 : CALL cp_fm_get_info(matrix_a(iz, ia) ${appendix1}$, nrow_local=nrows_local, ncol_local=ncols_local)
1090 : use_sp_a = matrix_a(iz, ia) ${appendix1}$%use_sp
1091 : use_sp_b = matrix_b(iz, ib) ${appendix2}$%use_sp
1092 :
1093 : ! assume that the matrices A(iz, ia) and B(iz, ib) have identical shapes and distribution schemes
1094 : IF (.NOT. use_sp_a .AND. .NOT. use_sp_b) THEN
1095 : ldata_a => matrix_a(iz, ia) ${appendix1}$%local_data(1:nrows_local, 1:ncols_local)
1096 : ldata_b => matrix_b(iz, ib) ${appendix2}$%local_data(1:nrows_local, 1:ncols_local)
1097 : IF (use_accurate_sum) THEN
1098 : t = t + accurate_dot_product(ldata_a, ldata_b)
1099 : ELSE
1100 : t = t + SUM(ldata_a*ldata_b)
1101 : END IF
1102 : ELSE IF (use_sp_a .AND. use_sp_b) THEN
1103 : ldata_a_sp => matrix_a(iz, ia) ${appendix1}$%local_data_sp(1:nrows_local, 1:ncols_local)
1104 : ldata_b_sp => matrix_b(iz, ib) ${appendix2}$%local_data_sp(1:nrows_local, 1:ncols_local)
1105 : IF (use_accurate_sum) THEN
1106 : t = t + accurate_dot_product(ldata_a_sp, ldata_b_sp)
1107 : ELSE
1108 : t = t + SUM(ldata_a_sp*ldata_b_sp)
1109 : END IF
1110 : ELSE
1111 : CPABORT("Matrices A and B are of different types")
1112 : END IF
1113 : END DO
1114 : trace(ia, ib) = t
1115 : END DO
1116 : !$OMP END PARALLEL DO
1117 :
1118 13876 : group = matrix_a(1, 1) ${appendix1}$%matrix_struct%para_env
1119 663128 : CALL group%sum(trace)
1120 :
1121 13876 : CALL timestop(handle)
1122 13876 : END SUBROUTINE cp_fm_contracted_trace_a2b2t2_${shortname1}$${shortname2}$
1123 : #:endfor
1124 : #:endfor
1125 :
1126 : ! **************************************************************************************************
1127 : !> \brief multiplies in place by a triangular matrix:
1128 : !> matrix_b = alpha op(triangular_matrix) matrix_b
1129 : !> or (if side='R')
1130 : !> matrix_b = alpha matrix_b op(triangular_matrix)
1131 : !> op(triangular_matrix) is:
1132 : !> triangular_matrix (if transpose_tr=.false. and invert_tr=.false.)
1133 : !> triangular_matrix^T (if transpose_tr=.true. and invert_tr=.false.)
1134 : !> triangular_matrix^(-1) (if transpose_tr=.false. and invert_tr=.true.)
1135 : !> triangular_matrix^(-T) (if transpose_tr=.true. and invert_tr=.true.)
1136 : !> \param triangular_matrix the triangular matrix that multiplies the other
1137 : !> \param matrix_b the matrix that gets multiplied and stores the result
1138 : !> \param side on which side of matrix_b stays op(triangular_matrix)
1139 : !> (defaults to 'L')
1140 : !> \param transpose_tr if the triangular matrix should be transposed
1141 : !> (defaults to false)
1142 : !> \param invert_tr if the triangular matrix should be inverted
1143 : !> (defaults to false)
1144 : !> \param uplo_tr if triangular_matrix is stored in the upper ('U') or
1145 : !> lower ('L') triangle (defaults to 'U')
1146 : !> \param unit_diag_tr if the diagonal elements of triangular_matrix should
1147 : !> be assumed to be 1 (defaults to false)
1148 : !> \param n_rows the number of rows of the result (defaults to
1149 : !> size(matrix_b,1))
1150 : !> \param n_cols the number of columns of the result (defaults to
1151 : !> size(matrix_b,2))
1152 : !> \param alpha ...
1153 : !> \par History
1154 : !> 08.2002 created [fawzi]
1155 : !> \author Fawzi Mohamed
1156 : !> \note
1157 : !> needs an mpi env
1158 : ! **************************************************************************************************
1159 107336 : SUBROUTINE cp_fm_triangular_multiply(triangular_matrix, matrix_b, side, &
1160 : transpose_tr, invert_tr, uplo_tr, unit_diag_tr, n_rows, n_cols, &
1161 : alpha)
1162 : TYPE(cp_fm_type), INTENT(IN) :: triangular_matrix, matrix_b
1163 : CHARACTER, INTENT(IN), OPTIONAL :: side
1164 : LOGICAL, INTENT(IN), OPTIONAL :: transpose_tr, invert_tr
1165 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1166 : LOGICAL, INTENT(IN), OPTIONAL :: unit_diag_tr
1167 : INTEGER, INTENT(IN), OPTIONAL :: n_rows, n_cols
1168 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: alpha
1169 :
1170 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_triangular_multiply'
1171 :
1172 : CHARACTER :: side_char, transa, unit_diag, uplo
1173 : INTEGER :: handle, mdim, m, n
1174 : LOGICAL :: invert
1175 : REAL(KIND=dp) :: al
1176 :
1177 53668 : CALL timeset(routineN, handle)
1178 53668 : side_char = 'L'
1179 53668 : unit_diag = 'N'
1180 53668 : uplo = 'U'
1181 53668 : transa = 'N'
1182 53668 : invert = .FALSE.
1183 53668 : al = 1.0_dp
1184 53668 : CALL cp_fm_get_info(matrix_b, nrow_global=m, ncol_global=n)
1185 53668 : IF (PRESENT(side)) side_char = side
1186 : mdim = MERGE(1, 2, 'L' == side_char)
1187 53668 : IF (PRESENT(invert_tr)) invert = invert_tr
1188 53668 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1189 53668 : IF (PRESENT(unit_diag_tr)) THEN
1190 0 : IF (unit_diag_tr) THEN
1191 0 : unit_diag = 'U'
1192 : ELSE
1193 : unit_diag = 'N'
1194 : END IF
1195 : END IF
1196 53668 : IF (PRESENT(transpose_tr)) THEN
1197 3390 : IF (transpose_tr) THEN
1198 1278 : transa = 'T'
1199 : ELSE
1200 : transa = 'N'
1201 : END IF
1202 : END IF
1203 53668 : IF (PRESENT(alpha)) al = alpha
1204 53668 : IF (PRESENT(n_rows)) m = n_rows
1205 53668 : IF (PRESENT(n_cols)) n = n_cols
1206 :
1207 53668 : IF (invert) THEN
1208 :
1209 : #if defined(__parallel)
1210 : CALL pdtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1211 : triangular_matrix%local_data(1, 1), 1, 1, &
1212 : triangular_matrix%matrix_struct%descriptor, &
1213 : matrix_b%local_data(1, 1), 1, 1, &
1214 43572 : matrix_b%matrix_struct%descriptor(1))
1215 : #else
1216 : CALL dtrsm(side_char, uplo, transa, unit_diag, m, n, al, &
1217 : triangular_matrix%local_data(1, 1), &
1218 : SIZE(triangular_matrix%local_data, mdim), &
1219 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1220 : #endif
1221 :
1222 : ELSE
1223 :
1224 : #if defined(__parallel)
1225 : CALL pdtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1226 : triangular_matrix%local_data(1, 1), 1, 1, &
1227 : triangular_matrix%matrix_struct%descriptor, &
1228 : matrix_b%local_data(1, 1), 1, 1, &
1229 10096 : matrix_b%matrix_struct%descriptor(1))
1230 : #else
1231 : CALL dtrmm(side_char, uplo, transa, unit_diag, m, n, al, &
1232 : triangular_matrix%local_data(1, 1), &
1233 : SIZE(triangular_matrix%local_data, mdim), &
1234 : matrix_b%local_data(1, 1), SIZE(matrix_b%local_data, 1))
1235 : #endif
1236 :
1237 : END IF
1238 :
1239 53668 : CALL timestop(handle)
1240 53668 : END SUBROUTINE cp_fm_triangular_multiply
1241 :
1242 : ! **************************************************************************************************
1243 : !> \brief scales a matrix
1244 : !> matrix_a = alpha * matrix_b
1245 : !> \param alpha ...
1246 : !> \param matrix_a ...
1247 : !> \note
1248 : !> use cp_fm_set_all to zero (avoids problems with nan)
1249 : ! **************************************************************************************************
1250 85403 : SUBROUTINE cp_fm_scale(alpha, matrix_a)
1251 : REAL(KIND=dp), INTENT(IN) :: alpha
1252 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1253 :
1254 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_scale'
1255 :
1256 : INTEGER :: handle, size_a
1257 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1258 :
1259 85403 : CALL timeset(routineN, handle)
1260 :
1261 : NULLIFY (a)
1262 :
1263 85403 : a => matrix_a%local_data
1264 85403 : size_a = SIZE(a, 1)*SIZE(a, 2)
1265 :
1266 85403 : CALL DSCAL(size_a, alpha, a, 1)
1267 :
1268 85403 : CALL timestop(handle)
1269 :
1270 85403 : END SUBROUTINE cp_fm_scale
1271 :
1272 : ! **************************************************************************************************
1273 : !> \brief transposes a matrix
1274 : !> matrixt = matrix ^ T
1275 : !> \param matrix ...
1276 : !> \param matrixt ...
1277 : !> \note
1278 : !> all matrix elements are transposed (see cp_fm_uplo_to_full to symmetrise a matrix)
1279 : ! **************************************************************************************************
1280 24958 : SUBROUTINE cp_fm_transpose(matrix, matrixt)
1281 : TYPE(cp_fm_type), INTENT(IN) :: matrix, matrixt
1282 :
1283 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_transpose'
1284 :
1285 : INTEGER :: handle, ncol_global, &
1286 : nrow_global, ncol_globalt, nrow_globalt
1287 12479 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, c
1288 : #if defined(__parallel)
1289 : INTEGER, DIMENSION(9) :: desca, descc
1290 : #elif !defined(__MKL)
1291 : INTEGER :: i, j
1292 : #endif
1293 :
1294 12479 : nrow_global = matrix%matrix_struct%nrow_global
1295 12479 : ncol_global = matrix%matrix_struct%ncol_global
1296 12479 : nrow_globalt = matrixt%matrix_struct%nrow_global
1297 12479 : ncol_globalt = matrixt%matrix_struct%ncol_global
1298 0 : CPASSERT(nrow_global == ncol_globalt)
1299 12479 : CPASSERT(nrow_globalt == ncol_global)
1300 :
1301 12479 : CALL timeset(routineN, handle)
1302 :
1303 12479 : a => matrix%local_data
1304 12479 : c => matrixt%local_data
1305 :
1306 : #if defined(__parallel)
1307 124790 : desca(:) = matrix%matrix_struct%descriptor(:)
1308 124790 : descc(:) = matrixt%matrix_struct%descriptor(:)
1309 12479 : CALL pdtran(ncol_global, nrow_global, 1.0_dp, a(1, 1), 1, 1, desca, 0.0_dp, c(1, 1), 1, 1, descc)
1310 : #elif defined(__MKL)
1311 : CALL mkl_domatcopy('C', 'T', nrow_global, ncol_global, 1.0_dp, a(1, 1), nrow_global, c(1, 1), ncol_global)
1312 : #else
1313 : DO j = 1, ncol_global
1314 : DO i = 1, nrow_global
1315 : c(j, i) = a(i, j)
1316 : END DO
1317 : END DO
1318 : #endif
1319 12479 : CALL timestop(handle)
1320 :
1321 12479 : END SUBROUTINE cp_fm_transpose
1322 :
1323 : ! **************************************************************************************************
1324 : !> \brief given a triangular matrix according to uplo, computes the corresponding full matrix
1325 : !> \param matrix the triangular matrix as input, the full matrix as output
1326 : !> \param work a matrix of the same size as matrix
1327 : !> \param uplo triangular format; defaults to 'U'
1328 : !> \author Matthias Krack
1329 : !> \note
1330 : !> the opposite triangular part is irrelevant
1331 : ! **************************************************************************************************
1332 340698 : SUBROUTINE cp_fm_uplo_to_full(matrix, work, uplo)
1333 :
1334 : TYPE(cp_fm_type), INTENT(IN) :: matrix, work
1335 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1336 :
1337 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_uplo_to_full'
1338 :
1339 : CHARACTER :: myuplo
1340 : INTEGER :: handle, icol_global, irow_global, &
1341 : mypcol, myprow, ncol_global, &
1342 : nrow_global
1343 170349 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1344 170349 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1345 : TYPE(cp_blacs_env_type), POINTER :: context
1346 :
1347 : #if defined(__parallel)
1348 : INTEGER :: icol_local, irow_local, &
1349 : ncol_block, ncol_local, &
1350 : nrow_block, nrow_local
1351 : INTEGER, DIMENSION(9) :: desca, descc
1352 170349 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: c
1353 170349 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: c_sp
1354 : #endif
1355 :
1356 170349 : myuplo = 'U'
1357 0 : IF (PRESENT(uplo)) myuplo = uplo
1358 :
1359 170349 : nrow_global = matrix%matrix_struct%nrow_global
1360 170349 : ncol_global = matrix%matrix_struct%ncol_global
1361 170349 : CPASSERT(nrow_global == ncol_global)
1362 170349 : nrow_global = work%matrix_struct%nrow_global
1363 170349 : ncol_global = work%matrix_struct%ncol_global
1364 170349 : CPASSERT(nrow_global == ncol_global)
1365 170349 : CPASSERT(matrix%use_sp .EQV. work%use_sp)
1366 :
1367 170349 : CALL timeset(routineN, handle)
1368 :
1369 170349 : context => matrix%matrix_struct%context
1370 170349 : myprow = context%mepos(1)
1371 170349 : mypcol = context%mepos(2)
1372 :
1373 : #if defined(__parallel)
1374 :
1375 170349 : nrow_block = matrix%matrix_struct%nrow_block
1376 170349 : ncol_block = matrix%matrix_struct%ncol_block
1377 :
1378 170349 : nrow_local = matrix%matrix_struct%nrow_locals(myprow)
1379 170349 : ncol_local = matrix%matrix_struct%ncol_locals(mypcol)
1380 :
1381 170349 : a => work%local_data
1382 170349 : a_sp => work%local_data_sp
1383 1703490 : desca(:) = work%matrix_struct%descriptor(:)
1384 170349 : c => matrix%local_data
1385 170349 : c_sp => matrix%local_data_sp
1386 1703490 : descc(:) = matrix%matrix_struct%descriptor(:)
1387 :
1388 5578319 : DO icol_local = 1, ncol_local
1389 5407970 : icol_global = matrix%matrix_struct%col_indices(icol_local)
1390 244258595 : DO irow_local = 1, nrow_local
1391 238680276 : irow_global = matrix%matrix_struct%row_indices(irow_local)
1392 244088246 : IF (MERGE(irow_global > icol_global, irow_global < icol_global, (myuplo == "U") .OR. (myuplo == "u"))) THEN
1393 117767217 : IF (matrix%use_sp) THEN
1394 0 : c_sp(irow_local, icol_local) = 0.0_sp
1395 : ELSE
1396 117767217 : c(irow_local, icol_local) = 0.0_dp
1397 : END IF
1398 120913059 : ELSE IF (irow_global == icol_global) THEN
1399 3145842 : IF (matrix%use_sp) THEN
1400 0 : c_sp(irow_local, icol_local) = 0.5_sp*c_sp(irow_local, icol_local)
1401 : ELSE
1402 3145842 : c(irow_local, icol_local) = 0.5_dp*c(irow_local, icol_local)
1403 : END IF
1404 : END IF
1405 : END DO
1406 : END DO
1407 :
1408 5578319 : DO icol_local = 1, ncol_local
1409 244258595 : DO irow_local = 1, nrow_local
1410 244088246 : IF (matrix%use_sp) THEN
1411 0 : a_sp(irow_local, icol_local) = c_sp(irow_local, icol_local)
1412 : ELSE
1413 238680276 : a(irow_local, icol_local) = c(irow_local, icol_local)
1414 : END IF
1415 : END DO
1416 : END DO
1417 :
1418 170349 : IF (matrix%use_sp) THEN
1419 0 : CALL pstran(nrow_global, ncol_global, 1.0_sp, a_sp(1, 1), 1, 1, desca, 1.0_sp, c_sp(1, 1), 1, 1, descc)
1420 : ELSE
1421 170349 : CALL pdtran(nrow_global, ncol_global, 1.0_dp, a(1, 1), 1, 1, desca, 1.0_dp, c(1, 1), 1, 1, descc)
1422 : END IF
1423 :
1424 : #else
1425 :
1426 : a => matrix%local_data
1427 : a_sp => matrix%local_data_sp
1428 :
1429 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1430 : DO irow_global = 1, nrow_global
1431 : DO icol_global = irow_global + 1, ncol_global
1432 : IF (matrix%use_sp) THEN
1433 : a_sp(icol_global, irow_global) = a_sp(irow_global, icol_global)
1434 : ELSE
1435 : a(icol_global, irow_global) = a(irow_global, icol_global)
1436 : END IF
1437 : END DO
1438 : END DO
1439 : ELSE
1440 : DO icol_global = 1, ncol_global
1441 : DO irow_global = icol_global + 1, nrow_global
1442 : IF (matrix%use_sp) THEN
1443 : a_sp(irow_global, icol_global) = a_sp(icol_global, irow_global)
1444 : ELSE
1445 : a(irow_global, icol_global) = a(icol_global, irow_global)
1446 : END IF
1447 : END DO
1448 : END DO
1449 : END IF
1450 :
1451 : #endif
1452 170349 : CALL timestop(handle)
1453 :
1454 170349 : END SUBROUTINE cp_fm_uplo_to_full
1455 :
1456 : ! **************************************************************************************************
1457 : !> \brief scales column i of matrix a with scaling(i)
1458 : !> \param matrixa ...
1459 : !> \param scaling : an array used for scaling the columns,
1460 : !> SIZE(scaling) determines the number of columns to be scaled
1461 : !> \author Joost VandeVondele
1462 : !> \note
1463 : !> this is very useful as a first step in the computation of C = sum_i alpha_i A_i transpose (A_i)
1464 : !> that is a rank-k update (cp_fm_syrk , cp_sm_plus_fm_fm_t)
1465 : !> this procedure can be up to 20 times faster than calling cp_fm_syrk n times
1466 : !> where every vector has a different prefactor
1467 : ! **************************************************************************************************
1468 134313 : SUBROUTINE cp_fm_column_scale(matrixa, scaling)
1469 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1470 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1471 :
1472 : INTEGER :: k, mypcol, myprow, n, ncol_global, &
1473 : npcol, nprow
1474 134313 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1475 134313 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1476 : #if defined(__parallel)
1477 : INTEGER :: icol_global, icol_local, &
1478 : ipcol, iprow, irow_local
1479 : #else
1480 : INTEGER :: i
1481 : #endif
1482 :
1483 134313 : myprow = matrixa%matrix_struct%context%mepos(1)
1484 134313 : mypcol = matrixa%matrix_struct%context%mepos(2)
1485 134313 : nprow = matrixa%matrix_struct%context%num_pe(1)
1486 134313 : npcol = matrixa%matrix_struct%context%num_pe(2)
1487 :
1488 134313 : ncol_global = matrixa%matrix_struct%ncol_global
1489 :
1490 134313 : a => matrixa%local_data
1491 134313 : a_sp => matrixa%local_data_sp
1492 134313 : IF (matrixa%use_sp) THEN
1493 0 : n = SIZE(a_sp, 1)
1494 : ELSE
1495 134313 : n = SIZE(a, 1)
1496 : END IF
1497 134313 : k = MIN(SIZE(scaling), ncol_global)
1498 :
1499 : #if defined(__parallel)
1500 :
1501 2129606 : DO icol_global = 1, k
1502 : CALL infog2l(1, icol_global, matrixa%matrix_struct%descriptor, &
1503 : nprow, npcol, myprow, mypcol, &
1504 1995293 : irow_local, icol_local, iprow, ipcol)
1505 2129606 : IF ((ipcol == mypcol)) THEN
1506 1995293 : IF (matrixa%use_sp) THEN
1507 0 : CALL SSCAL(n, REAL(scaling(icol_global), sp), a_sp(:, icol_local), 1)
1508 : ELSE
1509 1995293 : CALL DSCAL(n, scaling(icol_global), a(:, icol_local), 1)
1510 : END IF
1511 : END IF
1512 : END DO
1513 : #else
1514 : DO i = 1, k
1515 : IF (matrixa%use_sp) THEN
1516 : CALL SSCAL(n, REAL(scaling(i), sp), a_sp(:, i), 1)
1517 : ELSE
1518 : CALL DSCAL(n, scaling(i), a(:, i), 1)
1519 : END IF
1520 : END DO
1521 : #endif
1522 134313 : END SUBROUTINE cp_fm_column_scale
1523 :
1524 : ! **************************************************************************************************
1525 : !> \brief scales row i of matrix a with scaling(i)
1526 : !> \param matrixa ...
1527 : !> \param scaling : an array used for scaling the columns,
1528 : !> \author JGH
1529 : !> \note
1530 : ! **************************************************************************************************
1531 6252 : SUBROUTINE cp_fm_row_scale(matrixa, scaling)
1532 : TYPE(cp_fm_type), INTENT(IN) :: matrixa
1533 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: scaling
1534 :
1535 : INTEGER :: n, m, nrow_global, nrow_local, ncol_local
1536 6252 : INTEGER, DIMENSION(:), POINTER :: row_indices
1537 6252 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1538 6252 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
1539 : #if defined(__parallel)
1540 : INTEGER :: irow_global, icol, irow
1541 : #else
1542 : INTEGER :: j
1543 : #endif
1544 :
1545 : CALL cp_fm_get_info(matrixa, row_indices=row_indices, nrow_global=nrow_global, &
1546 6252 : nrow_local=nrow_local, ncol_local=ncol_local)
1547 6252 : CPASSERT(SIZE(scaling) == nrow_global)
1548 :
1549 6252 : a => matrixa%local_data
1550 6252 : a_sp => matrixa%local_data_sp
1551 6252 : IF (matrixa%use_sp) THEN
1552 6252 : n = SIZE(a_sp, 1)
1553 6252 : m = SIZE(a_sp, 2)
1554 : ELSE
1555 6252 : n = SIZE(a, 1)
1556 6252 : m = SIZE(a, 2)
1557 : END IF
1558 :
1559 : #if defined(__parallel)
1560 78676 : DO icol = 1, ncol_local
1561 78676 : IF (matrixa%use_sp) THEN
1562 0 : DO irow = 1, nrow_local
1563 0 : irow_global = row_indices(irow)
1564 0 : a(irow, icol) = REAL(scaling(irow_global), dp)*a(irow, icol)
1565 : END DO
1566 : ELSE
1567 6478826 : DO irow = 1, nrow_local
1568 6406402 : irow_global = row_indices(irow)
1569 6478826 : a(irow, icol) = scaling(irow_global)*a(irow, icol)
1570 : END DO
1571 : END IF
1572 : END DO
1573 : #else
1574 : IF (matrixa%use_sp) THEN
1575 : DO j = 1, m
1576 : a_sp(1:n, j) = REAL(scaling(1:n), sp)*a_sp(1:n, j)
1577 : END DO
1578 : ELSE
1579 : DO j = 1, m
1580 : a(1:n, j) = scaling(1:n)*a(1:n, j)
1581 : END DO
1582 : END IF
1583 : #endif
1584 6252 : END SUBROUTINE cp_fm_row_scale
1585 :
1586 : ! **************************************************************************************************
1587 : !> \brief Inverts a cp_fm_type matrix, optionally returning the determinant of the input matrix
1588 : !> \param matrix_a the matrix to invert
1589 : !> \param matrix_inverse the inverse of matrix_a
1590 : !> \param det_a the determinant of matrix_a
1591 : !> \param eps_svd optional parameter to active SVD based inversion, singular values below eps_svd
1592 : !> are screened
1593 : !> \param eigval optionally return matrix eigenvalues/singular values
1594 : !> \par History
1595 : !> note of Jan Wilhelm (12.2015)
1596 : !> - computation of determinant corrected
1597 : !> - determinant only computed if det_a is present
1598 : !> 12.2016 added option to use SVD instead of LU [Nico Holmberg]
1599 : !> - Use cp_fm_get diag instead of n times cp_fm_get_element (A. Bussy)
1600 : !> \author Florian Schiffmann(02.2007)
1601 : ! **************************************************************************************************
1602 1588 : SUBROUTINE cp_fm_invert(matrix_a, matrix_inverse, det_a, eps_svd, eigval)
1603 :
1604 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_inverse
1605 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: det_a
1606 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_svd
1607 : REAL(KIND=dp), DIMENSION(:), POINTER, &
1608 : INTENT(INOUT), OPTIONAL :: eigval
1609 :
1610 : INTEGER :: n
1611 1588 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1612 : REAL(KIND=dp) :: determinant, my_eps_svd
1613 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1614 : TYPE(cp_fm_type) :: matrix_lu
1615 :
1616 : #if defined(__parallel)
1617 : TYPE(cp_fm_type) :: u, vt, sigma, inv_sigma_ut
1618 : TYPE(mp_comm_type) :: group
1619 : INTEGER :: i, info, liwork, lwork, exponent_of_minus_one
1620 : INTEGER, DIMENSION(9) :: desca
1621 : LOGICAL :: quenched
1622 : REAL(KIND=dp) :: alpha, beta
1623 1588 : REAL(KIND=dp), DIMENSION(:), POINTER :: diag
1624 1588 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: work
1625 : #else
1626 : LOGICAL :: sign
1627 : REAL(KIND=dp) :: eps1
1628 : #endif
1629 :
1630 1588 : my_eps_svd = 0.0_dp
1631 468 : IF (PRESENT(eps_svd)) my_eps_svd = eps_svd
1632 :
1633 : CALL cp_fm_create(matrix=matrix_lu, &
1634 : matrix_struct=matrix_a%matrix_struct, &
1635 1588 : name="A_lu"//TRIM(ADJUSTL(cp_to_string(1)))//"MATRIX")
1636 1588 : CALL cp_fm_to_fm(matrix_a, matrix_lu)
1637 :
1638 1588 : a => matrix_lu%local_data
1639 1588 : n = matrix_lu%matrix_struct%nrow_global
1640 4764 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1641 19369 : ipivot(:) = 0
1642 : #if defined(__parallel)
1643 1588 : IF (my_eps_svd == 0.0_dp) THEN
1644 : ! Use LU decomposition
1645 1560 : lwork = 3*n
1646 1560 : liwork = 3*n
1647 15600 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1648 1560 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1649 :
1650 1560 : IF (PRESENT(det_a) .OR. PRESENT(eigval)) THEN
1651 :
1652 1116 : ALLOCATE (diag(n))
1653 916 : diag(:) = 0.0_dp
1654 372 : CALL cp_fm_get_diag(matrix_lu, diag)
1655 :
1656 372 : exponent_of_minus_one = 0
1657 372 : determinant = 1.0_dp
1658 916 : DO i = 1, n
1659 544 : determinant = determinant*diag(i)
1660 916 : IF (ipivot(i) /= i) THEN
1661 224 : exponent_of_minus_one = exponent_of_minus_one + 1
1662 : END IF
1663 : END DO
1664 372 : IF (PRESENT(eigval)) THEN
1665 0 : CPASSERT(.NOT. ASSOCIATED(eigval))
1666 0 : ALLOCATE (eigval(n))
1667 0 : eigval(:) = diag
1668 : END IF
1669 372 : DEALLOCATE (diag)
1670 :
1671 372 : group = matrix_lu%matrix_struct%para_env
1672 372 : CALL group%sum(exponent_of_minus_one)
1673 :
1674 372 : determinant = determinant*(-1.0_dp)**exponent_of_minus_one
1675 :
1676 : END IF
1677 :
1678 1560 : alpha = 0.0_dp
1679 1560 : beta = 1.0_dp
1680 1560 : CALL cp_fm_set_all(matrix_inverse, alpha, beta)
1681 1560 : CALL pdgetrs('N', n, n, matrix_lu%local_data, 1, 1, desca, ipivot, matrix_inverse%local_data, 1, 1, desca, info)
1682 : ELSE
1683 : ! Use singular value decomposition
1684 : CALL cp_fm_create(matrix=u, &
1685 : matrix_struct=matrix_a%matrix_struct, &
1686 28 : name="LEFT_SINGULAR_MATRIX")
1687 28 : CALL cp_fm_set_all(u, alpha=0.0_dp)
1688 : CALL cp_fm_create(matrix=vt, &
1689 : matrix_struct=matrix_a%matrix_struct, &
1690 28 : name="RIGHT_SINGULAR_MATRIX")
1691 28 : CALL cp_fm_set_all(vt, alpha=0.0_dp)
1692 84 : ALLOCATE (diag(n))
1693 92 : diag(:) = 0.0_dp
1694 280 : desca(:) = matrix_lu%matrix_struct%descriptor(:)
1695 28 : ALLOCATE (work(1))
1696 : ! Workspace query
1697 28 : lwork = -1
1698 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1699 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1700 28 : lwork = INT(work(1))
1701 28 : DEALLOCATE (work)
1702 84 : ALLOCATE (work(lwork))
1703 : ! SVD
1704 : CALL pdgesvd('V', 'V', n, n, matrix_lu%local_data, 1, 1, desca, diag, u%local_data, &
1705 28 : 1, 1, desca, vt%local_data, 1, 1, desca, work, lwork, info)
1706 : ! info == n+1 implies homogeneity error when the number of procs is large
1707 : ! this likely isnt a problem, but maybe we should handle it separately
1708 28 : IF (info /= 0 .AND. info /= n + 1) &
1709 0 : CPABORT("Singular value decomposition of matrix failed.")
1710 : ! (Pseudo)inverse and (pseudo)determinant
1711 : CALL cp_fm_create(matrix=sigma, &
1712 : matrix_struct=matrix_a%matrix_struct, &
1713 28 : name="SINGULAR_VALUE_MATRIX")
1714 28 : CALL cp_fm_set_all(sigma, alpha=0.0_dp)
1715 28 : determinant = 1.0_dp
1716 28 : quenched = .FALSE.
1717 28 : IF (PRESENT(eigval)) THEN
1718 28 : CPASSERT(.NOT. ASSOCIATED(eigval))
1719 84 : ALLOCATE (eigval(n))
1720 156 : eigval(:) = diag
1721 : END IF
1722 92 : DO i = 1, n
1723 64 : IF (diag(i) < my_eps_svd) THEN
1724 18 : diag(i) = 0.0_dp
1725 18 : quenched = .TRUE.
1726 : ELSE
1727 46 : determinant = determinant*diag(i)
1728 46 : diag(i) = 1.0_dp/diag(i)
1729 : END IF
1730 92 : CALL cp_fm_set_element(sigma, i, i, diag(i))
1731 : END DO
1732 28 : DEALLOCATE (diag)
1733 28 : IF (quenched) &
1734 : CALL cp_warn(__LOCATION__, &
1735 : "Linear dependencies were detected in the SVD inversion of matrix "//TRIM(ADJUSTL(matrix_a%name))// &
1736 12 : ". At least one singular value has been quenched.")
1737 : ! Sigma^-1 * U^T
1738 : CALL cp_fm_create(matrix=inv_sigma_ut, &
1739 : matrix_struct=matrix_a%matrix_struct, &
1740 28 : name="SINGULAR_VALUE_MATRIX")
1741 28 : CALL cp_fm_set_all(inv_sigma_ut, alpha=0.0_dp)
1742 : CALL pdgemm('N', 'T', n, n, n, 1.0_dp, sigma%local_data, 1, 1, desca, &
1743 28 : u%local_data, 1, 1, desca, 0.0_dp, inv_sigma_ut%local_data, 1, 1, desca)
1744 : ! A^-1 = V * (Sigma^-1 * U^T)
1745 28 : CALL cp_fm_set_all(matrix_inverse, alpha=0.0_dp)
1746 : CALL pdgemm('T', 'N', n, n, n, 1.0_dp, vt%local_data, 1, 1, desca, &
1747 28 : inv_sigma_ut%local_data, 1, 1, desca, 0.0_dp, matrix_inverse%local_data, 1, 1, desca)
1748 : ! Clean up
1749 28 : DEALLOCATE (work)
1750 28 : CALL cp_fm_release(u)
1751 28 : CALL cp_fm_release(vt)
1752 28 : CALL cp_fm_release(sigma)
1753 140 : CALL cp_fm_release(inv_sigma_ut)
1754 : END IF
1755 : #else
1756 : IF (my_eps_svd == 0.0_dp) THEN
1757 : sign = .TRUE.
1758 : CALL invert_matrix(matrix_a%local_data, matrix_inverse%local_data, &
1759 : eval_error=eps1)
1760 : CALL cp_fm_lu_decompose(matrix_lu, determinant, correct_sign=sign)
1761 : IF (PRESENT(eigval)) &
1762 : CALL cp_abort(__LOCATION__, &
1763 : "NYI. Eigenvalues not available for return without SCALAPACK.")
1764 : ELSE
1765 : CALL get_pseudo_inverse_svd(matrix_a%local_data, matrix_inverse%local_data, eps_svd, &
1766 : determinant, eigval)
1767 : END IF
1768 : #endif
1769 1588 : CALL cp_fm_release(matrix_lu)
1770 1588 : DEALLOCATE (ipivot)
1771 1588 : IF (PRESENT(det_a)) det_a = determinant
1772 1588 : END SUBROUTINE cp_fm_invert
1773 :
1774 : ! **************************************************************************************************
1775 : !> \brief inverts a triangular matrix
1776 : !> \param matrix_a ...
1777 : !> \param uplo_tr triangular format; defaults to 'U'
1778 : !> \author MI
1779 : ! **************************************************************************************************
1780 5060 : SUBROUTINE cp_fm_triangular_invert(matrix_a, uplo_tr)
1781 :
1782 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
1783 : CHARACTER, INTENT(IN), OPTIONAL :: uplo_tr
1784 :
1785 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_triangular_invert'
1786 :
1787 : CHARACTER :: unit_diag, uplo
1788 : INTEGER :: handle, info, ncol_global
1789 5060 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1790 : #if defined(__parallel)
1791 : INTEGER, DIMENSION(9) :: desca
1792 : #endif
1793 :
1794 5060 : CALL timeset(routineN, handle)
1795 :
1796 5060 : unit_diag = 'N'
1797 5060 : uplo = 'U'
1798 5060 : IF (PRESENT(uplo_tr)) uplo = uplo_tr
1799 :
1800 5060 : ncol_global = matrix_a%matrix_struct%ncol_global
1801 :
1802 5060 : a => matrix_a%local_data
1803 :
1804 : #if defined(__parallel)
1805 50600 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1806 :
1807 5060 : CALL pdtrtri(uplo, unit_diag, ncol_global, a(1, 1), 1, 1, desca, info)
1808 :
1809 : #else
1810 : CALL dtrtri(uplo, unit_diag, ncol_global, a(1, 1), ncol_global, info)
1811 : #endif
1812 :
1813 5060 : CALL timestop(handle)
1814 5060 : END SUBROUTINE cp_fm_triangular_invert
1815 :
1816 : ! **************************************************************************************************
1817 : !> \brief performs a QR factorization of the input rectangular matrix A or of a submatrix of A
1818 : !> the computed triangular matrix R is in output of the submatrix sub(A) of size NxN
1819 : !> M and M give the dimension of the submatrix that has to be factorized (MxN) with M>N
1820 : !> \param matrix_a ...
1821 : !> \param matrix_r ...
1822 : !> \param nrow_fact ...
1823 : !> \param ncol_fact ...
1824 : !> \param first_row ...
1825 : !> \param first_col ...
1826 : !> \author MI
1827 : ! **************************************************************************************************
1828 19320 : SUBROUTINE cp_fm_qr_factorization(matrix_a, matrix_r, nrow_fact, ncol_fact, first_row, first_col, uplo)
1829 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, matrix_r
1830 : INTEGER, INTENT(IN), OPTIONAL :: nrow_fact, ncol_fact, &
1831 : first_row, first_col
1832 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
1833 :
1834 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_qr_factorization'
1835 :
1836 : CHARACTER :: myuplo
1837 : INTEGER :: handle, i, icol, info, irow, &
1838 : j, lda, lwork, ncol, &
1839 : ndim, nrow
1840 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: tau, work
1841 19320 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: r_mat
1842 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
1843 : #if defined(__parallel)
1844 : INTEGER, DIMENSION(9) :: desca
1845 : #endif
1846 :
1847 19320 : CALL timeset(routineN, handle)
1848 :
1849 19320 : myuplo = 'U'
1850 19320 : IF (PRESENT(uplo)) myuplo = uplo
1851 :
1852 19320 : ncol = matrix_a%matrix_struct%ncol_global
1853 19320 : nrow = matrix_a%matrix_struct%nrow_global
1854 19320 : lda = nrow
1855 :
1856 19320 : a => matrix_a%local_data
1857 :
1858 19320 : IF (PRESENT(nrow_fact)) nrow = nrow_fact
1859 19320 : IF (PRESENT(ncol_fact)) ncol = ncol_fact
1860 19320 : irow = 1
1861 19320 : IF (PRESENT(first_row)) irow = first_row
1862 19320 : icol = 1
1863 19320 : IF (PRESENT(first_col)) icol = first_col
1864 :
1865 19320 : CPASSERT(nrow >= ncol)
1866 19320 : ndim = SIZE(a, 2)
1867 57960 : ALLOCATE (tau(ndim))
1868 :
1869 : #if defined(__parallel)
1870 :
1871 193200 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1872 :
1873 19320 : lwork = -1
1874 57960 : ALLOCATE (work(2*ndim))
1875 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1876 19320 : lwork = INT(work(1))
1877 19320 : DEALLOCATE (work)
1878 57960 : ALLOCATE (work(lwork))
1879 19320 : CALL pdgeqrf(nrow, ncol, a, irow, icol, desca, tau, work, lwork, info)
1880 :
1881 : #else
1882 : lwork = -1
1883 : ALLOCATE (work(2*ndim))
1884 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1885 : lwork = INT(work(1))
1886 : DEALLOCATE (work)
1887 : ALLOCATE (work(lwork))
1888 : CALL dgeqrf(nrow, ncol, a, lda, tau, work, lwork, info)
1889 :
1890 : #endif
1891 :
1892 77280 : ALLOCATE (r_mat(ncol, ncol))
1893 19320 : CALL cp_fm_get_submatrix(matrix_a, r_mat, 1, 1, ncol, ncol)
1894 19320 : IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
1895 38640 : DO i = 1, ncol
1896 38640 : DO j = i + 1, ncol
1897 19320 : r_mat(j, i) = 0.0_dp
1898 : END DO
1899 : END DO
1900 : ELSE
1901 0 : DO j = 1, ncol
1902 0 : DO i = j + 1, ncol
1903 0 : r_mat(i, j) = 0.0_dp
1904 : END DO
1905 : END DO
1906 : END IF
1907 19320 : CALL cp_fm_set_submatrix(matrix_r, r_mat, 1, 1, ncol, ncol)
1908 :
1909 19320 : DEALLOCATE (tau, work, r_mat)
1910 :
1911 19320 : CALL timestop(handle)
1912 :
1913 19320 : END SUBROUTINE cp_fm_qr_factorization
1914 :
1915 : ! **************************************************************************************************
1916 : !> \brief computes the the solution to A*b=A_general using lu decomposition
1917 : !> \param matrix_a input matrix; will be overwritten
1918 : !> \param general_a contains the result
1919 : !> \author Florian Schiffmann
1920 : ! **************************************************************************************************
1921 6832 : SUBROUTINE cp_fm_solve(matrix_a, general_a)
1922 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a, general_a
1923 :
1924 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_solve'
1925 :
1926 : INTEGER :: handle, info, n, nrhs
1927 6832 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipivot
1928 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, a_general
1929 : #if defined(__parallel)
1930 : INTEGER, DIMENSION(9) :: desca, descb
1931 : #else
1932 : INTEGER :: lda, ldb
1933 : #endif
1934 :
1935 6832 : CALL timeset(routineN, handle)
1936 :
1937 6832 : a => matrix_a%local_data
1938 6832 : a_general => general_a%local_data
1939 6832 : n = matrix_a%matrix_struct%nrow_global
1940 6832 : nrhs = general_a%matrix_struct%ncol_global
1941 20496 : ALLOCATE (ipivot(n + matrix_a%matrix_struct%nrow_block))
1942 :
1943 : #if defined(__parallel)
1944 68320 : desca(:) = matrix_a%matrix_struct%descriptor(:)
1945 68320 : descb(:) = general_a%matrix_struct%descriptor(:)
1946 6832 : CALL pdgetrf(n, n, a, 1, 1, desca, ipivot, info)
1947 : CALL pdgetrs("N", n, nrhs, a, 1, 1, desca, ipivot, a_general, &
1948 6832 : 1, 1, descb, info)
1949 :
1950 : #else
1951 : lda = SIZE(a, 1)
1952 : ldb = SIZE(a_general, 1)
1953 : CALL dgetrf(n, n, a, lda, ipivot, info)
1954 : CALL dgetrs("N", n, nrhs, a, lda, ipivot, a_general, ldb, info)
1955 :
1956 : #endif
1957 : ! info is allowed to be zero
1958 : ! this does just signal a zero diagonal element
1959 6832 : DEALLOCATE (ipivot)
1960 6832 : CALL timestop(handle)
1961 6832 : END SUBROUTINE
1962 :
1963 : ! **************************************************************************************************
1964 : !> \brief Convenience function. Computes the matrix multiplications needed
1965 : !> for the multiplication of complex matrices.
1966 : !> C = beta * C + alpha * ( A ** transa ) * ( B ** transb )
1967 : !> \param transa : 'N' -> normal 'T' -> transpose
1968 : !> alpha,beta :: can be 0.0_dp and 1.0_dp
1969 : !> \param transb ...
1970 : !> \param m ...
1971 : !> \param n ...
1972 : !> \param k ...
1973 : !> \param alpha ...
1974 : !> \param A_re m x k matrix ( ! for transa = 'N'), real part
1975 : !> \param A_im m x k matrix ( ! for transa = 'N'), imaginary part
1976 : !> \param B_re k x n matrix ( ! for transa = 'N'), real part
1977 : !> \param B_im k x n matrix ( ! for transa = 'N'), imaginary part
1978 : !> \param beta ...
1979 : !> \param C_re m x n matrix, real part
1980 : !> \param C_im m x n matrix, imaginary part
1981 : !> \param a_first_col ...
1982 : !> \param a_first_row ...
1983 : !> \param b_first_col : the k x n matrix starts at col b_first_col of matrix_b (avoid usage)
1984 : !> \param b_first_row ...
1985 : !> \param c_first_col ...
1986 : !> \param c_first_row ...
1987 : !> \author Samuel Andermatt
1988 : !> \note
1989 : !> C should have no overlap with A, B
1990 : ! **************************************************************************************************
1991 0 : SUBROUTINE cp_complex_fm_gemm(transa, transb, m, n, k, alpha, A_re, A_im, B_re, B_im, beta, &
1992 : C_re, C_im, a_first_col, a_first_row, b_first_col, b_first_row, c_first_col, &
1993 : c_first_row)
1994 : CHARACTER(LEN=1), INTENT(IN) :: transa, transb
1995 : INTEGER, INTENT(IN) :: m, n, k
1996 : REAL(KIND=dp), INTENT(IN) :: alpha
1997 : TYPE(cp_fm_type), INTENT(IN) :: A_re, A_im, B_re, B_im
1998 : REAL(KIND=dp), INTENT(IN) :: beta
1999 : TYPE(cp_fm_type), INTENT(IN) :: C_re, C_im
2000 : INTEGER, INTENT(IN), OPTIONAL :: a_first_col, a_first_row, b_first_col, &
2001 : b_first_row, c_first_col, c_first_row
2002 :
2003 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_complex_fm_gemm'
2004 :
2005 : INTEGER :: handle
2006 :
2007 0 : CALL timeset(routineN, handle)
2008 :
2009 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_re, beta, C_re, &
2010 : a_first_col=a_first_col, &
2011 : a_first_row=a_first_row, &
2012 : b_first_col=b_first_col, &
2013 : b_first_row=b_first_row, &
2014 : c_first_col=c_first_col, &
2015 0 : c_first_row=c_first_row)
2016 : CALL cp_fm_gemm(transa, transb, m, n, k, -alpha, A_im, B_im, 1.0_dp, C_re, &
2017 : a_first_col=a_first_col, &
2018 : a_first_row=a_first_row, &
2019 : b_first_col=b_first_col, &
2020 : b_first_row=b_first_row, &
2021 : c_first_col=c_first_col, &
2022 0 : c_first_row=c_first_row)
2023 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_re, B_im, beta, C_im, &
2024 : a_first_col=a_first_col, &
2025 : a_first_row=a_first_row, &
2026 : b_first_col=b_first_col, &
2027 : b_first_row=b_first_row, &
2028 : c_first_col=c_first_col, &
2029 0 : c_first_row=c_first_row)
2030 : CALL cp_fm_gemm(transa, transb, m, n, k, alpha, A_im, B_re, 1.0_dp, C_im, &
2031 : a_first_col=a_first_col, &
2032 : a_first_row=a_first_row, &
2033 : b_first_col=b_first_col, &
2034 : b_first_row=b_first_row, &
2035 : c_first_col=c_first_col, &
2036 0 : c_first_row=c_first_row)
2037 :
2038 0 : CALL timestop(handle)
2039 :
2040 0 : END SUBROUTINE cp_complex_fm_gemm
2041 :
2042 : ! **************************************************************************************************
2043 : !> \brief inverts a matrix using LU decomposition
2044 : !> the input matrix will be overwritten
2045 : !> \param matrix : input a general square non-singular matrix, outputs its inverse
2046 : !> \param info_out : optional, if present outputs the info from (p)zgetri
2047 : !> \author Lianheng Tong
2048 : ! **************************************************************************************************
2049 0 : SUBROUTINE cp_fm_lu_invert(matrix, info_out)
2050 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2051 : INTEGER, INTENT(OUT), OPTIONAL :: info_out
2052 :
2053 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_lu_invert'
2054 :
2055 : INTEGER :: nrows_global, handle, info, lwork
2056 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: ipivot
2057 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: mat
2058 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: mat_sp
2059 0 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2060 0 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2061 : #if defined(__parallel)
2062 : INTEGER :: liwork
2063 : INTEGER, DIMENSION(9) :: desca
2064 0 : INTEGER, DIMENSION(:), ALLOCATABLE :: iwork
2065 : #else
2066 : INTEGER :: lda
2067 : #endif
2068 :
2069 0 : CALL timeset(routineN, handle)
2070 :
2071 0 : mat => matrix%local_data
2072 0 : mat_sp => matrix%local_data_sp
2073 0 : nrows_global = matrix%matrix_struct%nrow_global
2074 0 : CPASSERT(nrows_global == matrix%matrix_struct%ncol_global)
2075 0 : ALLOCATE (ipivot(nrows_global))
2076 : ! do LU decomposition
2077 : #if defined(__parallel)
2078 0 : desca = matrix%matrix_struct%descriptor
2079 0 : IF (matrix%use_sp) THEN
2080 : CALL psgetrf(nrows_global, nrows_global, &
2081 0 : mat_sp, 1, 1, desca, ipivot, info)
2082 : ELSE
2083 : CALL pdgetrf(nrows_global, nrows_global, &
2084 0 : mat, 1, 1, desca, ipivot, info)
2085 : END IF
2086 : #else
2087 : lda = SIZE(mat, 1)
2088 : IF (matrix%use_sp) THEN
2089 : CALL sgetrf(nrows_global, nrows_global, &
2090 : mat_sp, lda, ipivot, info)
2091 : ELSE
2092 : CALL dgetrf(nrows_global, nrows_global, &
2093 : mat, lda, ipivot, info)
2094 : END IF
2095 : #endif
2096 0 : IF (info /= 0) THEN
2097 0 : CALL cp_abort(__LOCATION__, "LU decomposition has failed")
2098 : END IF
2099 : ! do inversion
2100 0 : IF (matrix%use_sp) THEN
2101 0 : ALLOCATE (work(1))
2102 : ELSE
2103 0 : ALLOCATE (work_sp(1))
2104 : END IF
2105 : #if defined(__parallel)
2106 0 : ALLOCATE (iwork(1))
2107 0 : IF (matrix%use_sp) THEN
2108 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2109 0 : ipivot, work_sp, -1, iwork, -1, info)
2110 0 : lwork = INT(work_sp(1))
2111 0 : DEALLOCATE (work_sp)
2112 0 : ALLOCATE (work_sp(lwork))
2113 : ELSE
2114 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2115 0 : ipivot, work, -1, iwork, -1, info)
2116 0 : lwork = INT(work(1))
2117 0 : DEALLOCATE (work)
2118 0 : ALLOCATE (work(lwork))
2119 : END IF
2120 0 : liwork = INT(iwork(1))
2121 0 : DEALLOCATE (iwork)
2122 0 : ALLOCATE (iwork(liwork))
2123 0 : IF (matrix%use_sp) THEN
2124 : CALL psgetri(nrows_global, mat_sp, 1, 1, desca, &
2125 0 : ipivot, work_sp, lwork, iwork, liwork, info)
2126 : ELSE
2127 : CALL pdgetri(nrows_global, mat, 1, 1, desca, &
2128 0 : ipivot, work, lwork, iwork, liwork, info)
2129 : END IF
2130 0 : DEALLOCATE (iwork)
2131 : #else
2132 : IF (matrix%use_sp) THEN
2133 : CALL sgetri(nrows_global, mat_sp, lda, &
2134 : ipivot, work_sp, -1, info)
2135 : lwork = INT(work_sp(1))
2136 : DEALLOCATE (work_sp)
2137 : ALLOCATE (work_sp(lwork))
2138 : CALL sgetri(nrows_global, mat_sp, lda, &
2139 : ipivot, work_sp, lwork, info)
2140 : ELSE
2141 : CALL dgetri(nrows_global, mat, lda, &
2142 : ipivot, work, -1, info)
2143 : lwork = INT(work(1))
2144 : DEALLOCATE (work)
2145 : ALLOCATE (work(lwork))
2146 : CALL dgetri(nrows_global, mat, lda, &
2147 : ipivot, work, lwork, info)
2148 : END IF
2149 : #endif
2150 0 : IF (matrix%use_sp) THEN
2151 0 : DEALLOCATE (work_sp)
2152 : ELSE
2153 0 : DEALLOCATE (work)
2154 : END IF
2155 0 : DEALLOCATE (ipivot)
2156 :
2157 0 : IF (PRESENT(info_out)) THEN
2158 0 : info_out = info
2159 : ELSE
2160 0 : IF (info /= 0) &
2161 0 : CALL cp_abort(__LOCATION__, "LU inversion has failed")
2162 : END IF
2163 :
2164 0 : CALL timestop(handle)
2165 :
2166 0 : END SUBROUTINE cp_fm_lu_invert
2167 :
2168 : ! **************************************************************************************************
2169 : !> \brief norm of matrix using (p)dlange
2170 : !> \param matrix : input a general matrix
2171 : !> \param mode : 'M' max abs element value,
2172 : !> '1' or 'O' one norm, i.e. maximum column sum
2173 : !> 'I' infinity norm, i.e. maximum row sum
2174 : !> 'F' or 'E' Frobenius norm, i.e. sqrt of sum of all squares of elements
2175 : !> \return : the norm according to mode
2176 : !> \author Lianheng Tong
2177 : ! **************************************************************************************************
2178 492 : FUNCTION cp_fm_norm(matrix, mode) RESULT(res)
2179 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2180 : CHARACTER, INTENT(IN) :: mode
2181 : REAL(KIND=dp) :: res
2182 :
2183 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_norm'
2184 :
2185 : INTEGER :: nrows, ncols, handle, lwork, nrows_local, ncols_local
2186 : REAL(KIND=sp) :: res_sp
2187 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2188 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2189 492 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: work
2190 492 : REAL(KIND=sp), DIMENSION(:), ALLOCATABLE :: work_sp
2191 : #if defined(__parallel)
2192 : INTEGER, DIMENSION(9) :: desca
2193 : #else
2194 : INTEGER :: lda
2195 : #endif
2196 :
2197 492 : CALL timeset(routineN, handle)
2198 :
2199 : CALL cp_fm_get_info(matrix=matrix, &
2200 : nrow_global=nrows, &
2201 : ncol_global=ncols, &
2202 : nrow_local=nrows_local, &
2203 492 : ncol_local=ncols_local)
2204 492 : aa => matrix%local_data
2205 492 : aa_sp => matrix%local_data_sp
2206 :
2207 : #if defined(__parallel)
2208 4920 : desca = matrix%matrix_struct%descriptor
2209 : SELECT CASE (mode)
2210 : CASE ('M', 'm')
2211 492 : lwork = 1
2212 : CASE ('1', 'O', 'o')
2213 492 : lwork = ncols_local
2214 : CASE ('I', 'i')
2215 0 : lwork = nrows_local
2216 : CASE ('F', 'f', 'E', 'e')
2217 0 : lwork = 1
2218 : CASE DEFAULT
2219 492 : CPABORT("mode input is not valid")
2220 : END SELECT
2221 492 : IF (matrix%use_sp) THEN
2222 0 : ALLOCATE (work_sp(lwork))
2223 0 : res_sp = pslange(mode, nrows, ncols, aa_sp, 1, 1, desca, work_sp)
2224 0 : DEALLOCATE (work_sp)
2225 0 : res = REAL(res_sp, KIND=dp)
2226 : ELSE
2227 1476 : ALLOCATE (work(lwork))
2228 492 : res = pdlange(mode, nrows, ncols, aa, 1, 1, desca, work)
2229 492 : DEALLOCATE (work)
2230 : END IF
2231 : #else
2232 : SELECT CASE (mode)
2233 : CASE ('M', 'm')
2234 : lwork = 1
2235 : CASE ('1', 'O', 'o')
2236 : lwork = 1
2237 : CASE ('I', 'i')
2238 : lwork = nrows
2239 : CASE ('F', 'f', 'E', 'e')
2240 : lwork = 1
2241 : CASE DEFAULT
2242 : CPABORT("mode input is not valid")
2243 : END SELECT
2244 : IF (matrix%use_sp) THEN
2245 : ALLOCATE (work_sp(lwork))
2246 : lda = SIZE(aa_sp, 1)
2247 : res_sp = slange(mode, nrows, ncols, aa_sp, lda, work_sp)
2248 : DEALLOCATE (work_sp)
2249 : res = REAL(res_sp, KIND=dp)
2250 : ELSE
2251 : ALLOCATE (work(lwork))
2252 : lda = SIZE(aa, 1)
2253 : res = dlange(mode, nrows, ncols, aa, lda, work)
2254 : DEALLOCATE (work)
2255 : END IF
2256 : #endif
2257 :
2258 492 : CALL timestop(handle)
2259 :
2260 492 : END FUNCTION cp_fm_norm
2261 :
2262 : ! **************************************************************************************************
2263 : !> \brief trace of a matrix using pdlatra
2264 : !> \param matrix : input a square matrix
2265 : !> \return : the trace
2266 : !> \author Lianheng Tong
2267 : ! **************************************************************************************************
2268 0 : FUNCTION cp_fm_latra(matrix) RESULT(res)
2269 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2270 : REAL(KIND=dp) :: res
2271 :
2272 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_latra'
2273 :
2274 : INTEGER :: nrows, ncols, handle
2275 : REAL(KIND=sp) :: res_sp
2276 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: aa
2277 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: aa_sp
2278 : #if defined(__parallel)
2279 : INTEGER, DIMENSION(9) :: desca
2280 : #else
2281 : INTEGER :: ii
2282 : #endif
2283 :
2284 0 : CALL timeset(routineN, handle)
2285 :
2286 0 : nrows = matrix%matrix_struct%nrow_global
2287 0 : ncols = matrix%matrix_struct%ncol_global
2288 0 : CPASSERT(nrows == ncols)
2289 0 : aa => matrix%local_data
2290 0 : aa_sp => matrix%local_data_sp
2291 :
2292 : #if defined(__parallel)
2293 0 : desca = matrix%matrix_struct%descriptor
2294 0 : IF (matrix%use_sp) THEN
2295 0 : res_sp = pslatra(nrows, aa_sp, 1, 1, desca)
2296 0 : res = REAL(res_sp, KIND=dp)
2297 : ELSE
2298 0 : res = pdlatra(nrows, aa, 1, 1, desca)
2299 : END IF
2300 : #else
2301 : IF (matrix%use_sp) THEN
2302 : res_sp = 0.0_sp
2303 : DO ii = 1, nrows
2304 : res_sp = res_sp + aa_sp(ii, ii)
2305 : END DO
2306 : res = REAL(res_sp, KIND=dp)
2307 : ELSE
2308 : res = 0.0_dp
2309 : DO ii = 1, nrows
2310 : res = res + aa(ii, ii)
2311 : END DO
2312 : END IF
2313 : #endif
2314 :
2315 0 : CALL timestop(handle)
2316 :
2317 0 : END FUNCTION cp_fm_latra
2318 :
2319 : ! **************************************************************************************************
2320 : !> \brief compute a QR factorization with column pivoting of a M-by-N distributed matrix
2321 : !> sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
2322 : !> \param matrix : input M-by-N distributed matrix sub( A ) which is to be factored
2323 : !> \param tau : scalar factors TAU of the elementary reflectors. TAU is tied to the distributed matrix A
2324 : !> \param nrow ...
2325 : !> \param ncol ...
2326 : !> \param first_row ...
2327 : !> \param first_col ...
2328 : !> \author MI
2329 : ! **************************************************************************************************
2330 36 : SUBROUTINE cp_fm_pdgeqpf(matrix, tau, nrow, ncol, first_row, first_col)
2331 :
2332 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2333 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2334 : INTEGER, INTENT(IN) :: nrow, ncol, first_row, first_col
2335 :
2336 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdgeqpf'
2337 :
2338 : INTEGER :: handle
2339 : INTEGER :: info, lwork
2340 36 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ipiv
2341 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2342 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2343 : #if defined(__parallel)
2344 : INTEGER, DIMENSION(9) :: descc
2345 : #else
2346 : INTEGER :: lda
2347 : #endif
2348 :
2349 36 : CALL timeset(routineN, handle)
2350 :
2351 36 : a => matrix%local_data
2352 36 : lwork = -1
2353 108 : ALLOCATE (work(2*nrow))
2354 108 : ALLOCATE (ipiv(ncol))
2355 36 : info = 0
2356 :
2357 : #if defined(__parallel)
2358 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2359 : ! Call SCALAPACK routine to get optimal work dimension
2360 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2361 36 : lwork = INT(work(1))
2362 36 : DEALLOCATE (work)
2363 108 : ALLOCATE (work(lwork))
2364 244 : tau = 0.0_dp
2365 354 : ipiv = 0
2366 :
2367 : ! Call SCALAPACK routine to get QR decomposition of CTs
2368 36 : CALL pdgeqpf(nrow, ncol, a, first_row, first_col, descc, ipiv, tau, work, lwork, info)
2369 : #else
2370 : CPASSERT(first_row == 1 .AND. first_col == 1)
2371 : lda = SIZE(a, 1)
2372 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2373 : lwork = INT(work(1))
2374 : DEALLOCATE (work)
2375 : ALLOCATE (work(lwork))
2376 : tau = 0.0_dp
2377 : ipiv = 0
2378 : CALL dgeqp3(nrow, ncol, a, lda, ipiv, tau, work, lwork, info)
2379 : #endif
2380 36 : CPASSERT(info == 0)
2381 :
2382 36 : DEALLOCATE (work)
2383 36 : DEALLOCATE (ipiv)
2384 :
2385 36 : CALL timestop(handle)
2386 :
2387 36 : END SUBROUTINE cp_fm_pdgeqpf
2388 :
2389 : ! **************************************************************************************************
2390 : !> \brief generates an M-by-N real distributed matrix Q denoting A(IA:IA+M-1,JA:JA+N-1)
2391 : !> with orthonormal columns, which is defined as the first N columns of a product of K
2392 : !> elementary reflectors of order M
2393 : !> \param matrix : On entry, the j-th column must contain the vector which defines the elementary reflector
2394 : !> as returned from PDGEQRF
2395 : !> On exit it contains the M-by-N distributed matrix Q
2396 : !> \param tau : contains the scalar factors TAU of elementary reflectors as returned by PDGEQRF
2397 : !> \param nrow ...
2398 : !> \param first_row ...
2399 : !> \param first_col ...
2400 : !> \author MI
2401 : ! **************************************************************************************************
2402 36 : SUBROUTINE cp_fm_pdorgqr(matrix, tau, nrow, first_row, first_col)
2403 :
2404 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2405 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
2406 : INTEGER, INTENT(IN) :: nrow, first_row, first_col
2407 :
2408 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_pdorgqr'
2409 :
2410 : INTEGER :: handle
2411 : INTEGER :: info, lwork
2412 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2413 : REAL(KIND=dp), DIMENSION(:), POINTER :: work
2414 : #if defined(__parallel)
2415 : INTEGER, DIMENSION(9) :: descc
2416 : #else
2417 : INTEGER :: lda
2418 : #endif
2419 :
2420 36 : CALL timeset(routineN, handle)
2421 :
2422 36 : a => matrix%local_data
2423 36 : lwork = -1
2424 108 : ALLOCATE (work(2*nrow))
2425 36 : info = 0
2426 :
2427 : #if defined(__parallel)
2428 360 : descc(:) = matrix%matrix_struct%descriptor(:)
2429 :
2430 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2431 36 : CPASSERT(info == 0)
2432 36 : lwork = INT(work(1))
2433 36 : DEALLOCATE (work)
2434 108 : ALLOCATE (work(lwork))
2435 :
2436 : ! Call SCALAPACK routine to get Q
2437 36 : CALL pdorgqr(nrow, nrow, nrow, a, first_row, first_col, descc, tau, work, lwork, info)
2438 : #else
2439 : CPASSERT(first_row == 1 .AND. first_col == 1)
2440 : lda = SIZE(a, 1)
2441 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2442 : lwork = INT(work(1))
2443 : DEALLOCATE (work)
2444 : ALLOCATE (work(lwork))
2445 : CALL dorgqr(nrow, nrow, nrow, a, lda, tau, work, lwork, info)
2446 : #endif
2447 36 : CPASSERT(INFO == 0)
2448 :
2449 36 : DEALLOCATE (work)
2450 36 : CALL timestop(handle)
2451 :
2452 36 : END SUBROUTINE cp_fm_pdorgqr
2453 :
2454 : ! **************************************************************************************************
2455 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th rows.
2456 : !> \param matrix ...
2457 : !> \param irow ...
2458 : !> \param jrow ...
2459 : !> \param cs cosine of the rotation angle
2460 : !> \param sn sinus of the rotation angle
2461 : !> \author Ole Schuett
2462 : ! **************************************************************************************************
2463 565504 : SUBROUTINE cp_fm_rot_rows(matrix, irow, jrow, cs, sn)
2464 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2465 : INTEGER, INTENT(IN) :: irow, jrow
2466 : REAL(dp), INTENT(IN) :: cs, sn
2467 :
2468 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_rows'
2469 : INTEGER :: handle, ncol
2470 :
2471 : #if defined(__parallel)
2472 : INTEGER :: info, lwork
2473 : INTEGER, DIMENSION(9) :: desc
2474 565504 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2475 : #endif
2476 565504 : CALL timeset(routineN, handle)
2477 565504 : CALL cp_fm_get_info(matrix, ncol_global=ncol)
2478 : #if defined(__parallel)
2479 565504 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2480 565504 : lwork = 2*ncol + 1
2481 1696512 : ALLOCATE (work(lwork))
2482 5655040 : desc(:) = matrix%matrix_struct%descriptor(:)
2483 : CALL pdrot(ncol, &
2484 : matrix%local_data(1, 1), irow, 1, desc, ncol, &
2485 : matrix%local_data(1, 1), jrow, 1, desc, ncol, &
2486 565504 : cs, sn, work, lwork, info)
2487 565504 : CPASSERT(info == 0)
2488 565504 : DEALLOCATE (work)
2489 : ELSE
2490 : #endif
2491 0 : CALL drot(ncol, matrix%local_data(irow, 1), ncol, matrix%local_data(jrow, 1), ncol, cs, sn)
2492 : #if defined(__parallel)
2493 : END IF
2494 : #endif
2495 565504 : CALL timestop(handle)
2496 565504 : END SUBROUTINE cp_fm_rot_rows
2497 :
2498 : ! **************************************************************************************************
2499 : !> \brief Applies a planar rotation defined by cs and sn to the i'th and j'th columnns.
2500 : !> \param matrix ...
2501 : !> \param icol ...
2502 : !> \param jcol ...
2503 : !> \param cs cosine of the rotation angle
2504 : !> \param sn sinus of the rotation angle
2505 : !> \author Ole Schuett
2506 : ! **************************************************************************************************
2507 637106 : SUBROUTINE cp_fm_rot_cols(matrix, icol, jcol, cs, sn)
2508 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2509 : INTEGER, INTENT(IN) :: icol, jcol
2510 : REAL(dp), INTENT(IN) :: cs, sn
2511 :
2512 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_rot_cols'
2513 : INTEGER :: handle, nrow
2514 :
2515 : #if defined(__parallel)
2516 : INTEGER :: info, lwork
2517 : INTEGER, DIMENSION(9) :: desc
2518 637106 : REAL(dp), DIMENSION(:), ALLOCATABLE :: work
2519 : #endif
2520 637106 : CALL timeset(routineN, handle)
2521 637106 : CALL cp_fm_get_info(matrix, nrow_global=nrow)
2522 : #if defined(__parallel)
2523 637106 : IF (1 /= matrix%matrix_struct%context%n_pid) THEN
2524 637106 : lwork = 2*nrow + 1
2525 1911318 : ALLOCATE (work(lwork))
2526 6371060 : desc(:) = matrix%matrix_struct%descriptor(:)
2527 : CALL pdrot(nrow, &
2528 : matrix%local_data(1, 1), 1, icol, desc, 1, &
2529 : matrix%local_data(1, 1), 1, jcol, desc, 1, &
2530 637106 : cs, sn, work, lwork, info)
2531 637106 : CPASSERT(info == 0)
2532 637106 : DEALLOCATE (work)
2533 : ELSE
2534 : #endif
2535 0 : CALL drot(nrow, matrix%local_data(1, icol), 1, matrix%local_data(1, jcol), 1, cs, sn)
2536 : #if defined(__parallel)
2537 : END IF
2538 : #endif
2539 637106 : CALL timestop(handle)
2540 637106 : END SUBROUTINE cp_fm_rot_cols
2541 :
2542 : ! **************************************************************************************************
2543 : !> \brief Orthonormalizes selected rows and columns of a full matrix, matrix_a
2544 : !> \param matrix_a ...
2545 : !> \param B ...
2546 : !> \param nrows number of rows of matrix_a, optional, defaults to size(matrix_a,1)
2547 : !> \param ncols number of columns of matrix_a, optional, defaults to size(matrix_a, 2)
2548 : !> \param start_row starting index of rows, optional, defaults to 1
2549 : !> \param start_col starting index of columns, optional, defaults to 1
2550 : !> \param do_norm ...
2551 : !> \param do_print ...
2552 : ! **************************************************************************************************
2553 0 : SUBROUTINE cp_fm_Gram_Schmidt_orthonorm(matrix_a, B, nrows, ncols, start_row, start_col, &
2554 : do_norm, do_print)
2555 :
2556 : TYPE(cp_fm_type), INTENT(IN) :: matrix_a
2557 : REAL(kind=dp), DIMENSION(:, :), INTENT(OUT) :: B
2558 : INTEGER, INTENT(IN), OPTIONAL :: nrows, ncols, start_row, start_col
2559 : LOGICAL, INTENT(IN), OPTIONAL :: do_norm, do_print
2560 :
2561 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_Gram_Schmidt_orthonorm'
2562 :
2563 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
2564 : j_col, ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, &
2565 : start_col_local, start_row_global, start_row_local, this_col, unit_nr
2566 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
2567 : LOGICAL :: my_do_norm, my_do_print
2568 : REAL(KIND=dp) :: norm
2569 0 : REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2570 :
2571 0 : CALL timeset(routineN, handle)
2572 :
2573 0 : my_do_norm = .TRUE.
2574 0 : IF (PRESENT(do_norm)) my_do_norm = do_norm
2575 :
2576 0 : my_do_print = .FALSE.
2577 0 : IF (PRESENT(do_print) .AND. (my_do_norm)) my_do_print = do_print
2578 :
2579 0 : unit_nr = -1
2580 0 : IF (my_do_print) THEN
2581 0 : unit_nr = cp_logger_get_default_unit_nr()
2582 0 : IF (unit_nr < 1) my_do_print = .FALSE.
2583 : END IF
2584 :
2585 0 : IF (SIZE(B) /= 0) THEN
2586 0 : IF (PRESENT(nrows)) THEN
2587 0 : nrow_global = nrows
2588 : ELSE
2589 0 : nrow_global = SIZE(B, 1)
2590 : END IF
2591 :
2592 0 : IF (PRESENT(ncols)) THEN
2593 0 : ncol_global = ncols
2594 : ELSE
2595 0 : ncol_global = SIZE(B, 2)
2596 : END IF
2597 :
2598 0 : IF (PRESENT(start_row)) THEN
2599 0 : start_row_global = start_row
2600 : ELSE
2601 : start_row_global = 1
2602 : END IF
2603 :
2604 0 : IF (PRESENT(start_col)) THEN
2605 0 : start_col_global = start_col
2606 : ELSE
2607 : start_col_global = 1
2608 : END IF
2609 :
2610 0 : end_row_global = start_row_global + nrow_global - 1
2611 0 : end_col_global = start_col_global + ncol_global - 1
2612 :
2613 : CALL cp_fm_get_info(matrix=matrix_a, &
2614 : nrow_global=nrow_global, ncol_global=ncol_global, &
2615 : nrow_local=nrow_local, ncol_local=ncol_local, &
2616 0 : row_indices=row_indices, col_indices=col_indices)
2617 0 : IF (end_row_global > nrow_global) THEN
2618 : end_row_global = nrow_global
2619 : END IF
2620 0 : IF (end_col_global > ncol_global) THEN
2621 : end_col_global = ncol_global
2622 : END IF
2623 :
2624 : ! find out row/column indices of locally stored matrix elements that
2625 : ! needs to be copied.
2626 : ! Arrays row_indices and col_indices are assumed to be sorted in
2627 : ! ascending order
2628 0 : DO start_row_local = 1, nrow_local
2629 0 : IF (row_indices(start_row_local) >= start_row_global) EXIT
2630 : END DO
2631 :
2632 0 : DO end_row_local = start_row_local, nrow_local
2633 0 : IF (row_indices(end_row_local) > end_row_global) EXIT
2634 : END DO
2635 0 : end_row_local = end_row_local - 1
2636 :
2637 0 : DO start_col_local = 1, ncol_local
2638 0 : IF (col_indices(start_col_local) >= start_col_global) EXIT
2639 : END DO
2640 :
2641 0 : DO end_col_local = start_col_local, ncol_local
2642 0 : IF (col_indices(end_col_local) > end_col_global) EXIT
2643 : END DO
2644 0 : end_col_local = end_col_local - 1
2645 :
2646 0 : a => matrix_a%local_data
2647 :
2648 0 : this_col = col_indices(start_col_local) - start_col_global + 1
2649 :
2650 0 : B(:, this_col) = a(:, start_col_local)
2651 :
2652 0 : IF (my_do_norm) THEN
2653 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2654 0 : B(:, this_col) = B(:, this_col)/norm
2655 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2656 : END IF
2657 :
2658 0 : DO i = start_col_local + 1, end_col_local
2659 0 : this_col = col_indices(i) - start_col_global + 1
2660 0 : B(:, this_col) = a(:, i)
2661 0 : DO j = start_col_local, i - 1
2662 0 : j_col = col_indices(j) - start_col_global + 1
2663 : B(:, this_col) = B(:, this_col) - &
2664 : accurate_dot_product(B(:, j_col), B(:, this_col))* &
2665 0 : B(:, j_col)/accurate_dot_product(B(:, j_col), B(:, j_col))
2666 : END DO
2667 :
2668 0 : IF (my_do_norm) THEN
2669 0 : norm = SQRT(accurate_dot_product(B(:, this_col), B(:, this_col)))
2670 0 : B(:, this_col) = B(:, this_col)/norm
2671 0 : IF (my_do_print) WRITE (unit_nr, '(I3,F8.3)') this_col, norm
2672 : END IF
2673 :
2674 : END DO
2675 0 : CALL matrix_a%matrix_struct%para_env%sum(B)
2676 : END IF
2677 :
2678 0 : CALL timestop(handle)
2679 :
2680 0 : END SUBROUTINE cp_fm_Gram_Schmidt_orthonorm
2681 :
2682 : ! **************************************************************************************************
2683 : !> \brief Cholesky decomposition
2684 : !> \param fm_matrix ...
2685 : !> \param n ...
2686 : !> \param uplo triangular format; defaults to 'U'
2687 : ! **************************************************************************************************
2688 10030 : SUBROUTINE cp_fm_potrf(fm_matrix, n, uplo)
2689 : TYPE(cp_fm_type) :: fm_matrix
2690 : INTEGER, INTENT(IN) :: n
2691 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2692 :
2693 : CHARACTER :: myuplo
2694 : INTEGER :: info
2695 10030 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2696 10030 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2697 : #if defined(__parallel)
2698 : INTEGER, DIMENSION(9) :: desca
2699 : #endif
2700 :
2701 10030 : myuplo = 'U'
2702 0 : IF (PRESENT(uplo)) myuplo = uplo
2703 :
2704 10030 : a => fm_matrix%local_data
2705 10030 : a_sp => fm_matrix%local_data_sp
2706 : #if defined(__parallel)
2707 100300 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2708 10030 : IF (fm_matrix%use_sp) THEN
2709 0 : CALL pspotrf(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2710 : ELSE
2711 10030 : CALL pdpotrf(myuplo, n, a(1, 1), 1, 1, desca, info)
2712 : END IF
2713 : #else
2714 : IF (fm_matrix%use_sp) THEN
2715 : CALL spotrf(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2716 : ELSE
2717 : CALL dpotrf(myuplo, n, a(1, 1), SIZE(a, 1), info)
2718 : END IF
2719 : #endif
2720 10030 : IF (info /= 0) &
2721 0 : CPABORT("Cholesky decomposition failed. Matrix ill-conditioned?")
2722 :
2723 10030 : END SUBROUTINE cp_fm_potrf
2724 :
2725 : ! **************************************************************************************************
2726 : !> \brief Invert trianguar matrix
2727 : !> \param fm_matrix the matrix to invert (triangular matrix according to uplo)
2728 : !> \param n size of the matrix to invert
2729 : !> \param uplo triangular format; defaults to 'U'
2730 : ! **************************************************************************************************
2731 9236 : SUBROUTINE cp_fm_potri(fm_matrix, n, uplo)
2732 : TYPE(cp_fm_type) :: fm_matrix
2733 : INTEGER, INTENT(IN) :: n
2734 : CHARACTER, INTENT(IN), OPTIONAL :: uplo
2735 :
2736 : CHARACTER :: myuplo
2737 9236 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2738 9236 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
2739 : INTEGER :: info
2740 : #if defined(__parallel)
2741 : INTEGER, DIMENSION(9) :: desca
2742 : #endif
2743 :
2744 9236 : myuplo = 'U'
2745 0 : IF (PRESENT(uplo)) myuplo = uplo
2746 :
2747 9236 : a => fm_matrix%local_data
2748 9236 : a_sp => fm_matrix%local_data_sp
2749 : #if defined(__parallel)
2750 92360 : desca(:) = fm_matrix%matrix_struct%descriptor(:)
2751 9236 : IF (fm_matrix%use_sp) THEN
2752 0 : CALL pspotri(myuplo, n, a_sp(1, 1), 1, 1, desca, info)
2753 : ELSE
2754 9236 : CALL pdpotri(myuplo, n, a(1, 1), 1, 1, desca, info)
2755 : END IF
2756 : #else
2757 : IF (fm_matrix%use_sp) THEN
2758 : CALL spotri(myuplo, n, a_sp(1, 1), SIZE(a_sp, 1), info)
2759 : ELSE
2760 : CALL dpotri(myuplo, n, a(1, 1), SIZE(a, 1), info)
2761 : END IF
2762 : #endif
2763 9236 : CPASSERT(info == 0)
2764 9236 : END SUBROUTINE cp_fm_potri
2765 :
2766 : ! **************************************************************************************************
2767 : !> \brief Calculates
2768 : !> yv = alpha*amat*xv + beta*yv
2769 : !> where amat: fm matrix
2770 : !> xv : vector replicated
2771 : !> yv : vector replicated
2772 : !> Defaults: alpha = 1, beta = 0
2773 : ! **************************************************************************************************
2774 17136 : SUBROUTINE cp_fm_matvec(amat, xv, yv, alpha, beta)
2775 : TYPE(cp_fm_type), INTENT(IN) :: amat
2776 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: xv
2777 : REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: yv
2778 : REAL(KIND=dp), OPTIONAL, INTENT(IN) :: alpha, beta
2779 :
2780 : INTEGER :: na, nc, nx, ny
2781 : REAL(KIND=dp) :: aval, bval
2782 : #if defined(__parallel)
2783 : INTEGER :: nrl, ncl, ic, ir
2784 17136 : INTEGER, DIMENSION(:), POINTER :: rind, cind
2785 17136 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: xvl, yvl, yvm
2786 : #endif
2787 :
2788 17136 : IF (amat%use_sp) THEN
2789 0 : CPABORT("cp_fm_matvec: SP option not available")
2790 : END IF
2791 17136 : aval = 1.0_dp
2792 17136 : IF (PRESENT(alpha)) aval = alpha
2793 17136 : bval = 0.0_dp
2794 17136 : IF (PRESENT(beta)) bval = beta
2795 :
2796 17136 : CALL cp_fm_get_info(amat, nrow_global=na, ncol_global=nc)
2797 17136 : nx = SIZE(xv)
2798 17136 : ny = SIZE(yv)
2799 17136 : IF ((nx /= ny) .OR. (nc /= nx)) THEN
2800 0 : CPABORT("cp_fm_matvec: incompatible dimensions")
2801 : END IF
2802 : #if defined(__parallel)
2803 : CALL cp_fm_get_info(amat, nrow_local=nrl, ncol_local=ncl, &
2804 17136 : row_indices=rind, col_indices=cind)
2805 119952 : ALLOCATE (xvl(ncl), yvl(nrl), yvm(ny))
2806 150240 : DO ic = 1, ncl
2807 150240 : xvl(ic) = xv(cind(ic))
2808 : END DO
2809 1690008 : yvl(1:nrl) = MATMUL(amat%local_data, xvl(1:ncl))
2810 150240 : yvm = 0.0_dp
2811 83688 : DO ir = 1, nrl
2812 83688 : yvm(rind(ir)) = yvl(ir)
2813 : END DO
2814 17136 : CALL amat%matrix_struct%para_env%sum(yvm)
2815 17136 : IF (bval == 0.0_dp) THEN
2816 75120 : yv = aval*yvm
2817 : ELSE
2818 75120 : yv = bval*yv + aval*yvm
2819 : END IF
2820 : #else
2821 : IF (bval == 0.0_dp) THEN
2822 : yv = aval*MATMUL(amat%local_data, xv)
2823 : ELSE
2824 : yv = bval*yv + aval*MATMUL(amat%local_data, xv)
2825 : END IF
2826 : #endif
2827 :
2828 68544 : END SUBROUTINE cp_fm_matvec
2829 :
2830 : END MODULE cp_fm_basic_linalg
|