Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief represent a full matrix distributed on many processors
10 : !> \par History
11 : !> 3) separated structure object, removed globenv, renamed to full matrix
12 : !> many changes (fawzi 08.2002)
13 : !> \author Matthias Krack (22.05.2001)
14 : ! **************************************************************************************************
15 : MODULE cp_fm_types
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_blacs_types, ONLY: cp_blacs_type
18 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
19 : cp_fm_struct_equivalent,&
20 : cp_fm_struct_get,&
21 : cp_fm_struct_release,&
22 : cp_fm_struct_retain,&
23 : cp_fm_struct_type,&
24 : cp_fm_struct_write_info
25 : USE kinds, ONLY: dp
26 : USE message_passing, ONLY: cp2k_is_parallel,&
27 : mp_any_source,&
28 : mp_para_env_type,&
29 : mp_proc_null,&
30 : mp_request_null,&
31 : mp_request_type,&
32 : mp_waitall
33 : USE parallel_rng_types, ONLY: UNIFORM,&
34 : rng_stream_type
35 : #include "../base/base_uses.f90"
36 :
37 : IMPLICIT NONE
38 :
39 : PRIVATE
40 :
41 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 : LOGICAL, PARAMETER :: debug_this_module = .TRUE.
43 : INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44 :
45 : INTEGER, PRIVATE :: cp_fm_mm_type = 1
46 :
47 : PUBLIC :: cp_fm_type, &
48 : cp_fm_p_type, copy_info_type
49 :
50 : PUBLIC :: cp_fm_add_to_element, &
51 : cp_fm_create, &
52 : cp_fm_release, &
53 : cp_fm_get_info, &
54 : cp_fm_set_element, &
55 : cp_fm_get_element, &
56 : cp_fm_get_diag, & ! get diagonal
57 : cp_fm_set_all, & ! set all elements and diagonal
58 : cp_fm_set_all_submatrix, & ! set a submatrix to a given value
59 : cp_fm_set_submatrix, & ! set a submatrix to given values
60 : cp_fm_get_submatrix, & ! get a submatrix of given values
61 : cp_fm_init_random, &
62 : cp_fm_maxabsval, & ! find the maximum absolute value
63 : cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
64 : cp_fm_to_fm, & ! copy (parts of) a fm to a fm
65 : cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
66 : cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
67 : cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
68 : cp_fm_to_fm_triangular, &
69 : cp_fm_copy_general, &
70 : cp_fm_start_copy_general, &
71 : cp_fm_finish_copy_general, &
72 : cp_fm_cleanup_copy_general, &
73 : cp_fm_write_unformatted, & ! writes a full matrix to an open unit
74 : cp_fm_write_formatted, & ! writes a full matrix to an open unit
75 : cp_fm_read_unformatted, & ! reads a full matrix from an open unit
76 : cp_fm_setup, & ! allows to set flags for fms
77 : cp_fm_get_mm_type, &
78 : cp_fm_write_info, &
79 : cp_fm_to_fm_submat_general ! copy matrix across different contexts
80 :
81 : PUBLIC :: cp_fm_pilaenv
82 :
83 : INTERFACE cp_fm_to_fm
84 : MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
85 : cp_fm_to_fm_columns ! just a number of columns
86 : END INTERFACE
87 :
88 : INTERFACE cp_fm_release
89 : MODULE PROCEDURE cp_fm_release_aa0, &
90 : cp_fm_release_aa1, &
91 : cp_fm_release_aa2, &
92 : cp_fm_release_aa3, &
93 : cp_fm_release_ap1, &
94 : cp_fm_release_ap2, &
95 : cp_fm_release_pa1, &
96 : cp_fm_release_pa2, &
97 : cp_fm_release_pa3, &
98 : cp_fm_release_pp1, &
99 : cp_fm_release_pp2
100 : END INTERFACE
101 :
102 : ! **************************************************************************************************
103 : !> \brief represent a full matrix
104 : !> \param name the name of the matrix, used for printing
105 : !> \param matrix_struct structure of this matrix
106 : !> \param local_data array with the data of the matrix (its contents
107 : !> depend on the matrix type used: in parallel runs it will be
108 : !> in scalapack format, in sequential, it will simply contain
109 : !> the matrix)
110 : !> \par History
111 : !> 08.2002 created [fawzi]
112 : !> \author fawzi
113 : ! **************************************************************************************************
114 : TYPE cp_fm_type
115 : ! PRIVATE
116 : CHARACTER(LEN=60) :: name = ""
117 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
118 : REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
119 : END TYPE cp_fm_type
120 :
121 : ! **************************************************************************************************
122 : !> \brief just to build arrays of pointers to matrices
123 : !> \param matrix the pointer to the matrix
124 : !> \par History
125 : !> 08.2002 created [fawzi]
126 : !> \author fawzi
127 : ! **************************************************************************************************
128 : TYPE cp_fm_p_type
129 : TYPE(cp_fm_type), POINTER :: matrix => NULL()
130 : END TYPE cp_fm_p_type
131 :
132 : ! **************************************************************************************************
133 : !> \brief Stores the state of a copy between cp_fm_start_copy_general
134 : !> and cp_fm_finish_copy_general
135 : !> \par History
136 : !> Jan 2017 [Mark T]
137 : ! **************************************************************************************************
138 : TYPE copy_info_type
139 : INTEGER :: send_size = -1
140 : INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
141 : TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
142 : INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
143 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
144 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
145 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
146 : END TYPE copy_info_type
147 :
148 : CONTAINS
149 :
150 : ! **************************************************************************************************
151 : !> \brief creates a new full matrix with the given structure
152 : !> \param matrix the matrix to be created
153 : !> \param matrix_struct the structure of matrix
154 : !> \param name ...
155 : !> \param nrow ...
156 : !> \param ncol ...
157 : !> \param set_zero ...
158 : !> \par History
159 : !> 08.2002 created [fawzi]
160 : !> \author Fawzi Mohamed
161 : !> \note
162 : !> preferred allocation routine
163 : ! **************************************************************************************************
164 1855479 : SUBROUTINE cp_fm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
165 : TYPE(cp_fm_type), INTENT(OUT) :: matrix
166 : TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
167 : CHARACTER(LEN=*), INTENT(in), OPTIONAL :: name
168 : INTEGER, INTENT(IN), OPTIONAL :: nrow, ncol
169 : LOGICAL, INTENT(in), OPTIONAL :: set_zero
170 :
171 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_create'
172 :
173 : INTEGER :: handle, ncol_global, ncol_local, &
174 : nrow_global, nrow_local
175 : TYPE(cp_blacs_env_type), POINTER :: context
176 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
177 :
178 1855479 : CALL timeset(routineN, handle)
179 :
180 1855479 : IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
181 1912 : CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
182 1912 : IF (PRESENT(nrow)) nrow_global = nrow
183 1912 : IF (PRESENT(ncol)) ncol_global = ncol
184 : CALL cp_fm_struct_create(fm_struct, template_fmstruct=matrix_struct, &
185 1912 : nrow_global=nrow_global, ncol_global=ncol_global)
186 :
187 1912 : context => fm_struct%context
188 1912 : matrix%matrix_struct => fm_struct
189 1912 : CALL cp_fm_struct_retain(matrix%matrix_struct)
190 :
191 1912 : nrow_local = fm_struct%local_leading_dimension
192 1912 : ncol_local = MAX(1, fm_struct%ncol_locals(context%mepos(2)))
193 :
194 1912 : CALL cp_fm_struct_release(fm_struct)
195 : ELSE
196 :
197 1853567 : context => matrix_struct%context
198 1853567 : matrix%matrix_struct => matrix_struct
199 1853567 : CALL cp_fm_struct_retain(matrix%matrix_struct)
200 :
201 : ! OK, we allocate here at least a 1 x 1 matrix
202 : ! this must (and is) compatible with the descinit call
203 : ! in cp_fm_struct
204 1853567 : nrow_local = matrix_struct%local_leading_dimension
205 1853567 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
206 : END IF
207 :
208 1855479 : NULLIFY (matrix%local_data)
209 :
210 7421916 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
211 :
212 1855479 : IF (PRESENT(set_zero)) THEN
213 219927 : IF (set_zero) THEN
214 70983010 : matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
215 : END IF
216 : END IF
217 :
218 1855479 : IF (PRESENT(name)) THEN
219 849353 : matrix%name = name
220 : ELSE
221 1006126 : matrix%name = 'full matrix'
222 : END IF
223 :
224 1855479 : CALL timestop(handle)
225 :
226 1855479 : END SUBROUTINE cp_fm_create
227 :
228 : ! **************************************************************************************************
229 : !> \brief releases a full matrix
230 : !> \param matrix the matrix to release
231 : !> \par History
232 : !> 08.2002 created [fawzi]
233 : !> \author Fawzi Mohamed
234 : ! **************************************************************************************************
235 1875282 : SUBROUTINE cp_fm_release_aa0(matrix)
236 : TYPE(cp_fm_type), INTENT(INOUT) :: matrix
237 :
238 1875282 : IF (ASSOCIATED(matrix%local_data)) THEN
239 1854755 : DEALLOCATE (matrix%local_data)
240 : NULLIFY (matrix%local_data)
241 : END IF
242 1875282 : matrix%name = ""
243 1875282 : CALL cp_fm_struct_release(matrix%matrix_struct)
244 :
245 1875282 : END SUBROUTINE cp_fm_release_aa0
246 :
247 : ! **************************************************************************************************
248 : !> \brief ...
249 : !> \param matrices ...
250 : ! **************************************************************************************************
251 79358 : SUBROUTINE cp_fm_release_aa1(matrices)
252 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
253 :
254 : INTEGER :: i
255 :
256 79358 : IF (ALLOCATED(matrices)) THEN
257 210485 : DO i = 1, SIZE(matrices)
258 210485 : CALL cp_fm_release(matrices(i))
259 : END DO
260 77942 : DEALLOCATE (matrices)
261 : END IF
262 79358 : END SUBROUTINE cp_fm_release_aa1
263 :
264 : ! **************************************************************************************************
265 : !> \brief ...
266 : !> \param matrices ...
267 : ! **************************************************************************************************
268 8982 : SUBROUTINE cp_fm_release_aa2(matrices)
269 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
270 :
271 : INTEGER :: i, j
272 :
273 8982 : IF (ALLOCATED(matrices)) THEN
274 17692 : DO i = 1, SIZE(matrices, 1)
275 48856 : DO j = 1, SIZE(matrices, 2)
276 42808 : CALL cp_fm_release(matrices(i, j))
277 : END DO
278 : END DO
279 6048 : DEALLOCATE (matrices)
280 : END IF
281 8982 : END SUBROUTINE cp_fm_release_aa2
282 :
283 : ! **************************************************************************************************
284 : !> \brief ...
285 : !> \param matrices ...
286 : ! **************************************************************************************************
287 48 : SUBROUTINE cp_fm_release_aa3(matrices)
288 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
289 :
290 : INTEGER :: i, j, k
291 :
292 48 : IF (ALLOCATED(matrices)) THEN
293 550 : DO i = 1, SIZE(matrices, 1)
294 1582 : DO j = 1, SIZE(matrices, 2)
295 2686 : DO k = 1, SIZE(matrices, 3)
296 2184 : CALL cp_fm_release(matrices(i, j, k))
297 : END DO
298 : END DO
299 : END DO
300 48 : DEALLOCATE (matrices)
301 : END IF
302 48 : END SUBROUTINE cp_fm_release_aa3
303 :
304 : ! **************************************************************************************************
305 : !> \brief ...
306 : !> \param matrices ...
307 : ! **************************************************************************************************
308 188992 : SUBROUTINE cp_fm_release_pa1(matrices)
309 : TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
310 :
311 : INTEGER :: i
312 :
313 188992 : IF (ASSOCIATED(matrices)) THEN
314 145519 : DO i = 1, SIZE(matrices)
315 145519 : CALL cp_fm_release(matrices(i))
316 : END DO
317 60668 : DEALLOCATE (matrices)
318 : NULLIFY (matrices)
319 : END IF
320 188992 : END SUBROUTINE cp_fm_release_pa1
321 :
322 : ! **************************************************************************************************
323 : !> \brief ...
324 : !> \param matrices ...
325 : ! **************************************************************************************************
326 70274 : SUBROUTINE cp_fm_release_pa2(matrices)
327 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
328 :
329 : INTEGER :: i, j
330 :
331 70274 : IF (ASSOCIATED(matrices)) THEN
332 79942 : DO i = 1, SIZE(matrices, 1)
333 147650 : DO j = 1, SIZE(matrices, 2)
334 125358 : CALL cp_fm_release(matrices(i, j))
335 : END DO
336 : END DO
337 22292 : DEALLOCATE (matrices)
338 : NULLIFY (matrices)
339 : END IF
340 70274 : END SUBROUTINE cp_fm_release_pa2
341 :
342 : ! **************************************************************************************************
343 : !> \brief ...
344 : !> \param matrices ...
345 : ! **************************************************************************************************
346 0 : SUBROUTINE cp_fm_release_pa3(matrices)
347 : TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
348 :
349 : INTEGER :: i, j, k
350 :
351 0 : IF (ASSOCIATED(matrices)) THEN
352 0 : DO i = 1, SIZE(matrices, 1)
353 0 : DO j = 1, SIZE(matrices, 2)
354 0 : DO k = 1, SIZE(matrices, 3)
355 0 : CALL cp_fm_release(matrices(i, j, k))
356 : END DO
357 : END DO
358 : END DO
359 0 : DEALLOCATE (matrices)
360 : NULLIFY (matrices)
361 : END IF
362 0 : END SUBROUTINE cp_fm_release_pa3
363 :
364 : ! **************************************************************************************************
365 : !> \brief ...
366 : !> \param matrices ...
367 : ! **************************************************************************************************
368 0 : SUBROUTINE cp_fm_release_ap1(matrices)
369 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
370 :
371 : INTEGER :: i
372 :
373 0 : IF (ALLOCATED(matrices)) THEN
374 0 : DO i = 1, SIZE(matrices)
375 0 : CALL cp_fm_release(matrices(i)%matrix)
376 0 : DEALLOCATE (matrices(i)%matrix)
377 : END DO
378 0 : DEALLOCATE (matrices)
379 : END IF
380 0 : END SUBROUTINE cp_fm_release_ap1
381 :
382 : ! **************************************************************************************************
383 : !> \brief ...
384 : !> \param matrices ...
385 : ! **************************************************************************************************
386 0 : SUBROUTINE cp_fm_release_ap2(matrices)
387 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
388 :
389 : INTEGER :: i, j
390 :
391 0 : IF (ALLOCATED(matrices)) THEN
392 0 : DO i = 1, SIZE(matrices, 1)
393 0 : DO j = 1, SIZE(matrices, 2)
394 0 : CALL cp_fm_release(matrices(i, j)%matrix)
395 0 : DEALLOCATE (matrices(i, j)%matrix)
396 : END DO
397 : END DO
398 0 : DEALLOCATE (matrices)
399 : END IF
400 0 : END SUBROUTINE cp_fm_release_ap2
401 :
402 : ! **************************************************************************************************
403 : !> \brief ...
404 : !> \param matrices ...
405 : ! **************************************************************************************************
406 0 : SUBROUTINE cp_fm_release_pp1(matrices)
407 : TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
408 :
409 : INTEGER :: i
410 :
411 0 : IF (ASSOCIATED(matrices)) THEN
412 0 : DO i = 1, SIZE(matrices)
413 0 : CALL cp_fm_release(matrices(i)%matrix)
414 0 : DEALLOCATE (matrices(i)%matrix)
415 : END DO
416 0 : DEALLOCATE (matrices)
417 : NULLIFY (matrices)
418 : END IF
419 0 : END SUBROUTINE cp_fm_release_pp1
420 :
421 : ! **************************************************************************************************
422 : !> \brief ...
423 : !> \param matrices ...
424 : ! **************************************************************************************************
425 0 : SUBROUTINE cp_fm_release_pp2(matrices)
426 : TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
427 :
428 : INTEGER :: i, j
429 :
430 0 : IF (ASSOCIATED(matrices)) THEN
431 0 : DO i = 1, SIZE(matrices, 1)
432 0 : DO j = 1, SIZE(matrices, 2)
433 0 : CALL cp_fm_release(matrices(i, j)%matrix)
434 0 : DEALLOCATE (matrices(i, j)%matrix)
435 : END DO
436 : END DO
437 0 : DEALLOCATE (matrices)
438 : NULLIFY (matrices)
439 : END IF
440 0 : END SUBROUTINE cp_fm_release_pp2
441 :
442 : ! **************************************************************************************************
443 : !> \brief fills a matrix with random numbers
444 : !> \param matrix : to be initialized
445 : !> \param ncol : numbers of cols to fill
446 : !> \param start_col : starting at coll number
447 : !> \author Joost VandeVondele
448 : !> \note
449 : !> the value of a_ij is independent of the number of cpus
450 : ! **************************************************************************************************
451 2924 : SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
452 : TYPE(cp_fm_type), INTENT(IN) :: matrix
453 : INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
454 :
455 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_init_random'
456 :
457 : INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
458 : ncol_local, nrow_global, nrow_local
459 5848 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
460 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buff
461 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
462 2924 : POINTER :: local_data
463 : REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
464 : seed = RESHAPE([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
465 : TYPE(rng_stream_type) :: rng
466 :
467 2924 : CALL timeset(routineN, handle)
468 :
469 : ! guarantee same seed over all tasks
470 2924 : CALL matrix%matrix_struct%para_env%bcast(seed, 0)
471 :
472 : rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
473 2924 : extended_precision=.TRUE., seed=seed)
474 :
475 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
476 : nrow_local=nrow_local, ncol_local=ncol_local, &
477 : local_data=local_data, &
478 2924 : row_indices=row_indices, col_indices=col_indices)
479 :
480 2924 : my_start_col = 1
481 2924 : IF (PRESENT(start_col)) my_start_col = start_col
482 2924 : my_ncol = matrix%matrix_struct%ncol_global
483 2924 : IF (PRESENT(ncol)) my_ncol = ncol
484 :
485 2924 : IF (ncol_global < (my_start_col + my_ncol - 1)) &
486 0 : CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
487 :
488 8772 : ALLOCATE (buff(nrow_global))
489 :
490 : ! each global row has its own substream, in order to reach the stream for the local col,
491 : ! we just reset to the next substream
492 : ! following this, we fill the full buff with random numbers, and pick those we need
493 2924 : icol_global = 0
494 22869 : DO icol_local = 1, ncol_local
495 19945 : CPASSERT(col_indices(icol_local) > icol_global)
496 : DO
497 19945 : CALL rng%reset_to_next_substream()
498 19945 : icol_global = icol_global + 1
499 19945 : IF (icol_global == col_indices(icol_local)) EXIT
500 : END DO
501 19945 : CALL rng%fill(buff)
502 722521 : DO irow_local = 1, nrow_local
503 719597 : local_data(irow_local, icol_local) = buff(row_indices(irow_local))
504 : END DO
505 : END DO
506 :
507 2924 : DEALLOCATE (buff)
508 :
509 : ! store seed before deletion (unclear if this is the proper seed)
510 :
511 : ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
512 : ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
513 : ! stream of random numbers. While this seems odd and contrary to the original design,
514 : ! it was probably introduced to improve reproducibility.
515 : ! See also https://github.com/cp2k/cp2k/pull/506
516 2924 : CALL rng%get(ig=seed)
517 :
518 2924 : CALL timestop(handle)
519 :
520 81872 : END SUBROUTINE cp_fm_init_random
521 :
522 : ! **************************************************************************************************
523 : !> \brief set all elements of a matrix to the same value,
524 : !> and optionally the diagonal to a different one
525 : !> \param matrix input matrix
526 : !> \param alpha scalar used to set all elements of the matrix
527 : !> \param beta scalar used to set diagonal of the matrix
528 : !> \note
529 : !> can be used to zero a matrix
530 : !> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
531 : ! **************************************************************************************************
532 397920 : SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
533 :
534 : TYPE(cp_fm_type), INTENT(IN) :: matrix
535 : REAL(KIND=dp), INTENT(IN) :: alpha
536 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
537 :
538 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_set_all'
539 :
540 : INTEGER :: handle, i, n
541 :
542 397920 : CALL timeset(routineN, handle)
543 :
544 231885938 : matrix%local_data(:, :) = alpha
545 :
546 397920 : IF (PRESENT(beta)) THEN
547 65718 : n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
548 509666 : DO i = 1, n
549 509666 : CALL cp_fm_set_element(matrix, i, i, beta)
550 : END DO
551 : END IF
552 :
553 397920 : CALL timestop(handle)
554 :
555 397920 : END SUBROUTINE cp_fm_set_all
556 :
557 : ! **************************************************************************************************
558 : !> \brief returns the diagonal elements of a fm
559 : !> \param matrix ...
560 : !> \param diag ...
561 : ! **************************************************************************************************
562 13654 : SUBROUTINE cp_fm_get_diag(matrix, diag)
563 :
564 : ! arguments
565 : TYPE(cp_fm_type), INTENT(IN) :: matrix
566 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
567 :
568 : ! locals
569 : INTEGER :: i, nrow_global
570 :
571 : #if defined(__parallel)
572 : INTEGER, DIMENSION(9) :: desca
573 : TYPE(cp_blacs_env_type), POINTER :: context
574 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
575 : nprow
576 13654 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
577 : #endif
578 :
579 13654 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
580 :
581 : #if defined(__parallel)
582 146436 : diag = 0.0_dp
583 13654 : context => matrix%matrix_struct%context
584 13654 : myprow = context%mepos(1)
585 13654 : mypcol = context%mepos(2)
586 13654 : nprow = context%num_pe(1)
587 13654 : npcol = context%num_pe(2)
588 :
589 13654 : a => matrix%local_data
590 136540 : desca(:) = matrix%matrix_struct%descriptor(:)
591 :
592 146436 : DO i = 1, nrow_global
593 : CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
594 132782 : irow_local, icol_local, iprow, ipcol)
595 146436 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
596 66475 : diag(i) = a(irow_local, icol_local)
597 : END IF
598 : END DO
599 : #else
600 : DO i = 1, nrow_global
601 : diag(i) = matrix%local_data(i, i)
602 : END DO
603 : #endif
604 279218 : CALL matrix%matrix_struct%para_env%sum(diag)
605 :
606 13654 : END SUBROUTINE cp_fm_get_diag
607 :
608 : ! **************************************************************************************************
609 : !> \brief returns an element of a fm
610 : !> this value is valid on every cpu
611 : !> using this call is expensive
612 : !> \param matrix the matrix to read
613 : !> \param irow_global the row
614 : !> \param icol_global the col
615 : !> \param alpha the value of matrix(irow_global, icol_global)
616 : !> \param local true if the element is on this cpu, false otherwise
617 : !> \note
618 : !> - modified semantics. now this function always returns the value
619 : !> previously the value was zero on cpus that didn't own the relevant
620 : !> part of the matrix (Joost VandeVondele, May 2003)
621 : !> - usage of the function should be avoided, as it is likely to rather slow
622 : !> using row_indices/col_indices/local_data + some smart scheme normally
623 : !> yields a real parallel code
624 : ! **************************************************************************************************
625 1193594 : SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
626 :
627 : ! arguments
628 : TYPE(cp_fm_type), INTENT(IN) :: matrix
629 : REAL(KIND=dp), INTENT(OUT) :: alpha
630 : INTEGER, INTENT(IN) :: icol_global, &
631 : irow_global
632 : LOGICAL, INTENT(OUT), OPTIONAL :: local
633 :
634 : ! locals
635 : #if defined(__parallel)
636 : INTEGER, DIMENSION(9) :: desca
637 : TYPE(cp_blacs_env_type), POINTER :: context
638 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
639 : nprow
640 1193594 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
641 : #endif
642 :
643 : #if defined(__parallel)
644 1193594 : context => matrix%matrix_struct%context
645 1193594 : myprow = context%mepos(1)
646 1193594 : mypcol = context%mepos(2)
647 1193594 : nprow = context%num_pe(1)
648 1193594 : npcol = context%num_pe(2)
649 :
650 1193594 : a => matrix%local_data
651 11935940 : desca(:) = matrix%matrix_struct%descriptor(:)
652 :
653 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
654 1193594 : irow_local, icol_local, iprow, ipcol)
655 :
656 1193594 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
657 596847 : alpha = a(irow_local, icol_local)
658 596847 : CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
659 596847 : IF (PRESENT(local)) local = .TRUE.
660 : ELSE
661 596747 : CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
662 596747 : IF (PRESENT(local)) local = .FALSE.
663 : END IF
664 :
665 : #else
666 : IF (PRESENT(local)) local = .TRUE.
667 : alpha = matrix%local_data(irow_global, icol_global)
668 : #endif
669 :
670 1193594 : END SUBROUTINE cp_fm_get_element
671 :
672 : ! **************************************************************************************************
673 : !> \brief sets an element of a matrix
674 : !> \param matrix ...
675 : !> \param irow_global ...
676 : !> \param icol_global ...
677 : !> \param alpha ...
678 : !> \note
679 : !> we expect all cpus to have the same arguments in the call to this function
680 : !> (otherwise one should use local_data tricks)
681 : ! **************************************************************************************************
682 702782 : SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
683 : TYPE(cp_fm_type), INTENT(IN) :: matrix
684 : INTEGER, INTENT(IN) :: irow_global, icol_global
685 : REAL(KIND=dp), INTENT(IN) :: alpha
686 :
687 : INTEGER :: mypcol, myprow, npcol, nprow
688 : TYPE(cp_blacs_env_type), POINTER :: context
689 : #if defined(__parallel)
690 : INTEGER :: icol_local, ipcol, iprow, &
691 : irow_local
692 : INTEGER, DIMENSION(9) :: desca
693 702782 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
694 : #endif
695 :
696 702782 : context => matrix%matrix_struct%context
697 702782 : myprow = context%mepos(1)
698 702782 : mypcol = context%mepos(2)
699 702782 : nprow = context%num_pe(1)
700 702782 : npcol = context%num_pe(2)
701 :
702 : #if defined(__parallel)
703 :
704 702782 : a => matrix%local_data
705 :
706 7027820 : desca(:) = matrix%matrix_struct%descriptor(:)
707 :
708 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
709 702782 : irow_local, icol_local, iprow, ipcol)
710 :
711 702782 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
712 353296 : a(irow_local, icol_local) = alpha
713 : END IF
714 :
715 : #else
716 :
717 : matrix%local_data(irow_global, icol_global) = alpha
718 :
719 : #endif
720 702782 : END SUBROUTINE cp_fm_set_element
721 :
722 : ! **************************************************************************************************
723 : !> \brief sets a submatrix of a full matrix
724 : !> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
725 : !> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
726 : !> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
727 : !> \param fm the full to change
728 : !> \param new_values a replicated full matrix with the new values
729 : !> \param start_row the starting row of b_matrix (defaults to 1)
730 : !> \param start_col the starting col of b_matrix (defaults to 1)
731 : !> \param n_rows the number of row to change in b (defaults to
732 : !> size(op(new_values),1))
733 : !> \param n_cols the number of columns to change in b (defaults to
734 : !> size(op(new_values),2))
735 : !> \param alpha rescaling factor for the new values (defaults to 1.0)
736 : !> \param beta rescaling factor for the old values (defaults to 0.0)
737 : !> \param transpose if new_values should be transposed: if true
738 : !> op(new_values)=new_values^T, else op(new_values)=new_values
739 : !> (defaults to false)
740 : !> \par History
741 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
742 : !> \author Fawzi Mohamed
743 : !> \note
744 : !> optimized for full column updates and alpha=1.0, beta=0.0
745 : !> the new_values need to be valid on all cpus
746 : ! **************************************************************************************************
747 77583 : SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
748 : start_col, n_rows, n_cols, alpha, beta, transpose)
749 : TYPE(cp_fm_type), INTENT(IN) :: fm
750 : REAL(KIND=dp), DIMENSION(:, :), INTENT(in) :: new_values
751 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
752 : REAL(KIND=dp), INTENT(in), OPTIONAL :: alpha, beta
753 : LOGICAL, INTENT(in), OPTIONAL :: transpose
754 :
755 : INTEGER :: i, i0, j, j0, ncol, ncol_block, &
756 : ncol_global, ncol_local, nrow, &
757 : nrow_block, nrow_global, nrow_local, &
758 : this_col, this_row
759 77583 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
760 : LOGICAL :: tr_a
761 : REAL(KIND=dp) :: al, be
762 77583 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
763 :
764 77583 : al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
765 :
766 18876 : IF (PRESENT(alpha)) al = alpha
767 77583 : IF (PRESENT(beta)) be = beta
768 77583 : IF (PRESENT(start_row)) i0 = start_row
769 77583 : IF (PRESENT(start_col)) j0 = start_col
770 77583 : IF (PRESENT(transpose)) tr_a = transpose
771 15913 : IF (tr_a) THEN
772 15821 : nrow = SIZE(new_values, 2)
773 15821 : ncol = SIZE(new_values, 1)
774 : ELSE
775 61762 : nrow = SIZE(new_values, 1)
776 61762 : ncol = SIZE(new_values, 2)
777 : END IF
778 77583 : IF (PRESENT(n_rows)) nrow = n_rows
779 77583 : IF (PRESENT(n_cols)) ncol = n_cols
780 :
781 77583 : full_block => fm%local_data
782 :
783 : CALL cp_fm_get_info(matrix=fm, &
784 : nrow_global=nrow_global, ncol_global=ncol_global, &
785 : nrow_block=nrow_block, ncol_block=ncol_block, &
786 : nrow_local=nrow_local, ncol_local=ncol_local, &
787 77583 : row_indices=row_indices, col_indices=col_indices)
788 :
789 77583 : IF (al == 1.0 .AND. be == 0.0) THEN
790 1322906 : DO j = 1, ncol_local
791 1254669 : this_col = col_indices(j) - j0 + 1
792 1322906 : IF (this_col >= 1 .AND. this_col <= ncol) THEN
793 322871 : IF (tr_a) THEN
794 6475 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
795 158370 : DO i = 1, nrow_local
796 158370 : full_block(i, j) = new_values(this_col, row_indices(i))
797 : END DO
798 : ELSE
799 594 : DO i = 1, nrow_local
800 510 : this_row = row_indices(i) - i0 + 1
801 594 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
802 255 : full_block(i, j) = new_values(this_col, this_row)
803 : END IF
804 : END DO
805 : END IF
806 : ELSE
807 316396 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
808 7552865 : DO i = 1, nrow_local
809 7552865 : full_block(i, j) = new_values(row_indices(i), this_col)
810 : END DO
811 : ELSE
812 538491 : DO i = 1, nrow_local
813 528241 : this_row = row_indices(i) - i0 + 1
814 538491 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
815 58493 : full_block(i, j) = new_values(this_row, this_col)
816 : END IF
817 : END DO
818 : END IF
819 : END IF
820 : END IF
821 : END DO
822 : ELSE
823 836336 : DO j = 1, ncol_local
824 826990 : this_col = col_indices(j) - j0 + 1
825 836336 : IF (this_col >= 1 .AND. this_col <= ncol) THEN
826 826990 : IF (tr_a) THEN
827 88278291 : DO i = 1, nrow_local
828 87451301 : this_row = row_indices(i) - i0 + 1
829 88278291 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
830 : full_block(i, j) = al*new_values(this_col, this_row) + &
831 413495 : be*full_block(i, j)
832 : END IF
833 : END DO
834 : ELSE
835 0 : DO i = 1, nrow_local
836 0 : this_row = row_indices(i) - i0 + 1
837 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
838 : full_block(i, j) = al*new_values(this_row, this_col) + &
839 0 : be*full_block(i, j)
840 : END IF
841 : END DO
842 : END IF
843 : END IF
844 : END DO
845 : END IF
846 :
847 77583 : END SUBROUTINE cp_fm_set_submatrix
848 :
849 : ! **************************************************************************************************
850 : !> \brief sets a submatrix of a full matrix to a given value
851 : !> fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
852 : !> \param fm the full to change
853 : !> \param new_value ...
854 : !> \param start_row the starting row of matrix
855 : !> \param start_col the starting col of matrix
856 : !> \param n_rows the number of rows to change
857 : !> \param n_cols the number of columns to change
858 : !> \par History
859 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
860 : !> 12.2025 created from cp_fm_set_submatrix
861 : !> \author JGH
862 : ! **************************************************************************************************
863 1064067 : SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
864 : TYPE(cp_fm_type), INTENT(IN) :: fm
865 : REAL(KIND=dp), INTENT(in) :: new_value
866 : INTEGER, INTENT(in) :: start_row, start_col, n_rows, n_cols
867 :
868 : INTEGER :: i, i0, j, j0, ncol_global, ncol_local, &
869 : nrow_global, nrow_local, this_col, &
870 : this_row
871 1064067 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
872 1064067 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
873 :
874 1064067 : full_block => fm%local_data
875 :
876 : CALL cp_fm_get_info(matrix=fm, &
877 : nrow_global=nrow_global, ncol_global=ncol_global, &
878 : nrow_local=nrow_local, ncol_local=ncol_local, &
879 1064067 : row_indices=row_indices, col_indices=col_indices)
880 :
881 1064067 : i0 = start_row
882 1064067 : j0 = start_col
883 20354767 : DO j = 1, ncol_local
884 19290700 : this_col = col_indices(j) - j0 + 1
885 20354767 : IF (this_col >= 1 .AND. this_col <= n_cols) THEN
886 640146006 : DO i = 1, nrow_local
887 620915268 : this_row = row_indices(i) - i0 + 1
888 640146006 : IF (this_row >= 1 .AND. this_row <= n_rows) THEN
889 613751456 : full_block(i, j) = new_value
890 : END IF
891 : END DO
892 : END IF
893 : END DO
894 :
895 1064067 : END SUBROUTINE cp_fm_set_all_submatrix
896 :
897 : ! **************************************************************************************************
898 : !> \brief gets a submatrix of a full matrix
899 : !> op(target_m)(1:n_rows,1:n_cols)
900 : !> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
901 : !> target_m is replicated on all cpus
902 : !> using this call is expensive
903 : !> \param fm the full you want to get the info from
904 : !> \param target_m a replicated full matrix that will contain the result
905 : !> \param start_row the starting row of b_matrix (defaults to 1)
906 : !> \param start_col the starting col of b_matrix (defaults to 1)
907 : !> \param n_rows the number of row to change in b (defaults to
908 : !> size(op(new_values),1))
909 : !> \param n_cols the number of columns to change in b (defaults to
910 : !> size(op(new_values),2))
911 : !> \param transpose if target_m should be transposed: if true
912 : !> op(target_m)=target_m^T, else op(target_m)=target_m
913 : !> (defaults to false)
914 : !> \par History
915 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
916 : !> \author Fawzi Mohamed
917 : !> \note
918 : !> optimized for full column updates. Zeros out a little too much
919 : !> of target_m
920 : !> the target_m is replicated and valid on all cpus
921 : ! **************************************************************************************************
922 146652 : SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
923 : start_col, n_rows, n_cols, transpose)
924 : TYPE(cp_fm_type), INTENT(IN) :: fm
925 : REAL(KIND=dp), DIMENSION(:, :), INTENT(out) :: target_m
926 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
927 : LOGICAL, INTENT(in), OPTIONAL :: transpose
928 :
929 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
930 :
931 : INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
932 : ncol_local, nrow, nrow_global, &
933 : nrow_local, this_col, this_row
934 146652 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
935 : LOGICAL :: tr_a
936 146652 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
937 : TYPE(mp_para_env_type), POINTER :: para_env
938 :
939 146652 : CALL timeset(routineN, handle)
940 :
941 146652 : i0 = 1; j0 = 1; tr_a = .FALSE.
942 :
943 146652 : IF (PRESENT(start_row)) i0 = start_row
944 146652 : IF (PRESENT(start_col)) j0 = start_col
945 146652 : IF (PRESENT(transpose)) tr_a = transpose
946 6152 : IF (tr_a) THEN
947 2206 : nrow = SIZE(target_m, 2)
948 2206 : ncol = SIZE(target_m, 1)
949 : ELSE
950 144446 : nrow = SIZE(target_m, 1)
951 144446 : ncol = SIZE(target_m, 2)
952 : END IF
953 146652 : IF (PRESENT(n_rows)) nrow = n_rows
954 146652 : IF (PRESENT(n_cols)) ncol = n_cols
955 :
956 146652 : para_env => fm%matrix_struct%para_env
957 :
958 146652 : full_block => fm%local_data
959 : #if defined(__parallel)
960 : ! zero-out whole target_m
961 146652 : IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
962 164842 : CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
963 : END IF
964 : #endif
965 :
966 : CALL cp_fm_get_info(matrix=fm, &
967 : nrow_global=nrow_global, ncol_global=ncol_global, &
968 : nrow_local=nrow_local, ncol_local=ncol_local, &
969 146652 : row_indices=row_indices, col_indices=col_indices)
970 :
971 843124 : DO j = 1, ncol_local
972 696472 : this_col = col_indices(j) - j0 + 1
973 843124 : IF (this_col >= 1 .AND. this_col <= ncol) THEN
974 560004 : IF (tr_a) THEN
975 2206 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
976 80516 : DO i = 1, nrow_local
977 80516 : target_m(this_col, row_indices(i)) = full_block(i, j)
978 : END DO
979 : ELSE
980 0 : DO i = 1, nrow_local
981 0 : this_row = row_indices(i) - i0 + 1
982 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
983 0 : target_m(this_col, this_row) = full_block(i, j)
984 : END IF
985 : END DO
986 : END IF
987 : ELSE
988 557798 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
989 11846989 : DO i = 1, nrow_local
990 11846989 : target_m(row_indices(i), this_col) = full_block(i, j)
991 : END DO
992 : ELSE
993 1735300 : DO i = 1, nrow_local
994 1694848 : this_row = row_indices(i) - i0 + 1
995 1735300 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
996 85570 : target_m(this_row, this_col) = full_block(i, j)
997 : END IF
998 : END DO
999 : END IF
1000 : END IF
1001 : END IF
1002 : END DO
1003 :
1004 38939236 : CALL para_env%sum(target_m)
1005 :
1006 146652 : CALL timestop(handle)
1007 :
1008 146652 : END SUBROUTINE cp_fm_get_submatrix
1009 :
1010 : ! **************************************************************************************************
1011 : !> \brief returns all kind of information about the full matrix
1012 : !> \param matrix ...
1013 : !> \param name ...
1014 : !> \param nrow_global ...
1015 : !> \param ncol_global ...
1016 : !> \param nrow_block ...
1017 : !> \param ncol_block ...
1018 : !> \param nrow_local ...
1019 : !> \param ncol_local ...
1020 : !> \param row_indices ...
1021 : !> \param col_indices ...
1022 : !> \param local_data ...
1023 : !> \param context ...
1024 : !> \param nrow_locals ...
1025 : !> \param ncol_locals ...
1026 : !> \param matrix_struct ...
1027 : !> \param para_env ...
1028 : !> \note
1029 : !> see also cp_fm_struct for explanation
1030 : !> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1031 : !> access to the local blacs block
1032 : ! **************************************************************************************************
1033 6159393 : SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1034 : nrow_block, ncol_block, nrow_local, ncol_local, &
1035 : row_indices, col_indices, local_data, context, &
1036 : nrow_locals, ncol_locals, matrix_struct, para_env)
1037 :
1038 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1039 : CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1040 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1041 : ncol_block, nrow_local, ncol_local
1042 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1043 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1044 : OPTIONAL, POINTER :: local_data
1045 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1046 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1047 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1048 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1049 :
1050 8 : IF (PRESENT(name)) name = matrix%name
1051 6159393 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1052 6159393 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1053 :
1054 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1055 : ncol_local=ncol_local, nrow_global=nrow_global, &
1056 : ncol_global=ncol_global, nrow_block=nrow_block, &
1057 : ncol_block=ncol_block, row_indices=row_indices, &
1058 : col_indices=col_indices, nrow_locals=nrow_locals, &
1059 6159393 : ncol_locals=ncol_locals, context=context, para_env=para_env)
1060 :
1061 6159393 : END SUBROUTINE cp_fm_get_info
1062 :
1063 : ! **************************************************************************************************
1064 : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1065 : !> \param matrix a cp_fm_type instance
1066 : !> \param io_unit the I/O unit to use for writing
1067 : ! **************************************************************************************************
1068 3 : SUBROUTINE cp_fm_write_info(matrix, io_unit)
1069 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1070 : INTEGER, INTENT(IN) :: io_unit
1071 :
1072 3 : WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1073 3 : CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1074 3 : END SUBROUTINE cp_fm_write_info
1075 :
1076 : ! **************************************************************************************************
1077 : !> \brief find the maximum absolute value of the matrix element
1078 : !> maxval(abs(matrix))
1079 : !> \param matrix ...
1080 : !> \param a_max ...
1081 : !> \param ir_max ...
1082 : !> \param ic_max ...
1083 : ! **************************************************************************************************
1084 123695 : SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1085 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1086 : REAL(KIND=dp), INTENT(OUT) :: a_max
1087 : INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1088 :
1089 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsval'
1090 :
1091 : INTEGER :: handle, i, ic_max_local, ir_max_local, &
1092 : j, mepos, ncol_local, nrow_local, &
1093 : num_pe
1094 123695 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1095 123695 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1096 : REAL(dp) :: my_max
1097 123695 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1098 123695 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1099 :
1100 123695 : CALL timeset(routineN, handle)
1101 :
1102 123695 : my_block => matrix%local_data
1103 :
1104 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1105 123695 : row_indices=row_indices, col_indices=col_indices)
1106 :
1107 75967129 : a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
1108 :
1109 123695 : IF (PRESENT(ir_max)) THEN
1110 0 : num_pe = matrix%matrix_struct%para_env%num_pe
1111 0 : mepos = matrix%matrix_struct%para_env%mepos
1112 0 : ALLOCATE (ir_max_vec(0:num_pe - 1))
1113 0 : ir_max_vec(0:num_pe - 1) = 0
1114 0 : ALLOCATE (ic_max_vec(0:num_pe - 1))
1115 0 : ic_max_vec(0:num_pe - 1) = 0
1116 0 : ALLOCATE (a_max_vec(0:num_pe - 1))
1117 0 : a_max_vec(0:num_pe - 1) = 0.0_dp
1118 0 : my_max = 0.0_dp
1119 :
1120 0 : IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1121 0 : DO i = 1, ncol_local
1122 0 : DO j = 1, nrow_local
1123 0 : IF (ABS(my_block(j, i)) > my_max) THEN
1124 0 : my_max = my_block(j, i)
1125 0 : ir_max_local = j
1126 0 : ic_max_local = i
1127 : END IF
1128 : END DO
1129 : END DO
1130 :
1131 0 : a_max_vec(mepos) = my_max
1132 0 : ir_max_vec(mepos) = row_indices(ir_max_local)
1133 0 : ic_max_vec(mepos) = col_indices(ic_max_local)
1134 :
1135 : END IF
1136 :
1137 0 : CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1138 0 : CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1139 0 : CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1140 :
1141 0 : my_max = 0.0_dp
1142 0 : DO i = 0, num_pe - 1
1143 0 : IF (a_max_vec(i) > my_max) THEN
1144 0 : ir_max = ir_max_vec(i)
1145 0 : ic_max = ic_max_vec(i)
1146 : END IF
1147 : END DO
1148 :
1149 0 : DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1150 0 : CPASSERT(ic_max > 0)
1151 0 : CPASSERT(ir_max > 0)
1152 :
1153 : END IF
1154 :
1155 123695 : CALL matrix%matrix_struct%para_env%max(a_max)
1156 :
1157 123695 : CALL timestop(handle)
1158 :
1159 247390 : END SUBROUTINE cp_fm_maxabsval
1160 :
1161 : ! **************************************************************************************************
1162 : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1163 : !> = || A ||_infinity
1164 : !> \param matrix ...
1165 : !> \param a_max ...
1166 : !> \note
1167 : !> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1168 : !> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1169 : !> http://mathworld.wolfram.com/MatrixNorm.html
1170 : !> (but the bound is not so tight in the general case)
1171 : ! **************************************************************************************************
1172 4986 : SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1173 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1174 : REAL(KIND=dp), INTENT(OUT) :: a_max
1175 :
1176 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
1177 :
1178 : INTEGER :: handle, i, j, ncol_local, nrow_global, &
1179 : nrow_local
1180 4986 : INTEGER, DIMENSION(:), POINTER :: row_indices
1181 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: values
1182 4986 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1183 :
1184 4986 : CALL timeset(routineN, handle)
1185 :
1186 4986 : my_block => matrix%local_data
1187 :
1188 : CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1189 4986 : nrow_local=nrow_local, ncol_local=ncol_local)
1190 :
1191 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1192 14958 : ALLOCATE (values(nrow_global))
1193 4986 : values = 0.0_dp
1194 65834 : DO j = 1, ncol_local
1195 535250 : DO i = 1, nrow_local
1196 530264 : values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
1197 : END DO
1198 : END DO
1199 4986 : CALL matrix%matrix_struct%para_env%sum(values)
1200 65834 : a_max = MAXVAL(values)
1201 4986 : DEALLOCATE (values)
1202 :
1203 4986 : CALL timestop(handle)
1204 4986 : END SUBROUTINE cp_fm_maxabsrownorm
1205 :
1206 : ! **************************************************************************************************
1207 : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1208 : !> \param matrix ...
1209 : !> \param norm_array ...
1210 : ! **************************************************************************************************
1211 1316 : SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1212 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1213 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1214 :
1215 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_vectorsnorm'
1216 :
1217 : INTEGER :: handle, i, j, ncol_global, ncol_local, &
1218 : nrow_local
1219 1316 : INTEGER, DIMENSION(:), POINTER :: col_indices
1220 1316 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1221 :
1222 1316 : CALL timeset(routineN, handle)
1223 :
1224 1316 : my_block => matrix%local_data
1225 :
1226 : CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1227 1316 : nrow_local=nrow_local, ncol_local=ncol_local)
1228 :
1229 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1230 31004 : norm_array = 0.0_dp
1231 31004 : DO j = 1, ncol_local
1232 1325922 : DO i = 1, nrow_local
1233 1324606 : norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1234 : END DO
1235 : END DO
1236 60692 : CALL matrix%matrix_struct%para_env%sum(norm_array)
1237 31004 : norm_array = SQRT(norm_array)
1238 :
1239 1316 : CALL timestop(handle)
1240 1316 : END SUBROUTINE cp_fm_vectorsnorm
1241 :
1242 : ! **************************************************************************************************
1243 : !> \brief summing up all the elements along the matrix's i-th index
1244 : !> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1245 : !> or
1246 : !> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1247 : !> \param matrix an input matrix A
1248 : !> \param sum_array sums of elements in each column/row
1249 : !> \param dir ...
1250 : !> \note forked from cp_fm_vectorsnorm() to be used with
1251 : !> the maximum overlap method
1252 : !> added row variation
1253 : ! **************************************************************************************************
1254 7582 : SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1255 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1256 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1257 : CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1258 :
1259 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_vectorssum'
1260 :
1261 : INTEGER :: handle, i, j, ncol_local, nrow_local
1262 7582 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1263 : LOGICAL :: docol
1264 7582 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1265 :
1266 7582 : CALL timeset(routineN, handle)
1267 :
1268 7582 : IF (PRESENT(dir)) THEN
1269 7542 : IF (dir == 'c' .OR. dir == 'C') THEN
1270 : docol = .TRUE.
1271 : ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1272 : docol = .FALSE.
1273 : ELSE
1274 0 : CPABORT('Wrong argument DIR')
1275 : END IF
1276 : ELSE
1277 : docol = .TRUE.
1278 : END IF
1279 :
1280 7582 : my_block => matrix%local_data
1281 :
1282 : CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1283 7582 : nrow_local=nrow_local, ncol_local=ncol_local)
1284 :
1285 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1286 248628 : sum_array(:) = 0.0_dp
1287 7582 : IF (docol) THEN
1288 448 : DO j = 1, ncol_local
1289 3628 : DO i = 1, nrow_local
1290 3588 : sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1291 : END DO
1292 : END DO
1293 : ELSE
1294 83014 : DO j = 1, ncol_local
1295 6445414 : DO i = 1, nrow_local
1296 6437872 : sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1297 : END DO
1298 : END DO
1299 : END IF
1300 489674 : CALL matrix%matrix_struct%para_env%sum(sum_array)
1301 :
1302 7582 : CALL timestop(handle)
1303 7582 : END SUBROUTINE cp_fm_vectorssum
1304 :
1305 : ! **************************************************************************************************
1306 : !> \brief copy one identically sized matrix in the other
1307 : !> \param source ...
1308 : !> \param destination ...
1309 : !> \note
1310 : !> see also cp_fm_to_fm_columns
1311 : ! **************************************************************************************************
1312 549847 : SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1313 :
1314 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1315 :
1316 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
1317 :
1318 : INTEGER :: handle, npcol, nprow
1319 :
1320 549847 : CALL timeset(routineN, handle)
1321 :
1322 549847 : nprow = source%matrix_struct%context%num_pe(1)
1323 549847 : npcol = source%matrix_struct%context%num_pe(2)
1324 :
1325 549847 : IF ((.NOT. cp2k_is_parallel) .OR. &
1326 : cp_fm_struct_equivalent(source%matrix_struct, &
1327 : destination%matrix_struct)) THEN
1328 549847 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1329 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1330 : CALL cp_abort(__LOCATION__, &
1331 : "Cannot copy full matrix <"//TRIM(source%name)// &
1332 : "> to full matrix <"//TRIM(destination%name)// &
1333 0 : ">. The local_data blocks have different sizes.")
1334 : CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1335 549847 : source%local_data, 1, destination%local_data, 1)
1336 : ELSE
1337 0 : CPABORT("Data structures of source and target full matrix are not equivalent")
1338 : END IF
1339 :
1340 549847 : CALL timestop(handle)
1341 :
1342 549847 : END SUBROUTINE cp_fm_to_fm_matrix
1343 :
1344 : ! **************************************************************************************************
1345 : !> \brief copy just a subset of columns of a fm to a fm
1346 : !> \param msource ...
1347 : !> \param mtarget ...
1348 : !> \param ncol ...
1349 : !> \param source_start ...
1350 : !> \param target_start ...
1351 : ! **************************************************************************************************
1352 172628 : SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1353 : target_start)
1354 :
1355 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1356 : INTEGER, INTENT(IN) :: ncol
1357 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1358 :
1359 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1360 :
1361 : INTEGER :: handle, n, ss, ts
1362 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1363 : #if defined(__parallel)
1364 : INTEGER :: i
1365 : INTEGER, DIMENSION(9) :: desca, descb
1366 : #endif
1367 :
1368 172628 : CALL timeset(routineN, handle)
1369 :
1370 172628 : ss = 1
1371 172628 : ts = 1
1372 :
1373 172628 : IF (PRESENT(source_start)) ss = source_start
1374 172628 : IF (PRESENT(target_start)) ts = target_start
1375 :
1376 172628 : n = msource%matrix_struct%nrow_global
1377 :
1378 172628 : a => msource%local_data
1379 172628 : b => mtarget%local_data
1380 :
1381 : #if defined(__parallel)
1382 1726280 : desca(:) = msource%matrix_struct%descriptor(:)
1383 1726280 : descb(:) = mtarget%matrix_struct%descriptor(:)
1384 742692 : DO i = 0, ncol - 1
1385 742692 : CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1386 : END DO
1387 : #else
1388 : IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
1389 : CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1390 : END IF
1391 : #endif
1392 :
1393 172628 : CALL timestop(handle)
1394 :
1395 172628 : END SUBROUTINE cp_fm_to_fm_columns
1396 :
1397 : ! **************************************************************************************************
1398 : !> \brief copy just a triangular matrix
1399 : !> \param msource ...
1400 : !> \param mtarget ...
1401 : !> \param uplo ...
1402 : ! **************************************************************************************************
1403 58 : SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1404 :
1405 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1406 : CHARACTER(LEN=1), OPTIONAL, INTENT(IN) :: uplo
1407 :
1408 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
1409 :
1410 : CHARACTER(LEN=1) :: myuplo
1411 : INTEGER :: handle, ncol, nrow
1412 58 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1413 : #if defined(__parallel)
1414 : INTEGER, DIMENSION(9) :: desca, descb
1415 : #endif
1416 :
1417 58 : CALL timeset(routineN, handle)
1418 :
1419 58 : myuplo = 'U'
1420 58 : IF (PRESENT(uplo)) myuplo = uplo
1421 :
1422 58 : nrow = msource%matrix_struct%nrow_global
1423 58 : ncol = msource%matrix_struct%ncol_global
1424 :
1425 58 : a => msource%local_data
1426 58 : b => mtarget%local_data
1427 :
1428 : #if defined(__parallel)
1429 580 : desca(:) = msource%matrix_struct%descriptor(:)
1430 580 : descb(:) = mtarget%matrix_struct%descriptor(:)
1431 58 : CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1432 : #else
1433 : CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1434 : #endif
1435 :
1436 58 : CALL timestop(handle)
1437 :
1438 58 : END SUBROUTINE cp_fm_to_fm_triangular
1439 :
1440 : ! **************************************************************************************************
1441 : !> \brief copy just a part ot the matrix
1442 : !> \param msource ...
1443 : !> \param mtarget ...
1444 : !> \param nrow ...
1445 : !> \param ncol ...
1446 : !> \param s_firstrow ...
1447 : !> \param s_firstcol ...
1448 : !> \param t_firstrow ...
1449 : !> \param t_firstcol ...
1450 : ! **************************************************************************************************
1451 :
1452 13764 : SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1453 :
1454 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1455 : INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1456 : s_firstcol, t_firstrow, &
1457 : t_firstcol
1458 :
1459 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
1460 :
1461 : INTEGER :: handle, i, na, nb, ss, ts
1462 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1463 : #if defined(__parallel)
1464 : INTEGER, DIMENSION(9) :: desca, descb
1465 : #endif
1466 :
1467 13764 : CALL timeset(routineN, handle)
1468 :
1469 13764 : a => msource%local_data
1470 13764 : b => mtarget%local_data
1471 :
1472 13764 : na = msource%matrix_struct%nrow_global
1473 13764 : nb = mtarget%matrix_struct%nrow_global
1474 : ! nrow must be <= na and nb
1475 13764 : IF (nrow > na) &
1476 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1477 13764 : IF (nrow > nb) &
1478 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1479 13764 : na = msource%matrix_struct%ncol_global
1480 13764 : nb = mtarget%matrix_struct%ncol_global
1481 : ! ncol must be <= na_col and nb_col
1482 13764 : IF (ncol > na) &
1483 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1484 13764 : IF (ncol > nb) &
1485 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1486 :
1487 : #if defined(__parallel)
1488 137640 : desca(:) = msource%matrix_struct%descriptor(:)
1489 137640 : descb(:) = mtarget%matrix_struct%descriptor(:)
1490 145704 : DO i = 0, ncol - 1
1491 131940 : ss = s_firstcol + i
1492 131940 : ts = t_firstcol + i
1493 145704 : CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1494 : END DO
1495 : #else
1496 : DO i = 0, ncol - 1
1497 : ss = s_firstcol + i
1498 : ts = t_firstcol + i
1499 : CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1500 : END DO
1501 : #endif
1502 :
1503 13764 : CALL timestop(handle)
1504 13764 : END SUBROUTINE cp_fm_to_fm_submat
1505 :
1506 : ! **************************************************************************************************
1507 : !> \brief General copy of a fm matrix to another fm matrix.
1508 : !> Uses non-blocking MPI rather than ScaLAPACK.
1509 : !>
1510 : !> \param source input fm matrix
1511 : !> \param destination output fm matrix
1512 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1513 : !> of the input and output matrices
1514 : !> \par History
1515 : !> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1516 : ! **************************************************************************************************
1517 18980 : SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1518 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1519 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1520 :
1521 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
1522 :
1523 : INTEGER :: handle
1524 170820 : TYPE(copy_info_type) :: info
1525 :
1526 18980 : CALL timeset(routineN, handle)
1527 :
1528 18980 : CALL cp_fm_start_copy_general(source, destination, para_env, info)
1529 18980 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1530 18966 : CALL cp_fm_finish_copy_general(destination, info)
1531 : END IF
1532 18980 : IF (ASSOCIATED(source%matrix_struct)) THEN
1533 18775 : CALL cp_fm_cleanup_copy_general(info)
1534 : END IF
1535 :
1536 18980 : CALL timestop(handle)
1537 18980 : END SUBROUTINE cp_fm_copy_general
1538 :
1539 : ! **************************************************************************************************
1540 : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1541 : !> \param source input fm matrix
1542 : !> \param destination output fm matrix
1543 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1544 : !> of the input and output matrices
1545 : !> \param info all of the data that will be needed to complete the copy operation
1546 : ! **************************************************************************************************
1547 6635320 : SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1548 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1549 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1550 : TYPE(copy_info_type), INTENT(OUT) :: info
1551 :
1552 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
1553 :
1554 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1555 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1556 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1557 : recv_rank, recv_size, send_rank, send_size
1558 663532 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1559 1327064 : recv_count, send_count, send_disp, &
1560 663532 : source2global, src_p, src_q
1561 663532 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1562 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1563 : src_block, src_block_tmp, src_num_pe
1564 1327064 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1565 1327064 : send_col_indices, send_row_indices
1566 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1567 9289448 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1568 :
1569 663532 : CALL timeset(routineN, handle)
1570 :
1571 : IF (.NOT. cp2k_is_parallel) THEN
1572 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1573 : nrow_local_send = SIZE(source%local_data, 1)
1574 : ncol_local_send = SIZE(source%local_data, 2)
1575 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1576 : k = 0
1577 : DO j = 1, ncol_local_send
1578 : DO i = 1, nrow_local_send
1579 : k = k + 1
1580 : info%send_buf(k) = source%local_data(i, j)
1581 : END DO
1582 : END DO
1583 : ELSE
1584 663532 : NULLIFY (recv_dist, send_dist)
1585 663532 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1586 :
1587 : ! The 'global' communicator contains both the source and destination decompositions
1588 663532 : global_size = para_env%num_pe
1589 663532 : global_rank = para_env%mepos
1590 :
1591 : ! The source/send decomposition and destination/recv decompositions may only exist on
1592 : ! on a subset of the processes involved in the communication
1593 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1594 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1595 663532 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1596 495266 : recv_dist => destination%matrix_struct
1597 495266 : recv_rank = recv_dist%para_env%mepos
1598 : ELSE
1599 168266 : recv_rank = mp_proc_null
1600 : END IF
1601 :
1602 663532 : IF (ASSOCIATED(source%matrix_struct)) THEN
1603 578863 : send_dist => source%matrix_struct
1604 578863 : send_rank = send_dist%para_env%mepos
1605 : ELSE
1606 84669 : send_rank = mp_proc_null
1607 : END IF
1608 :
1609 : ! Map the rank in the source/dest communicator to the global rank
1610 1990596 : ALLOCATE (all_ranks(0:global_size - 1))
1611 :
1612 663532 : CALL para_env%allgather(send_rank, all_ranks)
1613 663532 : IF (ASSOCIATED(recv_dist)) THEN
1614 2476330 : ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
1615 1485798 : DO i = 0, global_size - 1
1616 1485798 : IF (all_ranks(i) /= mp_proc_null) THEN
1617 821194 : source2global(all_ranks(i)) = i
1618 : END IF
1619 : END DO
1620 : END IF
1621 :
1622 663532 : CALL para_env%allgather(recv_rank, all_ranks)
1623 663532 : IF (ASSOCIATED(send_dist)) THEN
1624 2894315 : ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
1625 1736589 : DO i = 0, global_size - 1
1626 1736589 : IF (all_ranks(i) /= mp_proc_null) THEN
1627 821194 : dest2global(all_ranks(i)) = i
1628 : END IF
1629 : END DO
1630 : END IF
1631 663532 : DEALLOCATE (all_ranks)
1632 :
1633 : ! Some data from the two decompositions will be needed by all processes in the global group :
1634 : ! process grid shape, block size, and the BLACS-to-MPI mapping
1635 :
1636 : ! The global root process will receive the data (from the root process in each decomposition)
1637 4644724 : send_req(:) = mp_request_null
1638 663532 : IF (global_rank == 0) THEN
1639 2322362 : recv_req(:) = mp_request_null
1640 331766 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1641 331766 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1642 331766 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1643 331766 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1644 : END IF
1645 :
1646 663532 : IF (ASSOCIATED(send_dist)) THEN
1647 578863 : IF ((send_rank == 0)) THEN
1648 : ! need to use separate buffers here in case this is actually global rank 0
1649 995298 : src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
1650 331766 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1651 331766 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1652 : END IF
1653 : END IF
1654 :
1655 663532 : IF (ASSOCIATED(recv_dist)) THEN
1656 495266 : IF ((recv_rank == 0)) THEN
1657 995298 : dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
1658 331766 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1659 331766 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1660 : END IF
1661 : END IF
1662 :
1663 663532 : IF (global_rank == 0) THEN
1664 331766 : CALL mp_waitall(recv_req(1:4))
1665 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1666 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1667 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1668 2322362 : )
1669 331766 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1670 331766 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1671 : END IF
1672 :
1673 663532 : IF (ASSOCIATED(send_dist)) THEN
1674 578863 : IF ((send_rank == 0)) THEN
1675 331766 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1676 : END IF
1677 : END IF
1678 :
1679 663532 : IF (ASSOCIATED(recv_dist)) THEN
1680 495266 : IF ((recv_rank == 0)) THEN
1681 331766 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1682 : END IF
1683 : END IF
1684 :
1685 663532 : IF (global_rank == 0) THEN
1686 331766 : CALL mp_waitall(recv_req(5:6))
1687 : END IF
1688 :
1689 : ! Finally, broadcast the data to all processes in the global communicator
1690 663532 : CALL para_env%bcast(src_block, 0)
1691 663532 : CALL para_env%bcast(dest_block, 0)
1692 663532 : CALL para_env%bcast(src_num_pe, 0)
1693 663532 : CALL para_env%bcast(dest_num_pe, 0)
1694 1990596 : info%src_num_pe(1:2) = src_num_pe(1:2)
1695 1990596 : info%nblock_src(1:2) = src_block(1:2)
1696 663532 : IF (global_rank /= 0) THEN
1697 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1698 0 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1699 2322362 : )
1700 : END IF
1701 663532 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1702 663532 : CALL para_env%bcast(dest_blacs2mpi, 0)
1703 :
1704 663532 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1705 663532 : send_size = src_num_pe(1)*src_num_pe(2)
1706 663532 : info%send_size = send_size
1707 663532 : CALL mp_waitall(send_req(:))
1708 :
1709 : ! Setup is now complete, we can start the actual communication here.
1710 : ! The order implemented here is:
1711 : ! DEST_1
1712 : ! compute recv sizes
1713 : ! call irecv
1714 : ! SRC_1
1715 : ! compute send sizes
1716 : ! pack send buffers
1717 : ! call isend
1718 : ! DEST_2
1719 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1720 : ! SRC_2
1721 : ! wait for the sends
1722 :
1723 : ! DEST_1
1724 663532 : IF (ASSOCIATED(recv_dist)) THEN
1725 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1726 : col_indices=recv_col_indices &
1727 495266 : )
1728 495266 : info%recv_col_indices => recv_col_indices
1729 495266 : info%recv_row_indices => recv_row_indices
1730 495266 : nrow_block_src = src_block(1)
1731 495266 : ncol_block_src = src_block(2)
1732 3297524 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1733 :
1734 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1735 495266 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1736 495266 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1737 495266 : info%nlocal_recv(1) = nrow_local_recv
1738 495266 : info%nlocal_recv(2) = ncol_local_recv
1739 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1740 2476330 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1741 10030703 : DO i = 1, nrow_local_recv
1742 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1743 : ! then work out which row block it comes from, and which process row that row block comes from.
1744 10030703 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1745 : END DO
1746 14479942 : DO j = 1, ncol_local_recv
1747 : ! Similarly for the columns
1748 14479942 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1749 : END DO
1750 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1751 :
1752 990532 : DO q = 0, src_num_pe(2) - 1
1753 14479942 : ncols = COUNT(src_q == q)
1754 1811726 : DO p = 0, src_num_pe(1) - 1
1755 17538594 : nrows = COUNT(src_p == p)
1756 : ! Use the send_dist here as we are looking up the processes where the data comes from
1757 1316460 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1758 : END DO
1759 : END DO
1760 495266 : DEALLOCATE (src_p, src_q)
1761 :
1762 : ! Use one long buffer (and displacements into that buffer)
1763 : ! this prevents the need for a rectangular array where not all elements will be populated
1764 2306992 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1765 495266 : info%recv_disp(0) = 0
1766 821194 : DO i = 1, send_size - 1
1767 821194 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1768 : END DO
1769 :
1770 : ! Issue receive calls on ranks which expect data
1771 1316460 : DO k = 0, send_size - 1
1772 1316460 : IF (recv_count(k) > 0) THEN
1773 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1774 664094 : source2global(k), info%recv_request(k))
1775 : END IF
1776 : END DO
1777 495266 : DEALLOCATE (source2global)
1778 : END IF ! ASSOCIATED(recv_dist)
1779 :
1780 : ! SRC_1
1781 663532 : IF (ASSOCIATED(send_dist)) THEN
1782 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1783 : col_indices=send_col_indices &
1784 578863 : )
1785 578863 : nrow_block_dest = dest_block(1)
1786 578863 : ncol_block_dest = dest_block(2)
1787 3715509 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1788 :
1789 : ! Determine the send counts, allocate the send buffers
1790 578863 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1791 578863 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1792 :
1793 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1794 : ! i.e. number of rows,cols in the sending distribution
1795 2894315 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1796 :
1797 10114300 : DO i = 1, nrow_local_send
1798 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1799 10114300 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1800 : END DO
1801 16962651 : DO j = 1, ncol_local_send
1802 16962651 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1803 : END DO
1804 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1805 :
1806 1157726 : DO q = 0, dest_num_pe(2) - 1
1807 16962651 : ncols = COUNT(dest_q == q)
1808 1978920 : DO p = 0, dest_num_pe(1) - 1
1809 15139984 : nrows = COUNT(dest_p == p)
1810 1400057 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1811 : END DO
1812 : END DO
1813 578863 : DEALLOCATE (dest_p, dest_q)
1814 :
1815 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1816 2557783 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1817 578863 : send_disp(0) = 0
1818 821194 : DO k = 1, recv_size - 1
1819 821194 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1820 : END DO
1821 :
1822 : ! Loop over the smat, pack the send buffers
1823 578863 : send_count(:) = 0
1824 16962651 : DO j = 1, ncol_local_send
1825 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1826 16383788 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1827 540934331 : DO i = 1, nrow_local_send
1828 523971680 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1829 523971680 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1830 523971680 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1831 540355468 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1832 : END DO
1833 : END DO
1834 :
1835 : ! For each non-zero send_count, call mpi_isend
1836 1400057 : DO k = 0, recv_size - 1
1837 1400057 : IF (send_count(k) > 0) THEN
1838 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1839 664094 : dest2global(k), info%send_request(k))
1840 : END IF
1841 : END DO
1842 578863 : DEALLOCATE (send_count, send_disp, dest2global)
1843 : END IF ! ASSOCIATED(send_dist)
1844 663532 : DEALLOCATE (dest_blacs2mpi)
1845 :
1846 : END IF !IF (.NOT. cp2k_is_parallel)
1847 :
1848 663532 : CALL timestop(handle)
1849 :
1850 2654128 : END SUBROUTINE cp_fm_start_copy_general
1851 :
1852 : ! **************************************************************************************************
1853 : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1854 : !> \param destination output fm matrix
1855 : !> \param info all of the data that will be needed to complete the copy operation
1856 : ! **************************************************************************************************
1857 495266 : SUBROUTINE cp_fm_finish_copy_general(destination, info)
1858 : TYPE(cp_fm_type), INTENT(IN) :: destination
1859 : TYPE(copy_info_type), INTENT(INOUT) :: info
1860 :
1861 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
1862 :
1863 : INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1864 : src_p_i, src_q_j
1865 495266 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1866 : INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1867 495266 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1868 :
1869 495266 : CALL timeset(routineN, handle)
1870 :
1871 : IF (.NOT. cp2k_is_parallel) THEN
1872 : ! Now unpack the data from the 'send buffer'
1873 : k = 0
1874 : DO j = 1, SIZE(destination%local_data, 2)
1875 : DO i = 1, SIZE(destination%local_data, 1)
1876 : k = k + 1
1877 : destination%local_data(i, j) = info%send_buf(k)
1878 : END DO
1879 : END DO
1880 : DEALLOCATE (info%send_buf)
1881 : ELSE
1882 : ! Set up local variables ...
1883 495266 : send_size = info%send_size
1884 1485798 : nlocal_recv(1:2) = info%nlocal_recv(:)
1885 1485798 : nblock_src(1:2) = info%nblock_src(:)
1886 1485798 : src_num_pe(1:2) = info%src_num_pe(:)
1887 495266 : recv_col_indices => info%recv_col_indices
1888 495266 : recv_row_indices => info%recv_row_indices
1889 :
1890 : ! ... use the local variables to do the work
1891 : ! DEST_2
1892 495266 : CALL mp_waitall(info%recv_request(:))
1893 1485798 : ALLOCATE (recv_count(0:send_size - 1))
1894 : ! Loop over the rmat, filling it in with data from the recv buffers
1895 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1896 495266 : recv_count(:) = 0
1897 14479942 : DO j = 1, nlocal_recv(2)
1898 13984676 : src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1899 538451622 : DO i = 1, nlocal_recv(1)
1900 523971680 : src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1901 523971680 : mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1902 523971680 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1903 537956356 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1904 : END DO
1905 : END DO
1906 495266 : DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1907 : ! Invalidate the stored state
1908 : NULLIFY (info%recv_col_indices, &
1909 495266 : info%recv_row_indices)
1910 :
1911 : END IF
1912 :
1913 495266 : CALL timestop(handle)
1914 :
1915 495266 : END SUBROUTINE cp_fm_finish_copy_general
1916 :
1917 : ! **************************************************************************************************
1918 : !> \brief Completes the copy operation: wait for comms clean up MPI state
1919 : !> \param info all of the data that will be needed to complete the copy operation
1920 : ! **************************************************************************************************
1921 578167 : SUBROUTINE cp_fm_cleanup_copy_general(info)
1922 : TYPE(copy_info_type), INTENT(INOUT) :: info
1923 :
1924 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
1925 :
1926 : INTEGER :: handle
1927 :
1928 578167 : CALL timeset(routineN, handle)
1929 :
1930 : IF (.NOT. cp2k_is_parallel) THEN
1931 : ! Don't do anything - no MPI state for the serial case
1932 : ELSE
1933 : ! SRC_2
1934 : ! If this process is also in the destination decomposition, this deallocate
1935 : ! Was already done in cp_fm_finish_copy_general
1936 578167 : IF (ALLOCATED(info%src_blacs2mpi)) THEN
1937 167570 : DEALLOCATE (info%src_blacs2mpi)
1938 : END IF
1939 578167 : CALL mp_waitall(info%send_request)
1940 578167 : DEALLOCATE (info%send_request, info%send_buf)
1941 :
1942 : END IF
1943 :
1944 578167 : CALL timestop(handle)
1945 :
1946 578167 : END SUBROUTINE cp_fm_cleanup_copy_general
1947 :
1948 : ! **************************************************************************************************
1949 : !> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1950 : !> The two matrices can have different contexts.
1951 : !>
1952 : !> Summary of distribution routines for dense matrices
1953 : !> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1954 : !>
1955 : !> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1956 : !>
1957 : !> A process that is not a part of the context of A should set descA(2)
1958 : !> to -1, and similarly for B.
1959 : !>
1960 : !> \param source input fm matrix
1961 : !> \param destination output fm matrix
1962 : !> \param nrows number of rows of sub matrix to be copied
1963 : !> \param ncols number of cols of sub matrix to be copied
1964 : !> \param s_firstrow starting global row index of sub matrix in source
1965 : !> \param s_firstcol starting global col index of sub matrix in source
1966 : !> \param d_firstrow starting global row index of sub matrix in destination
1967 : !> \param d_firstcol starting global col index of sub matrix in destination
1968 : !> \param global_context process grid that covers all parts of either A or B.
1969 : ! **************************************************************************************************
1970 11579 : SUBROUTINE cp_fm_to_fm_submat_general(source, &
1971 : destination, &
1972 : nrows, &
1973 : ncols, &
1974 : s_firstrow, &
1975 : s_firstcol, &
1976 : d_firstrow, &
1977 : d_firstcol, &
1978 : global_context)
1979 :
1980 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1981 : INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
1982 : d_firstrow, d_firstcol
1983 :
1984 : CLASS(cp_blacs_type), INTENT(IN) :: global_context
1985 :
1986 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
1987 :
1988 : LOGICAL :: debug
1989 : INTEGER :: handle
1990 : #if defined(__parallel)
1991 : INTEGER, DIMENSION(9) :: desca, descb
1992 : REAL(KIND=dp), DIMENSION(1, 1), TARGET :: dummy
1993 11579 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: smat, dmat
1994 : #endif
1995 :
1996 11579 : CALL timeset(routineN, handle)
1997 :
1998 11579 : debug = debug_this_module
1999 :
2000 : IF (.NOT. cp2k_is_parallel) THEN
2001 : CALL cp_fm_to_fm_submat(source, &
2002 : destination, &
2003 : nrows, &
2004 : ncols, &
2005 : s_firstrow, &
2006 : s_firstcol, &
2007 : d_firstrow, &
2008 : d_firstcol)
2009 : ELSE
2010 : #ifdef __parallel
2011 : NULLIFY (smat, dmat)
2012 : ! check whether source is available on this process
2013 11579 : IF (ASSOCIATED(source%matrix_struct)) THEN
2014 115790 : desca = source%matrix_struct%descriptor
2015 11579 : IF (nrows > source%matrix_struct%nrow_global) &
2016 0 : CPABORT("nrows is greater than nrow_global of source")
2017 11579 : IF (ncols > source%matrix_struct%ncol_global) &
2018 0 : CPABORT("ncols is greater than ncol_global of source")
2019 11579 : smat => source%local_data
2020 : ELSE
2021 0 : desca = -1
2022 0 : smat => dummy
2023 : END IF
2024 : ! check destination is available on this process
2025 11579 : IF (ASSOCIATED(destination%matrix_struct)) THEN
2026 115790 : descb = destination%matrix_struct%descriptor
2027 11579 : IF (nrows > destination%matrix_struct%nrow_global) &
2028 0 : CPABORT("nrows is greater than nrow_global of destination")
2029 11579 : IF (ncols > destination%matrix_struct%ncol_global) &
2030 0 : CPABORT("ncols is greater than ncol_global of destination")
2031 11579 : dmat => destination%local_data
2032 : ELSE
2033 0 : descb = -1
2034 0 : dmat => dummy
2035 : END IF
2036 : ! do copy
2037 :
2038 : CALL pdgemr2d(nrows, &
2039 : ncols, &
2040 : smat, &
2041 : s_firstrow, &
2042 : s_firstcol, &
2043 : desca, &
2044 : dmat, &
2045 : d_firstrow, &
2046 : d_firstcol, &
2047 : descb, &
2048 11579 : global_context%get_handle())
2049 : #else
2050 : MARK_USED(global_context)
2051 : CPABORT("this subroutine only supports SCALAPACK")
2052 : #endif
2053 : END IF
2054 :
2055 11579 : CALL timestop(handle)
2056 :
2057 11579 : END SUBROUTINE cp_fm_to_fm_submat_general
2058 :
2059 : ! **************************************************************************************************
2060 : !> \brief ...
2061 : !> \param matrix ...
2062 : !> \param irow_global ...
2063 : !> \param icol_global ...
2064 : !> \param alpha ...
2065 : ! **************************************************************************************************
2066 240 : SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2067 :
2068 : ! Add alpha to the matrix element specified by the global indices
2069 : ! irow_global and icol_global
2070 :
2071 : ! - Creation (05.05.06,MK)
2072 :
2073 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2074 : INTEGER, INTENT(IN) :: irow_global, icol_global
2075 : REAL(KIND=dp), INTENT(IN) :: alpha
2076 :
2077 : INTEGER :: mypcol, myprow, npcol, nprow
2078 240 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2079 : TYPE(cp_blacs_env_type), POINTER :: context
2080 : #if defined(__parallel)
2081 : INTEGER :: icol_local, ipcol, iprow, &
2082 : irow_local
2083 : INTEGER, DIMENSION(9) :: desca
2084 : #endif
2085 :
2086 240 : context => matrix%matrix_struct%context
2087 :
2088 240 : myprow = context%mepos(1)
2089 240 : mypcol = context%mepos(2)
2090 :
2091 240 : nprow = context%num_pe(1)
2092 240 : npcol = context%num_pe(2)
2093 :
2094 240 : a => matrix%local_data
2095 :
2096 : #if defined(__parallel)
2097 :
2098 2400 : desca(:) = matrix%matrix_struct%descriptor(:)
2099 :
2100 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2101 240 : irow_local, icol_local, iprow, ipcol)
2102 :
2103 240 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2104 120 : a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2105 : END IF
2106 :
2107 : #else
2108 :
2109 : a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2110 :
2111 : #endif
2112 :
2113 240 : END SUBROUTINE cp_fm_add_to_element
2114 :
2115 : ! **************************************************************************************************
2116 : !> \brief ...
2117 : !> \param fm ...
2118 : !> \param unit ...
2119 : ! **************************************************************************************************
2120 75035 : SUBROUTINE cp_fm_write_unformatted(fm, unit)
2121 : TYPE(cp_fm_type), INTENT(IN) :: fm
2122 : INTEGER, INTENT(IN) :: unit
2123 :
2124 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
2125 :
2126 : INTEGER :: handle, j, max_block, &
2127 : ncol_global, nrow_global
2128 : TYPE(mp_para_env_type), POINTER :: para_env
2129 : #if defined(__parallel)
2130 : INTEGER :: i, i_block, icol_local, &
2131 : in, info, ipcol, &
2132 : iprow, irow_local, &
2133 : mepos, &
2134 : num_pe, rb, tag
2135 : INTEGER, DIMENSION(9) :: desc
2136 75035 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2137 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2138 : TYPE(cp_blacs_type) :: ictxt_loc
2139 : INTEGER, EXTERNAL :: numroc
2140 : #endif
2141 :
2142 75035 : CALL timeset(routineN, handle)
2143 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2144 75035 : para_env=para_env)
2145 :
2146 : #if defined(__parallel)
2147 75035 : num_pe = para_env%num_pe
2148 75035 : mepos = para_env%mepos
2149 75035 : rb = nrow_global
2150 75035 : tag = 0
2151 : ! get a new context
2152 75035 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2153 75035 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2154 75035 : CPASSERT(info == 0)
2155 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2156 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2157 150070 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2158 :
2159 300140 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2160 :
2161 : ! do the actual scalapack to cols reordering
2162 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2163 : fm%matrix_struct%descriptor, &
2164 75035 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2165 :
2166 225105 : ALLOCATE (vecbuf(nrow_global*max_block))
2167 29993577 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2168 :
2169 323327 : DO i = 1, ncol_global, MAX(max_block, 1)
2170 173257 : i_block = MIN(max_block, ncol_global - i + 1)
2171 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2172 173257 : irow_local, icol_local, iprow, ipcol)
2173 173257 : IF (ipcol == mypcol) THEN
2174 661752 : DO j = 1, i_block
2175 76560060 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2176 : END DO
2177 : END IF
2178 :
2179 173257 : IF (ipcol == 0) THEN
2180 : ! do nothing
2181 : ELSE
2182 58944 : IF (ipcol == mypcol) THEN
2183 16890600 : CALL para_env%send(vecbuf(:), 0, tag)
2184 : END IF
2185 58944 : IF (mypcol == 0) THEN
2186 33751728 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2187 : END IF
2188 : END IF
2189 :
2190 248292 : IF (unit > 0) THEN
2191 659136 : DO j = 1, i_block
2192 38583974 : WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2193 : END DO
2194 : END IF
2195 :
2196 : END DO
2197 : END ASSOCIATE
2198 75035 : DEALLOCATE (vecbuf)
2199 :
2200 75035 : CALL ictxt_loc%gridexit()
2201 :
2202 75035 : DEALLOCATE (newdat)
2203 :
2204 : #else
2205 :
2206 : IF (unit > 0) THEN
2207 : DO j = 1, ncol_global
2208 : WRITE (unit) fm%local_data(:, j)
2209 : END DO
2210 : END IF
2211 :
2212 : #endif
2213 75035 : CALL timestop(handle)
2214 :
2215 375175 : END SUBROUTINE cp_fm_write_unformatted
2216 :
2217 : ! **************************************************************************************************
2218 : !> \brief Write out a full matrix in plain text.
2219 : !> \param fm the matrix to be outputted
2220 : !> \param unit the unit number for I/O
2221 : !> \param header optional header
2222 : !> \param value_format ...
2223 : ! **************************************************************************************************
2224 1378 : SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2225 : TYPE(cp_fm_type), INTENT(IN) :: fm
2226 : INTEGER, INTENT(IN) :: unit
2227 : CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2228 :
2229 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
2230 :
2231 : CHARACTER(LEN=21) :: my_value_format
2232 : INTEGER :: handle, i, j, max_block, &
2233 : ncol_global, nrow_global
2234 : TYPE(mp_para_env_type), POINTER :: para_env
2235 : #if defined(__parallel)
2236 : INTEGER :: i_block, icol_local, &
2237 : in, info, ipcol, &
2238 : iprow, irow_local, &
2239 : mepos, num_pe, rb, tag, k, &
2240 : icol, irow
2241 : INTEGER, DIMENSION(9) :: desc
2242 1378 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2243 1378 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2244 : TYPE(cp_blacs_type) :: ictxt_loc
2245 : INTEGER, EXTERNAL :: numroc
2246 : #endif
2247 :
2248 1378 : CALL timeset(routineN, handle)
2249 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2250 1378 : para_env=para_env)
2251 :
2252 1378 : IF (PRESENT(value_format)) THEN
2253 0 : CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
2254 0 : my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
2255 : ELSE
2256 1378 : my_value_format = "(I10, I10, ES24.12)"
2257 : END IF
2258 :
2259 1378 : IF (unit > 0) THEN
2260 11 : IF (PRESENT(header)) WRITE (unit, *) header
2261 11 : WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
2262 : END IF
2263 :
2264 : #if defined(__parallel)
2265 1378 : num_pe = para_env%num_pe
2266 1378 : mepos = para_env%mepos
2267 1378 : rb = nrow_global
2268 1378 : tag = 0
2269 : ! get a new context
2270 1378 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2271 1378 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2272 1378 : CPASSERT(info == 0)
2273 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2274 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2275 2756 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2276 :
2277 5512 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2278 :
2279 : ! do the actual scalapack to cols reordering
2280 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2281 : fm%matrix_struct%descriptor, &
2282 1378 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2283 :
2284 4134 : ALLOCATE (vecbuf(nrow_global*max_block))
2285 14136 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2286 1378 : irow = 1
2287 1378 : icol = 1
2288 :
2289 5510 : DO i = 1, ncol_global, MAX(max_block, 1)
2290 2754 : i_block = MIN(max_block, ncol_global - i + 1)
2291 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2292 2754 : irow_local, icol_local, iprow, ipcol)
2293 2754 : IF (ipcol == mypcol) THEN
2294 2922 : DO j = 1, i_block
2295 20802 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2296 : END DO
2297 : END IF
2298 :
2299 2754 : IF (ipcol == 0) THEN
2300 : ! do nothing
2301 : ELSE
2302 1370 : IF (ipcol == mypcol) THEN
2303 5019 : CALL para_env%send(vecbuf(:), 0, tag)
2304 : END IF
2305 1370 : IF (mypcol == 0) THEN
2306 9353 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2307 : END IF
2308 : END IF
2309 :
2310 4132 : IF (unit > 0) THEN
2311 210 : DO j = 1, i_block
2312 6438 : DO k = (j - 1)*nrow_global + 1, nrow_global*j
2313 6228 : WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
2314 6228 : irow = irow + 1
2315 6417 : IF (irow > nrow_global) THEN
2316 189 : irow = 1
2317 189 : icol = icol + 1
2318 : END IF
2319 : END DO
2320 : END DO
2321 : END IF
2322 :
2323 : END DO
2324 : END ASSOCIATE
2325 1378 : DEALLOCATE (vecbuf)
2326 :
2327 1378 : CALL ictxt_loc%gridexit()
2328 :
2329 1378 : DEALLOCATE (newdat)
2330 :
2331 : #else
2332 :
2333 : IF (unit > 0) THEN
2334 : DO j = 1, ncol_global
2335 : DO i = 1, nrow_global
2336 : WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
2337 : END DO
2338 : END DO
2339 : END IF
2340 :
2341 : #endif
2342 1378 : CALL timestop(handle)
2343 :
2344 6890 : END SUBROUTINE cp_fm_write_formatted
2345 :
2346 : ! **************************************************************************************************
2347 : !> \brief ...
2348 : !> \param fm ...
2349 : !> \param unit ...
2350 : ! **************************************************************************************************
2351 1974 : SUBROUTINE cp_fm_read_unformatted(fm, unit)
2352 : TYPE(cp_fm_type), INTENT(INOUT) :: fm
2353 : INTEGER, INTENT(IN) :: unit
2354 :
2355 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
2356 :
2357 : INTEGER :: handle, j, max_block, &
2358 : ncol_global, nrow_global
2359 : TYPE(mp_para_env_type), POINTER :: para_env
2360 : #if defined(__parallel)
2361 : INTEGER :: k, n_cols
2362 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: vecbuf
2363 : #endif
2364 :
2365 1974 : CALL timeset(routineN, handle)
2366 :
2367 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2368 1974 : para_env=para_env)
2369 :
2370 : #if defined(__parallel)
2371 :
2372 : ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2373 :
2374 7896 : ALLOCATE (vecbuf(nrow_global, max_block))
2375 :
2376 6152 : DO j = 1, ncol_global, max_block
2377 :
2378 4178 : n_cols = MIN(max_block, ncol_global - j + 1)
2379 4178 : IF (para_env%mepos == 0) THEN
2380 16918 : DO k = 1, n_cols
2381 368077 : READ (unit) vecbuf(:, k)
2382 : END DO
2383 : END IF
2384 1482498 : CALL para_env%bcast(vecbuf, 0)
2385 6152 : CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2386 :
2387 : END DO
2388 :
2389 1974 : DEALLOCATE (vecbuf)
2390 :
2391 : #else
2392 :
2393 : DO j = 1, ncol_global
2394 : READ (unit) fm%local_data(:, j)
2395 : END DO
2396 :
2397 : #endif
2398 :
2399 1974 : CALL timestop(handle)
2400 :
2401 1974 : END SUBROUTINE cp_fm_read_unformatted
2402 :
2403 : ! **************************************************************************************************
2404 : !> \brief ...
2405 : !> \param mm_type ...
2406 : ! **************************************************************************************************
2407 10991 : SUBROUTINE cp_fm_setup(mm_type)
2408 : INTEGER, INTENT(IN) :: mm_type
2409 :
2410 10991 : cp_fm_mm_type = mm_type
2411 10991 : END SUBROUTINE cp_fm_setup
2412 :
2413 : ! **************************************************************************************************
2414 : !> \brief ...
2415 : !> \return ...
2416 : ! **************************************************************************************************
2417 1961671 : FUNCTION cp_fm_get_mm_type() RESULT(res)
2418 : INTEGER :: res
2419 :
2420 1961671 : res = cp_fm_mm_type
2421 1961671 : END FUNCTION cp_fm_get_mm_type
2422 :
2423 : ! **************************************************************************************************
2424 : !> \brief ...
2425 : !> \param ictxt ...
2426 : !> \param prec ...
2427 : !> \return ...
2428 : ! **************************************************************************************************
2429 10 : FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2430 : INTEGER :: ictxt
2431 : CHARACTER(LEN=1) :: prec
2432 : INTEGER :: res
2433 : #if defined(__parallel)
2434 : INTEGER :: pilaenv
2435 10 : res = pilaenv(ictxt, prec)
2436 : #else
2437 : MARK_USED(ictxt)
2438 : MARK_USED(prec)
2439 : res = -1
2440 : #endif
2441 :
2442 10 : END FUNCTION cp_fm_pilaenv
2443 :
2444 0 : END MODULE cp_fm_types
|