Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief represent a full matrix distributed on many processors
10 : !> \par History
11 : !> 3) separated structure object, removed globenv, renamed to full matrix
12 : !> many changes (fawzi 08.2002)
13 : !> \author Matthias Krack (22.05.2001)
14 : ! **************************************************************************************************
15 : MODULE cp_fm_types
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_blacs_types, ONLY: cp_blacs_type
18 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent,&
19 : cp_fm_struct_get,&
20 : cp_fm_struct_release,&
21 : cp_fm_struct_retain,&
22 : cp_fm_struct_type,&
23 : cp_fm_struct_write_info
24 : USE kinds, ONLY: dp,&
25 : sp
26 : USE message_passing, ONLY: cp2k_is_parallel,&
27 : mp_any_source,&
28 : mp_para_env_type,&
29 : mp_proc_null,&
30 : mp_request_null,&
31 : mp_request_type,&
32 : mp_waitall
33 : USE parallel_rng_types, ONLY: UNIFORM,&
34 : rng_stream_type
35 : #include "../base/base_uses.f90"
36 :
37 : IMPLICIT NONE
38 :
39 : PRIVATE
40 :
41 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 : LOGICAL, PARAMETER :: debug_this_module = .TRUE.
43 : INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44 :
45 : INTEGER, PRIVATE :: mm_type = 1
46 :
47 : PUBLIC :: cp_fm_type, &
48 : cp_fm_p_type, copy_info_type
49 :
50 : PUBLIC :: cp_fm_add_to_element, &
51 : cp_fm_create, &
52 : cp_fm_release, &
53 : cp_fm_get_info, &
54 : cp_fm_set_element, &
55 : cp_fm_get_element, &
56 : cp_fm_get_diag, & ! get diagonal
57 : cp_fm_set_all, & ! set all elements and diagonal
58 : cp_fm_set_submatrix, & ! set a submatrix to given values
59 : cp_fm_get_submatrix, & ! get a submatrix of given values
60 : cp_fm_init_random, &
61 : cp_fm_maxabsval, & ! find the maximum absolute value
62 : cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
63 : cp_fm_to_fm, & ! copy (parts of) a fm to a fm
64 : cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
65 : cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
66 : cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
67 : cp_fm_to_fm_triangular, &
68 : cp_fm_copy_general, &
69 : cp_fm_start_copy_general, &
70 : cp_fm_finish_copy_general, &
71 : cp_fm_cleanup_copy_general, &
72 : cp_fm_write_unformatted, & ! writes a full matrix to an open unit
73 : cp_fm_write_formatted, & ! writes a full matrix to an open unit
74 : cp_fm_read_unformatted, & ! reads a full matrix from an open unit
75 : cp_fm_setup, & ! allows to set flags for fms
76 : cp_fm_get_mm_type, &
77 : cp_fm_write_info, &
78 : cp_fm_to_fm_submat_general ! copy matrix across different contexts
79 :
80 : PUBLIC :: cp_fm_indxg2p, &
81 : cp_fm_indxg2l, &
82 : cp_fm_indxl2g, &
83 : cp_fm_pilaenv
84 :
85 : INTERFACE cp_fm_to_fm
86 : MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
87 : cp_fm_to_fm_columns ! just a number of columns
88 : END INTERFACE
89 :
90 : INTERFACE cp_fm_release
91 : MODULE PROCEDURE cp_fm_release_aa0, &
92 : cp_fm_release_aa1, &
93 : cp_fm_release_aa2, &
94 : cp_fm_release_aa3, &
95 : cp_fm_release_ap1, &
96 : cp_fm_release_ap2, &
97 : cp_fm_release_pa1, &
98 : cp_fm_release_pa2, &
99 : cp_fm_release_pa3, &
100 : cp_fm_release_pp1, &
101 : cp_fm_release_pp2
102 : END INTERFACE
103 :
104 : ! **************************************************************************************************
105 : !> \brief represent a full matrix
106 : !> \param name the name of the matrix, used for printing
107 : !> \param matrix_struct structure of this matrix
108 : !> \param local_data array with the data of the matrix (its contents
109 : !> depend on the matrix type used: in parallel runs it will be
110 : !> in scalapack format, in sequential, it will simply contain
111 : !> the matrix)
112 : !> \par History
113 : !> 08.2002 created [fawzi]
114 : !> \author fawzi
115 : ! **************************************************************************************************
116 : TYPE cp_fm_type
117 : ! PRIVATE
118 : CHARACTER(LEN=60) :: name = ""
119 : LOGICAL :: use_sp = .FALSE.
120 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
121 : REAL(KIND=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
122 : REAL(KIND=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => NULL()
123 : END TYPE cp_fm_type
124 :
125 : ! **************************************************************************************************
126 : !> \brief just to build arrays of pointers to matrices
127 : !> \param matrix the pointer to the matrix
128 : !> \par History
129 : !> 08.2002 created [fawzi]
130 : !> \author fawzi
131 : ! **************************************************************************************************
132 : TYPE cp_fm_p_type
133 : TYPE(cp_fm_type), POINTER :: matrix => NULL()
134 : END TYPE cp_fm_p_type
135 :
136 : ! **************************************************************************************************
137 : !> \brief Stores the state of a copy between cp_fm_start_copy_general
138 : !> and cp_fm_finish_copy_general
139 : !> \par History
140 : !> Jan 2017 [Mark T]
141 : ! **************************************************************************************************
142 : TYPE copy_info_type
143 : INTEGER :: send_size = -1
144 : INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
145 : TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
146 : INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
147 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
148 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
149 : REAL(KIND=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
150 : END TYPE copy_info_type
151 :
152 : CONTAINS
153 :
154 : ! **************************************************************************************************
155 : !> \brief creates a new full matrix with the given structure
156 : !> \param matrix the matrix to be created
157 : !> \param matrix_struct the structure of matrix
158 : !> \param name ...
159 : !> \param use_sp ...
160 : !> \par History
161 : !> 08.2002 created [fawzi]
162 : !> \author Fawzi Mohamed
163 : !> \note
164 : !> preferred allocation routine
165 : ! **************************************************************************************************
166 1219022 : SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
167 : TYPE(cp_fm_type), INTENT(OUT) :: matrix
168 : TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
169 : CHARACTER(len=*), INTENT(in), OPTIONAL :: name
170 : LOGICAL, INTENT(in), OPTIONAL :: use_sp
171 :
172 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_create'
173 :
174 : INTEGER :: handle, ncol_local, npcol, nprow, &
175 : nrow_local
176 : TYPE(cp_blacs_env_type), POINTER :: context
177 :
178 1219022 : CALL timeset(routineN, handle)
179 :
180 : #if defined(__parallel) && ! defined(__SCALAPACK)
181 : CPABORT("full matrices need scalapack for parallel runs")
182 : #endif
183 :
184 1219022 : context => matrix_struct%context
185 1219022 : matrix%matrix_struct => matrix_struct
186 1219022 : CALL cp_fm_struct_retain(matrix%matrix_struct)
187 :
188 1219022 : matrix%use_sp = .FALSE.
189 1219022 : IF (PRESENT(use_sp)) matrix%use_sp = use_sp
190 :
191 1219022 : nprow = context%num_pe(1)
192 1219022 : npcol = context%num_pe(2)
193 1219022 : NULLIFY (matrix%local_data)
194 1219022 : NULLIFY (matrix%local_data_sp)
195 :
196 : ! OK, we allocate here at least a 1 x 1 matrix
197 : ! this must (and is) compatible with the descinit call
198 : ! in cp_fm_struct
199 1219022 : nrow_local = matrix_struct%local_leading_dimension
200 1219022 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
201 1219022 : IF (matrix%use_sp) THEN
202 0 : ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
203 : ELSE
204 4876088 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
205 : END IF
206 :
207 : ! JVDV we should remove this, as it is up to the user to zero afterwards
208 1219022 : IF (matrix%use_sp) THEN
209 0 : matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
210 : ELSE
211 612954651 : matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
212 : END IF
213 :
214 1219022 : IF (PRESENT(name)) THEN
215 624927 : matrix%name = name
216 : ELSE
217 594095 : matrix%name = 'full matrix'
218 : END IF
219 :
220 1219022 : CALL timestop(handle)
221 :
222 1219022 : END SUBROUTINE cp_fm_create
223 :
224 : ! **************************************************************************************************
225 : !> \brief releases a full matrix
226 : !> \param matrix the matrix to release
227 : !> \par History
228 : !> 08.2002 created [fawzi]
229 : !> \author Fawzi Mohamed
230 : ! **************************************************************************************************
231 1229574 : SUBROUTINE cp_fm_release_aa0(matrix)
232 : TYPE(cp_fm_type), INTENT(INOUT) :: matrix
233 :
234 1229574 : IF (ASSOCIATED(matrix%local_data)) THEN
235 1218334 : DEALLOCATE (matrix%local_data)
236 : NULLIFY (matrix%local_data)
237 : END IF
238 1229574 : IF (ASSOCIATED(matrix%local_data_sp)) THEN
239 0 : DEALLOCATE (matrix%local_data_sp)
240 : NULLIFY (matrix%local_data_sp)
241 : END IF
242 1229574 : matrix%name = ""
243 1229574 : CALL cp_fm_struct_release(matrix%matrix_struct)
244 :
245 1229574 : END SUBROUTINE cp_fm_release_aa0
246 :
247 : ! **************************************************************************************************
248 : !> \brief ...
249 : !> \param matrices ...
250 : ! **************************************************************************************************
251 71126 : SUBROUTINE cp_fm_release_aa1(matrices)
252 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
253 :
254 : INTEGER :: i
255 :
256 71126 : IF (ALLOCATED(matrices)) THEN
257 192423 : DO i = 1, SIZE(matrices)
258 192423 : CALL cp_fm_release(matrices(i))
259 : END DO
260 70042 : DEALLOCATE (matrices)
261 : END IF
262 71126 : END SUBROUTINE cp_fm_release_aa1
263 :
264 : ! **************************************************************************************************
265 : !> \brief ...
266 : !> \param matrices ...
267 : ! **************************************************************************************************
268 7928 : SUBROUTINE cp_fm_release_aa2(matrices)
269 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
270 :
271 : INTEGER :: i, j
272 :
273 7928 : IF (ALLOCATED(matrices)) THEN
274 19148 : DO i = 1, SIZE(matrices, 1)
275 51288 : DO j = 1, SIZE(matrices, 2)
276 45628 : CALL cp_fm_release(matrices(i, j))
277 : END DO
278 : END DO
279 5660 : DEALLOCATE (matrices)
280 : END IF
281 7928 : END SUBROUTINE cp_fm_release_aa2
282 :
283 : ! **************************************************************************************************
284 : !> \brief ...
285 : !> \param matrices ...
286 : ! **************************************************************************************************
287 32 : SUBROUTINE cp_fm_release_aa3(matrices)
288 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
289 :
290 : INTEGER :: i, j, k
291 :
292 32 : IF (ALLOCATED(matrices)) THEN
293 232 : DO i = 1, SIZE(matrices, 1)
294 660 : DO j = 1, SIZE(matrices, 2)
295 1176 : DO k = 1, SIZE(matrices, 3)
296 976 : CALL cp_fm_release(matrices(i, j, k))
297 : END DO
298 : END DO
299 : END DO
300 32 : DEALLOCATE (matrices)
301 : END IF
302 32 : END SUBROUTINE cp_fm_release_aa3
303 :
304 : ! **************************************************************************************************
305 : !> \brief ...
306 : !> \param matrices ...
307 : ! **************************************************************************************************
308 125694 : SUBROUTINE cp_fm_release_pa1(matrices)
309 : TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
310 :
311 : INTEGER :: i
312 :
313 125694 : IF (ASSOCIATED(matrices)) THEN
314 115359 : DO i = 1, SIZE(matrices)
315 115359 : CALL cp_fm_release(matrices(i))
316 : END DO
317 49274 : DEALLOCATE (matrices)
318 : NULLIFY (matrices)
319 : END IF
320 125694 : END SUBROUTINE cp_fm_release_pa1
321 :
322 : ! **************************************************************************************************
323 : !> \brief ...
324 : !> \param matrices ...
325 : ! **************************************************************************************************
326 19266 : SUBROUTINE cp_fm_release_pa2(matrices)
327 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
328 :
329 : INTEGER :: i, j
330 :
331 19266 : IF (ASSOCIATED(matrices)) THEN
332 43646 : DO i = 1, SIZE(matrices, 1)
333 85888 : DO j = 1, SIZE(matrices, 2)
334 75124 : CALL cp_fm_release(matrices(i, j))
335 : END DO
336 : END DO
337 10764 : DEALLOCATE (matrices)
338 : NULLIFY (matrices)
339 : END IF
340 19266 : END SUBROUTINE cp_fm_release_pa2
341 :
342 : ! **************************************************************************************************
343 : !> \brief ...
344 : !> \param matrices ...
345 : ! **************************************************************************************************
346 0 : SUBROUTINE cp_fm_release_pa3(matrices)
347 : TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
348 :
349 : INTEGER :: i, j, k
350 :
351 0 : IF (ASSOCIATED(matrices)) THEN
352 0 : DO i = 1, SIZE(matrices, 1)
353 0 : DO j = 1, SIZE(matrices, 2)
354 0 : DO k = 1, SIZE(matrices, 3)
355 0 : CALL cp_fm_release(matrices(i, j, k))
356 : END DO
357 : END DO
358 : END DO
359 0 : DEALLOCATE (matrices)
360 : NULLIFY (matrices)
361 : END IF
362 0 : END SUBROUTINE cp_fm_release_pa3
363 :
364 : ! **************************************************************************************************
365 : !> \brief ...
366 : !> \param matrices ...
367 : ! **************************************************************************************************
368 0 : SUBROUTINE cp_fm_release_ap1(matrices)
369 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
370 :
371 : INTEGER :: i
372 :
373 0 : IF (ALLOCATED(matrices)) THEN
374 0 : DO i = 1, SIZE(matrices)
375 0 : CALL cp_fm_release(matrices(i)%matrix)
376 0 : DEALLOCATE (matrices(i)%matrix)
377 : END DO
378 0 : DEALLOCATE (matrices)
379 : END IF
380 0 : END SUBROUTINE cp_fm_release_ap1
381 :
382 : ! **************************************************************************************************
383 : !> \brief ...
384 : !> \param matrices ...
385 : ! **************************************************************************************************
386 0 : SUBROUTINE cp_fm_release_ap2(matrices)
387 : TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
388 :
389 : INTEGER :: i, j
390 :
391 0 : IF (ALLOCATED(matrices)) THEN
392 0 : DO i = 1, SIZE(matrices, 1)
393 0 : DO j = 1, SIZE(matrices, 2)
394 0 : CALL cp_fm_release(matrices(i, j)%matrix)
395 0 : DEALLOCATE (matrices(i, j)%matrix)
396 : END DO
397 : END DO
398 0 : DEALLOCATE (matrices)
399 : END IF
400 0 : END SUBROUTINE cp_fm_release_ap2
401 :
402 : ! **************************************************************************************************
403 : !> \brief ...
404 : !> \param matrices ...
405 : ! **************************************************************************************************
406 0 : SUBROUTINE cp_fm_release_pp1(matrices)
407 : TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
408 :
409 : INTEGER :: i
410 :
411 0 : IF (ASSOCIATED(matrices)) THEN
412 0 : DO i = 1, SIZE(matrices)
413 0 : CALL cp_fm_release(matrices(i)%matrix)
414 0 : DEALLOCATE (matrices(i)%matrix)
415 : END DO
416 0 : DEALLOCATE (matrices)
417 : NULLIFY (matrices)
418 : END IF
419 0 : END SUBROUTINE cp_fm_release_pp1
420 :
421 : ! **************************************************************************************************
422 : !> \brief ...
423 : !> \param matrices ...
424 : ! **************************************************************************************************
425 0 : SUBROUTINE cp_fm_release_pp2(matrices)
426 : TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
427 :
428 : INTEGER :: i, j
429 :
430 0 : IF (ASSOCIATED(matrices)) THEN
431 0 : DO i = 1, SIZE(matrices, 1)
432 0 : DO j = 1, SIZE(matrices, 2)
433 0 : CALL cp_fm_release(matrices(i, j)%matrix)
434 0 : DEALLOCATE (matrices(i, j)%matrix)
435 : END DO
436 : END DO
437 0 : DEALLOCATE (matrices)
438 : NULLIFY (matrices)
439 : END IF
440 0 : END SUBROUTINE cp_fm_release_pp2
441 :
442 : ! **************************************************************************************************
443 : !> \brief fills a matrix with random numbers
444 : !> \param matrix : to be initialized
445 : !> \param ncol : numbers of cols to fill
446 : !> \param start_col : starting at coll number
447 : !> \author Joost VandeVondele
448 : !> \note
449 : !> the value of a_ij is independent of the number of cpus
450 : ! **************************************************************************************************
451 2612 : SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
452 : TYPE(cp_fm_type), INTENT(IN) :: matrix
453 : INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
454 :
455 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_init_random'
456 :
457 : INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
458 : ncol_local, nrow_global, nrow_local
459 5224 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
460 2612 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buff
461 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
462 2612 : POINTER :: local_data
463 : REAL(KIND=dp), DIMENSION(3, 2), SAVE :: &
464 : seed = RESHAPE((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
465 : TYPE(rng_stream_type) :: rng
466 :
467 2612 : CALL timeset(routineN, handle)
468 :
469 : ! guarantee same seed over all tasks
470 2612 : CALL matrix%matrix_struct%para_env%bcast(seed, 0)
471 :
472 : rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=UNIFORM, &
473 2612 : extended_precision=.TRUE., seed=seed)
474 :
475 2612 : CPASSERT(.NOT. matrix%use_sp)
476 :
477 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
478 : nrow_local=nrow_local, ncol_local=ncol_local, &
479 : local_data=local_data, &
480 2612 : row_indices=row_indices, col_indices=col_indices)
481 :
482 2612 : my_start_col = 1
483 2612 : IF (PRESENT(start_col)) my_start_col = start_col
484 2612 : my_ncol = matrix%matrix_struct%ncol_global
485 2612 : IF (PRESENT(ncol)) my_ncol = ncol
486 :
487 2612 : IF (ncol_global < (my_start_col + my_ncol - 1)) &
488 0 : CPABORT("ncol_global>=(my_start_col+my_ncol-1)")
489 :
490 7836 : ALLOCATE (buff(nrow_global))
491 :
492 : ! each global row has its own substream, in order to reach the stream for the local col,
493 : ! we just reset to the next substream
494 : ! following this, we fill the full buff with random numbers, and pick those we need
495 2612 : icol_global = 0
496 21375 : DO icol_local = 1, ncol_local
497 18763 : CPASSERT(col_indices(icol_local) > icol_global)
498 : DO
499 18763 : CALL rng%reset_to_next_substream()
500 18763 : icol_global = icol_global + 1
501 18763 : IF (icol_global == col_indices(icol_local)) EXIT
502 : END DO
503 18763 : CALL rng%fill(buff)
504 785317 : DO irow_local = 1, nrow_local
505 782705 : local_data(irow_local, icol_local) = buff(row_indices(irow_local))
506 : END DO
507 : END DO
508 :
509 2612 : DEALLOCATE (buff)
510 :
511 : ! store seed before deletion (unclear if this is the proper seed)
512 :
513 : ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
514 : ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
515 : ! stream of random numbers. While this seems odd and contrary to the original design,
516 : ! it was probably introduced to improve reproducibility.
517 : ! See also https://github.com/cp2k/cp2k/pull/506
518 2612 : CALL rng%get(ig=seed)
519 :
520 2612 : CALL timestop(handle)
521 :
522 73136 : END SUBROUTINE cp_fm_init_random
523 :
524 : ! **************************************************************************************************
525 : !> \brief set all elements of a matrix to the same value,
526 : !> and optionally the diagonal to a different one
527 : !> \param matrix input matrix
528 : !> \param alpha scalar used to set all elements of the matrix
529 : !> \param beta scalar used to set diagonal of the matrix
530 : !> \note
531 : !> can be used to zero a matrix
532 : !> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
533 : ! **************************************************************************************************
534 299169 : SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
535 :
536 : TYPE(cp_fm_type), INTENT(IN) :: matrix
537 : REAL(KIND=dp), INTENT(IN) :: alpha
538 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: beta
539 :
540 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_set_all'
541 :
542 : INTEGER :: handle, i, n
543 :
544 299169 : CALL timeset(routineN, handle)
545 :
546 299169 : IF (matrix%use_sp) THEN
547 0 : matrix%local_data_sp(:, :) = REAL(alpha, sp)
548 : ELSE
549 152194503 : matrix%local_data(:, :) = alpha
550 : END IF
551 :
552 299169 : IF (PRESENT(beta)) THEN
553 47430 : CPASSERT(.NOT. matrix%use_sp)
554 47430 : n = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
555 459162 : DO i = 1, n
556 459162 : CALL cp_fm_set_element(matrix, i, i, beta)
557 : END DO
558 : END IF
559 :
560 299169 : CALL timestop(handle)
561 :
562 299169 : END SUBROUTINE cp_fm_set_all
563 :
564 : ! **************************************************************************************************
565 : !> \brief returns the diagonal elements of a fm
566 : !> \param matrix ...
567 : !> \param diag ...
568 : ! **************************************************************************************************
569 10666 : SUBROUTINE cp_fm_get_diag(matrix, diag)
570 :
571 : ! arguments
572 : TYPE(cp_fm_type), INTENT(IN) :: matrix
573 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: diag
574 :
575 : ! locals
576 : INTEGER :: i, nrow_global
577 :
578 : #if defined(__SCALAPACK)
579 : INTEGER, DIMENSION(9) :: desca
580 : TYPE(cp_blacs_env_type), POINTER :: context
581 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
582 : nprow
583 10666 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
584 10666 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: a_sp
585 : #endif
586 :
587 10666 : CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
588 :
589 : #if defined(__SCALAPACK)
590 156202 : diag = 0.0_dp
591 10666 : context => matrix%matrix_struct%context
592 10666 : myprow = context%mepos(1)
593 10666 : mypcol = context%mepos(2)
594 10666 : nprow = context%num_pe(1)
595 10666 : npcol = context%num_pe(2)
596 :
597 10666 : a => matrix%local_data
598 10666 : a_sp => matrix%local_data_sp
599 106660 : desca(:) = matrix%matrix_struct%descriptor(:)
600 :
601 156202 : DO i = 1, nrow_global
602 : CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
603 145536 : irow_local, icol_local, iprow, ipcol)
604 156202 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
605 72852 : IF (matrix%use_sp) THEN
606 0 : diag(i) = REAL(a_sp(irow_local, icol_local), dp)
607 : ELSE
608 72852 : diag(i) = a(irow_local, icol_local)
609 : END IF
610 : END IF
611 : END DO
612 : #else
613 : DO i = 1, nrow_global
614 : IF (matrix%use_sp) THEN
615 : diag(i) = REAL(matrix%local_data_sp(i, i), dp)
616 : ELSE
617 : diag(i) = matrix%local_data(i, i)
618 : END IF
619 : END DO
620 : #endif
621 301738 : CALL matrix%matrix_struct%para_env%sum(diag)
622 :
623 10666 : END SUBROUTINE cp_fm_get_diag
624 :
625 : ! **************************************************************************************************
626 : !> \brief returns an element of a fm
627 : !> this value is valid on every cpu
628 : !> using this call is expensive
629 : !> \param matrix the matrix to read
630 : !> \param irow_global the row
631 : !> \param icol_global the col
632 : !> \param alpha the value of matrix(irow_global, icol_global)
633 : !> \param local true if the element is on this cpu, false otherwise
634 : !> \note
635 : !> - modified semantics. now this function always returns the value
636 : !> previously the value was zero on cpus that didn't own the relevant
637 : !> part of the matrix (Joost VandeVondele, May 2003)
638 : !> - usage of the function should be avoided, as it is likely to rather slow
639 : !> using row_indices/col_indices/local_data + some smart scheme normally
640 : !> yields a real parallel code
641 : ! **************************************************************************************************
642 2551668 : SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
643 :
644 : ! arguments
645 : TYPE(cp_fm_type), INTENT(IN) :: matrix
646 : REAL(KIND=dp), INTENT(OUT) :: alpha
647 : INTEGER, INTENT(IN) :: icol_global, &
648 : irow_global
649 : LOGICAL, INTENT(OUT), OPTIONAL :: local
650 :
651 : ! locals
652 : #if defined(__SCALAPACK)
653 : INTEGER, DIMENSION(9) :: desca
654 : TYPE(cp_blacs_env_type), POINTER :: context
655 : INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
656 : nprow
657 2551668 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
658 : #endif
659 :
660 : #if defined(__SCALAPACK)
661 2551668 : context => matrix%matrix_struct%context
662 2551668 : myprow = context%mepos(1)
663 2551668 : mypcol = context%mepos(2)
664 2551668 : nprow = context%num_pe(1)
665 2551668 : npcol = context%num_pe(2)
666 :
667 2551668 : a => matrix%local_data
668 25516680 : desca(:) = matrix%matrix_struct%descriptor(:)
669 :
670 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
671 2551668 : irow_local, icol_local, iprow, ipcol)
672 :
673 2551668 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
674 1275834 : alpha = a(irow_local, icol_local)
675 1275834 : CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
676 1275834 : IF (PRESENT(local)) local = .TRUE.
677 : ELSE
678 1275834 : CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
679 1275834 : IF (PRESENT(local)) local = .FALSE.
680 : END IF
681 :
682 : #else
683 : IF (PRESENT(local)) local = .TRUE.
684 : alpha = matrix%local_data(irow_global, icol_global)
685 : #endif
686 :
687 2551668 : END SUBROUTINE cp_fm_get_element
688 :
689 : ! **************************************************************************************************
690 : !> \brief sets an element of a matrix
691 : !> \param matrix ...
692 : !> \param irow_global ...
693 : !> \param icol_global ...
694 : !> \param alpha ...
695 : !> \note
696 : !> we expect all cpus to have the same arguments in the call to this function
697 : !> (otherwise one should use local_data tricks)
698 : ! **************************************************************************************************
699 816070 : SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
700 : TYPE(cp_fm_type), INTENT(IN) :: matrix
701 : INTEGER, INTENT(IN) :: irow_global, icol_global
702 : REAL(KIND=dp), INTENT(IN) :: alpha
703 :
704 : INTEGER :: mypcol, myprow, npcol, nprow
705 : TYPE(cp_blacs_env_type), POINTER :: context
706 : #if defined(__SCALAPACK)
707 : INTEGER :: icol_local, ipcol, iprow, &
708 : irow_local
709 : INTEGER, DIMENSION(9) :: desca
710 816070 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
711 : #endif
712 :
713 816070 : context => matrix%matrix_struct%context
714 816070 : myprow = context%mepos(1)
715 816070 : mypcol = context%mepos(2)
716 816070 : nprow = context%num_pe(1)
717 816070 : npcol = context%num_pe(2)
718 :
719 0 : CPASSERT(.NOT. matrix%use_sp)
720 :
721 : #if defined(__SCALAPACK)
722 :
723 816070 : a => matrix%local_data
724 :
725 8160700 : desca(:) = matrix%matrix_struct%descriptor(:)
726 :
727 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
728 816070 : irow_local, icol_local, iprow, ipcol)
729 :
730 816070 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
731 412370 : a(irow_local, icol_local) = alpha
732 : END IF
733 :
734 : #else
735 :
736 : matrix%local_data(irow_global, icol_global) = alpha
737 :
738 : #endif
739 816070 : END SUBROUTINE cp_fm_set_element
740 :
741 : ! **************************************************************************************************
742 : !> \brief sets a submatrix of a full matrix
743 : !> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
744 : !> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
745 : !> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
746 : !> \param fm the full to change
747 : !> \param new_values a replicated full matrix with the new values
748 : !> \param start_row the starting row of b_matrix (defaults to 1)
749 : !> \param start_col the starting col of b_matrix (defaults to 1)
750 : !> \param n_rows the number of row to change in b (defaults to
751 : !> size(op(new_values),1))
752 : !> \param n_cols the number of columns to change in b (defaults to
753 : !> size(op(new_values),2))
754 : !> \param alpha rescaling factor for the new values (defaults to 1.0)
755 : !> \param beta rescaling factor for the old values (defaults to 0.0)
756 : !> \param transpose if new_values should be transposed: if true
757 : !> op(new_values)=new_values^T, else op(new_values)=new_values
758 : !> (defaults to false)
759 : !> \par History
760 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
761 : !> \author Fawzi Mohamed
762 : !> \note
763 : !> optimized for full column updates and alpha=1.0, beta=0.0
764 : !> the new_values need to be valid on all cpus
765 : ! **************************************************************************************************
766 61251 : SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
767 : start_col, n_rows, n_cols, alpha, beta, transpose)
768 : TYPE(cp_fm_type), INTENT(IN) :: fm
769 : REAL(KIND=dp), DIMENSION(:, :), INTENT(in) :: new_values
770 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
771 : REAL(KIND=dp), INTENT(in), OPTIONAL :: alpha, beta
772 : LOGICAL, INTENT(in), OPTIONAL :: transpose
773 :
774 : INTEGER :: i, i0, j, j0, ncol, ncol_block, &
775 : ncol_global, ncol_local, nrow, &
776 : nrow_block, nrow_global, nrow_local, &
777 : this_col, this_row
778 61251 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
779 : LOGICAL :: tr_a
780 : REAL(KIND=dp) :: al, be
781 61251 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
782 :
783 61251 : al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .FALSE.
784 : ! can be called too many times, making it a bit useless
785 : ! CALL timeset(routineN//','//moduleN,handle)
786 :
787 0 : CPASSERT(.NOT. fm%use_sp)
788 :
789 61251 : IF (PRESENT(alpha)) al = alpha
790 61251 : IF (PRESENT(beta)) be = beta
791 61251 : IF (PRESENT(start_row)) i0 = start_row
792 61251 : IF (PRESENT(start_col)) j0 = start_col
793 61251 : IF (PRESENT(transpose)) tr_a = transpose
794 15695 : IF (tr_a) THEN
795 15603 : nrow = SIZE(new_values, 2)
796 15603 : ncol = SIZE(new_values, 1)
797 : ELSE
798 45648 : nrow = SIZE(new_values, 1)
799 45648 : ncol = SIZE(new_values, 2)
800 : END IF
801 61251 : IF (PRESENT(n_rows)) nrow = n_rows
802 61251 : IF (PRESENT(n_cols)) ncol = n_cols
803 :
804 61251 : full_block => fm%local_data
805 :
806 : CALL cp_fm_get_info(matrix=fm, &
807 : nrow_global=nrow_global, ncol_global=ncol_global, &
808 : nrow_block=nrow_block, ncol_block=ncol_block, &
809 : nrow_local=nrow_local, ncol_local=ncol_local, &
810 61251 : row_indices=row_indices, col_indices=col_indices)
811 :
812 61251 : IF (al == 1.0 .AND. be == 0.0) THEN
813 1289094 : DO j = 1, ncol_local
814 1236981 : this_col = col_indices(j) - j0 + 1
815 1289094 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
816 283421 : IF (tr_a) THEN
817 6465 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
818 157692 : DO i = 1, nrow_local
819 157692 : full_block(i, j) = new_values(this_col, row_indices(i))
820 : END DO
821 : ELSE
822 594 : DO i = 1, nrow_local
823 510 : this_row = row_indices(i) - i0 + 1
824 594 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
825 255 : full_block(i, j) = new_values(this_col, this_row)
826 : END IF
827 : END DO
828 : END IF
829 : ELSE
830 276956 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
831 8894821 : DO i = 1, nrow_local
832 8894821 : full_block(i, j) = new_values(row_indices(i), this_col)
833 : END DO
834 : ELSE
835 4662 : DO i = 1, nrow_local
836 3990 : this_row = row_indices(i) - i0 + 1
837 4662 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
838 476 : full_block(i, j) = new_values(this_row, this_col)
839 : END IF
840 : END DO
841 : END IF
842 : END IF
843 : END IF
844 : END DO
845 : ELSE
846 831608 : DO j = 1, ncol_local
847 822470 : this_col = col_indices(j) - j0 + 1
848 831608 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
849 822470 : IF (tr_a) THEN
850 88223375 : DO i = 1, nrow_local
851 87400905 : this_row = row_indices(i) - i0 + 1
852 88223375 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
853 : full_block(i, j) = al*new_values(this_col, this_row) + &
854 411235 : be*full_block(i, j)
855 : END IF
856 : END DO
857 : ELSE
858 0 : DO i = 1, nrow_local
859 0 : this_row = row_indices(i) - i0 + 1
860 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
861 : full_block(i, j) = al*new_values(this_row, this_col) + &
862 0 : be*full_block(i, j)
863 : END IF
864 : END DO
865 : END IF
866 : END IF
867 : END DO
868 : END IF
869 :
870 : ! CALL timestop(handle)
871 :
872 61251 : END SUBROUTINE cp_fm_set_submatrix
873 :
874 : ! **************************************************************************************************
875 : !> \brief gets a submatrix of a full matrix
876 : !> op(target_m)(1:n_rows,1:n_cols)
877 : !> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
878 : !> target_m is replicated on all cpus
879 : !> using this call is expensive
880 : !> \param fm the full you want to get the info from
881 : !> \param target_m a replicated full matrix that will contain the result
882 : !> \param start_row the starting row of b_matrix (defaults to 1)
883 : !> \param start_col the starting col of b_matrix (defaults to 1)
884 : !> \param n_rows the number of row to change in b (defaults to
885 : !> size(op(new_values),1))
886 : !> \param n_cols the number of columns to change in b (defaults to
887 : !> size(op(new_values),2))
888 : !> \param transpose if target_m should be transposed: if true
889 : !> op(target_m)=target_m^T, else op(target_m)=target_m
890 : !> (defaults to false)
891 : !> \par History
892 : !> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
893 : !> \author Fawzi Mohamed
894 : !> \note
895 : !> optimized for full column updates. Zeros out a little too much
896 : !> of target_m
897 : !> the target_m is replicated and valid on all cpus
898 : ! **************************************************************************************************
899 70848 : SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
900 : start_col, n_rows, n_cols, transpose)
901 : TYPE(cp_fm_type), INTENT(IN) :: fm
902 : REAL(KIND=dp), DIMENSION(:, :), INTENT(out) :: target_m
903 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
904 : LOGICAL, INTENT(in), OPTIONAL :: transpose
905 :
906 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_get_submatrix'
907 :
908 : INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
909 : ncol_local, nrow, nrow_global, &
910 : nrow_local, this_col, this_row
911 70848 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
912 : LOGICAL :: tr_a
913 70848 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: full_block
914 : TYPE(mp_para_env_type), POINTER :: para_env
915 :
916 70848 : CALL timeset(routineN, handle)
917 :
918 70848 : i0 = 1; j0 = 1; tr_a = .FALSE.
919 :
920 70848 : CPASSERT(.NOT. fm%use_sp)
921 :
922 70848 : IF (PRESENT(start_row)) i0 = start_row
923 70848 : IF (PRESENT(start_col)) j0 = start_col
924 70848 : IF (PRESENT(transpose)) tr_a = transpose
925 6240 : IF (tr_a) THEN
926 2362 : nrow = SIZE(target_m, 2)
927 2362 : ncol = SIZE(target_m, 1)
928 : ELSE
929 68486 : nrow = SIZE(target_m, 1)
930 68486 : ncol = SIZE(target_m, 2)
931 : END IF
932 70848 : IF (PRESENT(n_rows)) nrow = n_rows
933 70848 : IF (PRESENT(n_cols)) ncol = n_cols
934 :
935 70848 : para_env => fm%matrix_struct%para_env
936 :
937 70848 : full_block => fm%local_data
938 : #if defined(__SCALAPACK)
939 : ! zero-out whole target_m
940 70848 : IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
941 70798 : CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
942 : END IF
943 : #endif
944 :
945 : CALL cp_fm_get_info(matrix=fm, &
946 : nrow_global=nrow_global, ncol_global=ncol_global, &
947 : nrow_local=nrow_local, ncol_local=ncol_local, &
948 70848 : row_indices=row_indices, col_indices=col_indices)
949 :
950 447704 : DO j = 1, ncol_local
951 376856 : this_col = col_indices(j) - j0 + 1
952 447704 : IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
953 268986 : IF (tr_a) THEN
954 2362 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
955 81863 : DO i = 1, nrow_local
956 81863 : target_m(this_col, row_indices(i)) = full_block(i, j)
957 : END DO
958 : ELSE
959 0 : DO i = 1, nrow_local
960 0 : this_row = row_indices(i) - i0 + 1
961 0 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
962 0 : target_m(this_col, this_row) = full_block(i, j)
963 : END IF
964 : END DO
965 : END IF
966 : ELSE
967 266624 : IF (i0 == 1 .AND. nrow_global == nrow) THEN
968 5988839 : DO i = 1, nrow_local
969 5988839 : target_m(row_indices(i), this_col) = full_block(i, j)
970 : END DO
971 : ELSE
972 1168384 : DO i = 1, nrow_local
973 1141908 : this_row = row_indices(i) - i0 + 1
974 1168384 : IF (this_row >= 1 .AND. this_row <= nrow) THEN
975 24612 : target_m(this_row, this_col) = full_block(i, j)
976 : END IF
977 : END DO
978 : END IF
979 : END IF
980 : END IF
981 : END DO
982 :
983 15453868 : CALL para_env%sum(target_m)
984 :
985 70848 : CALL timestop(handle)
986 :
987 70848 : END SUBROUTINE cp_fm_get_submatrix
988 :
989 : ! **************************************************************************************************
990 : !> \brief returns all kind of information about the full matrix
991 : !> \param matrix ...
992 : !> \param name ...
993 : !> \param nrow_global ...
994 : !> \param ncol_global ...
995 : !> \param nrow_block ...
996 : !> \param ncol_block ...
997 : !> \param nrow_local ...
998 : !> \param ncol_local ...
999 : !> \param row_indices ...
1000 : !> \param col_indices ...
1001 : !> \param local_data ...
1002 : !> \param context ...
1003 : !> \param nrow_locals ...
1004 : !> \param ncol_locals ...
1005 : !> \param matrix_struct ...
1006 : !> \param para_env ...
1007 : !> \note
1008 : !> see also cp_fm_struct for explanation
1009 : !> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1010 : !> access to the local blacs block
1011 : ! **************************************************************************************************
1012 5315253 : SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1013 : nrow_block, ncol_block, nrow_local, ncol_local, &
1014 : row_indices, col_indices, local_data, context, &
1015 : nrow_locals, ncol_locals, matrix_struct, para_env)
1016 :
1017 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1018 : CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1019 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1020 : ncol_block, nrow_local, ncol_local
1021 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1022 : REAL(KIND=dp), CONTIGUOUS, DIMENSION(:, :), &
1023 : OPTIONAL, POINTER :: local_data
1024 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1025 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1026 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1027 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1028 :
1029 8 : IF (PRESENT(name)) name = matrix%name
1030 5315253 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1031 5315253 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1032 :
1033 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1034 : ncol_local=ncol_local, nrow_global=nrow_global, &
1035 : ncol_global=ncol_global, nrow_block=nrow_block, &
1036 : ncol_block=ncol_block, row_indices=row_indices, &
1037 : col_indices=col_indices, nrow_locals=nrow_locals, &
1038 25839921 : ncol_locals=ncol_locals, context=context, para_env=para_env)
1039 :
1040 5315253 : END SUBROUTINE cp_fm_get_info
1041 :
1042 : ! **************************************************************************************************
1043 : !> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1044 : !> \param matrix a cp_fm_type instance
1045 : !> \param io_unit the I/O unit to use for writing
1046 : ! **************************************************************************************************
1047 3 : SUBROUTINE cp_fm_write_info(matrix, io_unit)
1048 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1049 : INTEGER, INTENT(IN) :: io_unit
1050 :
1051 3 : WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1052 3 : CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1053 3 : END SUBROUTINE cp_fm_write_info
1054 :
1055 : ! **************************************************************************************************
1056 : !> \brief find the maximum absolute value of the matrix element
1057 : !> maxval(abs(matrix))
1058 : !> \param matrix ...
1059 : !> \param a_max ...
1060 : !> \param ir_max ...
1061 : !> \param ic_max ...
1062 : ! **************************************************************************************************
1063 89625 : SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1064 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1065 : REAL(KIND=dp), INTENT(OUT) :: a_max
1066 : INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1067 :
1068 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_maxabsval'
1069 :
1070 : INTEGER :: handle, i, ic_max_local, ir_max_local, &
1071 : j, mepos, ncol_local, nrow_local, &
1072 : num_pe
1073 89625 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1074 89625 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1075 : REAL(dp) :: my_max
1076 89625 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1077 89625 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1078 89625 : REAL(KIND=sp), DIMENSION(:, :), POINTER :: my_block_sp
1079 :
1080 89625 : CALL timeset(routineN, handle)
1081 :
1082 89625 : my_block => matrix%local_data
1083 89625 : my_block_sp => matrix%local_data_sp
1084 :
1085 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1086 89625 : row_indices=row_indices, col_indices=col_indices)
1087 :
1088 89625 : IF (matrix%use_sp) THEN
1089 0 : a_max = REAL(MAXVAL(ABS(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1090 : ELSE
1091 55194339 : a_max = MAXVAL(ABS(my_block(1:nrow_local, 1:ncol_local)))
1092 : END IF
1093 :
1094 89625 : IF (PRESENT(ir_max)) THEN
1095 0 : num_pe = matrix%matrix_struct%para_env%num_pe
1096 0 : mepos = matrix%matrix_struct%para_env%mepos
1097 0 : ALLOCATE (ir_max_vec(0:num_pe - 1))
1098 0 : ir_max_vec(0:num_pe - 1) = 0
1099 0 : ALLOCATE (ic_max_vec(0:num_pe - 1))
1100 0 : ic_max_vec(0:num_pe - 1) = 0
1101 0 : ALLOCATE (a_max_vec(0:num_pe - 1))
1102 0 : a_max_vec(0:num_pe - 1) = 0.0_dp
1103 0 : my_max = 0.0_dp
1104 :
1105 0 : IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1106 0 : DO i = 1, ncol_local
1107 0 : DO j = 1, nrow_local
1108 0 : IF (matrix%use_sp) THEN
1109 0 : IF (ABS(my_block_sp(j, i)) > my_max) THEN
1110 0 : my_max = REAL(my_block_sp(j, i), dp)
1111 0 : ir_max_local = j
1112 0 : ic_max_local = i
1113 : END IF
1114 : ELSE
1115 0 : IF (ABS(my_block(j, i)) > my_max) THEN
1116 0 : my_max = my_block(j, i)
1117 0 : ir_max_local = j
1118 0 : ic_max_local = i
1119 : END IF
1120 : END IF
1121 : END DO
1122 : END DO
1123 :
1124 0 : a_max_vec(mepos) = my_max
1125 0 : ir_max_vec(mepos) = row_indices(ir_max_local)
1126 0 : ic_max_vec(mepos) = col_indices(ic_max_local)
1127 :
1128 : END IF
1129 :
1130 0 : CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1131 0 : CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1132 0 : CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1133 :
1134 0 : my_max = 0.0_dp
1135 0 : DO i = 0, num_pe - 1
1136 0 : IF (a_max_vec(i) > my_max) THEN
1137 0 : ir_max = ir_max_vec(i)
1138 0 : ic_max = ic_max_vec(i)
1139 : END IF
1140 : END DO
1141 :
1142 0 : DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1143 0 : CPASSERT(ic_max > 0)
1144 0 : CPASSERT(ir_max > 0)
1145 :
1146 : END IF
1147 :
1148 89625 : CALL matrix%matrix_struct%para_env%max(a_max)
1149 :
1150 89625 : CALL timestop(handle)
1151 :
1152 179250 : END SUBROUTINE cp_fm_maxabsval
1153 :
1154 : ! **************************************************************************************************
1155 : !> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1156 : !> = || A ||_infinity
1157 : !> \param matrix ...
1158 : !> \param a_max ...
1159 : !> \note
1160 : !> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1161 : !> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1162 : !> http://mathworld.wolfram.com/MatrixNorm.html
1163 : !> (but the bound is not so tight in the general case)
1164 : ! **************************************************************************************************
1165 4294 : SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1166 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1167 : REAL(KIND=dp), INTENT(OUT) :: a_max
1168 :
1169 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_maxabsrownorm'
1170 :
1171 : INTEGER :: handle, i, j, ncol_local, nrow_global, &
1172 : nrow_local
1173 4294 : INTEGER, DIMENSION(:), POINTER :: row_indices
1174 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: values
1175 4294 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1176 :
1177 4294 : CALL timeset(routineN, handle)
1178 :
1179 4294 : my_block => matrix%local_data
1180 :
1181 4294 : CPASSERT(.NOT. matrix%use_sp)
1182 :
1183 : CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1184 4294 : nrow_local=nrow_local, ncol_local=ncol_local)
1185 :
1186 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1187 12882 : ALLOCATE (values(nrow_global))
1188 62846 : values = 0.0_dp
1189 62846 : DO j = 1, ncol_local
1190 530930 : DO i = 1, nrow_local
1191 526636 : values(row_indices(i)) = values(row_indices(i)) + ABS(my_block(i, j))
1192 : END DO
1193 : END DO
1194 4294 : CALL matrix%matrix_struct%para_env%sum(values)
1195 67140 : a_max = MAXVAL(values)
1196 4294 : DEALLOCATE (values)
1197 :
1198 4294 : CALL timestop(handle)
1199 8588 : END SUBROUTINE
1200 :
1201 : ! **************************************************************************************************
1202 : !> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1203 : !> \param matrix ...
1204 : !> \param norm_array ...
1205 : ! **************************************************************************************************
1206 1282 : SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1207 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1208 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1209 :
1210 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_vectorsnorm'
1211 :
1212 : INTEGER :: handle, i, j, ncol_global, ncol_local, &
1213 : nrow_local
1214 1282 : INTEGER, DIMENSION(:), POINTER :: col_indices
1215 1282 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1216 :
1217 1282 : CALL timeset(routineN, handle)
1218 :
1219 1282 : my_block => matrix%local_data
1220 :
1221 1282 : CPASSERT(.NOT. matrix%use_sp)
1222 :
1223 : CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1224 1282 : nrow_local=nrow_local, ncol_local=ncol_local)
1225 :
1226 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1227 29746 : norm_array = 0.0_dp
1228 29746 : DO j = 1, ncol_local
1229 1261016 : DO i = 1, nrow_local
1230 1259734 : norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1231 : END DO
1232 : END DO
1233 58210 : CALL matrix%matrix_struct%para_env%sum(norm_array)
1234 29746 : norm_array = SQRT(norm_array)
1235 :
1236 1282 : CALL timestop(handle)
1237 1282 : END SUBROUTINE
1238 :
1239 : ! **************************************************************************************************
1240 : !> \brief summing up all the elements along the matrix's i-th index
1241 : !> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1242 : !> or
1243 : !> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1244 : !> \param matrix an input matrix A
1245 : !> \param sum_array sums of elements in each column/row
1246 : !> \param dir ...
1247 : !> \note forked from cp_fm_vectorsnorm() to be used with
1248 : !> the maximum overlap method
1249 : !> added row variation
1250 : ! **************************************************************************************************
1251 7760 : SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1252 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1253 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1254 : CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1255 :
1256 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_vectorssum'
1257 :
1258 : INTEGER :: handle, i, j, ncol_local, nrow_local
1259 7760 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1260 : LOGICAL :: docol
1261 7760 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: my_block
1262 :
1263 7760 : CALL timeset(routineN, handle)
1264 :
1265 7760 : IF (PRESENT(dir)) THEN
1266 7720 : IF (dir == 'c' .OR. dir == 'C') THEN
1267 : docol = .TRUE.
1268 7720 : ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1269 : docol = .FALSE.
1270 : ELSE
1271 0 : CPABORT('Wrong argument DIR')
1272 : END IF
1273 : ELSE
1274 : docol = .TRUE.
1275 : END IF
1276 :
1277 7760 : my_block => matrix%local_data
1278 :
1279 7760 : CPASSERT(.NOT. matrix%use_sp)
1280 :
1281 : CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1282 7760 : nrow_local=nrow_local, ncol_local=ncol_local)
1283 :
1284 : ! the efficiency could be improved by making use of the row-col distribution of scalapack
1285 240690 : sum_array(:) = 0.0_dp
1286 7760 : IF (docol) THEN
1287 448 : DO j = 1, ncol_local
1288 3628 : DO i = 1, nrow_local
1289 3588 : sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1290 : END DO
1291 : END DO
1292 : ELSE
1293 86224 : DO j = 1, ncol_local
1294 6633258 : DO i = 1, nrow_local
1295 6625538 : sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1296 : END DO
1297 : END DO
1298 : END IF
1299 473620 : CALL matrix%matrix_struct%para_env%sum(sum_array)
1300 :
1301 7760 : CALL timestop(handle)
1302 7760 : END SUBROUTINE
1303 :
1304 : ! **************************************************************************************************
1305 : !> \brief copy one identically sized matrix in the other
1306 : !> \param source ...
1307 : !> \param destination ...
1308 : !> \note
1309 : !> see also cp_fm_to_fm_columns
1310 : ! **************************************************************************************************
1311 359180 : SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1312 :
1313 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1314 :
1315 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_matrix'
1316 :
1317 : INTEGER :: handle, npcol, nprow
1318 :
1319 359180 : CALL timeset(routineN, handle)
1320 :
1321 359180 : nprow = source%matrix_struct%context%num_pe(1)
1322 359180 : npcol = source%matrix_struct%context%num_pe(2)
1323 :
1324 359180 : IF ((.NOT. cp2k_is_parallel) .OR. &
1325 : cp_fm_struct_equivalent(source%matrix_struct, &
1326 : destination%matrix_struct)) THEN
1327 359180 : IF (source%use_sp .AND. destination%use_sp) THEN
1328 0 : IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1329 : SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1330 : CALL cp_abort(__LOCATION__, &
1331 : "Cannot copy full matrix <"//TRIM(source%name)// &
1332 : "> to full matrix <"//TRIM(destination%name)// &
1333 0 : ">. The local_data blocks have different sizes.")
1334 : CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1335 0 : source%local_data_sp, 1, destination%local_data_sp, 1)
1336 359180 : ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1337 0 : IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1338 : SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1339 : CALL cp_abort(__LOCATION__, &
1340 : "Cannot copy full matrix <"//TRIM(source%name)// &
1341 : "> to full matrix <"//TRIM(destination%name)// &
1342 0 : ">. The local_data blocks have different sizes.")
1343 0 : destination%local_data = REAL(source%local_data_sp, dp)
1344 359180 : ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1345 0 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1346 : SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1347 : CALL cp_abort(__LOCATION__, &
1348 : "Cannot copy full matrix <"//TRIM(source%name)// &
1349 : "> to full matrix <"//TRIM(destination%name)// &
1350 0 : ">. The local_data blocks have different sizes.")
1351 0 : destination%local_data_sp = REAL(source%local_data, sp)
1352 : ELSE
1353 359180 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1354 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1355 : CALL cp_abort(__LOCATION__, &
1356 : "Cannot copy full matrix <"//TRIM(source%name)// &
1357 : "> to full matrix <"//TRIM(destination%name)// &
1358 0 : ">. The local_data blocks have different sizes.")
1359 : CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1360 359180 : source%local_data, 1, destination%local_data, 1)
1361 : END IF
1362 : ELSE
1363 0 : CPABORT("Data structures of source and target full matrix are not equivalent")
1364 : END IF
1365 :
1366 359180 : CALL timestop(handle)
1367 :
1368 359180 : END SUBROUTINE cp_fm_to_fm_matrix
1369 :
1370 : ! **************************************************************************************************
1371 : !> \brief copy just a subset of columns of a fm to a fm
1372 : !> \param msource ...
1373 : !> \param mtarget ...
1374 : !> \param ncol ...
1375 : !> \param source_start ...
1376 : !> \param target_start ...
1377 : ! **************************************************************************************************
1378 162914 : SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1379 : target_start)
1380 :
1381 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1382 : INTEGER, INTENT(IN) :: ncol
1383 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1384 :
1385 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1386 :
1387 : INTEGER :: handle, n, ss, ts
1388 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1389 : #if defined(__SCALAPACK)
1390 : INTEGER :: i
1391 : INTEGER, DIMENSION(9) :: desca, descb
1392 : #endif
1393 :
1394 162914 : CALL timeset(routineN, handle)
1395 :
1396 162914 : ss = 1
1397 162914 : ts = 1
1398 :
1399 162914 : IF (PRESENT(source_start)) ss = source_start
1400 162914 : IF (PRESENT(target_start)) ts = target_start
1401 :
1402 162914 : n = msource%matrix_struct%nrow_global
1403 :
1404 162914 : a => msource%local_data
1405 162914 : b => mtarget%local_data
1406 :
1407 : #if defined(__SCALAPACK)
1408 1629140 : desca(:) = msource%matrix_struct%descriptor(:)
1409 1629140 : descb(:) = mtarget%matrix_struct%descriptor(:)
1410 643018 : DO i = 0, ncol - 1
1411 643018 : CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1412 : END DO
1413 : #else
1414 : CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1415 : #endif
1416 :
1417 162914 : CALL timestop(handle)
1418 :
1419 162914 : END SUBROUTINE cp_fm_to_fm_columns
1420 :
1421 : ! **************************************************************************************************
1422 : !> \brief copy just a triangular matrix
1423 : !> \param msource ...
1424 : !> \param mtarget ...
1425 : !> \param uplo ...
1426 : ! **************************************************************************************************
1427 370 : SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1428 :
1429 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1430 : CHARACTER(LEN=*), INTENT(IN) :: uplo
1431 :
1432 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_triangular'
1433 :
1434 : INTEGER :: handle, ncol, nrow
1435 370 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1436 : #if defined(__SCALAPACK)
1437 : INTEGER, DIMENSION(9) :: desca, descb
1438 : #endif
1439 :
1440 370 : CALL timeset(routineN, handle)
1441 :
1442 370 : nrow = msource%matrix_struct%nrow_global
1443 370 : ncol = msource%matrix_struct%ncol_global
1444 :
1445 370 : a => msource%local_data
1446 370 : b => mtarget%local_data
1447 :
1448 : #if defined(__SCALAPACK)
1449 3700 : desca(:) = msource%matrix_struct%descriptor(:)
1450 3700 : descb(:) = mtarget%matrix_struct%descriptor(:)
1451 370 : CALL pdlacpy(uplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1452 : #else
1453 : CALL dlacpy(uplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1454 : #endif
1455 :
1456 370 : CALL timestop(handle)
1457 :
1458 370 : END SUBROUTINE cp_fm_to_fm_triangular
1459 :
1460 : ! **************************************************************************************************
1461 : !> \brief copy just a part ot the matrix
1462 : !> \param msource ...
1463 : !> \param mtarget ...
1464 : !> \param nrow ...
1465 : !> \param ncol ...
1466 : !> \param s_firstrow ...
1467 : !> \param s_firstcol ...
1468 : !> \param t_firstrow ...
1469 : !> \param t_firstcol ...
1470 : ! **************************************************************************************************
1471 :
1472 6806 : SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1473 :
1474 : TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1475 : INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1476 : s_firstcol, t_firstrow, &
1477 : t_firstcol
1478 :
1479 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat'
1480 :
1481 : INTEGER :: handle, i, na, nb, ss, ts
1482 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1483 : #if defined(__SCALAPACK)
1484 : INTEGER, DIMENSION(9) :: desca, descb
1485 : #endif
1486 :
1487 6806 : CALL timeset(routineN, handle)
1488 :
1489 6806 : a => msource%local_data
1490 6806 : b => mtarget%local_data
1491 :
1492 6806 : na = msource%matrix_struct%nrow_global
1493 6806 : nb = mtarget%matrix_struct%nrow_global
1494 : ! nrow must be <= na and nb
1495 6806 : IF (nrow > na) &
1496 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1497 6806 : IF (nrow > nb) &
1498 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1499 6806 : na = msource%matrix_struct%ncol_global
1500 6806 : nb = mtarget%matrix_struct%ncol_global
1501 : ! ncol must be <= na_col and nb_col
1502 6806 : IF (ncol > na) &
1503 0 : CPABORT("cannot copy because nrow > number of rows of source matrix")
1504 6806 : IF (ncol > nb) &
1505 0 : CPABORT("cannot copy because nrow > number of rows of target matrix")
1506 :
1507 : #if defined(__SCALAPACK)
1508 68060 : desca(:) = msource%matrix_struct%descriptor(:)
1509 68060 : descb(:) = mtarget%matrix_struct%descriptor(:)
1510 101378 : DO i = 0, ncol - 1
1511 94572 : ss = s_firstcol + i
1512 94572 : ts = t_firstcol + i
1513 101378 : CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1514 : END DO
1515 : #else
1516 : DO i = 0, ncol - 1
1517 : ss = s_firstcol + i
1518 : ts = t_firstcol + i
1519 : CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1520 : END DO
1521 : #endif
1522 :
1523 6806 : CALL timestop(handle)
1524 6806 : END SUBROUTINE cp_fm_to_fm_submat
1525 :
1526 : ! **************************************************************************************************
1527 : !> \brief General copy of a fm matrix to another fm matrix.
1528 : !> Uses non-blocking MPI rather than ScaLAPACK.
1529 : !>
1530 : !> \param source input fm matrix
1531 : !> \param destination output fm matrix
1532 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1533 : !> of the input and output matrices
1534 : !> \par History
1535 : !> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1536 : ! **************************************************************************************************
1537 8934 : SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1538 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1539 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1540 :
1541 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_copy_general'
1542 :
1543 : INTEGER :: handle
1544 80406 : TYPE(copy_info_type) :: info
1545 :
1546 8934 : CALL timeset(routineN, handle)
1547 :
1548 8934 : CALL cp_fm_start_copy_general(source, destination, para_env, info)
1549 8934 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1550 8920 : CALL cp_fm_finish_copy_general(destination, info)
1551 : END IF
1552 8934 : IF (ASSOCIATED(source%matrix_struct)) THEN
1553 8780 : CALL cp_fm_cleanup_copy_general(info)
1554 : END IF
1555 :
1556 8934 : CALL timestop(handle)
1557 8934 : END SUBROUTINE cp_fm_copy_general
1558 :
1559 : ! **************************************************************************************************
1560 : !> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1561 : !> \param source input fm matrix
1562 : !> \param destination output fm matrix
1563 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1564 : !> of the input and output matrices
1565 : !> \param info all of the data that will be needed to complete the copy operation
1566 : ! **************************************************************************************************
1567 1417086 : SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1568 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
1569 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
1570 : TYPE(copy_info_type), INTENT(OUT) :: info
1571 :
1572 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_start_copy_general'
1573 :
1574 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1575 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1576 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1577 : recv_rank, recv_size, send_rank, send_size
1578 157454 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1579 314908 : recv_count, send_count, send_disp, &
1580 157454 : source2global, src_p, src_q
1581 157454 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1582 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1583 : src_block, src_block_tmp, src_num_pe
1584 314908 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1585 314908 : send_col_indices, send_row_indices
1586 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1587 2204356 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1588 :
1589 157454 : CALL timeset(routineN, handle)
1590 :
1591 : IF (.NOT. cp2k_is_parallel) THEN
1592 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1593 : nrow_local_send = SIZE(source%local_data, 1)
1594 : ncol_local_send = SIZE(source%local_data, 2)
1595 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1596 : k = 0
1597 : DO j = 1, ncol_local_send
1598 : DO i = 1, nrow_local_send
1599 : k = k + 1
1600 : info%send_buf(k) = source%local_data(i, j)
1601 : END DO
1602 : END DO
1603 : ELSE
1604 : ! assumption of double precision data is carried forward from ScaLAPACK version
1605 157454 : IF (source%use_sp) CPABORT("only DP kind implemented")
1606 157454 : IF (destination%use_sp) CPABORT("only DP kind implemented")
1607 :
1608 157454 : NULLIFY (recv_dist, send_dist)
1609 157454 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1610 :
1611 : ! The 'global' communicator contains both the source and destination decompositions
1612 157454 : global_size = para_env%num_pe
1613 157454 : global_rank = para_env%mepos
1614 :
1615 : ! The source/send decomposition and destination/recv decompositions may only exist on
1616 : ! on a subset of the processes involved in the communication
1617 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1618 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1619 157454 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1620 126116 : recv_dist => destination%matrix_struct
1621 126116 : recv_rank = recv_dist%para_env%mepos
1622 : ELSE
1623 31338 : recv_rank = mp_proc_null
1624 : END IF
1625 :
1626 157454 : IF (ASSOCIATED(source%matrix_struct)) THEN
1627 138864 : send_dist => source%matrix_struct
1628 138864 : send_rank = send_dist%para_env%mepos
1629 : ELSE
1630 18590 : send_rank = mp_proc_null
1631 : END IF
1632 :
1633 : ! Map the rank in the source/dest communicator to the global rank
1634 472362 : ALLOCATE (all_ranks(0:global_size - 1))
1635 :
1636 157454 : CALL para_env%allgather(send_rank, all_ranks)
1637 157454 : IF (ASSOCIATED(recv_dist)) THEN
1638 630580 : ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
1639 378348 : DO i = 0, global_size - 1
1640 378348 : IF (all_ranks(i) .NE. mp_proc_null) THEN
1641 215052 : source2global(all_ranks(i)) = i
1642 : END IF
1643 : END DO
1644 : END IF
1645 :
1646 157454 : CALL para_env%allgather(recv_rank, all_ranks)
1647 157454 : IF (ASSOCIATED(send_dist)) THEN
1648 694320 : ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
1649 416592 : DO i = 0, global_size - 1
1650 416592 : IF (all_ranks(i) .NE. mp_proc_null) THEN
1651 215052 : dest2global(all_ranks(i)) = i
1652 : END IF
1653 : END DO
1654 : END IF
1655 157454 : DEALLOCATE (all_ranks)
1656 :
1657 : ! Some data from the two decompositions will be needed by all processes in the global group :
1658 : ! process grid shape, block size, and the BLACS-to-MPI mapping
1659 :
1660 : ! The global root process will receive the data (from the root process in each decomposition)
1661 1102178 : send_req(:) = mp_request_null
1662 157454 : IF (global_rank == 0) THEN
1663 551089 : recv_req(:) = mp_request_null
1664 78727 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1665 78727 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1666 78727 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1667 78727 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1668 : END IF
1669 :
1670 157454 : IF (ASSOCIATED(send_dist)) THEN
1671 138864 : IF ((send_rank .EQ. 0)) THEN
1672 : ! need to use separate buffers here in case this is actually global rank 0
1673 236181 : src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
1674 78727 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1675 78727 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1676 : END IF
1677 : END IF
1678 :
1679 157454 : IF (ASSOCIATED(recv_dist)) THEN
1680 126116 : IF ((recv_rank .EQ. 0)) THEN
1681 236181 : dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
1682 78727 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1683 78727 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1684 : END IF
1685 : END IF
1686 :
1687 157454 : IF (global_rank == 0) THEN
1688 78727 : CALL mp_waitall(recv_req(1:4))
1689 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1690 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1691 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1692 551089 : )
1693 78727 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1694 78727 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1695 : END IF
1696 :
1697 157454 : IF (ASSOCIATED(send_dist)) THEN
1698 138864 : IF ((send_rank .EQ. 0)) THEN
1699 78727 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1700 : END IF
1701 : END IF
1702 :
1703 157454 : IF (ASSOCIATED(recv_dist)) THEN
1704 126116 : IF ((recv_rank .EQ. 0)) THEN
1705 78727 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1706 : END IF
1707 : END IF
1708 :
1709 157454 : IF (global_rank == 0) THEN
1710 78727 : CALL mp_waitall(recv_req(5:6))
1711 : END IF
1712 :
1713 : ! Finally, broadcast the data to all processes in the global communicator
1714 157454 : CALL para_env%bcast(src_block, 0)
1715 157454 : CALL para_env%bcast(dest_block, 0)
1716 157454 : CALL para_env%bcast(src_num_pe, 0)
1717 157454 : CALL para_env%bcast(dest_num_pe, 0)
1718 472362 : info%src_num_pe(1:2) = src_num_pe(1:2)
1719 472362 : info%nblock_src(1:2) = src_block(1:2)
1720 157454 : IF (global_rank .NE. 0) THEN
1721 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1722 0 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1723 551089 : )
1724 : END IF
1725 157454 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1726 157454 : CALL para_env%bcast(dest_blacs2mpi, 0)
1727 :
1728 157454 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1729 157454 : send_size = src_num_pe(1)*src_num_pe(2)
1730 157454 : info%send_size = send_size
1731 157454 : CALL mp_waitall(send_req(:))
1732 :
1733 : ! Setup is now complete, we can start the actual communication here.
1734 : ! The order implemented here is:
1735 : ! DEST_1
1736 : ! compute recv sizes
1737 : ! call irecv
1738 : ! SRC_1
1739 : ! compute send sizes
1740 : ! pack send buffers
1741 : ! call isend
1742 : ! DEST_2
1743 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1744 : ! SRC_2
1745 : ! wait for the sends
1746 :
1747 : ! DEST_1
1748 157454 : IF (ASSOCIATED(recv_dist)) THEN
1749 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1750 : col_indices=recv_col_indices &
1751 126116 : )
1752 126116 : info%recv_col_indices => recv_col_indices
1753 126116 : info%recv_row_indices => recv_row_indices
1754 126116 : nrow_block_src = src_block(1)
1755 126116 : ncol_block_src = src_block(2)
1756 1097864 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1757 :
1758 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1759 126116 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1760 126116 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1761 126116 : info%nlocal_recv(1) = nrow_local_recv
1762 126116 : info%nlocal_recv(2) = ncol_local_recv
1763 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1764 630580 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1765 2243920 : DO i = 1, nrow_local_recv
1766 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1767 : ! then work out which row block it comes from, and which process row that row block comes from.
1768 2243920 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1769 : END DO
1770 3714150 : DO j = 1, ncol_local_recv
1771 : ! Similarly for the columns
1772 3714150 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1773 : END DO
1774 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1775 :
1776 252232 : DO q = 0, src_num_pe(2) - 1
1777 3714150 : ncols = COUNT(src_q .EQ. q)
1778 467284 : DO p = 0, src_num_pe(1) - 1
1779 4067200 : nrows = COUNT(src_p .EQ. p)
1780 : ! Use the send_dist here as we are looking up the processes where the data comes from
1781 341168 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1782 : END DO
1783 : END DO
1784 126116 : DEALLOCATE (src_p, src_q)
1785 :
1786 : ! Use one long buffer (and displacements into that buffer)
1787 : ! this prevents the need for a rectangular array where not all elements will be populated
1788 593400 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1789 126116 : info%recv_disp(0) = 0
1790 215052 : DO i = 1, send_size - 1
1791 215052 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1792 : END DO
1793 :
1794 : ! Issue receive calls on ranks which expect data
1795 341168 : DO k = 0, send_size - 1
1796 341168 : IF (recv_count(k) .GT. 0) THEN
1797 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1798 157868 : source2global(k), info%recv_request(k))
1799 : END IF
1800 : END DO
1801 126116 : DEALLOCATE (source2global)
1802 : END IF ! ASSOCIATED(recv_dist)
1803 :
1804 : ! SRC_1
1805 157454 : IF (ASSOCIATED(send_dist)) THEN
1806 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1807 : col_indices=send_col_indices &
1808 138864 : )
1809 138864 : nrow_block_dest = dest_block(1)
1810 138864 : ncol_block_dest = dest_block(2)
1811 1187100 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1812 :
1813 : ! Determine the send counts, allocate the send buffers
1814 138864 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1815 138864 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1816 :
1817 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1818 : ! i.e. number of rows,cols in the sending distribution
1819 694320 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1820 :
1821 2256668 : DO i = 1, nrow_local_send
1822 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1823 2256668 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1824 : END DO
1825 3986302 : DO j = 1, ncol_local_send
1826 3986302 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1827 : END DO
1828 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1829 :
1830 277728 : DO q = 0, dest_num_pe(2) - 1
1831 3986302 : ncols = COUNT(dest_q .EQ. q)
1832 492780 : DO p = 0, dest_num_pe(1) - 1
1833 3808300 : nrows = COUNT(dest_p .EQ. p)
1834 353916 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1835 : END DO
1836 : END DO
1837 138864 : DEALLOCATE (dest_p, dest_q)
1838 :
1839 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1840 631644 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1841 138864 : send_disp(0) = 0
1842 215052 : DO k = 1, recv_size - 1
1843 215052 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1844 : END DO
1845 :
1846 : ! Loop over the smat, pack the send buffers
1847 353916 : send_count(:) = 0
1848 3986302 : DO j = 1, ncol_local_send
1849 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1850 3847438 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1851 120937210 : DO i = 1, nrow_local_send
1852 116950908 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1853 116950908 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1854 116950908 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1855 120798346 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1856 : END DO
1857 : END DO
1858 :
1859 : ! For each non-zero send_count, call mpi_isend
1860 353916 : DO k = 0, recv_size - 1
1861 353916 : IF (send_count(k) .GT. 0) THEN
1862 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1863 157868 : dest2global(k), info%send_request(k))
1864 : END IF
1865 : END DO
1866 138864 : DEALLOCATE (send_count, send_disp, dest2global)
1867 : END IF ! ASSOCIATED(send_dist)
1868 157454 : DEALLOCATE (dest_blacs2mpi)
1869 :
1870 : END IF !IF (.NOT. cp2k_is_parallel)
1871 :
1872 157454 : CALL timestop(handle)
1873 :
1874 629816 : END SUBROUTINE cp_fm_start_copy_general
1875 :
1876 : ! **************************************************************************************************
1877 : !> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1878 : !> \param destination output fm matrix
1879 : !> \param info all of the data that will be needed to complete the copy operation
1880 : ! **************************************************************************************************
1881 126116 : SUBROUTINE cp_fm_finish_copy_general(destination, info)
1882 : TYPE(cp_fm_type), INTENT(IN) :: destination
1883 : TYPE(copy_info_type), INTENT(INOUT) :: info
1884 :
1885 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_finish_copy_general'
1886 :
1887 : INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1888 : src_p_i, src_q_j
1889 126116 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1890 : INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1891 126116 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1892 :
1893 126116 : CALL timeset(routineN, handle)
1894 :
1895 : IF (.NOT. cp2k_is_parallel) THEN
1896 : ! Now unpack the data from the 'send buffer'
1897 : k = 0
1898 : DO j = 1, SIZE(destination%local_data, 2)
1899 : DO i = 1, SIZE(destination%local_data, 1)
1900 : k = k + 1
1901 : destination%local_data(i, j) = info%send_buf(k)
1902 : END DO
1903 : END DO
1904 : DEALLOCATE (info%send_buf)
1905 : ELSE
1906 : ! Set up local variables ...
1907 126116 : send_size = info%send_size
1908 378348 : nlocal_recv(1:2) = info%nlocal_recv(:)
1909 378348 : nblock_src(1:2) = info%nblock_src(:)
1910 378348 : src_num_pe(1:2) = info%src_num_pe(:)
1911 126116 : recv_col_indices => info%recv_col_indices
1912 126116 : recv_row_indices => info%recv_row_indices
1913 :
1914 : ! ... use the local variables to do the work
1915 : ! DEST_2
1916 126116 : CALL mp_waitall(info%recv_request(:))
1917 378348 : ALLOCATE (recv_count(0:send_size - 1))
1918 : ! Loop over the rmat, filling it in with data from the recv buffers
1919 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1920 341168 : recv_count(:) = 0
1921 3714150 : DO j = 1, nlocal_recv(2)
1922 3588034 : src_q_j = MOD(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1923 120665058 : DO i = 1, nlocal_recv(1)
1924 116950908 : src_p_i = MOD(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1925 116950908 : mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1926 116950908 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1927 120538942 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1928 : END DO
1929 : END DO
1930 126116 : DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1931 : ! Invalidate the stored state
1932 : NULLIFY (info%recv_col_indices, &
1933 126116 : info%recv_row_indices)
1934 :
1935 : END IF
1936 :
1937 126116 : CALL timestop(handle)
1938 :
1939 126116 : END SUBROUTINE cp_fm_finish_copy_general
1940 :
1941 : ! **************************************************************************************************
1942 : !> \brief Completes the copy operation: wait for comms clean up MPI state
1943 : !> \param info all of the data that will be needed to complete the copy operation
1944 : ! **************************************************************************************************
1945 138200 : SUBROUTINE cp_fm_cleanup_copy_general(info)
1946 : TYPE(copy_info_type), INTENT(INOUT) :: info
1947 :
1948 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_cleanup_copy_general'
1949 :
1950 : INTEGER :: handle
1951 :
1952 138200 : CALL timeset(routineN, handle)
1953 :
1954 : IF (.NOT. cp2k_is_parallel) THEN
1955 : ! Don't do anything - no MPI state for the serial case
1956 : ELSE
1957 : ! SRC_2
1958 : ! If this process is also in the destination decomposition, this deallocate
1959 : ! Was already done in cp_fm_finish_copy_general
1960 138200 : IF (ALLOCATED(info%src_blacs2mpi)) THEN
1961 30674 : DEALLOCATE (info%src_blacs2mpi)
1962 : END IF
1963 138200 : CALL mp_waitall(info%send_request)
1964 138200 : DEALLOCATE (info%send_request, info%send_buf)
1965 :
1966 : END IF
1967 :
1968 138200 : CALL timestop(handle)
1969 :
1970 138200 : END SUBROUTINE cp_fm_cleanup_copy_general
1971 :
1972 : ! **************************************************************************************************
1973 : !> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1974 : !> The two matrices can have different contexts.
1975 : !>
1976 : !> Summary of distribution routines for dense matrices
1977 : !> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1978 : !>
1979 : !> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1980 : !>
1981 : !> A process that is not a part of the context of A should set descA(2)
1982 : !> to -1, and similarly for B.
1983 : !>
1984 : !> \param source input fm matrix
1985 : !> \param destination output fm matrix
1986 : !> \param nrows number of rows of sub matrix to be copied
1987 : !> \param ncols number of cols of sub matrix to be copied
1988 : !> \param s_firstrow starting global row index of sub matrix in source
1989 : !> \param s_firstcol starting global col index of sub matrix in source
1990 : !> \param d_firstrow starting global row index of sub matrix in destination
1991 : !> \param d_firstcol starting global col index of sub matrix in destination
1992 : !> \param global_context process grid that covers all parts of either A or B.
1993 : ! **************************************************************************************************
1994 1536 : SUBROUTINE cp_fm_to_fm_submat_general(source, &
1995 : destination, &
1996 : nrows, &
1997 : ncols, &
1998 : s_firstrow, &
1999 : s_firstcol, &
2000 : d_firstrow, &
2001 : d_firstcol, &
2002 : global_context)
2003 :
2004 : TYPE(cp_fm_type), INTENT(IN) :: source, destination
2005 : INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2006 : d_firstrow, d_firstcol
2007 :
2008 : CLASS(cp_blacs_type), INTENT(IN) :: global_context
2009 :
2010 : CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_submat_general'
2011 :
2012 : LOGICAL :: debug
2013 : INTEGER :: handle
2014 : #if defined(__SCALAPACK)
2015 : INTEGER, DIMENSION(9) :: desca, descb
2016 : REAL(KIND=dp), DIMENSION(1, 1), TARGET :: dummy
2017 1536 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: smat, dmat
2018 : #endif
2019 :
2020 1536 : CALL timeset(routineN, handle)
2021 :
2022 1536 : debug = debug_this_module
2023 :
2024 : IF (.NOT. cp2k_is_parallel) THEN
2025 : CALL cp_fm_to_fm_submat(source, &
2026 : destination, &
2027 : nrows, &
2028 : ncols, &
2029 : s_firstrow, &
2030 : s_firstcol, &
2031 : d_firstrow, &
2032 : d_firstcol)
2033 : ELSE
2034 : #ifdef __SCALAPACK
2035 1536 : NULLIFY (smat, dmat)
2036 : ! check whether source is available on this process
2037 1536 : IF (ASSOCIATED(source%matrix_struct)) THEN
2038 15360 : desca = source%matrix_struct%descriptor
2039 1536 : IF (source%use_sp) CPABORT("only DP kind implemented")
2040 1536 : IF (nrows .GT. source%matrix_struct%nrow_global) &
2041 0 : CPABORT("nrows is greater than nrow_global of source")
2042 1536 : IF (ncols .GT. source%matrix_struct%ncol_global) &
2043 0 : CPABORT("ncols is greater than ncol_global of source")
2044 1536 : smat => source%local_data
2045 : ELSE
2046 0 : desca = -1
2047 0 : smat => dummy
2048 : END IF
2049 : ! check destination is available on this process
2050 1536 : IF (ASSOCIATED(destination%matrix_struct)) THEN
2051 15360 : descb = destination%matrix_struct%descriptor
2052 1536 : IF (destination%use_sp) CPABORT("only DP kind implemented")
2053 1536 : IF (nrows .GT. destination%matrix_struct%nrow_global) &
2054 0 : CPABORT("nrows is greater than nrow_global of destination")
2055 1536 : IF (ncols .GT. destination%matrix_struct%ncol_global) &
2056 0 : CPABORT("ncols is greater than ncol_global of destination")
2057 1536 : dmat => destination%local_data
2058 : ELSE
2059 0 : descb = -1
2060 0 : dmat => dummy
2061 : END IF
2062 : ! do copy
2063 :
2064 : CALL pdgemr2d(nrows, &
2065 : ncols, &
2066 : smat, &
2067 : s_firstrow, &
2068 : s_firstcol, &
2069 : desca, &
2070 : dmat, &
2071 : d_firstrow, &
2072 : d_firstcol, &
2073 : descb, &
2074 1536 : global_context%get_handle())
2075 : #else
2076 : MARK_USED(global_context)
2077 : CPABORT("this subroutine only supports SCALAPACK")
2078 : #endif
2079 : END IF
2080 :
2081 1536 : CALL timestop(handle)
2082 :
2083 1536 : END SUBROUTINE cp_fm_to_fm_submat_general
2084 :
2085 : ! **************************************************************************************************
2086 : !> \brief ...
2087 : !> \param matrix ...
2088 : !> \param irow_global ...
2089 : !> \param icol_global ...
2090 : !> \param alpha ...
2091 : ! **************************************************************************************************
2092 240 : SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2093 :
2094 : ! Add alpha to the matrix element specified by the global indices
2095 : ! irow_global and icol_global
2096 :
2097 : ! - Creation (05.05.06,MK)
2098 :
2099 : TYPE(cp_fm_type), INTENT(IN) :: matrix
2100 : INTEGER, INTENT(IN) :: irow_global, icol_global
2101 : REAL(KIND=dp), INTENT(IN) :: alpha
2102 :
2103 : INTEGER :: mypcol, myprow, npcol, nprow
2104 240 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: a
2105 : TYPE(cp_blacs_env_type), POINTER :: context
2106 : #if defined(__SCALAPACK)
2107 : INTEGER :: icol_local, ipcol, iprow, &
2108 : irow_local
2109 : INTEGER, DIMENSION(9) :: desca
2110 : #endif
2111 :
2112 240 : context => matrix%matrix_struct%context
2113 :
2114 240 : myprow = context%mepos(1)
2115 240 : mypcol = context%mepos(2)
2116 :
2117 240 : nprow = context%num_pe(1)
2118 240 : npcol = context%num_pe(2)
2119 :
2120 240 : a => matrix%local_data
2121 :
2122 : #if defined(__SCALAPACK)
2123 :
2124 2400 : desca(:) = matrix%matrix_struct%descriptor(:)
2125 :
2126 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2127 240 : irow_local, icol_local, iprow, ipcol)
2128 :
2129 240 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2130 120 : a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2131 : END IF
2132 :
2133 : #else
2134 :
2135 : a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2136 :
2137 : #endif
2138 :
2139 240 : END SUBROUTINE cp_fm_add_to_element
2140 :
2141 : ! **************************************************************************************************
2142 : !> \brief ...
2143 : !> \param fm ...
2144 : !> \param unit ...
2145 : ! **************************************************************************************************
2146 78684 : SUBROUTINE cp_fm_write_unformatted(fm, unit)
2147 : TYPE(cp_fm_type), INTENT(IN) :: fm
2148 : INTEGER, INTENT(IN) :: unit
2149 :
2150 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_write_unformatted'
2151 :
2152 : INTEGER :: handle, j, max_block, &
2153 : ncol_global, nrow_global
2154 : TYPE(mp_para_env_type), POINTER :: para_env
2155 : #if defined(__SCALAPACK)
2156 : INTEGER :: i, i_block, icol_local, &
2157 : in, info, ipcol, &
2158 : iprow, irow_local, &
2159 : mepos, &
2160 : num_pe, rb, tag
2161 : INTEGER, DIMENSION(9) :: desc
2162 78684 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2163 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2164 : TYPE(cp_blacs_type) :: ictxt_loc
2165 : INTEGER, EXTERNAL :: numroc
2166 : #endif
2167 :
2168 78684 : CALL timeset(routineN, handle)
2169 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2170 78684 : para_env=para_env)
2171 :
2172 : #if defined(__SCALAPACK)
2173 78684 : num_pe = para_env%num_pe
2174 78684 : mepos = para_env%mepos
2175 78684 : rb = nrow_global
2176 78684 : tag = 0
2177 : ! get a new context
2178 78684 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2179 78684 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2180 78684 : CPASSERT(info == 0)
2181 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2182 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2183 157368 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2184 :
2185 314736 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2186 :
2187 : ! do the actual scalapack to cols reordering
2188 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2189 : fm%matrix_struct%descriptor, &
2190 78684 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2191 :
2192 236052 : ALLOCATE (vecbuf(nrow_global*max_block))
2193 62359655 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2194 :
2195 293914 : DO i = 1, ncol_global, MAX(max_block, 1)
2196 136546 : i_block = MIN(max_block, ncol_global - i + 1)
2197 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2198 136546 : irow_local, icol_local, iprow, ipcol)
2199 136546 : IF (ipcol == mypcol) THEN
2200 984905 : DO j = 1, i_block
2201 158058191 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2202 : END DO
2203 : END IF
2204 :
2205 136546 : IF (ipcol == 0) THEN
2206 : ! do nothing
2207 : ELSE
2208 56164 : IF (ipcol == mypcol) THEN
2209 38194718 : CALL para_env%send(vecbuf(:), 0, tag)
2210 : END IF
2211 56164 : IF (mypcol == 0) THEN
2212 76361354 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2213 : END IF
2214 : END IF
2215 :
2216 215230 : IF (unit > 0) THEN
2217 954629 : DO j = 1, i_block
2218 954629 : WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2219 : END DO
2220 : END IF
2221 :
2222 : END DO
2223 : END ASSOCIATE
2224 78684 : DEALLOCATE (vecbuf)
2225 :
2226 78684 : CALL ictxt_loc%gridexit()
2227 :
2228 78684 : DEALLOCATE (newdat)
2229 :
2230 : #else
2231 :
2232 : IF (unit > 0) THEN
2233 : DO j = 1, ncol_global
2234 : WRITE (unit) fm%local_data(:, j)
2235 : END DO
2236 : END IF
2237 :
2238 : #endif
2239 78684 : CALL timestop(handle)
2240 :
2241 393420 : END SUBROUTINE cp_fm_write_unformatted
2242 :
2243 : ! **************************************************************************************************
2244 : !> \brief Write out a full matrix in plain text.
2245 : !> \param fm the matrix to be outputted
2246 : !> \param unit the unit number for I/O
2247 : !> \param header optional header
2248 : !> \param value_format ...
2249 : ! **************************************************************************************************
2250 10 : SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2251 : TYPE(cp_fm_type), INTENT(IN) :: fm
2252 : INTEGER, INTENT(IN) :: unit
2253 : CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2254 :
2255 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_write_formatted'
2256 :
2257 : CHARACTER(LEN=21) :: my_value_format
2258 : INTEGER :: handle, i, j, max_block, &
2259 : ncol_global, nrow_global, &
2260 : nrow_block
2261 : TYPE(mp_para_env_type), POINTER :: para_env
2262 : #if defined(__SCALAPACK)
2263 : INTEGER :: i_block, icol_local, &
2264 : in, info, ipcol, &
2265 : iprow, irow_local, &
2266 : mepos, num_pe, rb, tag, k, &
2267 : icol, irow
2268 : INTEGER, DIMENSION(9) :: desc
2269 10 : REAL(KIND=dp), DIMENSION(:), POINTER :: vecbuf
2270 10 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: newdat
2271 : TYPE(cp_blacs_type) :: ictxt_loc
2272 : INTEGER, EXTERNAL :: numroc
2273 : #endif
2274 :
2275 10 : CALL timeset(routineN, handle)
2276 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2277 10 : nrow_block=nrow_block, para_env=para_env)
2278 :
2279 10 : IF (PRESENT(value_format)) THEN
2280 0 : CPASSERT(LEN_TRIM(ADJUSTL(value_format)) < 11)
2281 0 : my_value_format = "(I10, I10, "//TRIM(ADJUSTL(value_format))//")"
2282 : ELSE
2283 10 : my_value_format = "(I10, I10, ES24.12)"
2284 : END IF
2285 :
2286 10 : IF (unit > 0) THEN
2287 5 : IF (PRESENT(header)) WRITE (unit, *) header
2288 5 : WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", ADJUSTL("Value")
2289 : END IF
2290 :
2291 : #if defined(__SCALAPACK)
2292 10 : num_pe = para_env%num_pe
2293 10 : mepos = para_env%mepos
2294 10 : rb = nrow_global
2295 10 : tag = 0
2296 : ! get a new context
2297 10 : CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2298 10 : CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2299 10 : CPASSERT(info == 0)
2300 : ASSOCIATE (nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2301 : myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2302 20 : in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2303 :
2304 40 : ALLOCATE (newdat(nrow_global, MAX(1, in)))
2305 :
2306 : ! do the actual scalapack to cols reordering
2307 : CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2308 : fm%matrix_struct%descriptor, &
2309 10 : newdat, 1, 1, desc, ictxt_loc%get_handle())
2310 :
2311 30 : ALLOCATE (vecbuf(nrow_global*max_block))
2312 1226 : vecbuf = HUGE(1.0_dp) ! init for valgrind
2313 10 : irow = 1
2314 10 : icol = 1
2315 :
2316 32 : DO i = 1, ncol_global, MAX(max_block, 1)
2317 12 : i_block = MIN(max_block, ncol_global - i + 1)
2318 : CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2319 12 : irow_local, icol_local, iprow, ipcol)
2320 12 : IF (ipcol == mypcol) THEN
2321 36 : DO j = 1, i_block
2322 1256 : vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2323 : END DO
2324 : END IF
2325 :
2326 12 : IF (ipcol == 0) THEN
2327 : ! do nothing
2328 : ELSE
2329 2 : IF (ipcol == mypcol) THEN
2330 3 : CALL para_env%send(vecbuf(:), 0, tag)
2331 : END IF
2332 2 : IF (mypcol == 0) THEN
2333 5 : CALL para_env%recv(vecbuf(:), ipcol, tag)
2334 : END IF
2335 : END IF
2336 :
2337 22 : IF (unit > 0) THEN
2338 36 : DO j = 1, i_block
2339 646 : DO k = (j - 1)*nrow_global + 1, nrow_global*j
2340 610 : WRITE (UNIT=unit, FMT=my_value_format) irow, icol, vecbuf(k)
2341 610 : irow = irow + 1
2342 640 : IF (irow > nrow_global) THEN
2343 30 : irow = 1
2344 30 : icol = icol + 1
2345 : END IF
2346 : END DO
2347 : END DO
2348 : END IF
2349 :
2350 : END DO
2351 : END ASSOCIATE
2352 10 : DEALLOCATE (vecbuf)
2353 :
2354 10 : CALL ictxt_loc%gridexit()
2355 :
2356 10 : DEALLOCATE (newdat)
2357 :
2358 : #else
2359 :
2360 : IF (unit > 0) THEN
2361 : DO j = 1, ncol_global
2362 : DO i = 1, nrow_global
2363 : WRITE (UNIT=unit, FMT=my_value_format) i, j, fm%local_data(i, j)
2364 : END DO
2365 : END DO
2366 : END IF
2367 :
2368 : #endif
2369 10 : CALL timestop(handle)
2370 :
2371 50 : END SUBROUTINE cp_fm_write_formatted
2372 :
2373 : ! **************************************************************************************************
2374 : !> \brief ...
2375 : !> \param fm ...
2376 : !> \param unit ...
2377 : ! **************************************************************************************************
2378 1032 : SUBROUTINE cp_fm_read_unformatted(fm, unit)
2379 : TYPE(cp_fm_type), INTENT(INOUT) :: fm
2380 : INTEGER, INTENT(IN) :: unit
2381 :
2382 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_read_unformatted'
2383 :
2384 : INTEGER :: handle, j, max_block, &
2385 : ncol_global, nrow_global
2386 : TYPE(mp_para_env_type), POINTER :: para_env
2387 : #if defined(__SCALAPACK)
2388 : INTEGER :: k, n_cols
2389 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: vecbuf
2390 : #endif
2391 :
2392 1032 : CALL timeset(routineN, handle)
2393 :
2394 : CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2395 1032 : para_env=para_env)
2396 :
2397 : #if defined(__SCALAPACK)
2398 :
2399 : ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2400 :
2401 4128 : ALLOCATE (vecbuf(nrow_global, max_block))
2402 :
2403 3080 : DO j = 1, ncol_global, max_block
2404 :
2405 2048 : n_cols = MIN(max_block, ncol_global - j + 1)
2406 2048 : IF (para_env%mepos == 0) THEN
2407 30305 : DO k = 1, n_cols
2408 30305 : READ (unit) vecbuf(:, k)
2409 : END DO
2410 : END IF
2411 11455440 : CALL para_env%bcast(vecbuf, 0)
2412 3080 : CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2413 :
2414 : END DO
2415 :
2416 1032 : DEALLOCATE (vecbuf)
2417 :
2418 : #else
2419 :
2420 : DO j = 1, ncol_global
2421 : READ (unit) fm%local_data(:, j)
2422 : END DO
2423 :
2424 : #endif
2425 :
2426 1032 : CALL timestop(handle)
2427 :
2428 1032 : END SUBROUTINE cp_fm_read_unformatted
2429 :
2430 : ! **************************************************************************************************
2431 : !> \brief wrapper to scalapack function INDXG2P that computes the process
2432 : !> coordinate which possesses the entry of a distributed matrix specified
2433 : !> by a global index INDXGLOB.
2434 : !>
2435 : !> Arguments
2436 : !> =========
2437 : !>
2438 : !> INDXGLOB (global input) INTEGER
2439 : !> The global index of the element.
2440 : !>
2441 : !> NB (global input) INTEGER
2442 : !> Block size, size of the blocks the distributed matrix is
2443 : !> split into.
2444 : !>
2445 : !> IPROC (local dummy) INTEGER
2446 : !> Dummy argument in this case in order to unify the calling
2447 : !> sequence of the tool-routines.
2448 : !>
2449 : !> ISRCPROC (global input) INTEGER
2450 : !> The coordinate of the process that possesses the first
2451 : !> row/column of the distributed matrix.
2452 : !>
2453 : !> NPROCS (global input) INTEGER
2454 : !> The total number processes over which the matrix is
2455 : !> distributed.
2456 : !>
2457 : !> \param INDXGLOB ...
2458 : !> \param NB ...
2459 : !> \param IPROC ...
2460 : !> \param ISRCPROC ...
2461 : !> \param NPROCS ...
2462 : !> \return ...
2463 : !> \author Mauro Del Ben [MDB] - 12.2012
2464 : ! **************************************************************************************************
2465 12628693 : FUNCTION cp_fm_indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2P)
2466 : INTEGER, INTENT(IN) :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS
2467 : INTEGER :: G2P
2468 :
2469 : #if defined(__SCALAPACK)
2470 : INTEGER, EXTERNAL :: indxg2p
2471 : #endif
2472 :
2473 : #if defined(__SCALAPACK)
2474 :
2475 12628693 : G2P = indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
2476 :
2477 : #else
2478 : MARK_USED(indxglob)
2479 : MARK_USED(iproc)
2480 : MARK_USED(isrcproc)
2481 : MARK_USED(nb)
2482 : MARK_USED(nprocs)
2483 :
2484 : G2P = 0
2485 :
2486 : #endif
2487 :
2488 12628693 : END FUNCTION cp_fm_indxg2p
2489 :
2490 : ! **************************************************************************************************
2491 : !> \brief wrapper to scalapack function INDXG2L that computes the local index
2492 : !> of a distributed matrix entry pointed to by the global index INDXGLOB.
2493 : !>
2494 : !> Arguments
2495 : !> =========
2496 : !>
2497 : !> INDXGLOB (global input) INTEGER
2498 : !> The global index of the distributed matrix entry.
2499 : !>
2500 : !> NB (global input) INTEGER
2501 : !> Block size, size of the blocks the distributed matrix is
2502 : !> split into.
2503 : !>
2504 : !> IPROC (local dummy) INTEGER
2505 : !> Dummy argument in this case in order to unify the calling
2506 : !> sequence of the tool-routines.
2507 : !>
2508 : !> ISRCPROC (local dummy) INTEGER
2509 : !> Dummy argument in this case in order to unify the calling
2510 : !> sequence of the tool-routines.
2511 : !>
2512 : !> NPROCS (global input) INTEGER
2513 : !> The total number processes over which the distributed
2514 : !> matrix is distributed.
2515 : !>
2516 : !> \param INDXGLOB ...
2517 : !> \param NB ...
2518 : !> \param IPROC ...
2519 : !> \param ISRCPROC ...
2520 : !> \param NPROCS ...
2521 : !> \return ...
2522 : !> \author Mauro Del Ben [MDB] - 12.2012
2523 : ! **************************************************************************************************
2524 221218 : FUNCTION cp_fm_indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS) RESULT(G2L)
2525 : INTEGER, INTENT(IN) :: INDXGLOB, NB, IPROC, ISRCPROC, NPROCS
2526 : INTEGER :: G2L
2527 :
2528 : #if defined(__SCALAPACK)
2529 : INTEGER, EXTERNAL :: indxg2l
2530 : #endif
2531 :
2532 : #if defined(__SCALAPACK)
2533 :
2534 221218 : G2L = indxg2l(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
2535 :
2536 : #else
2537 : MARK_USED(iproc)
2538 : MARK_USED(isrcproc)
2539 : MARK_USED(nb)
2540 : MARK_USED(nprocs)
2541 : MARK_USED(indxglob)
2542 :
2543 : G2L = INDXGLOB
2544 :
2545 : #endif
2546 :
2547 221218 : END FUNCTION cp_fm_indxg2l
2548 :
2549 : ! **************************************************************************************************
2550 : !> \brief wrapper to scalapack function INDXL2G that computes the global index
2551 : !> of a distributed matrix entry pointed to by the local index INDXLOC
2552 : !> of the process indicated by IPROC.
2553 : !>
2554 : !> Arguments
2555 : !> =========
2556 : !>
2557 : !> INDXLOC (global input) INTEGER
2558 : !> The local index of the distributed matrix entry.
2559 : !>
2560 : !> NB (global input) INTEGER
2561 : !> Block size, size of the blocks the distributed matrix is
2562 : !> split into.
2563 : !>
2564 : !> IPROC (local input) INTEGER
2565 : !> The coordinate of the process whose local array row or
2566 : !> column is to be determined.
2567 : !>
2568 : !> ISRCPROC (global input) INTEGER
2569 : !> The coordinate of the process that possesses the first
2570 : !> row/column of the distributed matrix.
2571 : !>
2572 : !> NPROCS (global input) INTEGER
2573 : !> The total number processes over which the distributed
2574 : !> matrix is distributed.
2575 : !>
2576 : !> \param INDXLOC ...
2577 : !> \param NB ...
2578 : !> \param IPROC ...
2579 : !> \param ISRCPROC ...
2580 : !> \param NPROCS ...
2581 : !> \return ...
2582 : !> \author Mauro Del Ben [MDB] - 12.2012
2583 : ! **************************************************************************************************
2584 210841 : FUNCTION cp_fm_indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS) RESULT(L2G)
2585 : INTEGER, INTENT(IN) :: INDXLOC, NB, IPROC, ISRCPROC, NPROCS
2586 : INTEGER :: L2G
2587 :
2588 : #if defined(__SCALAPACK)
2589 : INTEGER, EXTERNAL :: indxl2g
2590 : #endif
2591 :
2592 : #if defined(__SCALAPACK)
2593 :
2594 210841 : L2G = indxl2g(INDXLOC, NB, IPROC, ISRCPROC, NPROCS)
2595 :
2596 : #else
2597 : MARK_USED(iproc)
2598 : MARK_USED(isrcproc)
2599 : MARK_USED(nb)
2600 : MARK_USED(nprocs)
2601 :
2602 : L2G = INDXLOC
2603 :
2604 : #endif
2605 :
2606 210841 : END FUNCTION cp_fm_indxl2g
2607 :
2608 : ! **************************************************************************************************
2609 : !> \brief ...
2610 : !> \param mult_type ...
2611 : ! **************************************************************************************************
2612 8989 : SUBROUTINE cp_fm_setup(mult_type)
2613 : INTEGER, INTENT(IN) :: mult_type
2614 :
2615 8989 : mm_type = mult_type
2616 8989 : END SUBROUTINE cp_fm_setup
2617 :
2618 : ! **************************************************************************************************
2619 : !> \brief ...
2620 : !> \return ...
2621 : ! **************************************************************************************************
2622 1323540 : FUNCTION cp_fm_get_mm_type() RESULT(res)
2623 : INTEGER :: res
2624 :
2625 1323540 : res = mm_type
2626 1323540 : END FUNCTION
2627 :
2628 : ! **************************************************************************************************
2629 : !> \brief ...
2630 : !> \param ictxt ...
2631 : !> \param prec ...
2632 : !> \return ...
2633 : ! **************************************************************************************************
2634 10 : FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2635 : INTEGER :: ictxt
2636 : CHARACTER(LEN=1) :: prec
2637 : INTEGER :: res
2638 : #if defined(__SCALAPACK)
2639 : INTEGER :: pilaenv
2640 10 : res = pilaenv(ictxt, prec)
2641 : #else
2642 : MARK_USED(ictxt)
2643 : MARK_USED(prec)
2644 : res = -1
2645 : #endif
2646 :
2647 10 : END FUNCTION
2648 :
2649 0 : END MODULE cp_fm_types
|