Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Represents a complex full matrix distributed on many processors.
10 : !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11 : ! **************************************************************************************************
12 : MODULE cp_cfm_types
13 : USE cp_blacs_env, ONLY: cp_blacs_env_type
14 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent,&
15 : cp_fm_struct_get,&
16 : cp_fm_struct_release,&
17 : cp_fm_struct_retain,&
18 : cp_fm_struct_type
19 : USE cp_fm_types, ONLY: cp_fm_type
20 : USE kinds, ONLY: dp
21 : USE mathconstants, ONLY: z_one,&
22 : z_zero
23 : USE message_passing, ONLY: cp2k_is_parallel,&
24 : mp_any_source,&
25 : mp_para_env_type,&
26 : mp_proc_null,&
27 : mp_request_null,&
28 : mp_request_type,&
29 : mp_waitall
30 : #include "../base/base_uses.f90"
31 :
32 : IMPLICIT NONE
33 : PRIVATE
34 :
35 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
36 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
37 : INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
38 :
39 : PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
40 : PUBLIC :: cp_cfm_cleanup_copy_general, &
41 : cp_cfm_create, &
42 : cp_cfm_finish_copy_general, &
43 : cp_cfm_get_element, &
44 : cp_cfm_get_info, &
45 : cp_cfm_get_submatrix, &
46 : cp_cfm_release, &
47 : cp_cfm_set_all, &
48 : cp_cfm_set_element, &
49 : cp_cfm_set_submatrix, &
50 : cp_cfm_start_copy_general, &
51 : cp_cfm_to_cfm, &
52 : cp_cfm_to_fm, &
53 : cp_fm_to_cfm
54 :
55 : INTERFACE cp_cfm_to_cfm
56 : MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
57 : cp_cfm_to_cfm_columns ! just a number of columns
58 : END INTERFACE
59 :
60 : ! **************************************************************************************************
61 : !> \brief Represent a complex full matrix.
62 : !> \param name the name of the matrix, used for printing
63 : !> \param matrix_struct structure of this matrix
64 : !> \param local_data array with the data of the matrix (its content depends on
65 : !> the matrix type used: in parallel run it will be in
66 : !> ScaLAPACK format, in sequential run it will simply contain the matrix)
67 : ! **************************************************************************************************
68 : TYPE cp_cfm_type
69 : CHARACTER(len=60) :: name = ""
70 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
71 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
72 : END TYPE cp_cfm_type
73 :
74 : ! **************************************************************************************************
75 : !> \brief Just to build arrays of pointers to matrices.
76 : !> \param matrix the pointer to the matrix
77 : ! **************************************************************************************************
78 : TYPE cp_cfm_p_type
79 : TYPE(cp_cfm_type), POINTER :: matrix => NULL()
80 : END TYPE cp_cfm_p_type
81 :
82 : ! **************************************************************************************************
83 : !> \brief Stores the state of a copy between cp_cfm_start_copy_general
84 : !> and cp_cfm_finish_copy_general.
85 : !> \par History
86 : !> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
87 : !> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
88 : ! **************************************************************************************************
89 : TYPE copy_cfm_info_type
90 : !> number of MPI processes that send data
91 : INTEGER :: send_size = -1
92 : !> number of locally stored rows (1) and columns (2) of the destination matrix
93 : INTEGER, DIMENSION(2) :: nlocal_recv = -1
94 : !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
95 : INTEGER, DIMENSION(2) :: nblock_src = -1
96 : !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
97 : INTEGER, DIMENSION(2) :: src_num_pe = -1
98 : !> displacements into recv_buf
99 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
100 : !> MPI requests for non-blocking receive and send operations
101 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
102 : !> global column and row indices of locally stored elements of the destination matrix
103 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
104 : !> rank of MPI process with BLACS coordinates (prow, pcol)
105 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
106 : !> receiving and sending buffers for non-blocking MPI communication
107 : COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
108 : END TYPE copy_cfm_info_type
109 :
110 : CONTAINS
111 :
112 : ! **************************************************************************************************
113 : !> \brief Creates a new full matrix with the given structure.
114 : !> \param matrix matrix to be created
115 : !> \param matrix_struct structure of the matrix
116 : !> \param name name of the matrix
117 : !> \note
118 : !> preferred allocation routine
119 : ! **************************************************************************************************
120 149597 : SUBROUTINE cp_cfm_create(matrix, matrix_struct, name)
121 : TYPE(cp_cfm_type), INTENT(OUT) :: matrix
122 : TYPE(cp_fm_struct_type), TARGET, INTENT(IN) :: matrix_struct
123 : CHARACTER(len=*), INTENT(in), OPTIONAL :: name
124 :
125 : INTEGER :: ncol_local, npcol, nprow, nrow_local
126 : TYPE(cp_blacs_env_type), POINTER :: context
127 :
128 : #if defined(__parallel) && ! defined(__SCALAPACK)
129 : CPABORT("full matrices need ScaLAPACK for parallel run")
130 : #endif
131 :
132 149597 : context => matrix_struct%context
133 149597 : matrix%matrix_struct => matrix_struct
134 149597 : CALL cp_fm_struct_retain(matrix%matrix_struct)
135 :
136 149597 : nprow = context%num_pe(1)
137 149597 : npcol = context%num_pe(2)
138 149597 : NULLIFY (matrix%local_data)
139 :
140 149597 : nrow_local = matrix_struct%local_leading_dimension
141 149597 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
142 598388 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
143 :
144 : ! do not initialise created matrix as it is up to the user to do so
145 : !CALL zcopy(nrow_local*ncol_local, z_zero, 0, matrix%local_data, 1)
146 :
147 149597 : IF (PRESENT(name)) THEN
148 17416 : matrix%name = name
149 : ELSE
150 132181 : matrix%name = 'full complex matrix'
151 : END IF
152 149597 : END SUBROUTINE cp_cfm_create
153 :
154 : ! **************************************************************************************************
155 : !> \brief Releases a full matrix.
156 : !> \param matrix the matrix to release
157 : ! **************************************************************************************************
158 165530 : SUBROUTINE cp_cfm_release(matrix)
159 : TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
160 :
161 165530 : IF (ASSOCIATED(matrix%local_data)) THEN
162 149597 : DEALLOCATE (matrix%local_data)
163 : END IF
164 165530 : matrix%name = ""
165 165530 : CALL cp_fm_struct_release(matrix%matrix_struct)
166 165530 : END SUBROUTINE cp_cfm_release
167 :
168 : ! **************************************************************************************************
169 : !> \brief Set all elements of the full matrix to alpha. Besides, set all
170 : !> diagonal matrix elements to beta (if given explicitly).
171 : !> \param matrix matrix to initialise
172 : !> \param alpha value of off-diagonal matrix elements
173 : !> \param beta value of diagonal matrix elements (equal to alpha if absent)
174 : !> \date 12.06.2001
175 : !> \author Matthias Krack
176 : !> \version 1.0
177 : ! **************************************************************************************************
178 44114 : SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
179 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
180 : COMPLEX(kind=dp), INTENT(in) :: alpha
181 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
182 :
183 : INTEGER :: irow_local, nrow_local
184 : #if defined(__SCALAPACK)
185 : INTEGER :: icol_local, ncol_local
186 44114 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
187 : #endif
188 :
189 44114 : CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
190 :
191 44114 : IF (PRESENT(beta)) THEN
192 : #if defined(__SCALAPACK)
193 : CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
194 8156 : row_indices=row_indices, col_indices=col_indices)
195 :
196 8156 : icol_local = 1
197 8156 : irow_local = 1
198 :
199 50613 : DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
200 50613 : IF (row_indices(irow_local) < col_indices(icol_local)) THEN
201 0 : irow_local = irow_local + 1
202 42457 : ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
203 14237 : icol_local = icol_local + 1
204 : ELSE
205 28220 : matrix%local_data(irow_local, icol_local) = beta
206 28220 : irow_local = irow_local + 1
207 28220 : icol_local = icol_local + 1
208 : END IF
209 : END DO
210 : #else
211 : nrow_local = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
212 :
213 : DO irow_local = 1, nrow_local
214 : matrix%local_data(irow_local, irow_local) = beta
215 : END DO
216 : #endif
217 : END IF
218 :
219 44114 : END SUBROUTINE cp_cfm_set_all
220 :
221 : ! **************************************************************************************************
222 : !> \brief Get the matrix element by its global index.
223 : !> \param matrix full matrix
224 : !> \param irow_global global row index
225 : !> \param icol_global global column index
226 : !> \param alpha matrix element
227 : !> \par History
228 : !> , TCH, created
229 : !> always return the answer
230 : ! **************************************************************************************************
231 9282 : SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
232 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
233 : INTEGER, INTENT(in) :: irow_global, icol_global
234 : COMPLEX(kind=dp), INTENT(out) :: alpha
235 :
236 : #if defined(__SCALAPACK)
237 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
238 : mypcol, myprow, npcol, nprow
239 : INTEGER, DIMENSION(9) :: desca
240 : TYPE(cp_blacs_env_type), POINTER :: context
241 : #endif
242 :
243 : #if defined(__SCALAPACK)
244 9282 : context => matrix%matrix_struct%context
245 9282 : myprow = context%mepos(1)
246 9282 : mypcol = context%mepos(2)
247 9282 : nprow = context%num_pe(1)
248 9282 : npcol = context%num_pe(2)
249 :
250 92820 : desca(:) = matrix%matrix_struct%descriptor(:)
251 :
252 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
253 9282 : irow_local, icol_local, iprow, ipcol)
254 :
255 9282 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
256 4641 : alpha = matrix%local_data(irow_local, icol_local)
257 4641 : CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
258 : ELSE
259 4641 : CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
260 : END IF
261 :
262 : #else
263 : alpha = matrix%local_data(irow_global, icol_global)
264 : #endif
265 :
266 9282 : END SUBROUTINE cp_cfm_get_element
267 :
268 : ! **************************************************************************************************
269 : !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
270 : !> \param matrix full matrix
271 : !> \param irow_global global row index
272 : !> \param icol_global global column index
273 : !> \param alpha value of the matrix element
274 : !> \date 12.06.2001
275 : !> \author Matthias Krack
276 : !> \version 1.0
277 : ! **************************************************************************************************
278 79956 : SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
279 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
280 : INTEGER, INTENT(in) :: irow_global, icol_global
281 : COMPLEX(kind=dp), INTENT(in) :: alpha
282 :
283 : #if defined(__SCALAPACK)
284 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
285 : mypcol, myprow, npcol, nprow
286 : INTEGER, DIMENSION(9) :: desca
287 : TYPE(cp_blacs_env_type), POINTER :: context
288 : #endif
289 :
290 : #if defined(__SCALAPACK)
291 79956 : context => matrix%matrix_struct%context
292 79956 : myprow = context%mepos(1)
293 79956 : mypcol = context%mepos(2)
294 79956 : nprow = context%num_pe(1)
295 79956 : npcol = context%num_pe(2)
296 :
297 799560 : desca(:) = matrix%matrix_struct%descriptor(:)
298 :
299 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
300 79956 : irow_local, icol_local, iprow, ipcol)
301 :
302 79956 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
303 39978 : matrix%local_data(irow_local, icol_local) = alpha
304 : END IF
305 :
306 : #else
307 : matrix%local_data(irow_global, icol_global) = alpha
308 : #endif
309 :
310 79956 : END SUBROUTINE cp_cfm_set_element
311 :
312 : ! **************************************************************************************************
313 : !> \brief Extract a sub-matrix from the full matrix:
314 : !> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
315 : !> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
316 : !> \param fm full matrix you want to get the elements from
317 : !> \param target_m 2-D array to store the extracted sub-matrix
318 : !> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
319 : !> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
320 : !> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
321 : !> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
322 : !> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
323 : !> op(target_m) = target_m^T if .TRUE.,
324 : !> op(target_m) = target_m if .FALSE. (defaults to false)
325 : !> \par History
326 : !> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
327 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
328 : !> \author Lianheng Tong
329 : !> \note
330 : !> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
331 : ! **************************************************************************************************
332 392 : SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
333 : TYPE(cp_cfm_type), INTENT(IN) :: fm
334 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
335 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
336 : LOGICAL, INTENT(in), OPTIONAL :: transpose
337 :
338 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_get_submatrix'
339 :
340 392 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
341 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
342 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
343 : start_row_global, start_row_local, this_col
344 392 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
345 : LOGICAL :: do_zero, tr_a
346 : TYPE(mp_para_env_type), POINTER :: para_env
347 :
348 392 : CALL timeset(routineN, handle)
349 :
350 392 : IF (SIZE(target_m) /= 0) THEN
351 : #if defined(__SCALAPACK)
352 392 : do_zero = .TRUE.
353 : #else
354 : do_zero = .FALSE.
355 : #endif
356 :
357 392 : tr_a = .FALSE.
358 392 : IF (PRESENT(transpose)) tr_a = transpose
359 :
360 : ! find out the first and last global row/column indices
361 392 : start_row_global = 1
362 392 : start_col_global = 1
363 392 : IF (PRESENT(start_row)) start_row_global = start_row
364 392 : IF (PRESENT(start_col)) start_col_global = start_col
365 :
366 392 : IF (tr_a) THEN
367 0 : end_row_global = SIZE(target_m, 2)
368 0 : end_col_global = SIZE(target_m, 1)
369 : ELSE
370 392 : end_row_global = SIZE(target_m, 1)
371 392 : end_col_global = SIZE(target_m, 2)
372 : END IF
373 392 : IF (PRESENT(n_rows)) end_row_global = n_rows
374 392 : IF (PRESENT(n_cols)) end_col_global = n_cols
375 :
376 392 : end_row_global = end_row_global + start_row_global - 1
377 392 : end_col_global = end_col_global + start_col_global - 1
378 :
379 : CALL cp_cfm_get_info(matrix=fm, &
380 : nrow_global=nrow_global, ncol_global=ncol_global, &
381 : nrow_local=nrow_local, ncol_local=ncol_local, &
382 392 : row_indices=row_indices, col_indices=col_indices)
383 392 : IF (end_row_global > nrow_global) THEN
384 : end_row_global = nrow_global
385 : do_zero = .TRUE.
386 : END IF
387 392 : IF (end_col_global > ncol_global) THEN
388 : end_col_global = ncol_global
389 : do_zero = .TRUE.
390 : END IF
391 :
392 : ! find out row/column indices of locally stored matrix elements that needs to be copied.
393 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
394 1868 : DO start_row_local = 1, nrow_local
395 1868 : IF (row_indices(start_row_local) >= start_row_global) EXIT
396 : END DO
397 :
398 6237 : DO end_row_local = start_row_local, nrow_local
399 6237 : IF (row_indices(end_row_local) > end_row_global) EXIT
400 : END DO
401 392 : end_row_local = end_row_local - 1
402 :
403 3898 : DO start_col_local = 1, ncol_local
404 3898 : IF (col_indices(start_col_local) >= start_col_global) EXIT
405 : END DO
406 :
407 9734 : DO end_col_local = start_col_local, ncol_local
408 9734 : IF (col_indices(end_col_local) > end_col_global) EXIT
409 : END DO
410 392 : end_col_local = end_col_local - 1
411 :
412 392 : para_env => fm%matrix_struct%para_env
413 392 : local_data => fm%local_data
414 :
415 : ! wipe the content of the target matrix if:
416 : ! * the source matrix is distributed across a number of processes, or
417 : ! * not all elements of the target matrix will be assigned, e.g.
418 : ! when the target matrix is larger then the source matrix
419 : IF (do_zero) &
420 392 : CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
421 :
422 392 : IF (tr_a) THEN
423 0 : DO j = start_col_local, end_col_local
424 0 : this_col = col_indices(j) - start_col_global + 1
425 0 : DO i = start_row_local, end_row_local
426 0 : target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
427 : END DO
428 : END DO
429 : ELSE
430 9734 : DO j = start_col_local, end_col_local
431 9342 : this_col = col_indices(j) - start_col_global + 1
432 147316 : DO i = start_row_local, end_row_local
433 146924 : target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
434 : END DO
435 : END DO
436 : END IF
437 :
438 577852 : CALL para_env%sum(target_m)
439 : END IF
440 :
441 392 : CALL timestop(handle)
442 392 : END SUBROUTINE cp_cfm_get_submatrix
443 :
444 : ! **************************************************************************************************
445 : !> \brief Set a sub-matrix of the full matrix:
446 : !> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
447 : !> = alpha*op(new_values)(1:n_rows,1:n_cols) +
448 : !> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
449 : !> \param matrix full to update
450 : !> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
451 : !> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
452 : !> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
453 : !> \param n_rows number of rows to update (defaults to size(op(new_values),1))
454 : !> \param n_cols number of columns to update (defaults to size(op(new_values),2))
455 : !> \param alpha scale factor for the new values (defaults to (1.0,0.0))
456 : !> \param beta scale factor for the old values (defaults to (0.0,0.0))
457 : !> \param transpose indicates that the matrix new_values should be transposed:
458 : !> op(new_values) = new_values^T if .TRUE.,
459 : !> op(new_values) = new_values if .FALSE. (defaults to false)
460 : !> \par History
461 : !> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
462 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
463 : !> \author Lianheng Tong
464 : !> \note
465 : !> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
466 : !> All matrix elements 'new_values' need to be valid on all CPUs
467 : ! **************************************************************************************************
468 240 : SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
469 : start_col, n_rows, n_cols, alpha, beta, transpose)
470 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
471 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
472 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
473 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
474 : LOGICAL, INTENT(in), OPTIONAL :: transpose
475 :
476 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_set_submatrix'
477 :
478 : COMPLEX(kind=dp) :: al, be
479 240 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
480 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
481 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
482 : start_row_global, start_row_local, this_col
483 240 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
484 : LOGICAL :: tr_a
485 :
486 240 : CALL timeset(routineN, handle)
487 :
488 240 : al = z_one
489 240 : be = z_zero
490 240 : IF (PRESENT(alpha)) al = alpha
491 240 : IF (PRESENT(beta)) be = beta
492 :
493 : ! find out the first and last global row/column indices
494 240 : start_row_global = 1
495 240 : start_col_global = 1
496 240 : IF (PRESENT(start_row)) start_row_global = start_row
497 240 : IF (PRESENT(start_col)) start_col_global = start_col
498 :
499 240 : tr_a = .FALSE.
500 240 : IF (PRESENT(transpose)) tr_a = transpose
501 :
502 0 : IF (tr_a) THEN
503 0 : end_row_global = SIZE(new_values, 2)
504 0 : end_col_global = SIZE(new_values, 1)
505 : ELSE
506 240 : end_row_global = SIZE(new_values, 1)
507 240 : end_col_global = SIZE(new_values, 2)
508 : END IF
509 240 : IF (PRESENT(n_rows)) end_row_global = n_rows
510 240 : IF (PRESENT(n_cols)) end_col_global = n_cols
511 :
512 240 : end_row_global = end_row_global + start_row_global - 1
513 240 : end_col_global = end_col_global + start_col_global - 1
514 :
515 : CALL cp_cfm_get_info(matrix=matrix, &
516 : nrow_global=nrow_global, ncol_global=ncol_global, &
517 : nrow_local=nrow_local, ncol_local=ncol_local, &
518 240 : row_indices=row_indices, col_indices=col_indices)
519 240 : IF (end_row_global > nrow_global) end_row_global = nrow_global
520 240 : IF (end_col_global > ncol_global) end_col_global = ncol_global
521 :
522 : ! find out row/column indices of locally stored matrix elements that needs to be set.
523 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
524 978 : DO start_row_local = 1, nrow_local
525 978 : IF (row_indices(start_row_local) >= start_row_global) EXIT
526 : END DO
527 :
528 2919 : DO end_row_local = start_row_local, nrow_local
529 2919 : IF (row_indices(end_row_local) > end_row_global) EXIT
530 : END DO
531 240 : end_row_local = end_row_local - 1
532 :
533 944 : DO start_col_local = 1, ncol_local
534 944 : IF (col_indices(start_col_local) >= start_col_global) EXIT
535 : END DO
536 :
537 6698 : DO end_col_local = start_col_local, ncol_local
538 6698 : IF (col_indices(end_col_local) > end_col_global) EXIT
539 : END DO
540 240 : end_col_local = end_col_local - 1
541 :
542 240 : local_data => matrix%local_data
543 :
544 240 : IF (al == z_one .AND. be == z_zero) THEN
545 240 : IF (tr_a) THEN
546 0 : DO j = start_col_local, end_col_local
547 0 : this_col = col_indices(j) - start_col_global + 1
548 0 : DO i = start_row_local, end_row_local
549 0 : local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
550 : END DO
551 : END DO
552 : ELSE
553 6698 : DO j = start_col_local, end_col_local
554 6458 : this_col = col_indices(j) - start_col_global + 1
555 84098 : DO i = start_row_local, end_row_local
556 83858 : local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
557 : END DO
558 : END DO
559 : END IF
560 : ELSE
561 0 : IF (tr_a) THEN
562 0 : DO j = start_col_local, end_col_local
563 0 : this_col = col_indices(j) - start_col_global + 1
564 0 : DO i = start_row_local, end_row_local
565 : local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
566 0 : be*local_data(i, j)
567 : END DO
568 : END DO
569 : ELSE
570 0 : DO j = start_col_local, end_col_local
571 0 : this_col = col_indices(j) - start_col_global + 1
572 0 : DO i = start_row_local, end_row_local
573 : local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
574 0 : be*local_data(i, j)
575 : END DO
576 : END DO
577 : END IF
578 : END IF
579 :
580 240 : CALL timestop(handle)
581 240 : END SUBROUTINE cp_cfm_set_submatrix
582 :
583 : ! **************************************************************************************************
584 : !> \brief Returns information about a full matrix.
585 : !> \param matrix matrix
586 : !> \param name name of the matrix
587 : !> \param nrow_global total number of rows
588 : !> \param ncol_global total number of columns
589 : !> \param nrow_block number of rows per ScaLAPACK block
590 : !> \param ncol_block number of columns per ScaLAPACK block
591 : !> \param nrow_local number of locally stored rows
592 : !> \param ncol_local number of locally stored columns
593 : !> \param row_indices global indices of locally stored rows
594 : !> \param col_indices global indices of locally stored columns
595 : !> \param local_data locally stored matrix elements
596 : !> \param context BLACS context
597 : !> \param matrix_struct matrix structure
598 : !> \param para_env parallel environment
599 : !> \date 12.06.2001
600 : !> \author Matthias Krack
601 : !> \version 1.0
602 : ! **************************************************************************************************
603 229222 : SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
604 : nrow_block, ncol_block, nrow_local, ncol_local, &
605 : row_indices, col_indices, local_data, context, &
606 : matrix_struct, para_env)
607 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
608 : CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
609 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
610 : ncol_block, nrow_local, ncol_local
611 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
612 : COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
613 : OPTIONAL, POINTER :: local_data
614 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
615 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
616 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
617 :
618 0 : IF (PRESENT(name)) name = matrix%name
619 229222 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
620 229222 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
621 :
622 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
623 : ncol_local=ncol_local, nrow_global=nrow_global, &
624 : ncol_global=ncol_global, nrow_block=nrow_block, &
625 : ncol_block=ncol_block, context=context, &
626 595964 : row_indices=row_indices, col_indices=col_indices, para_env=para_env)
627 :
628 229222 : END SUBROUTINE cp_cfm_get_info
629 :
630 : ! **************************************************************************************************
631 : !> \brief Copy content of a full matrix into another full matrix of the same size.
632 : !> \param source source matrix
633 : !> \param destination destination matrix
634 : !> \author Joost VandeVondele
635 : ! **************************************************************************************************
636 169710 : SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
637 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
638 :
639 : INTEGER :: npcol, nprow
640 :
641 169710 : nprow = source%matrix_struct%context%num_pe(1)
642 169710 : npcol = source%matrix_struct%context%num_pe(2)
643 :
644 169710 : IF (.NOT. cp2k_is_parallel .OR. &
645 : cp_fm_struct_equivalent(source%matrix_struct, &
646 : destination%matrix_struct)) THEN
647 169710 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
648 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
649 0 : CPABORT("internal local_data has different sizes")
650 169710 : CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
651 : ELSE
652 0 : IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
653 0 : CPABORT("cannot copy between full matrixes of differen sizes")
654 0 : IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
655 0 : CPABORT("cannot copy between full matrixes of differen sizes")
656 : #if defined(__SCALAPACK)
657 : CALL pzcopy(source%matrix_struct%nrow_global* &
658 : source%matrix_struct%ncol_global, &
659 : source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
660 0 : destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
661 : #else
662 : CPABORT("")
663 : #endif
664 : END IF
665 169710 : END SUBROUTINE cp_cfm_to_cfm_matrix
666 :
667 : ! **************************************************************************************************
668 : !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
669 : !> \param msource source matrix
670 : !> \param mtarget destination matrix
671 : !> \param ncol number of columns to copy
672 : !> \param source_start global index of the first column to copy within the source matrix
673 : !> \param target_start global index of the first column to copy within the destination matrix
674 : ! **************************************************************************************************
675 15128 : SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
676 : target_start)
677 :
678 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
679 : INTEGER, INTENT(IN) :: ncol
680 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
681 :
682 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
683 :
684 15128 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
685 : INTEGER :: handle, n, ss, ts
686 : #if defined(__SCALAPACK)
687 : INTEGER :: i
688 : INTEGER, DIMENSION(9) :: desca, descb
689 : #endif
690 :
691 15128 : CALL timeset(routineN, handle)
692 :
693 15128 : ss = 1
694 15128 : ts = 1
695 :
696 15128 : IF (PRESENT(source_start)) ss = source_start
697 15128 : IF (PRESENT(target_start)) ts = target_start
698 :
699 15128 : n = msource%matrix_struct%nrow_global
700 :
701 15128 : a => msource%local_data
702 15128 : b => mtarget%local_data
703 :
704 : #if defined(__SCALAPACK)
705 151280 : desca(:) = msource%matrix_struct%descriptor(:)
706 151280 : descb(:) = mtarget%matrix_struct%descriptor(:)
707 236198 : DO i = 0, ncol - 1
708 236198 : CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
709 : END DO
710 : #else
711 : CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
712 : #endif
713 :
714 15128 : CALL timestop(handle)
715 :
716 15128 : END SUBROUTINE cp_cfm_to_cfm_columns
717 :
718 : ! **************************************************************************************************
719 : !> \brief Copy just a triangular matrix.
720 : !> \param msource source matrix
721 : !> \param mtarget target matrix
722 : !> \param uplo 'U' for upper triangular, 'L' for lower triangular
723 : ! **************************************************************************************************
724 0 : SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
725 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
726 : CHARACTER(len=*), INTENT(IN) :: uplo
727 :
728 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
729 :
730 0 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
731 : INTEGER :: handle, ncol, nrow
732 : #if defined(__SCALAPACK)
733 : INTEGER, DIMENSION(9) :: desca, descb
734 : #endif
735 :
736 0 : CALL timeset(routineN, handle)
737 :
738 0 : nrow = msource%matrix_struct%nrow_global
739 0 : ncol = msource%matrix_struct%ncol_global
740 :
741 0 : aa => msource%local_data
742 0 : bb => mtarget%local_data
743 :
744 : #if defined(__SCALAPACK)
745 0 : desca(:) = msource%matrix_struct%descriptor(:)
746 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
747 0 : CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
748 : #else
749 : CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
750 : #endif
751 :
752 0 : CALL timestop(handle)
753 0 : END SUBROUTINE cp_cfm_to_cfm_triangular
754 :
755 : ! **************************************************************************************************
756 : !> \brief Copy real and imaginary parts of a complex full matrix into
757 : !> separate real-value full matrices.
758 : !> \param msource complex matrix
759 : !> \param mtargetr (optional) real part of the source matrix
760 : !> \param mtargeti (optional) imaginary part of the source matrix
761 : !> \note
762 : !> Matrix structures are assumed to be equivalent.
763 : ! **************************************************************************************************
764 52016 : SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
765 :
766 : TYPE(cp_cfm_type), INTENT(IN) :: msource
767 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
768 :
769 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_fm'
770 :
771 52016 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
772 : INTEGER :: handle
773 52016 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
774 :
775 52016 : CALL timeset(routineN, handle)
776 :
777 52016 : zmat => msource%local_data
778 52016 : IF (PRESENT(mtargetr)) THEN
779 50440 : rmat => mtargetr%local_data
780 : IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
781 50440 : (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
782 : (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
783 0 : CPABORT("size of local_data of mtargetr differ to msource")
784 : END IF
785 : ! copy local data
786 28606341 : rmat = REAL(zmat, kind=dp)
787 : ELSE
788 52016 : NULLIFY (rmat)
789 : END IF
790 52016 : IF (PRESENT(mtargeti)) THEN
791 47818 : imat => mtargeti%local_data
792 : IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
793 47818 : (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
794 : (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
795 0 : CPABORT("size of local_data of mtargeti differ to msource")
796 : END IF
797 : ! copy local data
798 28258995 : imat = REAL(AIMAG(zmat), kind=dp)
799 : ELSE
800 52016 : NULLIFY (imat)
801 : END IF
802 :
803 52016 : CALL timestop(handle)
804 :
805 52016 : END SUBROUTINE cp_cfm_to_fm
806 :
807 : ! **************************************************************************************************
808 : !> \brief Construct a complex full matrix by taking its real and imaginary parts from
809 : !> two separate real-value full matrices.
810 : !> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
811 : !> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
812 : !> \param mtarget resulting complex matrix
813 : !> \note
814 : !> Matrix structures are assumed to be equivalent.
815 : ! **************************************************************************************************
816 77651 : SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
817 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
818 : TYPE(cp_cfm_type), INTENT(IN) :: mtarget
819 :
820 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_to_cfm'
821 :
822 77651 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
823 : INTEGER :: handle, mode
824 77651 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
825 :
826 77651 : CALL timeset(routineN, handle)
827 :
828 77651 : mode = 0
829 77651 : zmat => mtarget%local_data
830 77651 : IF (PRESENT(msourcer)) THEN
831 77651 : rmat => msourcer%local_data
832 : IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
833 77651 : (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
834 : (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
835 0 : CPABORT("size of local_data of msourcer differ to mtarget")
836 : END IF
837 : mode = mode + 1
838 : ELSE
839 : NULLIFY (rmat)
840 : END IF
841 77651 : IF (PRESENT(msourcei)) THEN
842 47056 : imat => msourcei%local_data
843 : IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
844 47056 : (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
845 : (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
846 0 : CPABORT("size of local_data of msourcei differ to mtarget")
847 : END IF
848 47056 : mode = mode + 2
849 : ELSE
850 : NULLIFY (imat)
851 : END IF
852 : ! copy local data
853 : SELECT CASE (mode)
854 : CASE (0)
855 0 : zmat(:, :) = z_zero
856 : CASE (1)
857 5777868 : zmat(:, :) = CMPLX(rmat(:, :), 0.0_dp, kind=dp)
858 : CASE (2)
859 0 : zmat(:, :) = CMPLX(0.0_dp, imat(:, :), kind=dp)
860 : CASE (3)
861 21810177 : zmat(:, :) = CMPLX(rmat(:, :), imat(:, :), kind=dp)
862 : END SELECT
863 :
864 77651 : CALL timestop(handle)
865 :
866 77651 : END SUBROUTINE cp_fm_to_cfm
867 :
868 : ! **************************************************************************************************
869 : !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
870 : !> \param source input complex-valued fm matrix
871 : !> \param destination output complex-valued fm matrix
872 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
873 : !> of the input and output matrices
874 : !> \param info all of the data that will be needed to complete the copy operation
875 : !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
876 : !> allocatable arrays instead of pointers wherever possible.
877 : ! **************************************************************************************************
878 81900 : SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
879 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
880 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
881 : TYPE(copy_cfm_info_type), INTENT(out) :: info
882 :
883 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_start_copy_general'
884 :
885 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
886 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
887 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
888 : recv_rank, recv_size, send_rank, send_size
889 9100 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
890 18200 : recv_count, send_count, send_disp, &
891 9100 : source2global, src_p, src_q
892 9100 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
893 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
894 : src_block, src_block_tmp, src_num_pe
895 18200 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
896 18200 : send_col_indices, send_row_indices
897 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
898 127400 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
899 :
900 9100 : CALL timeset(routineN, handle)
901 :
902 : IF (.NOT. cp2k_is_parallel) THEN
903 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
904 : nrow_local_send = SIZE(source%local_data, 1)
905 : ncol_local_send = SIZE(source%local_data, 2)
906 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
907 : k = 0
908 : DO j = 1, ncol_local_send
909 : DO i = 1, nrow_local_send
910 : k = k + 1
911 : info%send_buf(k) = source%local_data(i, j)
912 : END DO
913 : END DO
914 : ELSE
915 9100 : NULLIFY (recv_dist, send_dist)
916 9100 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
917 :
918 : ! The 'global' communicator contains both the source and destination decompositions
919 9100 : global_size = para_env%num_pe
920 9100 : global_rank = para_env%mepos
921 :
922 : ! The source/send decomposition and destination/recv decompositions may only exist on
923 : ! on a subset of the processes involved in the communication
924 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
925 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
926 9100 : IF (ASSOCIATED(destination%matrix_struct)) THEN
927 9100 : recv_dist => destination%matrix_struct
928 9100 : recv_rank = recv_dist%para_env%mepos
929 : ELSE
930 0 : recv_rank = mp_proc_null
931 : END IF
932 :
933 9100 : IF (ASSOCIATED(source%matrix_struct)) THEN
934 4550 : send_dist => source%matrix_struct
935 4550 : send_rank = send_dist%para_env%mepos
936 : ELSE
937 4550 : send_rank = mp_proc_null
938 : END IF
939 :
940 : ! Map the rank in the source/dest communicator to the global rank
941 27300 : ALLOCATE (all_ranks(0:global_size - 1))
942 :
943 9100 : CALL para_env%allgather(send_rank, all_ranks)
944 9100 : IF (ASSOCIATED(destination%matrix_struct)) THEN
945 45500 : ALLOCATE (source2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
946 27300 : DO i = 0, global_size - 1
947 27300 : IF (all_ranks(i) .NE. mp_proc_null) THEN
948 9100 : source2global(all_ranks(i)) = i
949 : END IF
950 : END DO
951 : END IF
952 :
953 9100 : CALL para_env%allgather(recv_rank, all_ranks)
954 9100 : IF (ASSOCIATED(source%matrix_struct)) THEN
955 22750 : ALLOCATE (dest2global(0:COUNT(all_ranks .NE. mp_proc_null) - 1))
956 13650 : DO i = 0, global_size - 1
957 13650 : IF (all_ranks(i) .NE. mp_proc_null) THEN
958 9100 : dest2global(all_ranks(i)) = i
959 : END IF
960 : END DO
961 : END IF
962 9100 : DEALLOCATE (all_ranks)
963 :
964 : ! Some data from the two decompositions will be needed by all processes in the global group :
965 : ! process grid shape, block size, and the BLACS-to-MPI mapping
966 :
967 : ! The global root process will receive the data (from the root process in each decomposition)
968 63700 : send_req(:) = mp_request_null
969 9100 : IF (global_rank == 0) THEN
970 31850 : recv_req(:) = mp_request_null
971 4550 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
972 4550 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
973 4550 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
974 4550 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
975 : END IF
976 :
977 9100 : IF (ASSOCIATED(source%matrix_struct)) THEN
978 4550 : IF ((send_rank == 0)) THEN
979 : ! need to use separate buffers here in case this is actually global rank 0
980 13650 : src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
981 4550 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
982 4550 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
983 : END IF
984 : END IF
985 :
986 9100 : IF (ASSOCIATED(destination%matrix_struct)) THEN
987 9100 : IF ((recv_rank == 0)) THEN
988 13650 : dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
989 4550 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
990 4550 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
991 : END IF
992 : END IF
993 :
994 9100 : IF (global_rank == 0) THEN
995 4550 : CALL mp_waitall(recv_req(1:4))
996 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
997 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
998 31850 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
999 4550 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1000 4550 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1001 : END IF
1002 :
1003 9100 : IF (ASSOCIATED(source%matrix_struct)) THEN
1004 4550 : IF ((send_rank == 0)) THEN
1005 4550 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1006 : END IF
1007 : END IF
1008 :
1009 9100 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1010 9100 : IF ((recv_rank == 0)) THEN
1011 4550 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1012 : END IF
1013 : END IF
1014 :
1015 9100 : IF (global_rank == 0) THEN
1016 4550 : CALL mp_waitall(recv_req(5:6))
1017 : END IF
1018 :
1019 : ! Finally, broadcast the data to all processes in the global communicator
1020 9100 : CALL para_env%bcast(src_block, 0)
1021 9100 : CALL para_env%bcast(dest_block, 0)
1022 9100 : CALL para_env%bcast(src_num_pe, 0)
1023 9100 : CALL para_env%bcast(dest_num_pe, 0)
1024 27300 : info%src_num_pe(1:2) = src_num_pe(1:2)
1025 27300 : info%nblock_src(1:2) = src_block(1:2)
1026 9100 : IF (global_rank /= 0) THEN
1027 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1028 31850 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1029 : END IF
1030 9100 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1031 9100 : CALL para_env%bcast(dest_blacs2mpi, 0)
1032 :
1033 9100 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1034 9100 : send_size = src_num_pe(1)*src_num_pe(2)
1035 9100 : info%send_size = send_size
1036 9100 : CALL mp_waitall(send_req(:))
1037 :
1038 : ! Setup is now complete, we can start the actual communication here.
1039 : ! The order implemented here is:
1040 : ! DEST_1
1041 : ! compute recv sizes
1042 : ! call irecv
1043 : ! SRC_1
1044 : ! compute send sizes
1045 : ! pack send buffers
1046 : ! call isend
1047 : ! DEST_2
1048 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1049 : ! SRC_2
1050 : ! wait for the sends
1051 :
1052 : ! DEST_1
1053 9100 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1054 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1055 9100 : col_indices=recv_col_indices)
1056 9100 : info%recv_col_indices => recv_col_indices
1057 9100 : info%recv_row_indices => recv_row_indices
1058 9100 : nrow_block_src = src_block(1)
1059 9100 : ncol_block_src = src_block(2)
1060 72800 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1061 :
1062 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1063 9100 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1064 9100 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1065 9100 : info%nlocal_recv(1) = nrow_local_recv
1066 9100 : info%nlocal_recv(2) = ncol_local_recv
1067 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1068 45500 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1069 98140 : DO i = 1, nrow_local_recv
1070 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1071 : ! then work out which row block it comes from, and which process row that row block comes from.
1072 98140 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1073 : END DO
1074 187180 : DO j = 1, ncol_local_recv
1075 : ! Similarly for the columns
1076 187180 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1077 : END DO
1078 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1079 :
1080 18200 : DO q = 0, src_num_pe(2) - 1
1081 187180 : ncols = COUNT(src_q .EQ. q)
1082 27300 : DO p = 0, src_num_pe(1) - 1
1083 98140 : nrows = COUNT(src_p .EQ. p)
1084 : ! Use the send_dist here as we are looking up the processes where the data comes from
1085 18200 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1086 : END DO
1087 : END DO
1088 9100 : DEALLOCATE (src_p, src_q)
1089 :
1090 : ! Use one long buffer (and displacements into that buffer)
1091 : ! this prevents the need for a rectangular array where not all elements will be populated
1092 36400 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1093 9100 : info%recv_disp(0) = 0
1094 9100 : DO i = 1, send_size - 1
1095 9100 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1096 : END DO
1097 :
1098 : ! Issue receive calls on ranks which expect data
1099 18200 : DO k = 0, send_size - 1
1100 18200 : IF (recv_count(k) .GT. 0) THEN
1101 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1102 9100 : source2global(k), info%recv_request(k))
1103 : ELSE
1104 0 : info%recv_request(k) = mp_request_null
1105 : END IF
1106 : END DO
1107 9100 : DEALLOCATE (source2global)
1108 : END IF ! ASSOCIATED(destination)
1109 :
1110 : ! SRC_1
1111 9100 : IF (ASSOCIATED(source%matrix_struct)) THEN
1112 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1113 4550 : col_indices=send_col_indices)
1114 4550 : nrow_block_dest = dest_block(1)
1115 4550 : ncol_block_dest = dest_block(2)
1116 40950 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1117 :
1118 : ! Determine the send counts, allocate the send buffers
1119 4550 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1120 4550 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1121 :
1122 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1123 : ! i.e. number of rows,cols in the sending distribution
1124 22750 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1125 :
1126 93590 : DO i = 1, nrow_local_send
1127 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1128 93590 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1129 : END DO
1130 93590 : DO j = 1, ncol_local_send
1131 93590 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1132 : END DO
1133 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1134 :
1135 9100 : DO q = 0, dest_num_pe(2) - 1
1136 93590 : ncols = COUNT(dest_q .EQ. q)
1137 18200 : DO p = 0, dest_num_pe(1) - 1
1138 187180 : nrows = COUNT(dest_p .EQ. p)
1139 13650 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1140 : END DO
1141 : END DO
1142 4550 : DEALLOCATE (dest_p, dest_q)
1143 :
1144 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1145 22750 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1146 4550 : send_disp(0) = 0
1147 9100 : DO k = 1, recv_size - 1
1148 9100 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1149 : END DO
1150 :
1151 : ! Loop over the smat, pack the send buffers
1152 13650 : send_count(:) = 0
1153 93590 : DO j = 1, ncol_local_send
1154 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1155 89040 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1156 2069270 : DO i = 1, nrow_local_send
1157 1975680 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1158 1975680 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1159 1975680 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1160 2064720 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1161 : END DO
1162 : END DO
1163 :
1164 : ! For each non-zero send_count, call mpi_isend
1165 13650 : DO k = 0, recv_size - 1
1166 13650 : IF (send_count(k) .GT. 0) THEN
1167 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1168 9100 : dest2global(k), info%send_request(k))
1169 : ELSE
1170 0 : info%send_request(k) = mp_request_null
1171 : END IF
1172 : END DO
1173 4550 : DEALLOCATE (send_count, send_disp, dest2global)
1174 : END IF ! ASSOCIATED(source)
1175 9100 : DEALLOCATE (dest_blacs2mpi)
1176 :
1177 : END IF !IF (.NOT. cp2k_is_parallel)
1178 :
1179 9100 : CALL timestop(handle)
1180 :
1181 36400 : END SUBROUTINE cp_cfm_start_copy_general
1182 :
1183 : ! **************************************************************************************************
1184 : !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1185 : !> \param destination output cfm matrix
1186 : !> \param info all of the data that will be needed to complete the copy operation
1187 : !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1188 : !> allocatable arrays instead of pointers wherever possible.
1189 : ! **************************************************************************************************
1190 9100 : SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1191 : TYPE(cp_cfm_type), INTENT(IN) :: destination
1192 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1193 :
1194 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_finish_copy_general'
1195 :
1196 : INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1197 : src_q_j
1198 9100 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1199 9100 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1200 :
1201 9100 : CALL timeset(routineN, handle)
1202 :
1203 : IF (.NOT. cp2k_is_parallel) THEN
1204 : ! Now unpack the data from the 'send buffer'
1205 : k = 0
1206 : DO j = 1, SIZE(destination%local_data, 2)
1207 : DO i = 1, SIZE(destination%local_data, 1)
1208 : k = k + 1
1209 : destination%local_data(i, j) = info%send_buf(k)
1210 : END DO
1211 : END DO
1212 : DEALLOCATE (info%send_buf)
1213 : ELSE
1214 : ! Set up local variables ...
1215 9100 : recv_col_indices => info%recv_col_indices
1216 9100 : recv_row_indices => info%recv_row_indices
1217 :
1218 : ! ... use the local variables to do the work
1219 : ! DEST_2
1220 9100 : CALL mp_waitall(info%recv_request(:))
1221 :
1222 9100 : nj = info%nlocal_recv(2)
1223 9100 : ni = info%nlocal_recv(1)
1224 45500 : ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1225 : ! Loop over the rmat, filling it in with data from the recv buffers
1226 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1227 18200 : recv_count(:) = 0
1228 98140 : DO i = 1, ni
1229 98140 : src_p_i(i) = MOD(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1230 : END DO
1231 :
1232 187180 : DO j = 1, nj
1233 178080 : src_q_j = MOD(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1234 2162860 : DO i = 1, ni
1235 1975680 : mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1236 1975680 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1237 2153760 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1238 : END DO
1239 : END DO
1240 :
1241 9100 : DEALLOCATE (recv_count, src_p_i)
1242 : ! Invalidate the stored state
1243 9100 : NULLIFY (info%recv_col_indices, info%recv_row_indices)
1244 9100 : DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1245 : END IF
1246 :
1247 9100 : CALL timestop(handle)
1248 :
1249 9100 : END SUBROUTINE cp_cfm_finish_copy_general
1250 :
1251 : ! **************************************************************************************************
1252 : !> \brief Complete the copy operation: wait for comms clean up MPI state.
1253 : !> \param info all of the data that will be needed to complete the copy operation
1254 : !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1255 : !> allocatable arrays instead of pointers wherever possible.
1256 : ! **************************************************************************************************
1257 4550 : SUBROUTINE cp_cfm_cleanup_copy_general(info)
1258 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1259 :
1260 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_cleanup_copy_general'
1261 :
1262 : INTEGER :: handle
1263 :
1264 4550 : CALL timeset(routineN, handle)
1265 :
1266 : IF (cp2k_is_parallel) THEN
1267 : ! SRC_2
1268 : ! If this process is also in the destination decomposition, this deallocate
1269 : ! Was already done in cp_fm_finish_copy_general
1270 4550 : IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1271 4550 : CALL mp_waitall(info%send_request(:))
1272 4550 : DEALLOCATE (info%send_request, info%send_buf)
1273 : END IF
1274 :
1275 4550 : CALL timestop(handle)
1276 4550 : END SUBROUTINE cp_cfm_cleanup_copy_general
1277 0 : END MODULE cp_cfm_types
|