Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Represents a complex full matrix distributed on many processors.
10 : !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11 : ! **************************************************************************************************
12 : MODULE cp_cfm_types
13 : USE cp_blacs_env, ONLY: cp_blacs_env_type
14 : USE cp_fm_struct, ONLY: cp_fm_struct_equivalent,&
15 : cp_fm_struct_get,&
16 : cp_fm_struct_release,&
17 : cp_fm_struct_retain,&
18 : cp_fm_struct_type
19 : USE cp_fm_types, ONLY: cp_fm_type
20 : USE kinds, ONLY: dp
21 : USE mathconstants, ONLY: z_one,&
22 : z_zero
23 : USE message_passing, ONLY: cp2k_is_parallel,&
24 : mp_any_source,&
25 : mp_para_env_type,&
26 : mp_proc_null,&
27 : mp_request_null,&
28 : mp_request_type,&
29 : mp_waitall
30 : #include "../base/base_uses.f90"
31 :
32 : IMPLICIT NONE
33 : PRIVATE
34 :
35 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
36 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
37 : INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
38 :
39 : PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
40 : PUBLIC :: cp_cfm_cleanup_copy_general, &
41 : cp_cfm_create, &
42 : cp_cfm_finish_copy_general, &
43 : cp_cfm_get_element, &
44 : cp_cfm_get_info, &
45 : cp_cfm_get_submatrix, &
46 : cp_cfm_release, &
47 : cp_cfm_set_all, &
48 : cp_cfm_set_element, &
49 : cp_cfm_set_submatrix, &
50 : cp_cfm_start_copy_general, &
51 : cp_cfm_to_cfm, &
52 : cp_cfm_to_fm, &
53 : cp_fm_to_cfm
54 :
55 : INTERFACE cp_cfm_to_cfm
56 : MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
57 : cp_cfm_to_cfm_columns ! just a number of columns
58 : END INTERFACE
59 :
60 : ! **************************************************************************************************
61 : !> \brief Represent a complex full matrix.
62 : !> \param name the name of the matrix, used for printing
63 : !> \param matrix_struct structure of this matrix
64 : !> \param local_data array with the data of the matrix (its content depends on
65 : !> the matrix type used: in parallel run it will be in
66 : !> ScaLAPACK format, in sequential run it will simply contain the matrix)
67 : ! **************************************************************************************************
68 : TYPE cp_cfm_type
69 : CHARACTER(len=60) :: name = ""
70 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
71 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
72 : END TYPE cp_cfm_type
73 :
74 : ! **************************************************************************************************
75 : !> \brief Just to build arrays of pointers to matrices.
76 : !> \param matrix the pointer to the matrix
77 : ! **************************************************************************************************
78 : TYPE cp_cfm_p_type
79 : TYPE(cp_cfm_type), POINTER :: matrix => NULL()
80 : END TYPE cp_cfm_p_type
81 :
82 : ! **************************************************************************************************
83 : !> \brief Stores the state of a copy between cp_cfm_start_copy_general
84 : !> and cp_cfm_finish_copy_general.
85 : !> \par History
86 : !> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
87 : !> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
88 : ! **************************************************************************************************
89 : TYPE copy_cfm_info_type
90 : !> number of MPI processes that send data
91 : INTEGER :: send_size = -1
92 : !> number of locally stored rows (1) and columns (2) of the destination matrix
93 : INTEGER, DIMENSION(2) :: nlocal_recv = -1
94 : !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
95 : INTEGER, DIMENSION(2) :: nblock_src = -1
96 : !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
97 : INTEGER, DIMENSION(2) :: src_num_pe = -1
98 : !> displacements into recv_buf
99 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
100 : !> MPI requests for non-blocking receive and send operations
101 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
102 : !> global column and row indices of locally stored elements of the destination matrix
103 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
104 : !> rank of MPI process with BLACS coordinates (prow, pcol)
105 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
106 : !> receiving and sending buffers for non-blocking MPI communication
107 : COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
108 : END TYPE copy_cfm_info_type
109 :
110 : CONTAINS
111 :
112 : ! **************************************************************************************************
113 : !> \brief Creates a new full matrix with the given structure.
114 : !> \param matrix matrix to be created
115 : !> \param matrix_struct structure of the matrix
116 : !> \param name name of the matrix
117 : !> \param set_zero ...
118 : !> \note
119 : !> preferred allocation routine
120 : ! **************************************************************************************************
121 158142 : SUBROUTINE cp_cfm_create(matrix, matrix_struct, name, set_zero)
122 : TYPE(cp_cfm_type), INTENT(OUT) :: matrix
123 : TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
124 : CHARACTER(len=*), INTENT(in), OPTIONAL :: name
125 : LOGICAL, INTENT(in), OPTIONAL :: set_zero
126 :
127 : INTEGER :: ncol_local, npcol, nprow, nrow_local
128 : TYPE(cp_blacs_env_type), POINTER :: context
129 :
130 158142 : context => matrix_struct%context
131 158142 : matrix%matrix_struct => matrix_struct
132 158142 : CALL cp_fm_struct_retain(matrix%matrix_struct)
133 :
134 158142 : nprow = context%num_pe(1)
135 158142 : npcol = context%num_pe(2)
136 158142 : NULLIFY (matrix%local_data)
137 :
138 158142 : nrow_local = matrix_struct%local_leading_dimension
139 158142 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
140 632568 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
141 :
142 158142 : IF (PRESENT(set_zero)) THEN
143 36 : IF (set_zero) THEN
144 7886 : matrix%local_data(1:nrow_local, 1:ncol_local) = z_zero
145 : END IF
146 : END IF
147 :
148 158142 : IF (PRESENT(name)) THEN
149 18320 : matrix%name = name
150 : ELSE
151 139822 : matrix%name = 'full complex matrix'
152 : END IF
153 158142 : END SUBROUTINE cp_cfm_create
154 :
155 : ! **************************************************************************************************
156 : !> \brief Releases a full matrix.
157 : !> \param matrix the matrix to release
158 : ! **************************************************************************************************
159 167738 : SUBROUTINE cp_cfm_release(matrix)
160 : TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
161 :
162 167738 : IF (ASSOCIATED(matrix%local_data)) THEN
163 158142 : DEALLOCATE (matrix%local_data)
164 : END IF
165 167738 : matrix%name = ""
166 167738 : CALL cp_fm_struct_release(matrix%matrix_struct)
167 167738 : END SUBROUTINE cp_cfm_release
168 :
169 : ! **************************************************************************************************
170 : !> \brief Set all elements of the full matrix to alpha. Besides, set all
171 : !> diagonal matrix elements to beta (if given explicitly).
172 : !> \param matrix matrix to initialise
173 : !> \param alpha value of off-diagonal matrix elements
174 : !> \param beta value of diagonal matrix elements (equal to alpha if absent)
175 : !> \date 12.06.2001
176 : !> \author Matthias Krack
177 : !> \version 1.0
178 : ! **************************************************************************************************
179 50250 : SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
180 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
181 : COMPLEX(kind=dp), INTENT(in) :: alpha
182 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
183 :
184 : INTEGER :: irow_local, nrow_local
185 : #if defined(__parallel)
186 : INTEGER :: icol_local, ncol_local
187 50250 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
188 : #endif
189 :
190 150750 : CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
191 :
192 50250 : IF (PRESENT(beta)) THEN
193 : #if defined(__parallel)
194 : CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
195 9274 : row_indices=row_indices, col_indices=col_indices)
196 :
197 9274 : icol_local = 1
198 9274 : irow_local = 1
199 :
200 60894 : DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
201 60894 : IF (row_indices(irow_local) < col_indices(icol_local)) THEN
202 0 : irow_local = irow_local + 1
203 51620 : ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
204 21116 : icol_local = icol_local + 1
205 : ELSE
206 30504 : matrix%local_data(irow_local, icol_local) = beta
207 30504 : irow_local = irow_local + 1
208 30504 : icol_local = icol_local + 1
209 : END IF
210 : END DO
211 : #else
212 : nrow_local = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
213 :
214 : DO irow_local = 1, nrow_local
215 : matrix%local_data(irow_local, irow_local) = beta
216 : END DO
217 : #endif
218 : END IF
219 :
220 50250 : END SUBROUTINE cp_cfm_set_all
221 :
222 : ! **************************************************************************************************
223 : !> \brief Get the matrix element by its global index.
224 : !> \param matrix full matrix
225 : !> \param irow_global global row index
226 : !> \param icol_global global column index
227 : !> \param alpha matrix element
228 : !> \par History
229 : !> , TCH, created
230 : !> always return the answer
231 : ! **************************************************************************************************
232 9282 : SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
233 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
234 : INTEGER, INTENT(in) :: irow_global, icol_global
235 : COMPLEX(kind=dp), INTENT(out) :: alpha
236 :
237 : #if defined(__parallel)
238 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
239 : mypcol, myprow, npcol, nprow
240 : INTEGER, DIMENSION(9) :: desca
241 : TYPE(cp_blacs_env_type), POINTER :: context
242 : #endif
243 :
244 : #if defined(__parallel)
245 9282 : context => matrix%matrix_struct%context
246 9282 : myprow = context%mepos(1)
247 9282 : mypcol = context%mepos(2)
248 9282 : nprow = context%num_pe(1)
249 9282 : npcol = context%num_pe(2)
250 :
251 92820 : desca(:) = matrix%matrix_struct%descriptor(:)
252 :
253 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
254 9282 : irow_local, icol_local, iprow, ipcol)
255 :
256 9282 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
257 4641 : alpha = matrix%local_data(irow_local, icol_local)
258 4641 : CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
259 : ELSE
260 4641 : CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
261 : END IF
262 :
263 : #else
264 : alpha = matrix%local_data(irow_global, icol_global)
265 : #endif
266 :
267 9282 : END SUBROUTINE cp_cfm_get_element
268 :
269 : ! **************************************************************************************************
270 : !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
271 : !> \param matrix full matrix
272 : !> \param irow_global global row index
273 : !> \param icol_global global column index
274 : !> \param alpha value of the matrix element
275 : !> \date 12.06.2001
276 : !> \author Matthias Krack
277 : !> \version 1.0
278 : ! **************************************************************************************************
279 79956 : SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
280 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
281 : INTEGER, INTENT(in) :: irow_global, icol_global
282 : COMPLEX(kind=dp), INTENT(in) :: alpha
283 :
284 : #if defined(__parallel)
285 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
286 : mypcol, myprow, npcol, nprow
287 : INTEGER, DIMENSION(9) :: desca
288 : TYPE(cp_blacs_env_type), POINTER :: context
289 : #endif
290 :
291 : #if defined(__parallel)
292 79956 : context => matrix%matrix_struct%context
293 79956 : myprow = context%mepos(1)
294 79956 : mypcol = context%mepos(2)
295 79956 : nprow = context%num_pe(1)
296 79956 : npcol = context%num_pe(2)
297 :
298 799560 : desca(:) = matrix%matrix_struct%descriptor(:)
299 :
300 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
301 79956 : irow_local, icol_local, iprow, ipcol)
302 :
303 79956 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
304 39978 : matrix%local_data(irow_local, icol_local) = alpha
305 : END IF
306 :
307 : #else
308 : matrix%local_data(irow_global, icol_global) = alpha
309 : #endif
310 :
311 79956 : END SUBROUTINE cp_cfm_set_element
312 :
313 : ! **************************************************************************************************
314 : !> \brief Extract a sub-matrix from the full matrix:
315 : !> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
316 : !> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
317 : !> \param fm full matrix you want to get the elements from
318 : !> \param target_m 2-D array to store the extracted sub-matrix
319 : !> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
320 : !> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
321 : !> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
322 : !> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
323 : !> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
324 : !> op(target_m) = target_m^T if .TRUE.,
325 : !> op(target_m) = target_m if .FALSE. (defaults to false)
326 : !> \par History
327 : !> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
328 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
329 : !> \author Lianheng Tong
330 : !> \note
331 : !> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
332 : ! **************************************************************************************************
333 390 : SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
334 : TYPE(cp_cfm_type), INTENT(IN) :: fm
335 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
336 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
337 : LOGICAL, INTENT(in), OPTIONAL :: transpose
338 :
339 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_get_submatrix'
340 :
341 390 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
342 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
343 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
344 : start_row_global, start_row_local, this_col
345 390 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
346 : LOGICAL :: do_zero, tr_a
347 : TYPE(mp_para_env_type), POINTER :: para_env
348 :
349 390 : CALL timeset(routineN, handle)
350 :
351 1170 : IF (SIZE(target_m) /= 0) THEN
352 : #if defined(__parallel)
353 390 : do_zero = .TRUE.
354 : #else
355 : do_zero = .FALSE.
356 : #endif
357 :
358 390 : tr_a = .FALSE.
359 390 : IF (PRESENT(transpose)) tr_a = transpose
360 :
361 : ! find out the first and last global row/column indices
362 390 : start_row_global = 1
363 390 : start_col_global = 1
364 390 : IF (PRESENT(start_row)) start_row_global = start_row
365 390 : IF (PRESENT(start_col)) start_col_global = start_col
366 :
367 390 : IF (tr_a) THEN
368 0 : end_row_global = SIZE(target_m, 2)
369 0 : end_col_global = SIZE(target_m, 1)
370 : ELSE
371 390 : end_row_global = SIZE(target_m, 1)
372 390 : end_col_global = SIZE(target_m, 2)
373 : END IF
374 390 : IF (PRESENT(n_rows)) end_row_global = n_rows
375 390 : IF (PRESENT(n_cols)) end_col_global = n_cols
376 :
377 390 : end_row_global = end_row_global + start_row_global - 1
378 390 : end_col_global = end_col_global + start_col_global - 1
379 :
380 : CALL cp_cfm_get_info(matrix=fm, &
381 : nrow_global=nrow_global, ncol_global=ncol_global, &
382 : nrow_local=nrow_local, ncol_local=ncol_local, &
383 390 : row_indices=row_indices, col_indices=col_indices)
384 390 : IF (end_row_global > nrow_global) THEN
385 : end_row_global = nrow_global
386 : do_zero = .TRUE.
387 : END IF
388 390 : IF (end_col_global > ncol_global) THEN
389 : end_col_global = ncol_global
390 : do_zero = .TRUE.
391 : END IF
392 :
393 : ! find out row/column indices of locally stored matrix elements that needs to be copied.
394 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
395 1866 : DO start_row_local = 1, nrow_local
396 1866 : IF (row_indices(start_row_local) >= start_row_global) EXIT
397 : END DO
398 :
399 6234 : DO end_row_local = start_row_local, nrow_local
400 6234 : IF (row_indices(end_row_local) > end_row_global) EXIT
401 : END DO
402 390 : end_row_local = end_row_local - 1
403 :
404 3896 : DO start_col_local = 1, ncol_local
405 3896 : IF (col_indices(start_col_local) >= start_col_global) EXIT
406 : END DO
407 :
408 9730 : DO end_col_local = start_col_local, ncol_local
409 9730 : IF (col_indices(end_col_local) > end_col_global) EXIT
410 : END DO
411 390 : end_col_local = end_col_local - 1
412 :
413 390 : para_env => fm%matrix_struct%para_env
414 390 : local_data => fm%local_data
415 :
416 : ! wipe the content of the target matrix if:
417 : ! * the source matrix is distributed across a number of processes, or
418 : ! * not all elements of the target matrix will be assigned, e.g.
419 : ! when the target matrix is larger then the source matrix
420 : IF (do_zero) &
421 1170 : CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
422 :
423 390 : IF (tr_a) THEN
424 0 : DO j = start_col_local, end_col_local
425 0 : this_col = col_indices(j) - start_col_global + 1
426 0 : DO i = start_row_local, end_row_local
427 0 : target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
428 : END DO
429 : END DO
430 : ELSE
431 9730 : DO j = start_col_local, end_col_local
432 9340 : this_col = col_indices(j) - start_col_global + 1
433 147311 : DO i = start_row_local, end_row_local
434 146921 : target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
435 : END DO
436 : END DO
437 : END IF
438 :
439 577842 : CALL para_env%sum(target_m)
440 : END IF
441 :
442 390 : CALL timestop(handle)
443 390 : END SUBROUTINE cp_cfm_get_submatrix
444 :
445 : ! **************************************************************************************************
446 : !> \brief Set a sub-matrix of the full matrix:
447 : !> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
448 : !> = alpha*op(new_values)(1:n_rows,1:n_cols) +
449 : !> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
450 : !> \param matrix full to update
451 : !> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
452 : !> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
453 : !> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
454 : !> \param n_rows number of rows to update (defaults to size(op(new_values),1))
455 : !> \param n_cols number of columns to update (defaults to size(op(new_values),2))
456 : !> \param alpha scale factor for the new values (defaults to (1.0,0.0))
457 : !> \param beta scale factor for the old values (defaults to (0.0,0.0))
458 : !> \param transpose indicates that the matrix new_values should be transposed:
459 : !> op(new_values) = new_values^T if .TRUE.,
460 : !> op(new_values) = new_values if .FALSE. (defaults to false)
461 : !> \par History
462 : !> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
463 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
464 : !> \author Lianheng Tong
465 : !> \note
466 : !> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
467 : !> All matrix elements 'new_values' need to be valid on all CPUs
468 : ! **************************************************************************************************
469 214 : SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
470 : start_col, n_rows, n_cols, alpha, beta, transpose)
471 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
472 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
473 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
474 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
475 : LOGICAL, INTENT(in), OPTIONAL :: transpose
476 :
477 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_set_submatrix'
478 :
479 : COMPLEX(kind=dp) :: al, be
480 214 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
481 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
482 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
483 : start_row_global, start_row_local, this_col
484 214 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
485 : LOGICAL :: tr_a
486 :
487 214 : CALL timeset(routineN, handle)
488 :
489 214 : al = z_one
490 214 : be = z_zero
491 214 : IF (PRESENT(alpha)) al = alpha
492 214 : IF (PRESENT(beta)) be = beta
493 :
494 : ! find out the first and last global row/column indices
495 214 : start_row_global = 1
496 214 : start_col_global = 1
497 214 : IF (PRESENT(start_row)) start_row_global = start_row
498 214 : IF (PRESENT(start_col)) start_col_global = start_col
499 :
500 214 : tr_a = .FALSE.
501 214 : IF (PRESENT(transpose)) tr_a = transpose
502 :
503 0 : IF (tr_a) THEN
504 0 : end_row_global = SIZE(new_values, 2)
505 0 : end_col_global = SIZE(new_values, 1)
506 : ELSE
507 214 : end_row_global = SIZE(new_values, 1)
508 214 : end_col_global = SIZE(new_values, 2)
509 : END IF
510 214 : IF (PRESENT(n_rows)) end_row_global = n_rows
511 214 : IF (PRESENT(n_cols)) end_col_global = n_cols
512 :
513 214 : end_row_global = end_row_global + start_row_global - 1
514 214 : end_col_global = end_col_global + start_col_global - 1
515 :
516 : CALL cp_cfm_get_info(matrix=matrix, &
517 : nrow_global=nrow_global, ncol_global=ncol_global, &
518 : nrow_local=nrow_local, ncol_local=ncol_local, &
519 214 : row_indices=row_indices, col_indices=col_indices)
520 214 : IF (end_row_global > nrow_global) end_row_global = nrow_global
521 214 : IF (end_col_global > ncol_global) end_col_global = ncol_global
522 :
523 : ! find out row/column indices of locally stored matrix elements that needs to be set.
524 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
525 952 : DO start_row_local = 1, nrow_local
526 952 : IF (row_indices(start_row_local) >= start_row_global) EXIT
527 : END DO
528 :
529 2880 : DO end_row_local = start_row_local, nrow_local
530 2880 : IF (row_indices(end_row_local) > end_row_global) EXIT
531 : END DO
532 214 : end_row_local = end_row_local - 1
533 :
534 918 : DO start_col_local = 1, ncol_local
535 918 : IF (col_indices(start_col_local) >= start_col_global) EXIT
536 : END DO
537 :
538 6646 : DO end_col_local = start_col_local, ncol_local
539 6646 : IF (col_indices(end_col_local) > end_col_global) EXIT
540 : END DO
541 214 : end_col_local = end_col_local - 1
542 :
543 214 : local_data => matrix%local_data
544 :
545 214 : IF (al == z_one .AND. be == z_zero) THEN
546 214 : IF (tr_a) THEN
547 0 : DO j = start_col_local, end_col_local
548 0 : this_col = col_indices(j) - start_col_global + 1
549 0 : DO i = start_row_local, end_row_local
550 0 : local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
551 : END DO
552 : END DO
553 : ELSE
554 6646 : DO j = start_col_local, end_col_local
555 6432 : this_col = col_indices(j) - start_col_global + 1
556 84033 : DO i = start_row_local, end_row_local
557 83819 : local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
558 : END DO
559 : END DO
560 : END IF
561 : ELSE
562 0 : IF (tr_a) THEN
563 0 : DO j = start_col_local, end_col_local
564 0 : this_col = col_indices(j) - start_col_global + 1
565 0 : DO i = start_row_local, end_row_local
566 : local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
567 0 : be*local_data(i, j)
568 : END DO
569 : END DO
570 : ELSE
571 0 : DO j = start_col_local, end_col_local
572 0 : this_col = col_indices(j) - start_col_global + 1
573 0 : DO i = start_row_local, end_row_local
574 : local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
575 0 : be*local_data(i, j)
576 : END DO
577 : END DO
578 : END IF
579 : END IF
580 :
581 214 : CALL timestop(handle)
582 214 : END SUBROUTINE cp_cfm_set_submatrix
583 :
584 : ! **************************************************************************************************
585 : !> \brief Returns information about a full matrix.
586 : !> \param matrix matrix
587 : !> \param name name of the matrix
588 : !> \param nrow_global total number of rows
589 : !> \param ncol_global total number of columns
590 : !> \param nrow_block number of rows per ScaLAPACK block
591 : !> \param ncol_block number of columns per ScaLAPACK block
592 : !> \param nrow_local number of locally stored rows
593 : !> \param ncol_local number of locally stored columns
594 : !> \param row_indices global indices of locally stored rows
595 : !> \param col_indices global indices of locally stored columns
596 : !> \param local_data locally stored matrix elements
597 : !> \param context BLACS context
598 : !> \param matrix_struct matrix structure
599 : !> \param para_env parallel environment
600 : !> \date 12.06.2001
601 : !> \author Matthias Krack
602 : !> \version 1.0
603 : ! **************************************************************************************************
604 240128 : SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
605 : nrow_block, ncol_block, nrow_local, ncol_local, &
606 : row_indices, col_indices, local_data, context, &
607 : matrix_struct, para_env)
608 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
609 : CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
610 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
611 : ncol_block, nrow_local, ncol_local
612 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
613 : COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
614 : OPTIONAL, POINTER :: local_data
615 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
616 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
617 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
618 :
619 0 : IF (PRESENT(name)) name = matrix%name
620 240128 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
621 240128 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
622 :
623 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
624 : ncol_local=ncol_local, nrow_global=nrow_global, &
625 : ncol_global=ncol_global, nrow_block=nrow_block, &
626 : ncol_block=ncol_block, context=context, &
627 240128 : row_indices=row_indices, col_indices=col_indices, para_env=para_env)
628 :
629 240128 : END SUBROUTINE cp_cfm_get_info
630 :
631 : ! **************************************************************************************************
632 : !> \brief Copy content of a full matrix into another full matrix of the same size.
633 : !> \param source source matrix
634 : !> \param destination destination matrix
635 : !> \author Joost VandeVondele
636 : ! **************************************************************************************************
637 206846 : SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
638 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
639 :
640 : INTEGER :: npcol, nprow
641 :
642 206846 : nprow = source%matrix_struct%context%num_pe(1)
643 206846 : npcol = source%matrix_struct%context%num_pe(2)
644 :
645 206846 : IF (.NOT. cp2k_is_parallel .OR. &
646 : cp_fm_struct_equivalent(source%matrix_struct, &
647 : destination%matrix_struct)) THEN
648 206846 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
649 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
650 0 : CPABORT("internal local_data has different sizes")
651 620538 : CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
652 : ELSE
653 0 : IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
654 0 : CPABORT("cannot copy between full matrixes of differen sizes")
655 0 : IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
656 0 : CPABORT("cannot copy between full matrixes of differen sizes")
657 : #if defined(__parallel)
658 : CALL pzcopy(source%matrix_struct%nrow_global* &
659 : source%matrix_struct%ncol_global, &
660 : source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
661 0 : destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
662 : #else
663 : CPABORT("")
664 : #endif
665 : END IF
666 206846 : END SUBROUTINE cp_cfm_to_cfm_matrix
667 :
668 : ! **************************************************************************************************
669 : !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
670 : !> \param msource source matrix
671 : !> \param mtarget destination matrix
672 : !> \param ncol number of columns to copy
673 : !> \param source_start global index of the first column to copy within the source matrix
674 : !> \param target_start global index of the first column to copy within the destination matrix
675 : ! **************************************************************************************************
676 16902 : SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
677 : target_start)
678 :
679 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
680 : INTEGER, INTENT(IN) :: ncol
681 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
682 :
683 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
684 :
685 16902 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
686 : INTEGER :: handle, n, ss, ts
687 : #if defined(__parallel)
688 : INTEGER :: i
689 : INTEGER, DIMENSION(9) :: desca, descb
690 : #endif
691 :
692 16902 : CALL timeset(routineN, handle)
693 :
694 16902 : ss = 1
695 16902 : ts = 1
696 :
697 16902 : IF (PRESENT(source_start)) ss = source_start
698 16902 : IF (PRESENT(target_start)) ts = target_start
699 :
700 16902 : n = msource%matrix_struct%nrow_global
701 :
702 16902 : a => msource%local_data
703 16902 : b => mtarget%local_data
704 :
705 : #if defined(__parallel)
706 169020 : desca(:) = msource%matrix_struct%descriptor(:)
707 169020 : descb(:) = mtarget%matrix_struct%descriptor(:)
708 333350 : DO i = 0, ncol - 1
709 333350 : CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
710 : END DO
711 : #else
712 : CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
713 : #endif
714 :
715 16902 : CALL timestop(handle)
716 :
717 16902 : END SUBROUTINE cp_cfm_to_cfm_columns
718 :
719 : ! **************************************************************************************************
720 : !> \brief Copy just a triangular matrix.
721 : !> \param msource source matrix
722 : !> \param mtarget target matrix
723 : !> \param uplo 'U' for upper triangular, 'L' for lower triangular
724 : ! **************************************************************************************************
725 0 : SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
726 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
727 : CHARACTER(len=*), INTENT(IN) :: uplo
728 :
729 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
730 :
731 0 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
732 : INTEGER :: handle, ncol, nrow
733 : #if defined(__parallel)
734 : INTEGER, DIMENSION(9) :: desca, descb
735 : #endif
736 :
737 0 : CALL timeset(routineN, handle)
738 :
739 0 : nrow = msource%matrix_struct%nrow_global
740 0 : ncol = msource%matrix_struct%ncol_global
741 :
742 0 : aa => msource%local_data
743 0 : bb => mtarget%local_data
744 :
745 : #if defined(__parallel)
746 0 : desca(:) = msource%matrix_struct%descriptor(:)
747 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
748 0 : CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
749 : #else
750 : CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
751 : #endif
752 :
753 0 : CALL timestop(handle)
754 0 : END SUBROUTINE cp_cfm_to_cfm_triangular
755 :
756 : ! **************************************************************************************************
757 : !> \brief Copy real and imaginary parts of a complex full matrix into
758 : !> separate real-value full matrices.
759 : !> \param msource complex matrix
760 : !> \param mtargetr (optional) real part of the source matrix
761 : !> \param mtargeti (optional) imaginary part of the source matrix
762 : !> \note
763 : !> Matrix structures are assumed to be equivalent.
764 : ! **************************************************************************************************
765 83856 : SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
766 :
767 : TYPE(cp_cfm_type), INTENT(IN) :: msource
768 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
769 :
770 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_fm'
771 :
772 83856 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
773 : INTEGER :: handle
774 83856 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
775 :
776 83856 : CALL timeset(routineN, handle)
777 :
778 83856 : zmat => msource%local_data
779 83856 : IF (PRESENT(mtargetr)) THEN
780 75268 : rmat => mtargetr%local_data
781 : IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
782 75268 : (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
783 : (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
784 0 : CPABORT("size of local_data of mtargetr differ to msource")
785 : END IF
786 : ! copy local data
787 40831681 : rmat = REAL(zmat, kind=dp)
788 : ELSE
789 83856 : NULLIFY (rmat)
790 : END IF
791 83856 : IF (PRESENT(mtargeti)) THEN
792 68124 : imat => mtargeti%local_data
793 : IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
794 68124 : (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
795 : (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
796 0 : CPABORT("size of local_data of mtargeti differ to msource")
797 : END IF
798 : ! copy local data
799 40241849 : imat = REAL(AIMAG(zmat), kind=dp)
800 : ELSE
801 83856 : NULLIFY (imat)
802 : END IF
803 :
804 83856 : CALL timestop(handle)
805 :
806 83856 : END SUBROUTINE cp_cfm_to_fm
807 :
808 : ! **************************************************************************************************
809 : !> \brief Construct a complex full matrix by taking its real and imaginary parts from
810 : !> two separate real-value full matrices.
811 : !> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
812 : !> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
813 : !> \param mtarget resulting complex matrix
814 : !> \note
815 : !> Matrix structures are assumed to be equivalent.
816 : ! **************************************************************************************************
817 97586 : SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
818 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
819 : TYPE(cp_cfm_type), INTENT(IN) :: mtarget
820 :
821 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_to_cfm'
822 :
823 97586 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
824 : INTEGER :: handle, mode
825 97586 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
826 :
827 97586 : CALL timeset(routineN, handle)
828 :
829 97586 : mode = 0
830 97586 : zmat => mtarget%local_data
831 97586 : IF (PRESENT(msourcer)) THEN
832 92350 : rmat => msourcer%local_data
833 : IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
834 92350 : (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
835 : (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
836 0 : CPABORT("size of local_data of msourcer differ to mtarget")
837 : END IF
838 : mode = mode + 1
839 : ELSE
840 : NULLIFY (rmat)
841 : END IF
842 97586 : IF (PRESENT(msourcei)) THEN
843 65794 : imat => msourcei%local_data
844 : IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
845 65794 : (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
846 : (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
847 0 : CPABORT("size of local_data of msourcei differ to mtarget")
848 : END IF
849 65794 : mode = mode + 2
850 : ELSE
851 : NULLIFY (imat)
852 : END IF
853 : ! copy local data
854 : SELECT CASE (mode)
855 : CASE (0)
856 0 : zmat(:, :) = z_zero
857 : CASE (1)
858 1028841 : zmat(:, :) = CMPLX(rmat(:, :), 0.0_dp, kind=dp)
859 : CASE (2)
860 67108 : zmat(:, :) = CMPLX(0.0_dp, imat(:, :), kind=dp)
861 : CASE (3)
862 21002154 : zmat(:, :) = CMPLX(rmat(:, :), imat(:, :), kind=dp)
863 : END SELECT
864 :
865 97586 : CALL timestop(handle)
866 :
867 97586 : END SUBROUTINE cp_fm_to_cfm
868 :
869 : ! **************************************************************************************************
870 : !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
871 : !> \param source input complex-valued fm matrix
872 : !> \param destination output complex-valued fm matrix
873 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
874 : !> of the input and output matrices
875 : !> \param info all of the data that will be needed to complete the copy operation
876 : !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
877 : !> allocatable arrays instead of pointers wherever possible.
878 : ! **************************************************************************************************
879 27480 : SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
880 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
881 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
882 : TYPE(copy_cfm_info_type), INTENT(out) :: info
883 :
884 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_start_copy_general'
885 :
886 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
887 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
888 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
889 : recv_rank, recv_size, send_rank, send_size
890 2748 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
891 5496 : recv_count, send_count, send_disp, &
892 2748 : source2global, src_p, src_q
893 2748 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
894 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
895 : src_block, src_block_tmp, src_num_pe
896 5496 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
897 5496 : send_col_indices, send_row_indices
898 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
899 38472 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
900 :
901 2748 : CALL timeset(routineN, handle)
902 :
903 : IF (.NOT. cp2k_is_parallel) THEN
904 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
905 : nrow_local_send = SIZE(source%local_data, 1)
906 : ncol_local_send = SIZE(source%local_data, 2)
907 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
908 : k = 0
909 : DO j = 1, ncol_local_send
910 : DO i = 1, nrow_local_send
911 : k = k + 1
912 : info%send_buf(k) = source%local_data(i, j)
913 : END DO
914 : END DO
915 : ELSE
916 2748 : NULLIFY (recv_dist, send_dist)
917 2748 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
918 :
919 : ! The 'global' communicator contains both the source and destination decompositions
920 2748 : global_size = para_env%num_pe
921 2748 : global_rank = para_env%mepos
922 :
923 : ! The source/send decomposition and destination/recv decompositions may only exist on
924 : ! on a subset of the processes involved in the communication
925 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
926 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
927 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
928 2748 : recv_dist => destination%matrix_struct
929 2748 : recv_rank = recv_dist%para_env%mepos
930 : ELSE
931 0 : recv_rank = mp_proc_null
932 : END IF
933 :
934 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
935 1374 : send_dist => source%matrix_struct
936 1374 : send_rank = send_dist%para_env%mepos
937 : ELSE
938 1374 : send_rank = mp_proc_null
939 : END IF
940 :
941 : ! Map the rank in the source/dest communicator to the global rank
942 8244 : ALLOCATE (all_ranks(0:global_size - 1))
943 :
944 2748 : CALL para_env%allgather(send_rank, all_ranks)
945 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
946 13740 : ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
947 8244 : DO i = 0, global_size - 1
948 8244 : IF (all_ranks(i) /= mp_proc_null) THEN
949 2748 : source2global(all_ranks(i)) = i
950 : END IF
951 : END DO
952 : END IF
953 :
954 2748 : CALL para_env%allgather(recv_rank, all_ranks)
955 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
956 6870 : ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
957 4122 : DO i = 0, global_size - 1
958 4122 : IF (all_ranks(i) /= mp_proc_null) THEN
959 2748 : dest2global(all_ranks(i)) = i
960 : END IF
961 : END DO
962 : END IF
963 2748 : DEALLOCATE (all_ranks)
964 :
965 : ! Some data from the two decompositions will be needed by all processes in the global group :
966 : ! process grid shape, block size, and the BLACS-to-MPI mapping
967 :
968 : ! The global root process will receive the data (from the root process in each decomposition)
969 19236 : send_req(:) = mp_request_null
970 2748 : IF (global_rank == 0) THEN
971 9618 : recv_req(:) = mp_request_null
972 1374 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
973 1374 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
974 1374 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
975 1374 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
976 : END IF
977 :
978 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
979 1374 : IF ((send_rank == 0)) THEN
980 : ! need to use separate buffers here in case this is actually global rank 0
981 4122 : src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
982 1374 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
983 1374 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
984 : END IF
985 : END IF
986 :
987 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
988 2748 : IF ((recv_rank == 0)) THEN
989 4122 : dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
990 1374 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
991 1374 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
992 : END IF
993 : END IF
994 :
995 2748 : IF (global_rank == 0) THEN
996 1374 : CALL mp_waitall(recv_req(1:4))
997 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
998 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
999 9618 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1000 1374 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1001 1374 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1002 : END IF
1003 :
1004 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
1005 1374 : IF ((send_rank == 0)) THEN
1006 1374 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1007 : END IF
1008 : END IF
1009 :
1010 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1011 2748 : IF ((recv_rank == 0)) THEN
1012 1374 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1013 : END IF
1014 : END IF
1015 :
1016 2748 : IF (global_rank == 0) THEN
1017 1374 : CALL mp_waitall(recv_req(5:6))
1018 : END IF
1019 :
1020 : ! Finally, broadcast the data to all processes in the global communicator
1021 2748 : CALL para_env%bcast(src_block, 0)
1022 2748 : CALL para_env%bcast(dest_block, 0)
1023 2748 : CALL para_env%bcast(src_num_pe, 0)
1024 2748 : CALL para_env%bcast(dest_num_pe, 0)
1025 8244 : info%src_num_pe(1:2) = src_num_pe(1:2)
1026 8244 : info%nblock_src(1:2) = src_block(1:2)
1027 2748 : IF (global_rank /= 0) THEN
1028 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1029 9618 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1030 : END IF
1031 2748 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1032 2748 : CALL para_env%bcast(dest_blacs2mpi, 0)
1033 :
1034 2748 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1035 2748 : send_size = src_num_pe(1)*src_num_pe(2)
1036 2748 : info%send_size = send_size
1037 2748 : CALL mp_waitall(send_req(:))
1038 :
1039 : ! Setup is now complete, we can start the actual communication here.
1040 : ! The order implemented here is:
1041 : ! DEST_1
1042 : ! compute recv sizes
1043 : ! call irecv
1044 : ! SRC_1
1045 : ! compute send sizes
1046 : ! pack send buffers
1047 : ! call isend
1048 : ! DEST_2
1049 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1050 : ! SRC_2
1051 : ! wait for the sends
1052 :
1053 : ! DEST_1
1054 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1055 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1056 2748 : col_indices=recv_col_indices)
1057 2748 : info%recv_col_indices => recv_col_indices
1058 2748 : info%recv_row_indices => recv_row_indices
1059 2748 : nrow_block_src = src_block(1)
1060 2748 : ncol_block_src = src_block(2)
1061 16488 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1062 :
1063 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1064 2748 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1065 2748 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1066 2748 : info%nlocal_recv(1) = nrow_local_recv
1067 2748 : info%nlocal_recv(2) = ncol_local_recv
1068 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1069 13740 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1070 17604 : DO i = 1, nrow_local_recv
1071 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1072 : ! then work out which row block it comes from, and which process row that row block comes from.
1073 17604 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1074 : END DO
1075 32460 : DO j = 1, ncol_local_recv
1076 : ! Similarly for the columns
1077 32460 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1078 : END DO
1079 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1080 :
1081 5496 : DO q = 0, src_num_pe(2) - 1
1082 32460 : ncols = COUNT(src_q == q)
1083 8244 : DO p = 0, src_num_pe(1) - 1
1084 17604 : nrows = COUNT(src_p == p)
1085 : ! Use the send_dist here as we are looking up the processes where the data comes from
1086 5496 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1087 : END DO
1088 : END DO
1089 2748 : DEALLOCATE (src_p, src_q)
1090 :
1091 : ! Use one long buffer (and displacements into that buffer)
1092 : ! this prevents the need for a rectangular array where not all elements will be populated
1093 10992 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1094 2748 : info%recv_disp(0) = 0
1095 2748 : DO i = 1, send_size - 1
1096 2748 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1097 : END DO
1098 :
1099 : ! Issue receive calls on ranks which expect data
1100 5496 : DO k = 0, send_size - 1
1101 5496 : IF (recv_count(k) > 0) THEN
1102 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1103 2748 : source2global(k), info%recv_request(k))
1104 : ELSE
1105 0 : info%recv_request(k) = mp_request_null
1106 : END IF
1107 : END DO
1108 2748 : DEALLOCATE (source2global)
1109 : END IF ! ASSOCIATED(destination)
1110 :
1111 : ! SRC_1
1112 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
1113 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1114 1374 : col_indices=send_col_indices)
1115 1374 : nrow_block_dest = dest_block(1)
1116 1374 : ncol_block_dest = dest_block(2)
1117 9618 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1118 :
1119 : ! Determine the send counts, allocate the send buffers
1120 1374 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1121 1374 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1122 :
1123 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1124 : ! i.e. number of rows,cols in the sending distribution
1125 6870 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1126 :
1127 16230 : DO i = 1, nrow_local_send
1128 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1129 16230 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1130 : END DO
1131 16230 : DO j = 1, ncol_local_send
1132 16230 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1133 : END DO
1134 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1135 :
1136 2748 : DO q = 0, dest_num_pe(2) - 1
1137 16230 : ncols = COUNT(dest_q == q)
1138 5496 : DO p = 0, dest_num_pe(1) - 1
1139 32460 : nrows = COUNT(dest_p == p)
1140 4122 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1141 : END DO
1142 : END DO
1143 1374 : DEALLOCATE (dest_p, dest_q)
1144 :
1145 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1146 6870 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1147 1374 : send_disp(0) = 0
1148 2748 : DO k = 1, recv_size - 1
1149 2748 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1150 : END DO
1151 :
1152 : ! Loop over the smat, pack the send buffers
1153 4122 : send_count(:) = 0
1154 16230 : DO j = 1, ncol_local_send
1155 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1156 14856 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1157 187974 : DO i = 1, nrow_local_send
1158 171744 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1159 171744 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1160 171744 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1161 186600 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1162 : END DO
1163 : END DO
1164 :
1165 : ! For each non-zero send_count, call mpi_isend
1166 4122 : DO k = 0, recv_size - 1
1167 4122 : IF (send_count(k) > 0) THEN
1168 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1169 2748 : dest2global(k), info%send_request(k))
1170 : ELSE
1171 0 : info%send_request(k) = mp_request_null
1172 : END IF
1173 : END DO
1174 1374 : DEALLOCATE (send_count, send_disp, dest2global)
1175 : END IF ! ASSOCIATED(source)
1176 2748 : DEALLOCATE (dest_blacs2mpi)
1177 :
1178 : END IF !IF (.NOT. cp2k_is_parallel)
1179 :
1180 2748 : CALL timestop(handle)
1181 :
1182 10992 : END SUBROUTINE cp_cfm_start_copy_general
1183 :
1184 : ! **************************************************************************************************
1185 : !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1186 : !> \param destination output cfm matrix
1187 : !> \param info all of the data that will be needed to complete the copy operation
1188 : !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1189 : !> allocatable arrays instead of pointers wherever possible.
1190 : ! **************************************************************************************************
1191 2748 : SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1192 : TYPE(cp_cfm_type), INTENT(IN) :: destination
1193 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1194 :
1195 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_finish_copy_general'
1196 :
1197 : INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1198 : src_q_j
1199 2748 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1200 2748 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1201 :
1202 2748 : CALL timeset(routineN, handle)
1203 :
1204 : IF (.NOT. cp2k_is_parallel) THEN
1205 : ! Now unpack the data from the 'send buffer'
1206 : k = 0
1207 : DO j = 1, SIZE(destination%local_data, 2)
1208 : DO i = 1, SIZE(destination%local_data, 1)
1209 : k = k + 1
1210 : destination%local_data(i, j) = info%send_buf(k)
1211 : END DO
1212 : END DO
1213 : DEALLOCATE (info%send_buf)
1214 : ELSE
1215 : ! Set up local variables ...
1216 2748 : recv_col_indices => info%recv_col_indices
1217 2748 : recv_row_indices => info%recv_row_indices
1218 :
1219 : ! ... use the local variables to do the work
1220 : ! DEST_2
1221 2748 : CALL mp_waitall(info%recv_request(:))
1222 :
1223 2748 : nj = info%nlocal_recv(2)
1224 2748 : ni = info%nlocal_recv(1)
1225 13740 : ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1226 : ! Loop over the rmat, filling it in with data from the recv buffers
1227 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1228 5496 : recv_count(:) = 0
1229 17604 : DO i = 1, ni
1230 17604 : src_p_i(i) = MOD(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1231 : END DO
1232 :
1233 32460 : DO j = 1, nj
1234 29712 : src_q_j = MOD(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1235 204204 : DO i = 1, ni
1236 171744 : mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1237 171744 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1238 201456 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1239 : END DO
1240 : END DO
1241 :
1242 2748 : DEALLOCATE (recv_count, src_p_i)
1243 : ! Invalidate the stored state
1244 2748 : NULLIFY (info%recv_col_indices, info%recv_row_indices)
1245 2748 : DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1246 : END IF
1247 :
1248 2748 : CALL timestop(handle)
1249 :
1250 2748 : END SUBROUTINE cp_cfm_finish_copy_general
1251 :
1252 : ! **************************************************************************************************
1253 : !> \brief Complete the copy operation: wait for comms clean up MPI state.
1254 : !> \param info all of the data that will be needed to complete the copy operation
1255 : !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1256 : !> allocatable arrays instead of pointers wherever possible.
1257 : ! **************************************************************************************************
1258 1374 : SUBROUTINE cp_cfm_cleanup_copy_general(info)
1259 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1260 :
1261 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_cleanup_copy_general'
1262 :
1263 : INTEGER :: handle
1264 :
1265 1374 : CALL timeset(routineN, handle)
1266 :
1267 : IF (cp2k_is_parallel) THEN
1268 : ! SRC_2
1269 : ! If this process is also in the destination decomposition, this deallocate
1270 : ! Was already done in cp_fm_finish_copy_general
1271 1374 : IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1272 1374 : CALL mp_waitall(info%send_request(:))
1273 1374 : DEALLOCATE (info%send_request, info%send_buf)
1274 : END IF
1275 :
1276 1374 : CALL timestop(handle)
1277 1374 : END SUBROUTINE cp_cfm_cleanup_copy_general
1278 0 : END MODULE cp_cfm_types
|