Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Represents a complex full matrix distributed on many processors.
10 : !> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11 : ! **************************************************************************************************
12 : MODULE cp_cfm_types
13 : USE cp_blacs_env, ONLY: cp_blacs_env_type
14 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
15 : cp_fm_struct_equivalent,&
16 : cp_fm_struct_get,&
17 : cp_fm_struct_release,&
18 : cp_fm_struct_retain,&
19 : cp_fm_struct_type
20 : USE cp_fm_types, ONLY: cp_fm_type
21 : USE kinds, ONLY: dp
22 : USE mathconstants, ONLY: z_one,&
23 : z_zero
24 : USE message_passing, ONLY: cp2k_is_parallel,&
25 : mp_any_source,&
26 : mp_para_env_type,&
27 : mp_proc_null,&
28 : mp_request_null,&
29 : mp_request_type,&
30 : mp_waitall
31 : #include "../base/base_uses.f90"
32 :
33 : IMPLICIT NONE
34 : PRIVATE
35 :
36 : LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
37 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
38 : INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
39 :
40 : PUBLIC :: cp_cfm_type, cp_cfm_p_type, copy_cfm_info_type
41 : PUBLIC :: cp_cfm_cleanup_copy_general, &
42 : cp_cfm_create, &
43 : cp_cfm_finish_copy_general, &
44 : cp_cfm_get_element, &
45 : cp_cfm_get_info, &
46 : cp_cfm_get_submatrix, &
47 : cp_cfm_release, &
48 : cp_cfm_set_all, &
49 : cp_cfm_set_element, &
50 : cp_cfm_set_submatrix, &
51 : cp_cfm_start_copy_general, &
52 : cp_cfm_to_cfm, &
53 : cp_cfm_to_fm, &
54 : cp_fm_to_cfm
55 :
56 : INTERFACE cp_cfm_to_cfm
57 : MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
58 : cp_cfm_to_cfm_columns ! just a number of columns
59 : END INTERFACE
60 :
61 : ! **************************************************************************************************
62 : !> \brief Represent a complex full matrix.
63 : !> \param name the name of the matrix, used for printing
64 : !> \param matrix_struct structure of this matrix
65 : !> \param local_data array with the data of the matrix (its content depends on
66 : !> the matrix type used: in parallel run it will be in
67 : !> ScaLAPACK format, in sequential run it will simply contain the matrix)
68 : ! **************************************************************************************************
69 : TYPE cp_cfm_type
70 : CHARACTER(len=60) :: name = ""
71 : TYPE(cp_fm_struct_type), POINTER :: matrix_struct => NULL()
72 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => NULL()
73 : END TYPE cp_cfm_type
74 :
75 : ! **************************************************************************************************
76 : !> \brief Just to build arrays of pointers to matrices.
77 : !> \param matrix the pointer to the matrix
78 : ! **************************************************************************************************
79 : TYPE cp_cfm_p_type
80 : TYPE(cp_cfm_type), POINTER :: matrix => NULL()
81 : END TYPE cp_cfm_p_type
82 :
83 : ! **************************************************************************************************
84 : !> \brief Stores the state of a copy between cp_cfm_start_copy_general
85 : !> and cp_cfm_finish_copy_general.
86 : !> \par History
87 : !> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
88 : !> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
89 : ! **************************************************************************************************
90 : TYPE copy_cfm_info_type
91 : !> number of MPI processes that send data
92 : INTEGER :: send_size = -1
93 : !> number of locally stored rows (1) and columns (2) of the destination matrix
94 : INTEGER, DIMENSION(2) :: nlocal_recv = -1
95 : !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
96 : INTEGER, DIMENSION(2) :: nblock_src = -1
97 : !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
98 : INTEGER, DIMENSION(2) :: src_num_pe = -1
99 : !> displacements into recv_buf
100 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
101 : !> MPI requests for non-blocking receive and send operations
102 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
103 : !> global column and row indices of locally stored elements of the destination matrix
104 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices => NULL(), recv_row_indices => NULL()
105 : !> rank of MPI process with BLACS coordinates (prow, pcol)
106 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
107 : !> receiving and sending buffers for non-blocking MPI communication
108 : COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
109 : END TYPE copy_cfm_info_type
110 :
111 : CONTAINS
112 :
113 : ! **************************************************************************************************
114 : !> \brief Creates a new full matrix with the given structure.
115 : !> \param matrix matrix to be created
116 : !> \param matrix_struct structure of the matrix
117 : !> \param name name of the matrix
118 : !> \param nrow ...
119 : !> \param ncol ...
120 : !> \param set_zero ...
121 : !> \note
122 : !> preferred allocation routine
123 : ! **************************************************************************************************
124 158376 : SUBROUTINE cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
125 : TYPE(cp_cfm_type), INTENT(OUT) :: matrix
126 : TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
127 : CHARACTER(len=*), INTENT(in), OPTIONAL :: name
128 : INTEGER, INTENT(IN), OPTIONAL :: nrow, ncol
129 : LOGICAL, INTENT(IN), OPTIONAL :: set_zero
130 :
131 : INTEGER :: ncol_global, ncol_local, nrow_global, &
132 : nrow_local
133 : TYPE(cp_blacs_env_type), POINTER :: context
134 : TYPE(cp_fm_struct_type), POINTER :: cfm_struct
135 :
136 158376 : IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
137 0 : CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
138 0 : IF (PRESENT(nrow)) nrow_global = nrow
139 0 : IF (PRESENT(ncol)) ncol_global = ncol
140 : CALL cp_fm_struct_create(cfm_struct, template_fmstruct=matrix_struct, &
141 0 : nrow_global=nrow_global, ncol_global=ncol_global)
142 :
143 0 : context => cfm_struct%context
144 0 : matrix%matrix_struct => cfm_struct
145 0 : CALL cp_fm_struct_retain(matrix%matrix_struct)
146 :
147 0 : nrow_local = cfm_struct%local_leading_dimension
148 0 : ncol_local = MAX(1, cfm_struct%ncol_locals(context%mepos(2)))
149 :
150 0 : CALL cp_fm_struct_release(cfm_struct)
151 : ELSE
152 158376 : context => matrix_struct%context
153 158376 : matrix%matrix_struct => matrix_struct
154 158376 : CALL cp_fm_struct_retain(matrix%matrix_struct)
155 :
156 158376 : nrow_local = matrix_struct%local_leading_dimension
157 158376 : ncol_local = MAX(1, matrix_struct%ncol_locals(context%mepos(2)))
158 : END IF
159 :
160 158376 : NULLIFY (matrix%local_data)
161 633504 : ALLOCATE (matrix%local_data(nrow_local, ncol_local))
162 :
163 158376 : IF (PRESENT(set_zero)) THEN
164 42 : IF (set_zero) THEN
165 8297 : matrix%local_data(1:nrow_local, 1:ncol_local) = z_zero
166 : END IF
167 : END IF
168 :
169 158376 : IF (PRESENT(name)) THEN
170 18320 : matrix%name = name
171 : ELSE
172 140056 : matrix%name = 'full complex matrix'
173 : END IF
174 :
175 158376 : END SUBROUTINE cp_cfm_create
176 :
177 : ! **************************************************************************************************
178 : !> \brief Releases a full matrix.
179 : !> \param matrix the matrix to release
180 : ! **************************************************************************************************
181 167972 : SUBROUTINE cp_cfm_release(matrix)
182 : TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
183 :
184 167972 : IF (ASSOCIATED(matrix%local_data)) THEN
185 158376 : DEALLOCATE (matrix%local_data)
186 : END IF
187 167972 : matrix%name = ""
188 167972 : CALL cp_fm_struct_release(matrix%matrix_struct)
189 167972 : END SUBROUTINE cp_cfm_release
190 :
191 : ! **************************************************************************************************
192 : !> \brief Set all elements of the full matrix to alpha. Besides, set all
193 : !> diagonal matrix elements to beta (if given explicitly).
194 : !> \param matrix matrix to initialise
195 : !> \param alpha value of off-diagonal matrix elements
196 : !> \param beta value of diagonal matrix elements (equal to alpha if absent)
197 : !> \date 12.06.2001
198 : !> \author Matthias Krack
199 : !> \version 1.0
200 : ! **************************************************************************************************
201 50250 : SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
202 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
203 : COMPLEX(kind=dp), INTENT(in) :: alpha
204 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
205 :
206 : INTEGER :: irow_local, nrow_local
207 : #if defined(__parallel)
208 : INTEGER :: icol_local, ncol_local
209 50250 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
210 : #endif
211 :
212 150750 : CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
213 :
214 50250 : IF (PRESENT(beta)) THEN
215 : #if defined(__parallel)
216 : CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
217 9274 : row_indices=row_indices, col_indices=col_indices)
218 :
219 9274 : icol_local = 1
220 9274 : irow_local = 1
221 :
222 60894 : DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
223 60894 : IF (row_indices(irow_local) < col_indices(icol_local)) THEN
224 0 : irow_local = irow_local + 1
225 51620 : ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
226 21116 : icol_local = icol_local + 1
227 : ELSE
228 30504 : matrix%local_data(irow_local, icol_local) = beta
229 30504 : irow_local = irow_local + 1
230 30504 : icol_local = icol_local + 1
231 : END IF
232 : END DO
233 : #else
234 : nrow_local = MIN(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
235 :
236 : DO irow_local = 1, nrow_local
237 : matrix%local_data(irow_local, irow_local) = beta
238 : END DO
239 : #endif
240 : END IF
241 :
242 50250 : END SUBROUTINE cp_cfm_set_all
243 :
244 : ! **************************************************************************************************
245 : !> \brief Get the matrix element by its global index.
246 : !> \param matrix full matrix
247 : !> \param irow_global global row index
248 : !> \param icol_global global column index
249 : !> \param alpha matrix element
250 : !> \par History
251 : !> , TCH, created
252 : !> always return the answer
253 : ! **************************************************************************************************
254 9282 : SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
255 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
256 : INTEGER, INTENT(in) :: irow_global, icol_global
257 : COMPLEX(kind=dp), INTENT(out) :: alpha
258 :
259 : #if defined(__parallel)
260 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
261 : mypcol, myprow, npcol, nprow
262 : INTEGER, DIMENSION(9) :: desca
263 : TYPE(cp_blacs_env_type), POINTER :: context
264 : #endif
265 :
266 : #if defined(__parallel)
267 9282 : context => matrix%matrix_struct%context
268 9282 : myprow = context%mepos(1)
269 9282 : mypcol = context%mepos(2)
270 9282 : nprow = context%num_pe(1)
271 9282 : npcol = context%num_pe(2)
272 :
273 92820 : desca(:) = matrix%matrix_struct%descriptor(:)
274 :
275 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
276 9282 : irow_local, icol_local, iprow, ipcol)
277 :
278 9282 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
279 4641 : alpha = matrix%local_data(irow_local, icol_local)
280 4641 : CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
281 : ELSE
282 4641 : CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
283 : END IF
284 :
285 : #else
286 : alpha = matrix%local_data(irow_global, icol_global)
287 : #endif
288 :
289 9282 : END SUBROUTINE cp_cfm_get_element
290 :
291 : ! **************************************************************************************************
292 : !> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
293 : !> \param matrix full matrix
294 : !> \param irow_global global row index
295 : !> \param icol_global global column index
296 : !> \param alpha value of the matrix element
297 : !> \date 12.06.2001
298 : !> \author Matthias Krack
299 : !> \version 1.0
300 : ! **************************************************************************************************
301 79956 : SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
302 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
303 : INTEGER, INTENT(in) :: irow_global, icol_global
304 : COMPLEX(kind=dp), INTENT(in) :: alpha
305 :
306 : #if defined(__parallel)
307 : INTEGER :: icol_local, ipcol, iprow, irow_local, &
308 : mypcol, myprow, npcol, nprow
309 : INTEGER, DIMENSION(9) :: desca
310 : TYPE(cp_blacs_env_type), POINTER :: context
311 : #endif
312 :
313 : #if defined(__parallel)
314 79956 : context => matrix%matrix_struct%context
315 79956 : myprow = context%mepos(1)
316 79956 : mypcol = context%mepos(2)
317 79956 : nprow = context%num_pe(1)
318 79956 : npcol = context%num_pe(2)
319 :
320 799560 : desca(:) = matrix%matrix_struct%descriptor(:)
321 :
322 : CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
323 79956 : irow_local, icol_local, iprow, ipcol)
324 :
325 79956 : IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
326 39978 : matrix%local_data(irow_local, icol_local) = alpha
327 : END IF
328 :
329 : #else
330 : matrix%local_data(irow_global, icol_global) = alpha
331 : #endif
332 :
333 79956 : END SUBROUTINE cp_cfm_set_element
334 :
335 : ! **************************************************************************************************
336 : !> \brief Extract a sub-matrix from the full matrix:
337 : !> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
338 : !> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
339 : !> \param fm full matrix you want to get the elements from
340 : !> \param target_m 2-D array to store the extracted sub-matrix
341 : !> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
342 : !> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
343 : !> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
344 : !> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
345 : !> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
346 : !> op(target_m) = target_m^T if .TRUE.,
347 : !> op(target_m) = target_m if .FALSE. (defaults to false)
348 : !> \par History
349 : !> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
350 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
351 : !> \author Lianheng Tong
352 : !> \note
353 : !> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
354 : ! **************************************************************************************************
355 396 : SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
356 : TYPE(cp_cfm_type), INTENT(IN) :: fm
357 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
358 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
359 : LOGICAL, INTENT(in), OPTIONAL :: transpose
360 :
361 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_get_submatrix'
362 :
363 396 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
364 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
365 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
366 : start_row_global, start_row_local, this_col
367 396 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
368 : LOGICAL :: do_zero, tr_a
369 : TYPE(mp_para_env_type), POINTER :: para_env
370 :
371 396 : CALL timeset(routineN, handle)
372 :
373 1188 : IF (SIZE(target_m) /= 0) THEN
374 : #if defined(__parallel)
375 396 : do_zero = .TRUE.
376 : #else
377 : do_zero = .FALSE.
378 : #endif
379 :
380 396 : tr_a = .FALSE.
381 396 : IF (PRESENT(transpose)) tr_a = transpose
382 :
383 : ! find out the first and last global row/column indices
384 396 : start_row_global = 1
385 396 : start_col_global = 1
386 396 : IF (PRESENT(start_row)) start_row_global = start_row
387 396 : IF (PRESENT(start_col)) start_col_global = start_col
388 :
389 396 : IF (tr_a) THEN
390 0 : end_row_global = SIZE(target_m, 2)
391 0 : end_col_global = SIZE(target_m, 1)
392 : ELSE
393 396 : end_row_global = SIZE(target_m, 1)
394 396 : end_col_global = SIZE(target_m, 2)
395 : END IF
396 396 : IF (PRESENT(n_rows)) end_row_global = n_rows
397 396 : IF (PRESENT(n_cols)) end_col_global = n_cols
398 :
399 396 : end_row_global = end_row_global + start_row_global - 1
400 396 : end_col_global = end_col_global + start_col_global - 1
401 :
402 : CALL cp_cfm_get_info(matrix=fm, &
403 : nrow_global=nrow_global, ncol_global=ncol_global, &
404 : nrow_local=nrow_local, ncol_local=ncol_local, &
405 396 : row_indices=row_indices, col_indices=col_indices)
406 396 : IF (end_row_global > nrow_global) THEN
407 : end_row_global = nrow_global
408 : do_zero = .TRUE.
409 : END IF
410 396 : IF (end_col_global > ncol_global) THEN
411 : end_col_global = ncol_global
412 : do_zero = .TRUE.
413 : END IF
414 :
415 : ! find out row/column indices of locally stored matrix elements that needs to be copied.
416 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
417 1872 : DO start_row_local = 1, nrow_local
418 1872 : IF (row_indices(start_row_local) >= start_row_global) EXIT
419 : END DO
420 :
421 6279 : DO end_row_local = start_row_local, nrow_local
422 6279 : IF (row_indices(end_row_local) > end_row_global) EXIT
423 : END DO
424 396 : end_row_local = end_row_local - 1
425 :
426 3902 : DO start_col_local = 1, ncol_local
427 3902 : IF (col_indices(start_col_local) >= start_col_global) EXIT
428 : END DO
429 :
430 9742 : DO end_col_local = start_col_local, ncol_local
431 9742 : IF (col_indices(end_col_local) > end_col_global) EXIT
432 : END DO
433 396 : end_col_local = end_col_local - 1
434 :
435 396 : para_env => fm%matrix_struct%para_env
436 396 : local_data => fm%local_data
437 :
438 : ! wipe the content of the target matrix if:
439 : ! * the source matrix is distributed across a number of processes, or
440 : ! * not all elements of the target matrix will be assigned, e.g.
441 : ! when the target matrix is larger then the source matrix
442 : IF (do_zero) &
443 1188 : CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
444 :
445 396 : IF (tr_a) THEN
446 0 : DO j = start_col_local, end_col_local
447 0 : this_col = col_indices(j) - start_col_global + 1
448 0 : DO i = start_row_local, end_row_local
449 0 : target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
450 : END DO
451 : END DO
452 : ELSE
453 9742 : DO j = start_col_local, end_col_local
454 9346 : this_col = col_indices(j) - start_col_global + 1
455 147362 : DO i = start_row_local, end_row_local
456 146966 : target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
457 : END DO
458 : END DO
459 : END IF
460 :
461 578016 : CALL para_env%sum(target_m)
462 : END IF
463 :
464 396 : CALL timestop(handle)
465 396 : END SUBROUTINE cp_cfm_get_submatrix
466 :
467 : ! **************************************************************************************************
468 : !> \brief Set a sub-matrix of the full matrix:
469 : !> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
470 : !> = alpha*op(new_values)(1:n_rows,1:n_cols) +
471 : !> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
472 : !> \param matrix full to update
473 : !> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
474 : !> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
475 : !> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
476 : !> \param n_rows number of rows to update (defaults to size(op(new_values),1))
477 : !> \param n_cols number of columns to update (defaults to size(op(new_values),2))
478 : !> \param alpha scale factor for the new values (defaults to (1.0,0.0))
479 : !> \param beta scale factor for the old values (defaults to (0.0,0.0))
480 : !> \param transpose indicates that the matrix new_values should be transposed:
481 : !> op(new_values) = new_values^T if .TRUE.,
482 : !> op(new_values) = new_values if .FALSE. (defaults to false)
483 : !> \par History
484 : !> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
485 : !> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
486 : !> \author Lianheng Tong
487 : !> \note
488 : !> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
489 : !> All matrix elements 'new_values' need to be valid on all CPUs
490 : ! **************************************************************************************************
491 214 : SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
492 : start_col, n_rows, n_cols, alpha, beta, transpose)
493 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
494 : COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
495 : INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
496 : COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
497 : LOGICAL, INTENT(in), OPTIONAL :: transpose
498 :
499 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_set_submatrix'
500 :
501 : COMPLEX(kind=dp) :: al, be
502 214 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
503 : INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
504 : ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
505 : start_row_global, start_row_local, this_col
506 214 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
507 : LOGICAL :: tr_a
508 :
509 214 : CALL timeset(routineN, handle)
510 :
511 214 : al = z_one
512 214 : be = z_zero
513 214 : IF (PRESENT(alpha)) al = alpha
514 214 : IF (PRESENT(beta)) be = beta
515 :
516 : ! find out the first and last global row/column indices
517 214 : start_row_global = 1
518 214 : start_col_global = 1
519 214 : IF (PRESENT(start_row)) start_row_global = start_row
520 214 : IF (PRESENT(start_col)) start_col_global = start_col
521 :
522 214 : tr_a = .FALSE.
523 214 : IF (PRESENT(transpose)) tr_a = transpose
524 :
525 0 : IF (tr_a) THEN
526 0 : end_row_global = SIZE(new_values, 2)
527 0 : end_col_global = SIZE(new_values, 1)
528 : ELSE
529 214 : end_row_global = SIZE(new_values, 1)
530 214 : end_col_global = SIZE(new_values, 2)
531 : END IF
532 214 : IF (PRESENT(n_rows)) end_row_global = n_rows
533 214 : IF (PRESENT(n_cols)) end_col_global = n_cols
534 :
535 214 : end_row_global = end_row_global + start_row_global - 1
536 214 : end_col_global = end_col_global + start_col_global - 1
537 :
538 : CALL cp_cfm_get_info(matrix=matrix, &
539 : nrow_global=nrow_global, ncol_global=ncol_global, &
540 : nrow_local=nrow_local, ncol_local=ncol_local, &
541 214 : row_indices=row_indices, col_indices=col_indices)
542 214 : IF (end_row_global > nrow_global) end_row_global = nrow_global
543 214 : IF (end_col_global > ncol_global) end_col_global = ncol_global
544 :
545 : ! find out row/column indices of locally stored matrix elements that needs to be set.
546 : ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
547 952 : DO start_row_local = 1, nrow_local
548 952 : IF (row_indices(start_row_local) >= start_row_global) EXIT
549 : END DO
550 :
551 2880 : DO end_row_local = start_row_local, nrow_local
552 2880 : IF (row_indices(end_row_local) > end_row_global) EXIT
553 : END DO
554 214 : end_row_local = end_row_local - 1
555 :
556 918 : DO start_col_local = 1, ncol_local
557 918 : IF (col_indices(start_col_local) >= start_col_global) EXIT
558 : END DO
559 :
560 6646 : DO end_col_local = start_col_local, ncol_local
561 6646 : IF (col_indices(end_col_local) > end_col_global) EXIT
562 : END DO
563 214 : end_col_local = end_col_local - 1
564 :
565 214 : local_data => matrix%local_data
566 :
567 214 : IF (al == z_one .AND. be == z_zero) THEN
568 214 : IF (tr_a) THEN
569 0 : DO j = start_col_local, end_col_local
570 0 : this_col = col_indices(j) - start_col_global + 1
571 0 : DO i = start_row_local, end_row_local
572 0 : local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
573 : END DO
574 : END DO
575 : ELSE
576 6646 : DO j = start_col_local, end_col_local
577 6432 : this_col = col_indices(j) - start_col_global + 1
578 84033 : DO i = start_row_local, end_row_local
579 83819 : local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
580 : END DO
581 : END DO
582 : END IF
583 : ELSE
584 0 : IF (tr_a) THEN
585 0 : DO j = start_col_local, end_col_local
586 0 : this_col = col_indices(j) - start_col_global + 1
587 0 : DO i = start_row_local, end_row_local
588 : local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
589 0 : be*local_data(i, j)
590 : END DO
591 : END DO
592 : ELSE
593 0 : DO j = start_col_local, end_col_local
594 0 : this_col = col_indices(j) - start_col_global + 1
595 0 : DO i = start_row_local, end_row_local
596 : local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
597 0 : be*local_data(i, j)
598 : END DO
599 : END DO
600 : END IF
601 : END IF
602 :
603 214 : CALL timestop(handle)
604 214 : END SUBROUTINE cp_cfm_set_submatrix
605 :
606 : ! **************************************************************************************************
607 : !> \brief Returns information about a full matrix.
608 : !> \param matrix matrix
609 : !> \param name name of the matrix
610 : !> \param nrow_global total number of rows
611 : !> \param ncol_global total number of columns
612 : !> \param nrow_block number of rows per ScaLAPACK block
613 : !> \param ncol_block number of columns per ScaLAPACK block
614 : !> \param nrow_local number of locally stored rows
615 : !> \param ncol_local number of locally stored columns
616 : !> \param row_indices global indices of locally stored rows
617 : !> \param col_indices global indices of locally stored columns
618 : !> \param local_data locally stored matrix elements
619 : !> \param context BLACS context
620 : !> \param matrix_struct matrix structure
621 : !> \param para_env parallel environment
622 : !> \date 12.06.2001
623 : !> \author Matthias Krack
624 : !> \version 1.0
625 : ! **************************************************************************************************
626 241928 : SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
627 : nrow_block, ncol_block, nrow_local, ncol_local, &
628 : row_indices, col_indices, local_data, context, &
629 : matrix_struct, para_env)
630 : TYPE(cp_cfm_type), INTENT(IN) :: matrix
631 : CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
632 : INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
633 : ncol_block, nrow_local, ncol_local
634 : INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
635 : COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
636 : OPTIONAL, POINTER :: local_data
637 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
638 : TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
639 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
640 :
641 0 : IF (PRESENT(name)) name = matrix%name
642 241928 : IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
643 241928 : IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
644 :
645 : CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
646 : ncol_local=ncol_local, nrow_global=nrow_global, &
647 : ncol_global=ncol_global, nrow_block=nrow_block, &
648 : ncol_block=ncol_block, context=context, &
649 241928 : row_indices=row_indices, col_indices=col_indices, para_env=para_env)
650 :
651 241928 : END SUBROUTINE cp_cfm_get_info
652 :
653 : ! **************************************************************************************************
654 : !> \brief Copy content of a full matrix into another full matrix of the same size.
655 : !> \param source source matrix
656 : !> \param destination destination matrix
657 : !> \author Joost VandeVondele
658 : ! **************************************************************************************************
659 206846 : SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
660 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
661 :
662 : INTEGER :: npcol, nprow
663 :
664 206846 : nprow = source%matrix_struct%context%num_pe(1)
665 206846 : npcol = source%matrix_struct%context%num_pe(2)
666 :
667 206846 : IF (.NOT. cp2k_is_parallel .OR. &
668 : cp_fm_struct_equivalent(source%matrix_struct, &
669 : destination%matrix_struct)) THEN
670 206846 : IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
671 : SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
672 0 : CPABORT("internal local_data has different sizes")
673 620538 : CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
674 : ELSE
675 0 : IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
676 0 : CPABORT("cannot copy between full matrixes of differen sizes")
677 0 : IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
678 0 : CPABORT("cannot copy between full matrixes of differen sizes")
679 : #if defined(__parallel)
680 : CALL pzcopy(source%matrix_struct%nrow_global* &
681 : source%matrix_struct%ncol_global, &
682 : source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
683 0 : destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
684 : #else
685 : CPABORT("")
686 : #endif
687 : END IF
688 206846 : END SUBROUTINE cp_cfm_to_cfm_matrix
689 :
690 : ! **************************************************************************************************
691 : !> \brief Copy a number of sequential columns of a full matrix into another full matrix.
692 : !> \param msource source matrix
693 : !> \param mtarget destination matrix
694 : !> \param ncol number of columns to copy
695 : !> \param source_start global index of the first column to copy within the source matrix
696 : !> \param target_start global index of the first column to copy within the destination matrix
697 : ! **************************************************************************************************
698 17350 : SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
699 : target_start)
700 :
701 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
702 : INTEGER, INTENT(IN) :: ncol
703 : INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
704 :
705 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
706 :
707 17350 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
708 : INTEGER :: handle, n, ss, ts
709 : #if defined(__parallel)
710 : INTEGER :: i
711 : INTEGER, DIMENSION(9) :: desca, descb
712 : #endif
713 :
714 17350 : CALL timeset(routineN, handle)
715 :
716 17350 : ss = 1
717 17350 : ts = 1
718 :
719 17350 : IF (PRESENT(source_start)) ss = source_start
720 17350 : IF (PRESENT(target_start)) ts = target_start
721 :
722 17350 : n = msource%matrix_struct%nrow_global
723 :
724 17350 : a => msource%local_data
725 17350 : b => mtarget%local_data
726 :
727 : #if defined(__parallel)
728 173500 : desca(:) = msource%matrix_struct%descriptor(:)
729 173500 : descb(:) = mtarget%matrix_struct%descriptor(:)
730 336486 : DO i = 0, ncol - 1
731 336486 : CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
732 : END DO
733 : #else
734 : CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
735 : #endif
736 :
737 17350 : CALL timestop(handle)
738 :
739 17350 : END SUBROUTINE cp_cfm_to_cfm_columns
740 :
741 : ! **************************************************************************************************
742 : !> \brief Copy just a triangular matrix.
743 : !> \param msource source matrix
744 : !> \param mtarget target matrix
745 : !> \param uplo 'U' for upper triangular, 'L' for lower triangular
746 : ! **************************************************************************************************
747 0 : SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
748 : TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
749 : CHARACTER(len=*), INTENT(IN) :: uplo
750 :
751 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
752 :
753 0 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
754 : INTEGER :: handle, ncol, nrow
755 : #if defined(__parallel)
756 : INTEGER, DIMENSION(9) :: desca, descb
757 : #endif
758 :
759 0 : CALL timeset(routineN, handle)
760 :
761 0 : nrow = msource%matrix_struct%nrow_global
762 0 : ncol = msource%matrix_struct%ncol_global
763 :
764 0 : aa => msource%local_data
765 0 : bb => mtarget%local_data
766 :
767 : #if defined(__parallel)
768 0 : desca(:) = msource%matrix_struct%descriptor(:)
769 0 : descb(:) = mtarget%matrix_struct%descriptor(:)
770 0 : CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
771 : #else
772 : CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
773 : #endif
774 :
775 0 : CALL timestop(handle)
776 0 : END SUBROUTINE cp_cfm_to_cfm_triangular
777 :
778 : ! **************************************************************************************************
779 : !> \brief Copy real and imaginary parts of a complex full matrix into
780 : !> separate real-value full matrices.
781 : !> \param msource complex matrix
782 : !> \param mtargetr (optional) real part of the source matrix
783 : !> \param mtargeti (optional) imaginary part of the source matrix
784 : !> \note
785 : !> Matrix structures are assumed to be equivalent.
786 : ! **************************************************************************************************
787 84304 : SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
788 :
789 : TYPE(cp_cfm_type), INTENT(IN) :: msource
790 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
791 :
792 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_fm'
793 :
794 84304 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
795 : INTEGER :: handle
796 84304 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
797 :
798 84304 : CALL timeset(routineN, handle)
799 :
800 84304 : zmat => msource%local_data
801 84304 : IF (PRESENT(mtargetr)) THEN
802 75716 : rmat => mtargetr%local_data
803 : IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
804 75716 : (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
805 : (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
806 0 : CPABORT("size of local_data of mtargetr differ to msource")
807 : END IF
808 : ! copy local data
809 40856321 : rmat = REAL(zmat, kind=dp)
810 : ELSE
811 84304 : NULLIFY (rmat)
812 : END IF
813 84304 : IF (PRESENT(mtargeti)) THEN
814 68572 : imat => mtargeti%local_data
815 : IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
816 68572 : (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
817 : (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
818 0 : CPABORT("size of local_data of mtargeti differ to msource")
819 : END IF
820 : ! copy local data
821 40266489 : imat = REAL(AIMAG(zmat), kind=dp)
822 : ELSE
823 84304 : NULLIFY (imat)
824 : END IF
825 :
826 84304 : CALL timestop(handle)
827 :
828 84304 : END SUBROUTINE cp_cfm_to_fm
829 :
830 : ! **************************************************************************************************
831 : !> \brief Construct a complex full matrix by taking its real and imaginary parts from
832 : !> two separate real-value full matrices.
833 : !> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
834 : !> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
835 : !> \param mtarget resulting complex matrix
836 : !> \note
837 : !> Matrix structures are assumed to be equivalent.
838 : ! **************************************************************************************************
839 97594 : SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
840 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
841 : TYPE(cp_cfm_type), INTENT(IN) :: mtarget
842 :
843 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_to_cfm'
844 :
845 97594 : COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
846 : INTEGER :: handle, mode
847 97594 : REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
848 :
849 97594 : CALL timeset(routineN, handle)
850 :
851 97594 : mode = 0
852 97594 : zmat => mtarget%local_data
853 97594 : IF (PRESENT(msourcer)) THEN
854 92358 : rmat => msourcer%local_data
855 : IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
856 92358 : (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
857 : (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
858 0 : CPABORT("size of local_data of msourcer differ to mtarget")
859 : END IF
860 : mode = mode + 1
861 : ELSE
862 : NULLIFY (rmat)
863 : END IF
864 97594 : IF (PRESENT(msourcei)) THEN
865 65796 : imat => msourcei%local_data
866 : IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
867 65796 : (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
868 : (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
869 0 : CPABORT("size of local_data of msourcei differ to mtarget")
870 : END IF
871 65796 : mode = mode + 2
872 : ELSE
873 : NULLIFY (imat)
874 : END IF
875 : ! copy local data
876 : SELECT CASE (mode)
877 : CASE (0)
878 0 : zmat(:, :) = z_zero
879 : CASE (1)
880 1029432 : zmat(:, :) = CMPLX(rmat(:, :), 0.0_dp, kind=dp)
881 : CASE (2)
882 67108 : zmat(:, :) = CMPLX(0.0_dp, imat(:, :), kind=dp)
883 : CASE (3)
884 21002357 : zmat(:, :) = CMPLX(rmat(:, :), imat(:, :), kind=dp)
885 : END SELECT
886 :
887 97594 : CALL timestop(handle)
888 :
889 97594 : END SUBROUTINE cp_fm_to_cfm
890 :
891 : ! **************************************************************************************************
892 : !> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
893 : !> \param source input complex-valued fm matrix
894 : !> \param destination output complex-valued fm matrix
895 : !> \param para_env parallel environment corresponding to the BLACS env that covers all parts
896 : !> of the input and output matrices
897 : !> \param info all of the data that will be needed to complete the copy operation
898 : !> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
899 : !> allocatable arrays instead of pointers wherever possible.
900 : ! **************************************************************************************************
901 27480 : SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
902 : TYPE(cp_cfm_type), INTENT(IN) :: source, destination
903 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
904 : TYPE(copy_cfm_info_type), INTENT(out) :: info
905 :
906 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_start_copy_general'
907 :
908 : INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
909 : ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
910 : nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
911 : recv_rank, recv_size, send_rank, send_size
912 2748 : INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
913 5496 : recv_count, send_count, send_disp, &
914 2748 : source2global, src_p, src_q
915 2748 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
916 : INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
917 : src_block, src_block_tmp, src_num_pe
918 5496 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
919 5496 : send_col_indices, send_row_indices
920 : TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
921 38472 : TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
922 :
923 2748 : CALL timeset(routineN, handle)
924 :
925 : IF (.NOT. cp2k_is_parallel) THEN
926 : ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
927 : nrow_local_send = SIZE(source%local_data, 1)
928 : ncol_local_send = SIZE(source%local_data, 2)
929 : ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
930 : k = 0
931 : DO j = 1, ncol_local_send
932 : DO i = 1, nrow_local_send
933 : k = k + 1
934 : info%send_buf(k) = source%local_data(i, j)
935 : END DO
936 : END DO
937 : ELSE
938 2748 : NULLIFY (recv_dist, send_dist)
939 2748 : NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
940 :
941 : ! The 'global' communicator contains both the source and destination decompositions
942 2748 : global_size = para_env%num_pe
943 2748 : global_rank = para_env%mepos
944 :
945 : ! The source/send decomposition and destination/recv decompositions may only exist on
946 : ! on a subset of the processes involved in the communication
947 : ! Check if the source and/or destination arguments are .not. ASSOCIATED():
948 : ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
949 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
950 2748 : recv_dist => destination%matrix_struct
951 2748 : recv_rank = recv_dist%para_env%mepos
952 : ELSE
953 0 : recv_rank = mp_proc_null
954 : END IF
955 :
956 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
957 1374 : send_dist => source%matrix_struct
958 1374 : send_rank = send_dist%para_env%mepos
959 : ELSE
960 1374 : send_rank = mp_proc_null
961 : END IF
962 :
963 : ! Map the rank in the source/dest communicator to the global rank
964 8244 : ALLOCATE (all_ranks(0:global_size - 1))
965 :
966 2748 : CALL para_env%allgather(send_rank, all_ranks)
967 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
968 13740 : ALLOCATE (source2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
969 8244 : DO i = 0, global_size - 1
970 8244 : IF (all_ranks(i) /= mp_proc_null) THEN
971 2748 : source2global(all_ranks(i)) = i
972 : END IF
973 : END DO
974 : END IF
975 :
976 2748 : CALL para_env%allgather(recv_rank, all_ranks)
977 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
978 6870 : ALLOCATE (dest2global(0:COUNT(all_ranks /= mp_proc_null) - 1))
979 4122 : DO i = 0, global_size - 1
980 4122 : IF (all_ranks(i) /= mp_proc_null) THEN
981 2748 : dest2global(all_ranks(i)) = i
982 : END IF
983 : END DO
984 : END IF
985 2748 : DEALLOCATE (all_ranks)
986 :
987 : ! Some data from the two decompositions will be needed by all processes in the global group :
988 : ! process grid shape, block size, and the BLACS-to-MPI mapping
989 :
990 : ! The global root process will receive the data (from the root process in each decomposition)
991 19236 : send_req(:) = mp_request_null
992 2748 : IF (global_rank == 0) THEN
993 9618 : recv_req(:) = mp_request_null
994 1374 : CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
995 1374 : CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
996 1374 : CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
997 1374 : CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
998 : END IF
999 :
1000 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
1001 1374 : IF ((send_rank == 0)) THEN
1002 : ! need to use separate buffers here in case this is actually global rank 0
1003 4122 : src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
1004 1374 : CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1005 1374 : CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1006 : END IF
1007 : END IF
1008 :
1009 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1010 2748 : IF ((recv_rank == 0)) THEN
1011 4122 : dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
1012 1374 : CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1013 1374 : CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1014 : END IF
1015 : END IF
1016 :
1017 2748 : IF (global_rank == 0) THEN
1018 1374 : CALL mp_waitall(recv_req(1:4))
1019 : ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1020 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1021 9618 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1022 1374 : CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1023 1374 : CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1024 : END IF
1025 :
1026 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
1027 1374 : IF ((send_rank == 0)) THEN
1028 1374 : CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1029 : END IF
1030 : END IF
1031 :
1032 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1033 2748 : IF ((recv_rank == 0)) THEN
1034 1374 : CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1035 : END IF
1036 : END IF
1037 :
1038 2748 : IF (global_rank == 0) THEN
1039 1374 : CALL mp_waitall(recv_req(5:6))
1040 : END IF
1041 :
1042 : ! Finally, broadcast the data to all processes in the global communicator
1043 2748 : CALL para_env%bcast(src_block, 0)
1044 2748 : CALL para_env%bcast(dest_block, 0)
1045 2748 : CALL para_env%bcast(src_num_pe, 0)
1046 2748 : CALL para_env%bcast(dest_num_pe, 0)
1047 8244 : info%src_num_pe(1:2) = src_num_pe(1:2)
1048 8244 : info%nblock_src(1:2) = src_block(1:2)
1049 2748 : IF (global_rank /= 0) THEN
1050 0 : ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1051 9618 : dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1052 : END IF
1053 2748 : CALL para_env%bcast(info%src_blacs2mpi, 0)
1054 2748 : CALL para_env%bcast(dest_blacs2mpi, 0)
1055 :
1056 2748 : recv_size = dest_num_pe(1)*dest_num_pe(2)
1057 2748 : send_size = src_num_pe(1)*src_num_pe(2)
1058 2748 : info%send_size = send_size
1059 2748 : CALL mp_waitall(send_req(:))
1060 :
1061 : ! Setup is now complete, we can start the actual communication here.
1062 : ! The order implemented here is:
1063 : ! DEST_1
1064 : ! compute recv sizes
1065 : ! call irecv
1066 : ! SRC_1
1067 : ! compute send sizes
1068 : ! pack send buffers
1069 : ! call isend
1070 : ! DEST_2
1071 : ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1072 : ! SRC_2
1073 : ! wait for the sends
1074 :
1075 : ! DEST_1
1076 2748 : IF (ASSOCIATED(destination%matrix_struct)) THEN
1077 : CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1078 2748 : col_indices=recv_col_indices)
1079 2748 : info%recv_col_indices => recv_col_indices
1080 2748 : info%recv_row_indices => recv_row_indices
1081 2748 : nrow_block_src = src_block(1)
1082 2748 : ncol_block_src = src_block(2)
1083 16488 : ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1084 :
1085 : ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1086 2748 : nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1087 2748 : ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1088 2748 : info%nlocal_recv(1) = nrow_local_recv
1089 2748 : info%nlocal_recv(2) = ncol_local_recv
1090 : ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1091 13740 : ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1092 17604 : DO i = 1, nrow_local_recv
1093 : ! For each local row we will receive, we look up its global row (in recv_row_indices),
1094 : ! then work out which row block it comes from, and which process row that row block comes from.
1095 17604 : src_p(i) = MOD(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1096 : END DO
1097 32460 : DO j = 1, ncol_local_recv
1098 : ! Similarly for the columns
1099 32460 : src_q(j) = MOD(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1100 : END DO
1101 : ! src_p/q now contains the process row/column ID that will send data to that row/column
1102 :
1103 5496 : DO q = 0, src_num_pe(2) - 1
1104 32460 : ncols = COUNT(src_q == q)
1105 8244 : DO p = 0, src_num_pe(1) - 1
1106 17604 : nrows = COUNT(src_p == p)
1107 : ! Use the send_dist here as we are looking up the processes where the data comes from
1108 5496 : recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1109 : END DO
1110 : END DO
1111 2748 : DEALLOCATE (src_p, src_q)
1112 :
1113 : ! Use one long buffer (and displacements into that buffer)
1114 : ! this prevents the need for a rectangular array where not all elements will be populated
1115 10992 : ALLOCATE (info%recv_buf(SUM(recv_count(:))))
1116 2748 : info%recv_disp(0) = 0
1117 2748 : DO i = 1, send_size - 1
1118 2748 : info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1119 : END DO
1120 :
1121 : ! Issue receive calls on ranks which expect data
1122 5496 : DO k = 0, send_size - 1
1123 5496 : IF (recv_count(k) > 0) THEN
1124 : CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1125 2748 : source2global(k), info%recv_request(k))
1126 : ELSE
1127 0 : info%recv_request(k) = mp_request_null
1128 : END IF
1129 : END DO
1130 2748 : DEALLOCATE (source2global)
1131 : END IF ! ASSOCIATED(destination)
1132 :
1133 : ! SRC_1
1134 2748 : IF (ASSOCIATED(source%matrix_struct)) THEN
1135 : CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1136 1374 : col_indices=send_col_indices)
1137 1374 : nrow_block_dest = dest_block(1)
1138 1374 : ncol_block_dest = dest_block(2)
1139 9618 : ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1140 :
1141 : ! Determine the send counts, allocate the send buffers
1142 1374 : nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1143 1374 : ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1144 :
1145 : ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1146 : ! i.e. number of rows,cols in the sending distribution
1147 6870 : ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1148 :
1149 16230 : DO i = 1, nrow_local_send
1150 : ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1151 16230 : dest_p(i) = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1152 : END DO
1153 16230 : DO j = 1, ncol_local_send
1154 16230 : dest_q(j) = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1155 : END DO
1156 : ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1157 :
1158 2748 : DO q = 0, dest_num_pe(2) - 1
1159 16230 : ncols = COUNT(dest_q == q)
1160 5496 : DO p = 0, dest_num_pe(1) - 1
1161 32460 : nrows = COUNT(dest_p == p)
1162 4122 : send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1163 : END DO
1164 : END DO
1165 1374 : DEALLOCATE (dest_p, dest_q)
1166 :
1167 : ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1168 6870 : ALLOCATE (info%send_buf(SUM(send_count(:))))
1169 1374 : send_disp(0) = 0
1170 2748 : DO k = 1, recv_size - 1
1171 2748 : send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1172 : END DO
1173 :
1174 : ! Loop over the smat, pack the send buffers
1175 4122 : send_count(:) = 0
1176 16230 : DO j = 1, ncol_local_send
1177 : ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1178 14856 : dest_q_j = MOD(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1179 187974 : DO i = 1, nrow_local_send
1180 171744 : dest_p_i = MOD(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1181 171744 : mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1182 171744 : send_count(mpi_rank) = send_count(mpi_rank) + 1
1183 186600 : info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1184 : END DO
1185 : END DO
1186 :
1187 : ! For each non-zero send_count, call mpi_isend
1188 4122 : DO k = 0, recv_size - 1
1189 4122 : IF (send_count(k) > 0) THEN
1190 : CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1191 2748 : dest2global(k), info%send_request(k))
1192 : ELSE
1193 0 : info%send_request(k) = mp_request_null
1194 : END IF
1195 : END DO
1196 1374 : DEALLOCATE (send_count, send_disp, dest2global)
1197 : END IF ! ASSOCIATED(source)
1198 2748 : DEALLOCATE (dest_blacs2mpi)
1199 :
1200 : END IF !IF (.NOT. cp2k_is_parallel)
1201 :
1202 2748 : CALL timestop(handle)
1203 :
1204 10992 : END SUBROUTINE cp_cfm_start_copy_general
1205 :
1206 : ! **************************************************************************************************
1207 : !> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1208 : !> \param destination output cfm matrix
1209 : !> \param info all of the data that will be needed to complete the copy operation
1210 : !> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1211 : !> allocatable arrays instead of pointers wherever possible.
1212 : ! **************************************************************************************************
1213 2748 : SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1214 : TYPE(cp_cfm_type), INTENT(IN) :: destination
1215 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1216 :
1217 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_finish_copy_general'
1218 :
1219 : INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1220 : src_q_j
1221 2748 : INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1222 2748 : INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1223 :
1224 2748 : CALL timeset(routineN, handle)
1225 :
1226 : IF (.NOT. cp2k_is_parallel) THEN
1227 : ! Now unpack the data from the 'send buffer'
1228 : k = 0
1229 : DO j = 1, SIZE(destination%local_data, 2)
1230 : DO i = 1, SIZE(destination%local_data, 1)
1231 : k = k + 1
1232 : destination%local_data(i, j) = info%send_buf(k)
1233 : END DO
1234 : END DO
1235 : DEALLOCATE (info%send_buf)
1236 : ELSE
1237 : ! Set up local variables ...
1238 2748 : recv_col_indices => info%recv_col_indices
1239 2748 : recv_row_indices => info%recv_row_indices
1240 :
1241 : ! ... use the local variables to do the work
1242 : ! DEST_2
1243 2748 : CALL mp_waitall(info%recv_request(:))
1244 :
1245 2748 : nj = info%nlocal_recv(2)
1246 2748 : ni = info%nlocal_recv(1)
1247 13740 : ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1248 : ! Loop over the rmat, filling it in with data from the recv buffers
1249 : ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1250 5496 : recv_count(:) = 0
1251 17604 : DO i = 1, ni
1252 17604 : src_p_i(i) = MOD(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1253 : END DO
1254 :
1255 32460 : DO j = 1, nj
1256 29712 : src_q_j = MOD(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1257 204204 : DO i = 1, ni
1258 171744 : mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1259 171744 : recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1260 201456 : destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1261 : END DO
1262 : END DO
1263 :
1264 2748 : DEALLOCATE (recv_count, src_p_i)
1265 : ! Invalidate the stored state
1266 2748 : NULLIFY (info%recv_col_indices, info%recv_row_indices)
1267 2748 : DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1268 : END IF
1269 :
1270 2748 : CALL timestop(handle)
1271 :
1272 2748 : END SUBROUTINE cp_cfm_finish_copy_general
1273 :
1274 : ! **************************************************************************************************
1275 : !> \brief Complete the copy operation: wait for comms clean up MPI state.
1276 : !> \param info all of the data that will be needed to complete the copy operation
1277 : !> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1278 : !> allocatable arrays instead of pointers wherever possible.
1279 : ! **************************************************************************************************
1280 1374 : SUBROUTINE cp_cfm_cleanup_copy_general(info)
1281 : TYPE(copy_cfm_info_type), INTENT(inout) :: info
1282 :
1283 : CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_cleanup_copy_general'
1284 :
1285 : INTEGER :: handle
1286 :
1287 1374 : CALL timeset(routineN, handle)
1288 :
1289 : IF (cp2k_is_parallel) THEN
1290 : ! SRC_2
1291 : ! If this process is also in the destination decomposition, this deallocate
1292 : ! Was already done in cp_fm_finish_copy_general
1293 1374 : IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1294 1374 : CALL mp_waitall(info%send_request(:))
1295 1374 : DEALLOCATE (info%send_request, info%send_buf)
1296 : END IF
1297 :
1298 1374 : CALL timestop(handle)
1299 1374 : END SUBROUTINE cp_cfm_cleanup_copy_general
1300 0 : END MODULE cp_cfm_types
|