Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines for data exchange between MPI processes
10 : !> \par History
11 : !> 04.2008 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_communication
15 : USE atomic_kind_types, ONLY: atomic_kind_type,&
16 : get_atomic_kind_set
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_api, ONLY: dbcsr_get_block_p,&
19 : dbcsr_iterator_blocks_left,&
20 : dbcsr_iterator_next_block,&
21 : dbcsr_iterator_start,&
22 : dbcsr_iterator_stop,&
23 : dbcsr_iterator_type,&
24 : dbcsr_p_type,&
25 : dbcsr_type
26 : USE hfx_types, ONLY: hfx_2D_map,&
27 : hfx_basis_type,&
28 : hfx_type
29 : USE kinds, ONLY: dp,&
30 : int_8
31 : USE message_passing, ONLY: mp_para_env_type,&
32 : mp_request_type,&
33 : mp_waitall
34 : USE particle_types, ONLY: particle_type
35 : USE qs_environment_types, ONLY: get_qs_env,&
36 : qs_environment_type
37 : #include "./base/base_uses.f90"
38 :
39 : IMPLICIT NONE
40 : PRIVATE
41 :
42 : PUBLIC :: get_full_density, &
43 : distribute_ks_matrix, &
44 : scale_and_add_fock_to_ks_matrix, &
45 : get_atomic_block_maps
46 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_communication'
47 :
48 : !***
49 :
50 : CONTAINS
51 :
52 : ! **************************************************************************************************
53 : !> \brief - Collects full density matrix from all CPUs
54 : !> \param para_env ...
55 : !> \param full_density The full Density matrix
56 : !> \param rho Distributed density
57 : !> \param number_of_p_entries Maximal buffer size
58 : !> \param block_offset ...
59 : !> \param kind_of ...
60 : !> \param basis_parameter ...
61 : !> \param get_max_vals_spin ...
62 : !> \param rho_beta ...
63 : !> \param antisymmetric ...
64 : !> \par History
65 : !> 11.2007 created [Manuel Guidon]
66 : !> \author Manuel Guidon
67 : !> \note
68 : !> - Communication with left/right node only
69 : !> added a mpi_sync before and after the ring of isendrecv. This *speed up* the
70 : !> communication, and might protect against idle neighbors flooding a busy node
71 : !> with messages [Joost]
72 : ! **************************************************************************************************
73 49039 : SUBROUTINE get_full_density(para_env, full_density, rho, number_of_p_entries, &
74 : block_offset, kind_of, basis_parameter, &
75 : get_max_vals_spin, rho_beta, antisymmetric)
76 :
77 : TYPE(mp_para_env_type), POINTER :: para_env
78 : REAL(dp), DIMENSION(:) :: full_density
79 : TYPE(dbcsr_type), POINTER :: rho
80 : INTEGER, INTENT(IN) :: number_of_p_entries
81 : INTEGER, DIMENSION(:), POINTER :: block_offset
82 : INTEGER :: kind_of(*)
83 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
84 : LOGICAL, INTENT(IN) :: get_max_vals_spin
85 : TYPE(dbcsr_type), OPTIONAL, POINTER :: rho_beta
86 : LOGICAL, INTENT(IN) :: antisymmetric
87 :
88 : INTEGER :: block_size, data_from, dest, i, iatom, icpu, ikind, iset, jatom, jkind, jset, &
89 : mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source, source_cpu
90 49039 : INTEGER, DIMENSION(:), POINTER :: nsgfa, nsgfb
91 : LOGICAL :: found
92 : REAL(dp) :: symmfac
93 49039 : REAL(dp), DIMENSION(:), POINTER :: recbuffer, sendbuffer, swapbuffer
94 49039 : REAL(dp), DIMENSION(:, :), POINTER :: sparse_block, sparse_block_beta
95 : TYPE(dbcsr_iterator_type) :: iter
96 147117 : TYPE(mp_request_type), DIMENSION(2) :: req
97 :
98 14666846 : full_density = 0.0_dp
99 147117 : ALLOCATE (sendbuffer(number_of_p_entries))
100 98078 : ALLOCATE (recbuffer(number_of_p_entries))
101 9916303 : sendbuffer = 0.0_dp
102 9916303 : recbuffer = 0.0_dp
103 :
104 49039 : i = 1
105 49039 : CALL dbcsr_iterator_start(iter, rho, shared=.FALSE.)
106 205072 : DO WHILE (dbcsr_iterator_blocks_left(iter))
107 156033 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
108 : ! the resulting vector will be only the upper triangle.
109 : ! in case of antisymmetry take care to change signs if a lower block gets copied
110 156033 : symmfac = 1.0_dp
111 156033 : IF (antisymmetric .AND. iatom > jatom) symmfac = -1.0_dp
112 156033 : ikind = kind_of(iatom)
113 156033 : nseta = basis_parameter(ikind)%nset
114 156033 : nsgfa => basis_parameter(ikind)%nsgf
115 156033 : jkind = kind_of(jatom)
116 156033 : nsetb = basis_parameter(jkind)%nset
117 156033 : nsgfb => basis_parameter(jkind)%nsgf
118 205072 : IF (get_max_vals_spin) THEN
119 : CALL dbcsr_get_block_p(rho_beta, &
120 0 : row=iatom, col=jatom, BLOCK=sparse_block_beta, found=found)
121 0 : pa = 0
122 0 : DO iset = 1, nseta
123 : pb = 0
124 0 : DO jset = 1, nsetb
125 0 : DO pa1 = pa + 1, pa + nsgfa(iset)
126 0 : DO pb1 = pb + 1, pb + nsgfb(jset)
127 0 : sendbuffer(i) = MAX(ABS(sparse_block(pa1, pb1)), ABS(sparse_block_beta(pa1, pb1)))
128 0 : i = i + 1
129 : END DO
130 : END DO
131 0 : pb = pb + nsgfb(jset)
132 : END DO
133 0 : pa = pa + nsgfa(iset)
134 : END DO
135 : ELSE
136 : pa = 0
137 598250 : DO iset = 1, nseta
138 : pb = 0
139 1819335 : DO jset = 1, nsetb
140 4573873 : DO pa1 = pa + 1, pa + nsgfa(iset)
141 11869988 : DO pb1 = pb + 1, pb + nsgfb(jset)
142 7296115 : sendbuffer(i) = sparse_block(pa1, pb1)*symmfac
143 10492870 : i = i + 1
144 : END DO
145 : END DO
146 1819335 : pb = pb + nsgfb(jset)
147 : END DO
148 598250 : pa = pa + nsgfa(iset)
149 : END DO
150 : END IF
151 : END DO
152 49039 : CALL dbcsr_iterator_stop(iter)
153 :
154 : ! sync before/after a ring of isendrecv
155 49039 : CALL para_env%sync()
156 49039 : ncpu = para_env%num_pe
157 49039 : mepos = para_env%mepos
158 49039 : dest = MODULO(mepos + 1, ncpu)
159 49039 : source = MODULO(mepos - 1, ncpu)
160 147034 : DO icpu = 0, ncpu - 1
161 97995 : IF (icpu /= ncpu - 1) THEN
162 : CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
163 48956 : req(1), req(2), 13)
164 : END IF
165 97995 : data_from = MODULO(mepos - icpu, ncpu)
166 97995 : source_cpu = MODULO(data_from, ncpu) + 1
167 97995 : block_size = block_offset(source_cpu + 1) - block_offset(source_cpu)
168 14666763 : full_density(block_offset(source_cpu):block_offset(source_cpu) + block_size - 1) = sendbuffer(1:block_size)
169 :
170 97995 : IF (icpu /= ncpu - 1) THEN
171 48956 : CALL mp_waitall(req)
172 : END IF
173 97995 : swapbuffer => sendbuffer
174 97995 : sendbuffer => recbuffer
175 147034 : recbuffer => swapbuffer
176 : END DO
177 49039 : DEALLOCATE (sendbuffer, recbuffer)
178 : ! sync before/after a ring of isendrecv
179 49039 : CALL para_env%sync()
180 :
181 98078 : END SUBROUTINE get_full_density
182 :
183 : ! **************************************************************************************************
184 : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS
185 : !> \param para_env ...
186 : !> \param full_ks The full Kohn-Sham matrix
187 : !> \param ks_matrix Distributed Kohn-Sham matrix
188 : !> \param number_of_p_entries Maximal buffer size
189 : !> \param block_offset ...
190 : !> \param kind_of ...
191 : !> \param basis_parameter ...
192 : !> \param off_diag_fac ...
193 : !> \param diag_fac ...
194 : !> \par History
195 : !> 11.2007 created [Manuel Guidon]
196 : !> \author Manuel Guidon
197 : !> \note
198 : !> - Communication with left/right node only
199 : ! **************************************************************************************************
200 46037 : SUBROUTINE distribute_ks_matrix(para_env, full_ks, ks_matrix, number_of_p_entries, &
201 : block_offset, kind_of, basis_parameter, &
202 : off_diag_fac, diag_fac)
203 :
204 : TYPE(mp_para_env_type), POINTER :: para_env
205 : REAL(dp), DIMENSION(:) :: full_ks
206 : TYPE(dbcsr_type), POINTER :: ks_matrix
207 : INTEGER, INTENT(IN) :: number_of_p_entries
208 : INTEGER, DIMENSION(:), POINTER :: block_offset
209 : INTEGER :: kind_of(*)
210 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
211 : REAL(dp), INTENT(IN), OPTIONAL :: off_diag_fac, diag_fac
212 :
213 : INTEGER :: block_size, data_to, dest, dest_cpu, i, iatom, icpu, ikind, iset, jatom, jkind, &
214 : jset, mepos, ncpu, nseta, nsetb, pa, pa1, pb, pb1, source
215 46037 : INTEGER, DIMENSION(:), POINTER :: nsgfa, nsgfb
216 : REAL(dp) :: my_fac, myd_fac
217 46037 : REAL(dp), DIMENSION(:), POINTER :: recbuffer, sendbuffer, swapbuffer
218 46037 : REAL(dp), DIMENSION(:, :), POINTER :: sparse_block
219 : TYPE(dbcsr_iterator_type) :: iter
220 138111 : TYPE(mp_request_type), DIMENSION(2) :: req
221 :
222 46037 : my_fac = 1.0_dp; myd_fac = 1.0_dp
223 46037 : IF (PRESENT(off_diag_fac)) my_fac = off_diag_fac
224 46037 : IF (PRESENT(diag_fac)) myd_fac = diag_fac
225 :
226 138111 : ALLOCATE (sendbuffer(number_of_p_entries))
227 9331397 : sendbuffer = 0.0_dp
228 92074 : ALLOCATE (recbuffer(number_of_p_entries))
229 9331397 : recbuffer = 0.0_dp
230 :
231 46037 : ncpu = para_env%num_pe
232 46037 : mepos = para_env%mepos
233 46037 : dest = MODULO(mepos + 1, ncpu)
234 46037 : source = MODULO(mepos - 1, ncpu)
235 :
236 : ! sync before/after a ring of isendrecv
237 46037 : CALL para_env%sync()
238 91991 : DO icpu = 1, ncpu
239 91991 : i = 1
240 91991 : data_to = mepos - icpu
241 91991 : dest_cpu = MODULO(data_to, ncpu) + 1
242 91991 : block_size = block_offset(dest_cpu + 1) - block_offset(dest_cpu)
243 13818879 : sendbuffer(1:block_size) = sendbuffer(1:block_size) + full_ks(block_offset(dest_cpu):block_offset(dest_cpu) + block_size - 1)
244 91991 : IF (icpu == ncpu) EXIT
245 : CALL para_env%isendrecv(sendbuffer, dest, recbuffer, source, &
246 45954 : req(1), req(2), 13)
247 :
248 45954 : CALL mp_waitall(req)
249 45954 : swapbuffer => sendbuffer
250 45954 : sendbuffer => recbuffer
251 91991 : recbuffer => swapbuffer
252 : END DO
253 : ! sync before/after a ring of isendrecv
254 46037 : CALL para_env%sync()
255 :
256 46037 : i = 1
257 46037 : CALL dbcsr_iterator_start(iter, ks_matrix, shared=.FALSE.)
258 193267 : DO WHILE (dbcsr_iterator_blocks_left(iter))
259 147230 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
260 :
261 147230 : ikind = kind_of(iatom)
262 147230 : nseta = basis_parameter(ikind)%nset
263 147230 : nsgfa => basis_parameter(ikind)%nsgf
264 147230 : jkind = kind_of(jatom)
265 147230 : nsetb = basis_parameter(jkind)%nset
266 147230 : nsgfb => basis_parameter(jkind)%nsgf
267 147230 : pa = 0
268 611011 : DO iset = 1, nseta
269 : pb = 0
270 1721159 : DO jset = 1, nsetb
271 4323786 : DO pa1 = pa + 1, pa + nsgfa(iset)
272 11202751 : DO pb1 = pb + 1, pb + nsgfb(jset)
273 6878965 : IF (iatom == jatom .AND. pa1 == pb1) THEN
274 430264 : sparse_block(pa1, pb1) = sendbuffer(i)*myd_fac + sparse_block(pa1, pb1)
275 : ELSE
276 6448701 : sparse_block(pa1, pb1) = sendbuffer(i)*my_fac + sparse_block(pa1, pb1)
277 : END IF
278 9899336 : i = i + 1
279 : END DO
280 : END DO
281 1721159 : pb = pb + nsgfb(jset)
282 : END DO
283 564974 : pa = pa + nsgfa(iset)
284 : END DO
285 : END DO
286 46037 : CALL dbcsr_iterator_stop(iter)
287 :
288 46037 : DEALLOCATE (sendbuffer, recbuffer)
289 :
290 92074 : END SUBROUTINE distribute_ks_matrix
291 :
292 : ! **************************************************************************************************
293 : !> \brief - Distributes the local full Kohn-Sham matrix to all CPUS. Is called in
294 : !> case of adiabatic rescaling. This is just a refactored version of
295 : !> distribute_ks_matrix
296 : !> \param para_env ...
297 : !> \param qs_env ...
298 : !> \param ks_matrix Distributed Kohn-Sham matrix
299 : !> \param irep ...
300 : !> \param scaling_factor ...
301 : !> \par History
302 : !> 11.2007 created [Manuel Guidon]
303 : !> \author Manuel Guidon
304 : !> \note
305 : !> - Communication with left/right node only
306 : ! **************************************************************************************************
307 88 : SUBROUTINE scale_and_add_fock_to_ks_matrix(para_env, qs_env, ks_matrix, irep, &
308 : scaling_factor)
309 :
310 : TYPE(mp_para_env_type), POINTER :: para_env
311 : TYPE(qs_environment_type), POINTER :: qs_env
312 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
313 : INTEGER, INTENT(IN) :: irep
314 : REAL(dp), INTENT(IN) :: scaling_factor
315 :
316 : INTEGER :: iatom, ikind, img, natom, nimages, nspins
317 88 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global
318 88 : REAL(dp), DIMENSION(:, :), POINTER :: full_ks
319 88 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
320 : TYPE(dft_control_type), POINTER :: dft_control
321 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
322 : TYPE(hfx_type), POINTER :: actual_x_data
323 88 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
324 :
325 : !! All shared data is saved in i_thread = 1!
326 :
327 88 : NULLIFY (dft_control)
328 88 : actual_x_data => qs_env%x_data(irep, 1)
329 88 : basis_parameter => actual_x_data%basis_parameter
330 :
331 : CALL get_qs_env(qs_env=qs_env, &
332 : atomic_kind_set=atomic_kind_set, &
333 : particle_set=particle_set, &
334 88 : dft_control=dft_control)
335 :
336 88 : nspins = dft_control%nspins
337 88 : nimages = dft_control%nimages
338 88 : CPASSERT(nimages == 1)
339 :
340 88 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
341 :
342 88 : natom = SIZE(particle_set, 1)
343 264 : ALLOCATE (last_sgf_global(0:natom))
344 88 : last_sgf_global(0) = 0
345 176 : DO iatom = 1, natom
346 88 : ikind = kind_of(iatom)
347 176 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
348 : END DO
349 88 : full_ks => actual_x_data%full_ks_alpha
350 88 : IF (scaling_factor /= 1.0_dp) THEN
351 17512 : full_ks = full_ks*scaling_factor
352 : END IF
353 176 : DO img = 1, nimages
354 : CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(1, img)%matrix, actual_x_data%number_of_p_entries, &
355 : actual_x_data%block_offset, kind_of, basis_parameter, &
356 176 : off_diag_fac=0.5_dp)
357 : END DO
358 88 : DEALLOCATE (actual_x_data%full_ks_alpha)
359 :
360 88 : IF (nspins == 2) THEN
361 52 : full_ks => actual_x_data%full_ks_beta
362 52 : IF (scaling_factor /= 1.0_dp) THEN
363 10348 : full_ks = full_ks*scaling_factor
364 : END IF
365 104 : DO img = 1, nimages
366 : CALL distribute_ks_matrix(para_env, full_ks(:, img), ks_matrix(2, img)%matrix, actual_x_data%number_of_p_entries, &
367 : actual_x_data%block_offset, kind_of, basis_parameter, &
368 104 : off_diag_fac=0.5_dp)
369 : END DO
370 52 : DEALLOCATE (actual_x_data%full_ks_beta)
371 : END IF
372 :
373 88 : DEALLOCATE (last_sgf_global)
374 :
375 176 : END SUBROUTINE scale_and_add_fock_to_ks_matrix
376 :
377 : ! **************************************************************************************************
378 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
379 : !> a symmetric upper triangle NxN matrix
380 : !> The compiler should inline this function, therefore it appears in
381 : !> several modules
382 : !> \param i 2d index
383 : !> \param j 2d index
384 : !> \param N matrix size
385 : !> \return ...
386 : !> \par History
387 : !> 03.2009 created [Manuel Guidon]
388 : !> \author Manuel Guidon
389 : ! **************************************************************************************************
390 0 : PURE FUNCTION get_1D_idx(i, j, N)
391 : INTEGER, INTENT(IN) :: i, j
392 : INTEGER(int_8), INTENT(IN) :: N
393 : INTEGER(int_8) :: get_1D_idx
394 :
395 : INTEGER(int_8) :: min_ij
396 :
397 0 : min_ij = MIN(i, j)
398 0 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
399 :
400 0 : END FUNCTION get_1D_idx
401 :
402 : ! **************************************************************************************************
403 : !> \brief create a several maps array that reflects the ks matrix sparsity
404 : !> \param matrix ...
405 : !> \param basis_parameter ...
406 : !> \param kind_of ...
407 : !> \param is_assoc_atomic_block ...
408 : !> \param number_of_p_entries ...
409 : !> \param para_env ...
410 : !> \param atomic_block_offset ...
411 : !> \param set_offset ...
412 : !> \param block_offset ...
413 : !> \param map_atoms_to_cpus ...
414 : !> \param nkind ...
415 : !> \par History
416 : !> 11.2007 refactored [Joost VandeVondele]
417 : !> 07.2009 add new maps
418 : !> \author Manuel Guidon
419 : !> \notes
420 : !> is_assoc_atomic_block returns the mpi rank + 1 for associated blocks,
421 : !> zero for unassiated blocks
422 : ! **************************************************************************************************
423 2344 : SUBROUTINE get_atomic_block_maps(matrix, basis_parameter, kind_of, &
424 2344 : is_assoc_atomic_block, number_of_p_entries, &
425 : para_env, atomic_block_offset, set_offset, &
426 : block_offset, map_atoms_to_cpus, nkind)
427 :
428 : TYPE(dbcsr_type), POINTER :: matrix
429 : TYPE(hfx_basis_type), DIMENSION(:) :: basis_parameter
430 : INTEGER, DIMENSION(:) :: kind_of
431 : INTEGER, DIMENSION(:, :), INTENT(OUT) :: is_assoc_atomic_block
432 : INTEGER, INTENT(OUT) :: number_of_p_entries
433 : TYPE(mp_para_env_type), POINTER :: para_env
434 : INTEGER, DIMENSION(:, :), POINTER :: atomic_block_offset
435 : INTEGER, DIMENSION(:, :, :, :), POINTER :: set_offset
436 : INTEGER, DIMENSION(:), POINTER :: block_offset
437 : TYPE(hfx_2D_map), DIMENSION(:), POINTER :: map_atoms_to_cpus
438 : INTEGER :: nkind
439 :
440 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atomic_block_maps'
441 :
442 : INTEGER :: handle, iatom, ibuf, icpu, ikind, ilist, iset, itask, jatom, jkind, jset, natom, &
443 : ncpu, nseta, nsetb, number_of_p_blocks, offset, tmp(2)
444 2344 : INTEGER, ALLOCATABLE, DIMENSION(:) :: buffer_in, buffer_out, counter, rcount, &
445 2344 : rdispl
446 2344 : INTEGER, DIMENSION(:), POINTER :: iatom_list, jatom_list, nsgfa, nsgfb
447 2344 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: sparse_block
448 : TYPE(dbcsr_iterator_type) :: iter
449 :
450 2344 : CALL timeset(routineN, handle)
451 :
452 34432 : is_assoc_atomic_block = 0
453 2344 : number_of_p_entries = 0
454 2344 : number_of_p_blocks = 0
455 :
456 : !
457 : ! count number_of_p_blocks and number_of_p_entries
458 : !
459 2344 : CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
460 10323 : DO WHILE (dbcsr_iterator_blocks_left(iter))
461 7979 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
462 7979 : ikind = kind_of(iatom)
463 7979 : jkind = kind_of(jatom)
464 7979 : number_of_p_blocks = number_of_p_blocks + 1
465 : number_of_p_entries = number_of_p_entries + &
466 7979 : basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
467 : END DO
468 2344 : CALL dbcsr_iterator_stop(iter)
469 :
470 7032 : tmp = [number_of_p_entries, number_of_p_blocks]
471 2344 : CALL para_env%max(tmp)
472 2344 : number_of_p_entries = tmp(1)
473 2344 : number_of_p_blocks = tmp(2)
474 : !
475 : ! send this info around, so we can construct is_assoc_atomic_block
476 : ! pack all data in buffers and use allgatherv
477 : !
478 7032 : ALLOCATE (buffer_in(3*number_of_p_blocks))
479 7032 : ALLOCATE (buffer_out(3*number_of_p_blocks*para_env%num_pe))
480 9376 : ALLOCATE (rcount(para_env%num_pe), rdispl(para_env%num_pe))
481 :
482 32836 : buffer_in = 0
483 2344 : ibuf = 0
484 :
485 2344 : CALL dbcsr_iterator_start(iter, matrix, shared=.FALSE.)
486 10323 : DO WHILE (dbcsr_iterator_blocks_left(iter))
487 7979 : CALL dbcsr_iterator_next_block(iter, iatom, jatom, sparse_block)
488 :
489 7979 : buffer_in(ibuf + 1) = iatom
490 7979 : buffer_in(ibuf + 2) = jatom
491 7979 : buffer_in(ibuf + 3) = para_env%mepos + 1
492 7979 : ibuf = ibuf + 3
493 : END DO
494 2344 : CALL dbcsr_iterator_stop(iter)
495 :
496 7030 : rcount = SIZE(buffer_in)
497 2344 : rdispl(1) = 0
498 4686 : DO icpu = 2, para_env%num_pe
499 4686 : rdispl(icpu) = rdispl(icpu - 1) + rcount(icpu - 1)
500 : END DO
501 2344 : CALL para_env%allgatherv(buffer_in, buffer_out, rcount, rdispl)
502 :
503 22660 : DO ibuf = 0, number_of_p_blocks*para_env%num_pe*3 - 3, 3
504 20316 : itask = buffer_out(ibuf + 3)
505 : ! buffer_out can be 0 if buffer_in contained less elements than the max number of atom pairs
506 : ! is_assoc_atomic_block is a map for atom pairs to a processor (assumes symmetry, i,j on the ame as j,i)
507 22660 : IF (itask /= 0) THEN
508 15946 : iatom = buffer_out(ibuf + 1)
509 15946 : jatom = buffer_out(ibuf + 2)
510 15946 : is_assoc_atomic_block(iatom, jatom) = itask
511 15946 : is_assoc_atomic_block(jatom, iatom) = itask
512 : END IF
513 : END DO
514 :
515 2344 : IF (ASSOCIATED(map_atoms_to_cpus)) THEN
516 3138 : DO icpu = 1, para_env%num_pe
517 2092 : DEALLOCATE (map_atoms_to_cpus(icpu)%iatom_list)
518 3138 : DEALLOCATE (map_atoms_to_cpus(icpu)%jatom_list)
519 : END DO
520 1046 : DEALLOCATE (map_atoms_to_cpus)
521 : END IF
522 :
523 2344 : natom = SIZE(is_assoc_atomic_block, 1)
524 11718 : ALLOCATE (map_atoms_to_cpus(para_env%num_pe))
525 7032 : ALLOCATE (counter(para_env%num_pe))
526 7030 : counter = 0
527 :
528 9334 : DO iatom = 1, natom
529 25378 : DO jatom = iatom, natom
530 16044 : icpu = is_assoc_atomic_block(jatom, iatom)
531 23034 : IF (icpu > 0) counter(icpu) = counter(icpu) + 1
532 : END DO
533 : END DO
534 7030 : DO icpu = 1, para_env%num_pe
535 14002 : ALLOCATE (map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)))
536 11660 : ALLOCATE (map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)))
537 : END DO
538 7030 : counter = 0
539 9334 : DO iatom = 1, natom
540 25378 : DO jatom = iatom, natom
541 16044 : icpu = is_assoc_atomic_block(jatom, iatom)
542 23034 : IF (icpu > 0) THEN
543 15946 : counter(icpu) = counter(icpu) + 1
544 15946 : map_atoms_to_cpus(icpu)%jatom_list(counter(icpu)) = jatom
545 15946 : map_atoms_to_cpus(icpu)%iatom_list(counter(icpu)) = iatom
546 : END IF
547 : END DO
548 : END DO
549 :
550 2344 : DEALLOCATE (counter)
551 :
552 2344 : ncpu = para_env%num_pe
553 2344 : offset = 1
554 34432 : atomic_block_offset = 0
555 9374 : block_offset = 0
556 7030 : DO icpu = 1, ncpu
557 4686 : iatom_list => map_atoms_to_cpus(icpu)%iatom_list
558 4686 : jatom_list => map_atoms_to_cpus(icpu)%jatom_list
559 4686 : block_offset(icpu) = offset
560 22976 : DO ilist = 1, SIZE(iatom_list)
561 15946 : iatom = iatom_list(ilist)
562 15946 : ikind = kind_of(iatom)
563 15946 : jatom = jatom_list(ilist)
564 15946 : jkind = kind_of(jatom)
565 15946 : atomic_block_offset(iatom, jatom) = offset
566 15946 : atomic_block_offset(jatom, iatom) = offset
567 20632 : offset = offset + basis_parameter(ikind)%nsgf_total*basis_parameter(jkind)%nsgf_total
568 : END DO
569 : END DO
570 2344 : block_offset(ncpu + 1) = offset
571 166248 : set_offset = 0
572 6584 : DO ikind = 1, nkind
573 4240 : nseta = basis_parameter(ikind)%nset
574 4240 : nsgfa => basis_parameter(ikind)%nsgf
575 15876 : DO jkind = 1, nkind
576 9292 : nsetb = basis_parameter(jkind)%nset
577 9292 : nsgfb => basis_parameter(jkind)%nsgf
578 9292 : offset = 1
579 38446 : DO iset = 1, nseta
580 119368 : DO jset = 1, nsetb
581 85162 : set_offset(jset, iset, jkind, ikind) = offset
582 110076 : offset = offset + nsgfa(iset)*nsgfb(jset)
583 : END DO
584 : END DO
585 : END DO
586 : END DO
587 :
588 2344 : CALL timestop(handle)
589 :
590 4688 : END SUBROUTINE get_atomic_block_maps
591 :
592 : END MODULE hfx_communication
|