Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines for calculating RI-MP2 gradients
10 : !> \par History
11 : !> 10.2013 created [Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_ri_grad_util
14 : !
15 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
16 : cp_blacs_env_release,&
17 : cp_blacs_env_type
18 : USE cp_dbcsr_operations, ONLY: copy_fm_to_dbcsr,&
19 : cp_dbcsr_m_by_n_from_template
20 : USE cp_fm_basic_linalg, ONLY: cp_fm_geadd
21 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
22 : cp_fm_struct_get,&
23 : cp_fm_struct_release,&
24 : cp_fm_struct_type
25 : USE cp_fm_types, ONLY: &
26 : cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, &
27 : cp_fm_set_all, cp_fm_to_fm, cp_fm_type
28 : USE dbcsr_api, ONLY: dbcsr_p_type,&
29 : dbcsr_type,&
30 : dbcsr_type_no_symmetry
31 : USE group_dist_types, ONLY: create_group_dist,&
32 : get_group_dist,&
33 : group_dist_d1_type,&
34 : release_group_dist
35 : USE kinds, ONLY: dp
36 : USE libint_2c_3c, ONLY: compare_potential_types
37 : USE message_passing, ONLY: mp_para_env_type,&
38 : mp_request_null,&
39 : mp_request_type,&
40 : mp_waitall
41 : USE mp2_types, ONLY: integ_mat_buffer_type,&
42 : mp2_type
43 : USE parallel_gemm_api, ONLY: parallel_gemm
44 : USE util, ONLY: get_limit
45 : #include "./base/base_uses.f90"
46 :
47 : IMPLICIT NONE
48 :
49 : PRIVATE
50 :
51 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
52 :
53 : PUBLIC :: complete_gamma, array2fm, fm2array, prepare_redistribution, create_dbcsr_gamma
54 :
55 : TYPE index_map
56 : INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
57 : END TYPE
58 :
59 : CONTAINS
60 :
61 : ! **************************************************************************************************
62 : !> \brief complete the calculation of the Gamma matrices
63 : !> \param mp2_env ...
64 : !> \param B_ia_Q ...
65 : !> \param dimen_RI ...
66 : !> \param homo ...
67 : !> \param virtual ...
68 : !> \param para_env ...
69 : !> \param para_env_sub ...
70 : !> \param ngroup ...
71 : !> \param my_group_L_size ...
72 : !> \param my_group_L_start ...
73 : !> \param my_group_L_end ...
74 : !> \param my_B_size ...
75 : !> \param my_B_virtual_start ...
76 : !> \param gd_array ...
77 : !> \param gd_B_virtual ...
78 : !> \param kspin ...
79 : !> \author Mauro Del Ben
80 : ! **************************************************************************************************
81 296 : SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
82 : my_group_L_size, my_group_L_start, my_group_L_end, &
83 : my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
84 :
85 : TYPE(mp2_type) :: mp2_env
86 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
87 : INTENT(INOUT) :: B_ia_Q
88 : INTEGER, INTENT(IN) :: dimen_RI, homo, virtual
89 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env, para_env_sub
90 : INTEGER, INTENT(IN) :: ngroup, my_group_L_size, &
91 : my_group_L_start, my_group_L_end, &
92 : my_B_size, my_B_virtual_start
93 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_array, gd_B_virtual
94 : INTEGER, INTENT(IN) :: kspin
95 :
96 : CHARACTER(LEN=*), PARAMETER :: routineN = 'complete_gamma'
97 :
98 : INTEGER :: dimen_ia, handle, i, ispin, kkB, my_ia_end, my_ia_size, my_ia_start, my_P_end, &
99 : my_P_size, my_P_start, nspins, pos_group, pos_sub
100 296 : INTEGER, ALLOCATABLE, DIMENSION(:) :: pos_info
101 296 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: group_grid_2_mepos, mepos_2_grid_group
102 296 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: BIb_C_2D, Gamma_2D, Gamma_PQ
103 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
104 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_ia, fm_struct_RI
105 : TYPE(cp_fm_type) :: fm_Gamma, fm_Gamma_PQ, fm_Gamma_PQ_2, fm_Gamma_PQ_temp, &
106 : fm_Gamma_PQ_temp_2, fm_ia_P, fm_Y, operator_half, PQ_half
107 296 : TYPE(group_dist_d1_type) :: gd_array_new, gd_ia, gd_ia_new, gd_P, &
108 296 : gd_P_new
109 :
110 296 : CALL timeset(routineN, handle)
111 :
112 : ! reshape the data into a 2D array
113 : ! reorder the local data in such a way to help the next stage of matrix creation
114 : ! now the data inside the group are divided into a ia x K matrix
115 296 : dimen_ia = homo*virtual
116 296 : CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
117 296 : CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
118 :
119 : CALL mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
120 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
121 :
122 : CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, Gamma_2D, homo, my_B_size, virtual, my_B_virtual_start, &
123 296 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
124 :
125 : ! create the processor map and size arrays
126 296 : CALL create_group_dist(gd_ia_new, para_env%num_pe)
127 296 : CALL create_group_dist(gd_array_new, para_env%num_pe)
128 296 : CALL create_group_dist(gd_P, para_env_sub%num_pe, dimen_RI)
129 296 : CALL create_group_dist(gd_P_new, para_env%num_pe)
130 :
131 296 : CALL get_group_dist(gd_P, para_env_sub%mepos, my_P_start, my_P_end, my_P_size)
132 :
133 : CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
134 : group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
135 :
136 888 : DO i = 0, para_env%num_pe - 1
137 : ! calculate position of the group
138 592 : pos_group = i/para_env_sub%num_pe
139 : ! calculate position in the subgroup
140 592 : pos_sub = pos_info(i)
141 : ! 1 -> rows, 2 -> cols
142 592 : CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
143 592 : CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
144 888 : CALL get_group_dist(gd_P, pos_sub, gd_P_new, i)
145 : END DO
146 :
147 : ! create the blacs env
148 296 : NULLIFY (blacs_env)
149 296 : CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
150 :
151 296 : NULLIFY (fm_struct_ia)
152 : CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
153 296 : ncol_global=dimen_RI, para_env=para_env)
154 :
155 : ! create the fm matrix Gamma
156 : CALL array2fm(Gamma_2D, fm_struct_ia, my_ia_start, my_ia_end, &
157 : my_group_L_start, my_group_L_end, &
158 : gd_ia_new, gd_array_new, &
159 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
160 296 : fm_Y)
161 : ! create the fm matrix B_ia_P
162 : CALL array2fm(BIb_C_2D, fm_struct_ia, my_ia_start, my_ia_end, &
163 : my_group_L_start, my_group_L_end, &
164 : gd_ia_new, gd_array_new, &
165 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
166 296 : fm_ia_P)
167 :
168 296 : NULLIFY (fm_struct_RI)
169 : CALL cp_fm_struct_create(fm_struct_RI, context=blacs_env, nrow_global=dimen_RI, &
170 296 : ncol_global=dimen_RI, para_env=para_env)
171 :
172 : ! since we will need (P|Q)^(-1/2) in the future, make a copy
173 : CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_RI, my_P_start, my_P_end, &
174 : my_group_L_start, my_group_L_end, &
175 : gd_P_new, gd_array_new, &
176 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
177 296 : PQ_half, do_release_mat=.FALSE.)
178 :
179 296 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
180 : CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_RI, my_P_start, my_P_end, &
181 : my_group_L_start, my_group_L_end, &
182 : gd_P_new, gd_array_new, &
183 : group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
184 8 : operator_half, do_release_mat=.FALSE.)
185 : END IF
186 :
187 296 : CALL release_group_dist(gd_P_new)
188 296 : CALL release_group_dist(gd_ia_new)
189 296 : CALL release_group_dist(gd_array_new)
190 :
191 : ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
192 296 : IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
193 288 : CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
194 288 : CALL cp_fm_struct_release(fm_struct_ia)
195 : ! perform the matrix multiplication
196 : CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
197 : matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
198 288 : matrix_c=fm_Gamma)
199 : ! release the Y matrix
200 288 : CALL cp_fm_release(fm_Y)
201 :
202 : ! complete gamma small (fm_Gamma_PQ)
203 : ! create temp matrix
204 288 : CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
205 : CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
206 : matrix_a=fm_Gamma, matrix_b=fm_ia_P, beta=0.0_dp, &
207 288 : matrix_c=fm_Gamma_PQ_temp)
208 288 : CALL cp_fm_release(fm_ia_P)
209 : ! create fm_Gamma_PQ matrix
210 288 : CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI, name="fm_Gamma_PQ")
211 288 : CALL cp_fm_struct_release(fm_struct_RI)
212 : ! perform matrix multiplication
213 : CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
214 : matrix_a=fm_Gamma_PQ_temp, matrix_b=PQ_half, beta=0.0_dp, &
215 288 : matrix_c=fm_Gamma_PQ)
216 288 : CALL cp_fm_release(fm_Gamma_PQ_temp)
217 288 : CALL cp_fm_release(PQ_half)
218 :
219 : ELSE
220 : ! create temp matrix
221 8 : CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
222 : CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
223 : matrix_a=fm_Y, matrix_b=fm_ia_P, beta=0.0_dp, &
224 8 : matrix_c=fm_Gamma_PQ_temp)
225 8 : CALL cp_fm_release(fm_ia_P)
226 :
227 8 : CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
228 8 : CALL cp_fm_struct_release(fm_struct_ia)
229 : ! perform the matrix multiplication
230 : CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
231 : matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
232 8 : matrix_c=fm_Gamma)
233 8 : CALL cp_fm_release(fm_Y)
234 :
235 8 : CALL cp_fm_create(fm_Gamma_PQ_temp_2, fm_struct_RI, name="fm_Gamma_PQ_temp_2")
236 : CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
237 : matrix_a=fm_Gamma_PQ_temp, matrix_b=operator_half, beta=0.0_dp, &
238 8 : matrix_c=fm_Gamma_PQ_temp_2)
239 :
240 8 : CALL cp_fm_create(fm_Gamma_PQ_2, fm_struct_RI, name="fm_Gamma_PQ_2")
241 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
242 : matrix_a=PQ_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
243 8 : matrix_c=fm_Gamma_PQ_temp)
244 8 : CALL cp_fm_to_fm(fm_Gamma_PQ_temp, fm_Gamma_PQ_2)
245 8 : CALL cp_fm_geadd(1.0_dp, "T", fm_Gamma_PQ_temp, 1.0_dp, fm_Gamma_PQ_2)
246 8 : CALL cp_fm_release(fm_Gamma_PQ_temp)
247 8 : CALL cp_fm_release(PQ_half)
248 :
249 8 : CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI)
250 8 : CALL cp_fm_struct_release(fm_struct_RI)
251 : CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=-1.0_dp, &
252 : matrix_a=operator_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
253 8 : matrix_c=fm_Gamma_PQ)
254 8 : CALL cp_fm_release(operator_half)
255 8 : CALL cp_fm_release(fm_Gamma_PQ_temp_2)
256 : END IF
257 :
258 : ! now redistribute the data, in this case we go back
259 : ! to the original way the integrals were distributed
260 : CALL fm2array(Gamma_2D, my_ia_size, my_ia_start, my_ia_end, &
261 : my_group_L_size, my_group_L_start, my_group_L_end, &
262 : group_grid_2_mepos, mepos_2_grid_group, &
263 : para_env_sub%num_pe, ngroup, &
264 296 : fm_Gamma)
265 :
266 1184 : ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
267 : CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
268 : my_group_L_size, my_group_L_start, my_group_L_end, &
269 : group_grid_2_mepos, mepos_2_grid_group, &
270 : para_env_sub%num_pe, ngroup, &
271 296 : fm_Gamma_PQ)
272 296 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
273 220 : CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ)
274 : ELSE
275 303670 : mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + Gamma_PQ
276 76 : DEALLOCATE (Gamma_PQ)
277 : END IF
278 :
279 296 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
280 32 : ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
281 : CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
282 : my_group_L_size, my_group_L_start, my_group_L_end, &
283 : group_grid_2_mepos, mepos_2_grid_group, &
284 : para_env_sub%num_pe, ngroup, &
285 8 : fm_Gamma_PQ_2)
286 8 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
287 8 : CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ_2)
288 : ELSE
289 0 : mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + Gamma_PQ
290 0 : DEALLOCATE (Gamma_PQ)
291 : END IF
292 : END IF
293 :
294 : ! allocate G_P_ia (DBCSR)
295 296 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
296 220 : nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
297 13971 : ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_L_size, nspins))
298 516 : DO ispin = 1, nspins
299 13311 : DO kkB = 1, my_group_L_size
300 13091 : NULLIFY (mp2_env%ri_grad%G_P_ia(kkB, ispin)%matrix)
301 : END DO
302 : END DO
303 : END IF
304 :
305 : ! create the Gamma_ia_P in DBCSR style
306 : CALL create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
307 : my_ia_start, my_ia_end, my_group_L_size, gd_ia, &
308 296 : mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
309 :
310 296 : DEALLOCATE (pos_info)
311 296 : DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
312 296 : CALL release_group_dist(gd_ia)
313 296 : CALL release_group_dist(gd_P)
314 :
315 : ! release blacs_env
316 296 : CALL cp_blacs_env_release(blacs_env)
317 :
318 296 : CALL timestop(handle)
319 :
320 1776 : END SUBROUTINE complete_gamma
321 :
322 : ! **************************************************************************************************
323 : !> \brief Redistribute a 3d matrix to a 2d matrix
324 : !> \param B_ia_Q ...
325 : !> \param BIb_C_2D ...
326 : !> \param homo ...
327 : !> \param my_B_size ...
328 : !> \param virtual ...
329 : !> \param my_B_virtual_start ...
330 : !> \param my_ia_start ...
331 : !> \param my_ia_end ...
332 : !> \param my_ia_size ...
333 : !> \param my_group_L_size ...
334 : !> \param para_env_sub ...
335 : !> \param gd_B_virtual ...
336 : ! **************************************************************************************************
337 296 : SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
338 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
339 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
340 : INTENT(INOUT) :: B_ia_Q
341 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
342 : INTENT(OUT) :: BIb_C_2D
343 : INTEGER, INTENT(IN) :: homo, my_B_size, virtual, &
344 : my_B_virtual_start, my_ia_start, &
345 : my_ia_end, my_ia_size, my_group_L_size
346 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
347 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual
348 :
349 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d'
350 :
351 : INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
352 : rec_B_virtual_end, rec_B_virtual_start
353 296 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C_rec
354 :
355 296 : CALL timeset(routineN, handle)
356 :
357 1184 : ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
358 705779 : BIb_C_2D = 0.0_dp
359 :
360 1360 : DO iiB = 1, homo
361 16940 : DO jjB = 1, my_B_size
362 15580 : ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
363 16644 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
364 690895 : BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(iiB, jjB, 1:my_group_L_size)
365 : END IF
366 : END DO
367 : END DO
368 :
369 304 : DO proc_shift = 1, para_env_sub%num_pe - 1
370 8 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
371 8 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
372 :
373 8 : CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
374 :
375 40 : ALLOCATE (BIb_C_rec(homo, rec_B_size, my_group_L_size))
376 47130 : BIb_C_rec = 0.0_dp
377 :
378 : CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
379 8 : BIb_C_rec, proc_receive)
380 :
381 48 : DO iiB = 1, homo
382 438 : DO jjB = 1, rec_B_size
383 390 : ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
384 430 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
385 17373 : BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(iiB, jjB, 1:my_group_L_size)
386 : END IF
387 : END DO
388 : END DO
389 :
390 312 : DEALLOCATE (BIb_C_rec)
391 : END DO
392 296 : DEALLOCATE (B_ia_Q)
393 :
394 296 : CALL timestop(handle)
395 592 : END SUBROUTINE mat_3d_to_2d
396 :
397 : ! **************************************************************************************************
398 : !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
399 : !> \param B_ia_Q ...
400 : !> \param BIb_C_2D ...
401 : !> \param homo ...
402 : !> \param my_B_size ...
403 : !> \param virtual ...
404 : !> \param my_B_virtual_start ...
405 : !> \param my_ia_start ...
406 : !> \param my_ia_end ...
407 : !> \param my_ia_size ...
408 : !> \param my_group_L_size ...
409 : !> \param para_env_sub ...
410 : !> \param gd_B_virtual ...
411 : ! **************************************************************************************************
412 296 : SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
413 : my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
414 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
415 : INTENT(INOUT) :: B_ia_Q
416 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
417 : INTENT(OUT) :: BIb_C_2D
418 : INTEGER, INTENT(IN) :: homo, my_B_size, virtual, &
419 : my_B_virtual_start, my_ia_start, &
420 : my_ia_end, my_ia_size, my_group_L_size
421 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
422 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_B_virtual
423 :
424 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d_gamma'
425 :
426 : INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
427 : rec_B_virtual_end, rec_B_virtual_start
428 296 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C_rec
429 :
430 296 : CALL timeset(routineN, handle)
431 :
432 1184 : ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
433 705779 : BIb_C_2D = 0.0_dp
434 :
435 1360 : DO iiB = 1, homo
436 16940 : DO jjB = 1, my_B_size
437 15580 : ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
438 16644 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
439 690895 : BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(jjB, iiB, 1:my_group_L_size)
440 : END IF
441 : END DO
442 : END DO
443 :
444 304 : DO proc_shift = 1, para_env_sub%num_pe - 1
445 8 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
446 8 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
447 :
448 8 : CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
449 :
450 40 : ALLOCATE (BIb_C_rec(rec_B_size, homo, my_group_L_size))
451 43544 : BIb_C_rec = 0.0_dp
452 :
453 : CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
454 8 : BIb_C_rec, proc_receive)
455 :
456 48 : DO iiB = 1, homo
457 438 : DO jjB = 1, rec_B_size
458 390 : ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
459 430 : IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
460 17373 : BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(jjB, iiB, 1:my_group_L_size)
461 : END IF
462 : END DO
463 : END DO
464 :
465 312 : DEALLOCATE (BIb_C_rec)
466 : END DO
467 296 : DEALLOCATE (B_ia_Q)
468 :
469 296 : CALL timestop(handle)
470 592 : END SUBROUTINE mat_3d_to_2d_gamma
471 :
472 : ! **************************************************************************************************
473 : !> \brief redistribute local part of array to fm
474 : !> \param mat2D ...
475 : !> \param fm_struct ...
476 : !> \param my_start_row ...
477 : !> \param my_end_row ...
478 : !> \param my_start_col ...
479 : !> \param my_end_col ...
480 : !> \param gd_row ...
481 : !> \param gd_col ...
482 : !> \param group_grid_2_mepos ...
483 : !> \param ngroup_row ...
484 : !> \param ngroup_col ...
485 : !> \param fm_mat ...
486 : !> \param integ_group_size ...
487 : !> \param color_group ...
488 : !> \param do_release_mat whether to release the array (default: yes)
489 : ! **************************************************************************************************
490 1112 : SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
491 : my_start_col, my_end_col, &
492 : gd_row, gd_col, &
493 : group_grid_2_mepos, ngroup_row, ngroup_col, &
494 : fm_mat, integ_group_size, color_group, do_release_mat)
495 :
496 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
497 : INTENT(INOUT) :: mat2D
498 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
499 : INTEGER, INTENT(IN) :: my_start_row, my_end_row, my_start_col, &
500 : my_end_col
501 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_row, gd_col
502 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos
503 : INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
504 : TYPE(cp_fm_type), INTENT(OUT) :: fm_mat
505 : INTEGER, INTENT(IN), OPTIONAL :: integ_group_size, color_group
506 : LOGICAL, INTENT(IN), OPTIONAL :: do_release_mat
507 :
508 : CHARACTER(LEN=*), PARAMETER :: routineN = 'array2fm'
509 :
510 : INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
511 : i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, &
512 : my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
513 : nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, &
514 : proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, &
515 : rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, &
516 : ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
517 : INTEGER :: start_col_block, start_row_block
518 1112 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_col_rec, map_rec_size, &
519 1112 : map_send_size
520 1112 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blocks_ranges_col, blocks_ranges_row, &
521 1112 : grid_2_mepos, grid_ref_2_send_pos, &
522 1112 : mepos_2_grid
523 1112 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
524 : LOGICAL :: convert_pos, my_do_release_mat
525 : REAL(KIND=dp) :: part_col, part_row
526 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
527 1112 : DIMENSION(:) :: buffer_rec, buffer_send
528 : TYPE(mp_para_env_type), POINTER :: para_env
529 1112 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
530 :
531 1112 : CALL timeset(routineN, handle)
532 :
533 1112 : my_do_release_mat = .TRUE.
534 1112 : IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
535 :
536 1112 : CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
537 :
538 : ! create the full matrix, (num_rows x num_cols)
539 1112 : CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
540 1112 : CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
541 :
542 : ! start filling procedure
543 : ! fill the matrix
544 : CALL cp_fm_get_info(matrix=fm_mat, &
545 : nrow_local=nrow_local, &
546 : ncol_local=ncol_local, &
547 : row_indices=row_indices, &
548 : col_indices=col_indices, &
549 : nrow_block=nrow_block, &
550 1112 : ncol_block=ncol_block)
551 1112 : myprow = fm_mat%matrix_struct%context%mepos(1)
552 1112 : mypcol = fm_mat%matrix_struct%context%mepos(2)
553 1112 : nprow = fm_mat%matrix_struct%context%num_pe(1)
554 1112 : npcol = fm_mat%matrix_struct%context%num_pe(2)
555 :
556 : ! start the communication part
557 : ! 0) create array containing the processes position
558 : ! and supporting infos
559 1112 : CALL timeset(routineN//"_info", handle2)
560 4448 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
561 4330 : grid_2_mepos = 0
562 3336 : ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
563 1112 : grid_2_mepos(myprow, mypcol) = para_env%mepos
564 : ! sum infos
565 1112 : CALL para_env%sum(grid_2_mepos)
566 3336 : CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
567 :
568 : ! 1) loop over my local data and define a map for the proc to send data
569 3336 : ALLOCATE (map_send_size(0:para_env%num_pe - 1))
570 3218 : map_send_size = 0
571 1112 : dummy_proc = 0
572 63345 : DO jjB = my_start_col, my_end_col
573 : send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
574 62233 : fm_mat%matrix_struct%first_p_pos(2), npcol)
575 3601117 : DO iiB = my_start_row, my_end_row
576 : send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
577 3537772 : fm_mat%matrix_struct%first_p_pos(1), nprow)
578 3537772 : proc_send = grid_2_mepos(send_prow, send_pcol)
579 3600005 : map_send_size(proc_send) = map_send_size(proc_send) + 1
580 : END DO
581 : END DO
582 :
583 : ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
584 3336 : ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
585 3218 : map_rec_size = 0
586 1112 : part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
587 1112 : part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
588 :
589 : ! In some cases we have to convert global positions to positions in a subgroup (RPA)
590 1112 : convert_pos = .FALSE.
591 1112 : IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .TRUE.
592 :
593 43277 : DO jjB = 1, nrow_local
594 42165 : j_global = row_indices(jjB)
595 : ! check the group holding this element
596 : ! dirty way, if someone has a better idea ...
597 42165 : rec_prow = INT(REAL(j_global - 1, KIND=dp)/part_row)
598 42165 : rec_prow = MAX(0, rec_prow)
599 42165 : rec_prow = MIN(rec_prow, ngroup_row - 1)
600 : DO
601 42165 : itmp = get_limit(num_rows, ngroup_row, rec_prow)
602 42165 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
603 0 : IF (j_global < itmp(1)) rec_prow = rec_prow - 1
604 0 : IF (j_global > itmp(2)) rec_prow = rec_prow + 1
605 : END DO
606 :
607 42165 : IF (convert_pos) THEN
608 : ! if the group is not in the same RPA group cycle
609 11572 : IF ((rec_prow/integ_group_size) .NE. color_group) CYCLE
610 : ! convert global position to position into the RPA group
611 6918 : rec_prow = MOD(rec_prow, integ_group_size)
612 : END IF
613 :
614 3576395 : DO iiB = 1, ncol_local
615 3537772 : i_global = col_indices(iiB)
616 : ! check the process in the group holding this element
617 3537772 : rec_pcol = INT(REAL(i_global - 1, KIND=dp)/part_col)
618 3537772 : rec_pcol = MAX(0, rec_pcol)
619 3537772 : rec_pcol = MIN(rec_pcol, ngroup_col - 1)
620 : DO
621 3537772 : itmp = get_limit(num_cols, ngroup_col, rec_pcol)
622 3537772 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
623 0 : IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
624 0 : IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
625 : END DO
626 :
627 3537772 : proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
628 :
629 3579937 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
630 :
631 : END DO ! i_global
632 : END DO ! j_global
633 :
634 : ! 3) check if the local data has to be stored in the new fm_mat
635 1112 : IF (map_rec_size(para_env%mepos) > 0) THEN
636 101674 : DO jjB = 1, ncol_local
637 100564 : j_global = col_indices(jjB)
638 101674 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
639 2893662 : DO iiB = 1, nrow_local
640 2831429 : i_global = row_indices(iiB)
641 2893662 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
642 2218008 : fm_mat%local_data(iiB, jjB) = mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1)
643 : END IF
644 : END DO
645 : END IF
646 : END DO
647 : END IF
648 1112 : CALL timestop(handle2)
649 :
650 : ! 4) reorder data in the send_buffer
651 1112 : CALL timeset(routineN//"_buffer_send", handle2)
652 : ! count the number of messages to send
653 1112 : number_of_send = 0
654 2106 : DO proc_shift = 1, para_env%num_pe - 1
655 994 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
656 2106 : IF (map_send_size(proc_send) > 0) THEN
657 952 : number_of_send = number_of_send + 1
658 : END IF
659 : END DO
660 : ! allocate the structure that will hold the messages to be sent
661 4128 : ALLOCATE (buffer_send(number_of_send))
662 : ! grid_ref_2_send_pos is an array (map) that given a pair
663 : ! (ref_send_prow,ref_send_pcol) returns
664 : ! the position in the buffer_send associated to that process
665 4448 : ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
666 4330 : grid_ref_2_send_pos = 0
667 : ! finalize the allocation of buffer_send with the actual size
668 : ! of each message (actual size is size_send_buffer)
669 1112 : send_counter = 0
670 2106 : DO proc_shift = 1, para_env%num_pe - 1
671 994 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
672 994 : size_send_buffer = map_send_size(proc_send)
673 2106 : IF (map_send_size(proc_send) > 0) THEN
674 952 : send_counter = send_counter + 1
675 : ! allocate the sending buffer (msg)
676 2856 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
677 1320716 : buffer_send(send_counter)%msg = 0.0_dp
678 952 : buffer_send(send_counter)%proc = proc_send
679 : ! get the pointer to prow, pcol of the process that has
680 : ! to receive this message
681 952 : ref_send_prow = mepos_2_grid(1, proc_send)
682 952 : ref_send_pcol = mepos_2_grid(2, proc_send)
683 : ! save the rank of the process that has to receive this message
684 952 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
685 : END IF
686 : END DO
687 :
688 : ! loop over the locally held data and fill the buffer_send
689 : ! for doing that we need an array that keep track if the
690 : ! sequential increase of the index for each message
691 3176 : ALLOCATE (iii_vet(number_of_send))
692 2064 : iii_vet = 0
693 68133 : DO iiB = my_start_row, my_end_row
694 : send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
695 67021 : fm_mat%matrix_struct%first_p_pos(1), nprow)
696 3605905 : DO jjB = my_start_col, my_end_col
697 : send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
698 3537772 : fm_mat%matrix_struct%first_p_pos(2), npcol)
699 : ! we don't need to send to ourselves
700 3537772 : IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
701 :
702 1319764 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
703 1319764 : iii_vet(send_counter) = iii_vet(send_counter) + 1
704 1319764 : iii = iii_vet(send_counter)
705 3604793 : buffer_send(send_counter)%msg(iii) = mat2D(iiB - my_start_row + 1, jjB - my_start_col + 1)
706 : END DO
707 : END DO
708 :
709 1112 : DEALLOCATE (iii_vet)
710 1112 : DEALLOCATE (grid_ref_2_send_pos)
711 1112 : IF (my_do_release_mat) DEALLOCATE (mat2D)
712 1112 : CALL timestop(handle2)
713 :
714 : ! 5) similarly to what done for the buffer_send
715 : ! create the buffer for receive, post the message with irecv
716 : ! and send the messages non-blocking
717 1112 : CALL timeset(routineN//"_isendrecv", handle2)
718 : ! count the number of messages to be received
719 1112 : number_of_rec = 0
720 2106 : DO proc_shift = 1, para_env%num_pe - 1
721 994 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
722 2106 : IF (map_rec_size(proc_receive) > 0) THEN
723 952 : number_of_rec = number_of_rec + 1
724 : END IF
725 : END DO
726 :
727 4128 : ALLOCATE (buffer_rec(number_of_rec))
728 :
729 1112 : rec_counter = 0
730 2106 : DO proc_shift = 1, para_env%num_pe - 1
731 994 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
732 994 : size_rec_buffer = map_rec_size(proc_receive)
733 2106 : IF (map_rec_size(proc_receive) > 0) THEN
734 952 : rec_counter = rec_counter + 1
735 : ! prepare the buffer for receive
736 2856 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
737 1320716 : buffer_rec(rec_counter)%msg = 0.0_dp
738 952 : buffer_rec(rec_counter)%proc = proc_receive
739 : ! post the message to be received
740 : CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
741 952 : buffer_rec(rec_counter)%msg_req)
742 : END IF
743 : END DO
744 :
745 : ! send messages
746 5240 : ALLOCATE (req_send(number_of_send))
747 1112 : send_counter = 0
748 2106 : DO proc_shift = 1, para_env%num_pe - 1
749 994 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
750 2106 : IF (map_send_size(proc_send) > 0) THEN
751 952 : send_counter = send_counter + 1
752 : CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
753 952 : buffer_send(send_counter)%msg_req)
754 952 : req_send(send_counter) = buffer_send(send_counter)%msg_req
755 : END IF
756 : END DO
757 1112 : CALL timestop(handle2)
758 :
759 : ! 6) fill the fm_mat matrix with the messages received from the
760 : ! other processes
761 1112 : CALL timeset(routineN//"_fill", handle2)
762 : ! In order to perform this step efficiently first we have to know the
763 : ! ranges of the blocks over which a given process hold its local data.
764 : ! Start with the rows ...
765 1112 : my_num_row_blocks = 1
766 42165 : DO iiB = 1, nrow_local - 1
767 41053 : IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
768 42165 : my_num_row_blocks = my_num_row_blocks + 1
769 : END DO
770 3336 : ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
771 4646 : blocks_ranges_row = 0
772 1112 : blocks_ranges_row(1, 1) = row_indices(1)
773 1112 : iii = 1
774 42165 : DO iiB = 1, nrow_local - 1
775 41053 : IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
776 66 : iii = iii + 1
777 66 : blocks_ranges_row(2, iii - 1) = row_indices(iiB)
778 42165 : blocks_ranges_row(1, iii) = row_indices(iiB + 1)
779 : END DO
780 1112 : blocks_ranges_row(2, my_num_row_blocks) = row_indices(MAX(nrow_local, 1))
781 : ! ... and columns
782 1112 : my_num_col_blocks = 1
783 100566 : DO jjB = 1, ncol_local - 1
784 99454 : IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
785 100566 : my_num_col_blocks = my_num_col_blocks + 1
786 : END DO
787 3336 : ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
788 4448 : blocks_ranges_col = 0
789 1112 : blocks_ranges_col(1, 1) = col_indices(1)
790 1112 : iii = 1
791 100566 : DO jjB = 1, ncol_local - 1
792 99454 : IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
793 0 : iii = iii + 1
794 0 : blocks_ranges_col(2, iii - 1) = col_indices(jjB)
795 100566 : blocks_ranges_col(1, iii) = col_indices(jjB + 1)
796 : END DO
797 1112 : blocks_ranges_col(2, my_num_col_blocks) = col_indices(MAX(ncol_local, 1))
798 :
799 1112 : rec_counter = 0
800 2106 : DO proc_shift = 1, para_env%num_pe - 1
801 994 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
802 994 : size_rec_buffer = map_rec_size(proc_receive)
803 :
804 2106 : IF (map_rec_size(proc_receive) > 0) THEN
805 952 : rec_counter = rec_counter + 1
806 :
807 952 : CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
808 :
809 : ! precompute the number of received columns and relative index
810 952 : num_rec_cols = 0
811 1904 : DO jjB = 1, my_num_col_blocks
812 952 : start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
813 952 : end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
814 43175 : DO j_sub = start_col_block, end_col_block
815 42223 : num_rec_cols = num_rec_cols + 1
816 : END DO
817 : END DO
818 2856 : ALLOCATE (index_col_rec(num_rec_cols))
819 42223 : index_col_rec = 0
820 952 : iii = 0
821 1904 : DO jjB = 1, my_num_col_blocks
822 952 : start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
823 952 : end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
824 43175 : DO j_sub = start_col_block, end_col_block
825 41271 : iii = iii + 1
826 : j_local = cp_fm_indxg2l(j_sub, ncol_block, dummy_proc, &
827 41271 : fm_mat%matrix_struct%first_p_pos(2), npcol)
828 42223 : index_col_rec(iii) = j_local
829 : END DO
830 : END DO
831 :
832 952 : CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
833 :
834 : ! wait for the message
835 952 : CALL buffer_rec(rec_counter)%msg_req%wait()
836 :
837 : ! fill the local data in fm_mat
838 952 : iii = 0
839 1970 : DO iiB = 1, my_num_row_blocks
840 1018 : start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start)
841 1018 : end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end)
842 32123 : DO i_sub = start_row_block, end_row_block
843 : i_local = cp_fm_indxg2l(i_sub, nrow_block, dummy_proc, &
844 30153 : fm_mat%matrix_struct%first_p_pos(1), nprow)
845 1350935 : DO jjB = 1, num_rec_cols
846 1319764 : iii = iii + 1
847 1319764 : j_local = index_col_rec(jjB)
848 1349917 : fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
849 : END DO
850 : END DO
851 : END DO
852 :
853 952 : DEALLOCATE (buffer_rec(rec_counter)%msg)
854 2856 : DEALLOCATE (index_col_rec)
855 : END IF
856 : END DO
857 2064 : DEALLOCATE (buffer_rec)
858 :
859 1112 : DEALLOCATE (blocks_ranges_row)
860 1112 : DEALLOCATE (blocks_ranges_col)
861 :
862 1112 : CALL timestop(handle2)
863 :
864 : ! 7) Finally wait for all messeges to be sent
865 1112 : CALL timeset(routineN//"_waitall", handle2)
866 1112 : CALL mp_waitall(req_send(:))
867 2064 : DO send_counter = 1, number_of_send
868 2064 : DEALLOCATE (buffer_send(send_counter)%msg)
869 : END DO
870 2064 : DEALLOCATE (buffer_send)
871 1112 : CALL timestop(handle2)
872 :
873 1112 : DEALLOCATE (map_send_size)
874 1112 : DEALLOCATE (map_rec_size)
875 1112 : DEALLOCATE (grid_2_mepos)
876 1112 : DEALLOCATE (mepos_2_grid)
877 :
878 1112 : CALL timestop(handle)
879 :
880 8896 : END SUBROUTINE array2fm
881 :
882 : ! **************************************************************************************************
883 : !> \brief redistribute fm to local part of array
884 : !> \param mat2D ...
885 : !> \param my_rows ...
886 : !> \param my_start_row ...
887 : !> \param my_end_row ...
888 : !> \param my_cols ...
889 : !> \param my_start_col ...
890 : !> \param my_end_col ...
891 : !> \param group_grid_2_mepos ...
892 : !> \param mepos_2_grid_group ...
893 : !> \param ngroup_row ...
894 : !> \param ngroup_col ...
895 : !> \param fm_mat ...
896 : ! **************************************************************************************************
897 706 : SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
898 : my_cols, my_start_col, my_end_col, &
899 : group_grid_2_mepos, mepos_2_grid_group, &
900 : ngroup_row, ngroup_col, &
901 : fm_mat)
902 :
903 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
904 : INTENT(OUT) :: mat2D
905 : INTEGER, INTENT(IN) :: my_rows, my_start_row, my_end_row, &
906 : my_cols, my_start_col, my_end_col
907 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: group_grid_2_mepos, mepos_2_grid_group
908 : INTEGER, INTENT(IN) :: ngroup_row, ngroup_col
909 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat
910 :
911 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fm2array'
912 :
913 : INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, &
914 : myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, &
915 : num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
916 : proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
917 : ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
918 706 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, index_row_rec, map_rec_size, &
919 706 : map_send_size
920 706 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
921 706 : mepos_2_grid, sizes
922 706 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
923 : REAL(KIND=dp) :: part_col, part_row
924 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
925 706 : DIMENSION(:) :: buffer_rec, buffer_send
926 : TYPE(mp_para_env_type), POINTER :: para_env
927 706 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
928 :
929 706 : CALL timeset(routineN, handle)
930 :
931 : ! allocate the array
932 2824 : ALLOCATE (mat2D(my_rows, my_cols))
933 2183909 : mat2D = 0.0_dp
934 :
935 : ! start procedure
936 : ! get information of from the full matrix
937 : CALL cp_fm_get_info(matrix=fm_mat, &
938 : nrow_local=nrow_local, &
939 : ncol_local=ncol_local, &
940 : row_indices=row_indices, &
941 : col_indices=col_indices, &
942 : nrow_block=nrow_block, &
943 : ncol_block=ncol_block, &
944 : nrow_global=num_rows, &
945 : ncol_global=num_cols, &
946 706 : para_env=para_env)
947 706 : myprow = fm_mat%matrix_struct%context%mepos(1)
948 706 : mypcol = fm_mat%matrix_struct%context%mepos(2)
949 706 : nprow = fm_mat%matrix_struct%context%num_pe(1)
950 706 : npcol = fm_mat%matrix_struct%context%num_pe(2)
951 :
952 : ! start the communication part
953 : ! 0) create array containing the processes position
954 : ! and supporting infos
955 706 : CALL timeset(routineN//"_info", handle2)
956 2824 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
957 2824 : grid_2_mepos = 0
958 2118 : ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
959 :
960 : ! fill the info array
961 706 : grid_2_mepos(myprow, mypcol) = para_env%mepos
962 :
963 : ! sum infos
964 706 : CALL para_env%sum(grid_2_mepos)
965 2118 : CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
966 :
967 : ! 1) loop over my local data and define a map for the proc to send data
968 2118 : ALLOCATE (map_send_size(0:para_env%num_pe - 1))
969 2118 : map_send_size = 0
970 706 : part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
971 706 : part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
972 :
973 59890 : DO jjB = 1, ncol_local
974 59184 : j_global = col_indices(jjB)
975 : ! check the group holding this element
976 : ! dirty way, if someone has a better idea ...
977 59184 : send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
978 59184 : send_pcol = MAX(0, send_pcol)
979 59184 : send_pcol = MIN(send_pcol, ngroup_col - 1)
980 : DO
981 59184 : itmp = get_limit(num_cols, ngroup_col, send_pcol)
982 59184 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
983 0 : IF (j_global < itmp(1)) send_pcol = send_pcol - 1
984 0 : IF (j_global > itmp(2)) send_pcol = send_pcol + 1
985 : END DO
986 :
987 2212065 : DO iiB = 1, nrow_local
988 2152175 : i_global = row_indices(iiB)
989 : ! check the process in the group holding this element
990 2152175 : send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
991 2152175 : send_prow = MAX(0, send_prow)
992 2152175 : send_prow = MIN(send_prow, ngroup_row - 1)
993 : DO
994 2152175 : itmp = get_limit(num_rows, ngroup_row, send_prow)
995 2152175 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
996 0 : IF (i_global < itmp(1)) send_prow = send_prow - 1
997 0 : IF (i_global > itmp(2)) send_prow = send_prow + 1
998 : END DO
999 :
1000 2152175 : proc_send = group_grid_2_mepos(send_prow, send_pcol)
1001 :
1002 2211359 : map_send_size(proc_send) = map_send_size(proc_send) + 1
1003 :
1004 : END DO ! i_global
1005 : END DO ! j_global
1006 :
1007 : ! 2) loop over my local data of the array and define a map for the proc from which rec data
1008 2118 : ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
1009 2118 : map_rec_size = 0
1010 706 : dummy_proc = 0
1011 31734 : DO jjB = my_start_col, my_end_col
1012 : rec_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
1013 31028 : fm_mat%matrix_struct%first_p_pos(2), npcol)
1014 2183909 : DO iiB = my_start_row, my_end_row
1015 : rec_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
1016 2152175 : fm_mat%matrix_struct%first_p_pos(1), nprow)
1017 2152175 : proc_receive = grid_2_mepos(rec_prow, rec_pcol)
1018 2183203 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1019 : END DO
1020 : END DO
1021 :
1022 : ! 3) check if the local data in the fm_mat has to be stored in the new array
1023 706 : IF (map_rec_size(para_env%mepos) > 0) THEN
1024 59890 : DO jjB = 1, ncol_local
1025 59184 : j_global = col_indices(jjB)
1026 59890 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1027 1171990 : DO iiB = 1, nrow_local
1028 1140962 : i_global = row_indices(iiB)
1029 1171990 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1030 1140082 : mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iiB, jjB)
1031 : END IF
1032 : END DO
1033 : END IF
1034 : END DO
1035 : END IF
1036 706 : CALL timestop(handle2)
1037 :
1038 : ! 4) reorder data in the send_buffer
1039 706 : CALL timeset(routineN//"_buffer_send", handle2)
1040 : ! count the number of messages to send
1041 706 : number_of_send = 0
1042 1412 : DO proc_shift = 1, para_env%num_pe - 1
1043 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1044 1412 : IF (map_send_size(proc_send) > 0) THEN
1045 676 : number_of_send = number_of_send + 1
1046 : END IF
1047 : END DO
1048 : ! allocate the structure that will hold the messages to be sent
1049 2764 : ALLOCATE (buffer_send(number_of_send))
1050 : ! grid_ref_2_send_pos is an array (map) that given a pair
1051 : ! (ref_send_prow,ref_send_pcol) returns
1052 : ! the position in the buffer_send associated to that process
1053 :
1054 2824 : ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
1055 3498 : grid_ref_2_send_pos = 0
1056 :
1057 : ! finalize the allocation of buffer_send with the actual size
1058 : ! of each message (actual size is size_send_buffer)
1059 706 : send_counter = 0
1060 1412 : DO proc_shift = 1, para_env%num_pe - 1
1061 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1062 706 : size_send_buffer = map_send_size(proc_send)
1063 1412 : IF (map_send_size(proc_send) > 0) THEN
1064 676 : send_counter = send_counter + 1
1065 : ! allocate the sending buffer (msg)
1066 2028 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1067 1012769 : buffer_send(send_counter)%msg = 0.0_dp
1068 676 : buffer_send(send_counter)%proc = proc_send
1069 : ! get the pointer to prow, pcol of the process that has
1070 : ! to receive this message
1071 676 : ref_send_prow = mepos_2_grid_group(1, proc_send)
1072 676 : ref_send_pcol = mepos_2_grid_group(2, proc_send)
1073 : ! save the rank of the process that has to receive this message
1074 676 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1075 : END IF
1076 : END DO
1077 :
1078 : ! loop over the locally held data and fill the buffer_send
1079 : ! for doing that we need an array that keep track if the
1080 : ! sequential increase of the index for each message
1081 2088 : ALLOCATE (iii_vet(number_of_send))
1082 1382 : iii_vet = 0
1083 59890 : DO jjB = 1, ncol_local
1084 59184 : j_global = col_indices(jjB)
1085 : ! check the group holding this element
1086 : ! dirty way, if someone has a better idea ...
1087 59184 : send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
1088 59184 : send_pcol = MAX(0, send_pcol)
1089 59184 : send_pcol = MIN(send_pcol, ngroup_col - 1)
1090 : DO
1091 59184 : itmp = get_limit(num_cols, ngroup_col, send_pcol)
1092 59184 : IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
1093 0 : IF (j_global < itmp(1)) send_pcol = send_pcol - 1
1094 0 : IF (j_global > itmp(2)) send_pcol = send_pcol + 1
1095 : END DO
1096 :
1097 2212065 : DO iiB = 1, nrow_local
1098 2152175 : i_global = row_indices(iiB)
1099 : ! check the process in the group holding this element
1100 2152175 : send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
1101 2152175 : send_prow = MAX(0, send_prow)
1102 2152175 : send_prow = MIN(send_prow, ngroup_row - 1)
1103 : DO
1104 2152175 : itmp = get_limit(num_rows, ngroup_row, send_prow)
1105 2152175 : IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
1106 0 : IF (i_global < itmp(1)) send_prow = send_prow - 1
1107 0 : IF (i_global > itmp(2)) send_prow = send_prow + 1
1108 : END DO
1109 : ! we don't need to send to ourselves
1110 2152175 : IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
1111 :
1112 1012093 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1113 1012093 : iii_vet(send_counter) = iii_vet(send_counter) + 1
1114 1012093 : iii = iii_vet(send_counter)
1115 2211359 : buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iiB, jjB)
1116 : END DO
1117 : END DO
1118 :
1119 706 : DEALLOCATE (iii_vet)
1120 706 : DEALLOCATE (grid_ref_2_send_pos)
1121 706 : CALL timestop(handle2)
1122 :
1123 : ! 5) similarly to what done for the buffer_send
1124 : ! create the buffer for receive, post the message with irecv
1125 : ! and send the messages with non-blocking
1126 706 : CALL timeset(routineN//"_isendrecv", handle2)
1127 : ! count the number of messages to be received
1128 706 : number_of_rec = 0
1129 1412 : DO proc_shift = 1, para_env%num_pe - 1
1130 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1131 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1132 676 : number_of_rec = number_of_rec + 1
1133 : END IF
1134 : END DO
1135 :
1136 2764 : ALLOCATE (buffer_rec(number_of_rec))
1137 :
1138 706 : rec_counter = 0
1139 1412 : DO proc_shift = 1, para_env%num_pe - 1
1140 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1141 706 : size_rec_buffer = map_rec_size(proc_receive)
1142 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1143 676 : rec_counter = rec_counter + 1
1144 : ! prepare the buffer for receive
1145 2028 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1146 1012769 : buffer_rec(rec_counter)%msg = 0.0_dp
1147 676 : buffer_rec(rec_counter)%proc = proc_receive
1148 : ! post the message to be received
1149 : CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1150 676 : buffer_rec(rec_counter)%msg_req)
1151 : END IF
1152 : END DO
1153 :
1154 : ! send messages
1155 3470 : ALLOCATE (req_send(number_of_send))
1156 706 : send_counter = 0
1157 1412 : DO proc_shift = 1, para_env%num_pe - 1
1158 706 : proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
1159 1412 : IF (map_send_size(proc_send) > 0) THEN
1160 676 : send_counter = send_counter + 1
1161 : CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
1162 676 : buffer_send(send_counter)%msg_req)
1163 676 : req_send(send_counter) = buffer_send(send_counter)%msg_req
1164 : END IF
1165 : END DO
1166 706 : CALL timestop(handle2)
1167 :
1168 : ! 6) fill the fm_mat matrix with the messages received from the
1169 : ! other processes
1170 706 : CALL timeset(routineN//"_fill", handle2)
1171 : CALL cp_fm_get_info(matrix=fm_mat, &
1172 : nrow_local=nrow_local, &
1173 706 : ncol_local=ncol_local)
1174 2118 : ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
1175 2118 : CALL para_env%allgather([nrow_local, ncol_local], sizes)
1176 2118 : iiB = MAXVAL(sizes(1, :))
1177 2118 : ALLOCATE (index_row_rec(iiB))
1178 26000 : index_row_rec = 0
1179 706 : rec_counter = 0
1180 1412 : DO proc_shift = 1, para_env%num_pe - 1
1181 706 : proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
1182 706 : size_rec_buffer = map_rec_size(proc_receive)
1183 :
1184 1412 : IF (map_rec_size(proc_receive) > 0) THEN
1185 676 : rec_counter = rec_counter + 1
1186 :
1187 676 : rec_col_size = sizes(2, proc_receive)
1188 676 : rec_row_size = sizes(1, proc_receive)
1189 :
1190 : ! precompute the indeces of the rows
1191 676 : num_rec_rows = 0
1192 24437 : DO iiB = 1, rec_row_size
1193 : i_global = cp_fm_indxl2g(iiB, nrow_block, mepos_2_grid(1, proc_receive), &
1194 23761 : fm_mat%matrix_struct%first_p_pos(1), nprow)
1195 24437 : IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
1196 23636 : num_rec_rows = num_rec_rows + 1
1197 23636 : index_row_rec(num_rec_rows) = i_global
1198 : END IF
1199 : END DO
1200 :
1201 676 : CALL buffer_rec(rec_counter)%msg_req%wait()
1202 :
1203 676 : iii = 0
1204 57208 : DO jjB = 1, rec_col_size
1205 : j_global = cp_fm_indxl2g(jjB, ncol_block, mepos_2_grid(2, proc_receive), &
1206 56532 : fm_mat%matrix_struct%first_p_pos(2), npcol)
1207 57208 : IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
1208 1040469 : DO iiB = 1, num_rec_rows
1209 1012093 : i_global = index_row_rec(iiB)
1210 1012093 : iii = iii + 1
1211 1040469 : mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
1212 : END DO
1213 : END IF
1214 : END DO
1215 :
1216 676 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1217 : END IF
1218 : END DO
1219 1382 : DEALLOCATE (buffer_rec)
1220 706 : DEALLOCATE (index_row_rec)
1221 706 : CALL cp_fm_release(fm_mat)
1222 706 : CALL timestop(handle2)
1223 :
1224 : ! 7) Finally wait for all messeges to be sent
1225 706 : CALL timeset(routineN//"_waitall", handle2)
1226 706 : CALL mp_waitall(req_send(:))
1227 1382 : DO send_counter = 1, number_of_send
1228 1382 : DEALLOCATE (buffer_send(send_counter)%msg)
1229 : END DO
1230 1382 : DEALLOCATE (buffer_send)
1231 706 : CALL timestop(handle2)
1232 :
1233 706 : CALL timestop(handle)
1234 :
1235 4942 : END SUBROUTINE fm2array
1236 :
1237 : ! **************************************************************************************************
1238 : !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
1239 : !> \param Gamma_2D ...
1240 : !> \param homo ...
1241 : !> \param virtual ...
1242 : !> \param dimen_ia ...
1243 : !> \param para_env_sub ...
1244 : !> \param my_ia_start ...
1245 : !> \param my_ia_end ...
1246 : !> \param my_group_L_size ...
1247 : !> \param gd_ia ...
1248 : !> \param dbcsr_Gamma ...
1249 : !> \param mo_coeff_o ...
1250 : ! **************************************************************************************************
1251 344 : SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
1252 : my_ia_start, my_ia_end, my_group_L_size, &
1253 344 : gd_ia, dbcsr_Gamma, mo_coeff_o)
1254 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: Gamma_2D
1255 : INTEGER, INTENT(IN) :: homo, virtual, dimen_ia
1256 : TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env_sub
1257 : INTEGER, INTENT(IN) :: my_ia_start, my_ia_end, my_group_L_size
1258 : TYPE(group_dist_d1_type), INTENT(IN) :: gd_ia
1259 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: dbcsr_Gamma
1260 : TYPE(dbcsr_type), INTENT(INOUT) :: mo_coeff_o
1261 :
1262 : CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma'
1263 :
1264 : INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, &
1265 : j_local, jjB, jjj, kkB, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
1266 : nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
1267 : rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, &
1268 : ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, &
1269 : size_send_buffer
1270 344 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size
1271 344 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, grid_ref_2_send_pos, &
1272 344 : indeces_map_my, mepos_2_grid
1273 344 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1274 : REAL(KIND=dp) :: part_ia
1275 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1276 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
1277 : TYPE(cp_fm_type) :: fm_ia
1278 344 : TYPE(index_map), ALLOCATABLE, DIMENSION(:) :: indeces_rec
1279 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
1280 344 : DIMENSION(:) :: buffer_rec, buffer_send
1281 344 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
1282 :
1283 344 : CALL timeset(routineN, handle)
1284 :
1285 : ! create sub blacs env
1286 344 : NULLIFY (blacs_env)
1287 344 : CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
1288 :
1289 : ! create the fm_ia buffer matrix
1290 344 : NULLIFY (fm_struct)
1291 : CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
1292 344 : ncol_global=virtual, para_env=para_env_sub)
1293 344 : CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
1294 : ! release structure
1295 344 : CALL cp_fm_struct_release(fm_struct)
1296 : ! release blacs_env
1297 344 : CALL cp_blacs_env_release(blacs_env)
1298 :
1299 : ! get array information
1300 : CALL cp_fm_get_info(matrix=fm_ia, &
1301 : nrow_local=nrow_local, &
1302 : ncol_local=ncol_local, &
1303 : row_indices=row_indices, &
1304 : col_indices=col_indices, &
1305 : nrow_block=nrow_block, &
1306 344 : ncol_block=ncol_block)
1307 344 : myprow = fm_ia%matrix_struct%context%mepos(1)
1308 344 : mypcol = fm_ia%matrix_struct%context%mepos(2)
1309 344 : nprow = fm_ia%matrix_struct%context%num_pe(1)
1310 344 : npcol = fm_ia%matrix_struct%context%num_pe(2)
1311 :
1312 : ! 0) create array containing the processes position and supporting infos
1313 1376 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
1314 1048 : grid_2_mepos = 0
1315 1032 : ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1316 : ! fill the info array
1317 344 : grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
1318 : ! sum infos
1319 344 : CALL para_env_sub%sum(grid_2_mepos)
1320 1032 : CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1321 :
1322 : ! loop over local index range and define the sending map
1323 1032 : ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
1324 704 : map_send_size = 0
1325 344 : dummy_proc = 0
1326 19420 : DO iaia = my_ia_start, my_ia_end
1327 19076 : i_global = (iaia - 1)/virtual + 1
1328 19076 : j_global = MOD(iaia - 1, virtual) + 1
1329 : send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1330 19076 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1331 : send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1332 19076 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1333 19076 : proc_send = grid_2_mepos(send_prow, send_pcol)
1334 19420 : map_send_size(proc_send) = map_send_size(proc_send) + 1
1335 : END DO
1336 :
1337 : ! loop over local data of fm_ia and define the receiving map
1338 1032 : ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
1339 704 : map_rec_size = 0
1340 344 : part_ia = REAL(dimen_ia, KIND=dp)/REAL(para_env_sub%num_pe, KIND=dp)
1341 :
1342 1556 : DO iiB = 1, nrow_local
1343 1212 : i_global = row_indices(iiB)
1344 20632 : DO jjB = 1, ncol_local
1345 19076 : j_global = col_indices(jjB)
1346 19076 : iaia = (i_global - 1)*virtual + j_global
1347 19076 : proc_receive = INT(REAL(iaia - 1, KIND=dp)/part_ia)
1348 19076 : proc_receive = MAX(0, proc_receive)
1349 19076 : proc_receive = MIN(proc_receive, para_env_sub%num_pe - 1)
1350 : DO
1351 19076 : itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
1352 19076 : IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
1353 0 : IF (iaia < itmp(1)) proc_receive = proc_receive - 1
1354 0 : IF (iaia > itmp(2)) proc_receive = proc_receive + 1
1355 : END DO
1356 20288 : map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
1357 : END DO
1358 : END DO
1359 :
1360 : ! allocate the buffer for sending data
1361 344 : number_of_send = 0
1362 360 : DO proc_shift = 1, para_env_sub%num_pe - 1
1363 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1364 360 : IF (map_send_size(proc_send) > 0) THEN
1365 2 : number_of_send = number_of_send + 1
1366 : END IF
1367 : END DO
1368 : ! allocate the structure that will hold the messages to be sent
1369 692 : ALLOCATE (buffer_send(number_of_send))
1370 : ! and the map from the grid of processess to the message position
1371 1376 : ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
1372 1048 : grid_ref_2_send_pos = 0
1373 : ! finally allocate each message
1374 344 : send_counter = 0
1375 360 : DO proc_shift = 1, para_env_sub%num_pe - 1
1376 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1377 16 : size_send_buffer = map_send_size(proc_send)
1378 360 : IF (map_send_size(proc_send) > 0) THEN
1379 2 : send_counter = send_counter + 1
1380 : ! allocate the sending buffer (msg)
1381 6 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
1382 2 : buffer_send(send_counter)%proc = proc_send
1383 : ! get the pointer to prow, pcol of the process that has
1384 : ! to receive this message
1385 2 : ref_send_prow = mepos_2_grid(1, proc_send)
1386 2 : ref_send_pcol = mepos_2_grid(2, proc_send)
1387 : ! save the rank of the process that has to receive this message
1388 2 : grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
1389 : END IF
1390 : END DO
1391 :
1392 : ! allocate the buffer for receiving data
1393 : number_of_rec = 0
1394 360 : DO proc_shift = 1, para_env_sub%num_pe - 1
1395 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1396 360 : IF (map_rec_size(proc_receive) > 0) THEN
1397 2 : number_of_rec = number_of_rec + 1
1398 : END IF
1399 : END DO
1400 : ! allocate the structure that will hold the messages to be received
1401 : ! and relative indeces
1402 692 : ALLOCATE (buffer_rec(number_of_rec))
1403 692 : ALLOCATE (indeces_rec(number_of_rec))
1404 : ! finally allocate each message and fill the array of indeces
1405 344 : rec_counter = 0
1406 360 : DO proc_shift = 1, para_env_sub%num_pe - 1
1407 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1408 16 : size_rec_buffer = map_rec_size(proc_receive)
1409 360 : IF (map_rec_size(proc_receive) > 0) THEN
1410 2 : rec_counter = rec_counter + 1
1411 : ! prepare the buffer for receive
1412 6 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
1413 2 : buffer_rec(rec_counter)%proc = proc_receive
1414 : ! create the indeces array
1415 6 : ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
1416 59 : indeces_rec(rec_counter)%map = 0
1417 2 : CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
1418 2 : iii = 0
1419 120 : DO iaia = rec_iaia_start, rec_iaia_end
1420 118 : i_global = (iaia - 1)/virtual + 1
1421 118 : j_global = MOD(iaia - 1, virtual) + 1
1422 : rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1423 118 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1424 : rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1425 118 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1426 118 : IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
1427 19 : iii = iii + 1
1428 : i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1429 19 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1430 : j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1431 19 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1432 19 : indeces_rec(rec_counter)%map(1, iii) = i_local
1433 120 : indeces_rec(rec_counter)%map(2, iii) = j_local
1434 : END DO
1435 : END IF
1436 : END DO
1437 : ! and create the index map for my local data
1438 344 : IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1439 344 : size_rec_buffer = map_rec_size(para_env_sub%mepos)
1440 1032 : ALLOCATE (indeces_map_my(2, size_rec_buffer))
1441 57515 : indeces_map_my = 0
1442 : iii = 0
1443 19420 : DO iaia = my_ia_start, my_ia_end
1444 19076 : i_global = (iaia - 1)/virtual + 1
1445 19076 : j_global = MOD(iaia - 1, virtual) + 1
1446 : rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1447 19076 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1448 : rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1449 19076 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1450 19076 : IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
1451 19057 : iii = iii + 1
1452 : i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
1453 19057 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1454 : j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
1455 19057 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1456 19057 : indeces_map_my(1, iii) = i_local
1457 19420 : indeces_map_my(2, iii) = j_local
1458 : END DO
1459 : END IF
1460 :
1461 : ! auxiliary vector of indeces for the send buffer
1462 690 : ALLOCATE (iii_vet(number_of_send))
1463 : ! vector for the send requests
1464 1036 : ALLOCATE (req_send(number_of_send))
1465 : ! loop over auxiliary basis function and redistribute into a fm
1466 : ! and then compy the fm into a dbcsr matrix
1467 15575 : DO kkB = 1, my_group_L_size
1468 : ! zero the matries of the buffers and post the messages to be received
1469 15231 : CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
1470 15231 : rec_counter = 0
1471 16667 : DO proc_shift = 1, para_env_sub%num_pe - 1
1472 1436 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1473 16667 : IF (map_rec_size(proc_receive) > 0) THEN
1474 220 : rec_counter = rec_counter + 1
1475 2310 : buffer_rec(rec_counter)%msg = 0.0_dp
1476 : CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
1477 220 : buffer_rec(rec_counter)%msg_req)
1478 : END IF
1479 : END DO
1480 : ! fill the sending buffer and send the messages
1481 15451 : DO send_counter = 1, number_of_send
1482 17541 : buffer_send(send_counter)%msg = 0.0_dp
1483 : END DO
1484 15451 : iii_vet = 0
1485 : jjj = 0
1486 875195 : DO iaia = my_ia_start, my_ia_end
1487 859964 : i_global = (iaia - 1)/virtual + 1
1488 859964 : j_global = MOD(iaia - 1, virtual) + 1
1489 : send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
1490 859964 : fm_ia%matrix_struct%first_p_pos(1), nprow)
1491 : send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
1492 859964 : fm_ia%matrix_struct%first_p_pos(2), npcol)
1493 859964 : proc_send = grid_2_mepos(send_prow, send_pcol)
1494 : ! we don't need to send to ourselves
1495 875195 : IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
1496 : ! filling fm_ia with local data
1497 857874 : jjj = jjj + 1
1498 857874 : i_local = indeces_map_my(1, jjj)
1499 857874 : j_local = indeces_map_my(2, jjj)
1500 857874 : fm_ia%local_data(i_local, j_local) = Gamma_2D(iaia - my_ia_start + 1, kkB)
1501 : ELSE
1502 2090 : send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
1503 2090 : iii_vet(send_counter) = iii_vet(send_counter) + 1
1504 2090 : iii = iii_vet(send_counter)
1505 2090 : buffer_send(send_counter)%msg(iii) = Gamma_2D(iaia - my_ia_start + 1, kkB)
1506 : END IF
1507 : END DO
1508 15451 : req_send = mp_request_null
1509 15231 : send_counter = 0
1510 16667 : DO proc_shift = 1, para_env_sub%num_pe - 1
1511 1436 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1512 16667 : IF (map_send_size(proc_send) > 0) THEN
1513 220 : send_counter = send_counter + 1
1514 : CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
1515 220 : buffer_send(send_counter)%msg_req)
1516 220 : req_send(send_counter) = buffer_send(send_counter)%msg_req
1517 : END IF
1518 : END DO
1519 :
1520 : ! receive the massages and fill the fm_ia
1521 15231 : rec_counter = 0
1522 16667 : DO proc_shift = 1, para_env_sub%num_pe - 1
1523 1436 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1524 1436 : size_rec_buffer = map_rec_size(proc_receive)
1525 16667 : IF (map_rec_size(proc_receive) > 0) THEN
1526 220 : rec_counter = rec_counter + 1
1527 : ! wait for the message
1528 220 : CALL buffer_rec(rec_counter)%msg_req%wait()
1529 2310 : DO iii = 1, size_rec_buffer
1530 2090 : i_local = indeces_rec(rec_counter)%map(1, iii)
1531 2090 : j_local = indeces_rec(rec_counter)%map(2, iii)
1532 2310 : fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1533 : END DO
1534 : END IF
1535 : END DO
1536 :
1537 : ! wait all
1538 15231 : CALL mp_waitall(req_send(:))
1539 :
1540 : ! now create the DBCSR matrix and copy fm_ia into it
1541 15231 : ALLOCATE (dbcsr_Gamma(kkB)%matrix)
1542 : CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma(kkB)%matrix, &
1543 : template=mo_coeff_o, &
1544 15231 : m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
1545 15575 : CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma(kkB)%matrix, keep_sparsity=.FALSE.)
1546 :
1547 : END DO
1548 :
1549 : ! deallocate stuff
1550 344 : DEALLOCATE (Gamma_2D)
1551 344 : DEALLOCATE (iii_vet)
1552 344 : DEALLOCATE (req_send)
1553 344 : IF (map_rec_size(para_env_sub%mepos) > 0) THEN
1554 344 : DEALLOCATE (indeces_map_my)
1555 : END IF
1556 346 : DO rec_counter = 1, number_of_rec
1557 2 : DEALLOCATE (indeces_rec(rec_counter)%map)
1558 346 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1559 : END DO
1560 346 : DEALLOCATE (indeces_rec)
1561 346 : DEALLOCATE (buffer_rec)
1562 346 : DO send_counter = 1, number_of_send
1563 346 : DEALLOCATE (buffer_send(send_counter)%msg)
1564 : END DO
1565 346 : DEALLOCATE (buffer_send)
1566 344 : DEALLOCATE (map_send_size)
1567 344 : DEALLOCATE (map_rec_size)
1568 344 : DEALLOCATE (grid_2_mepos)
1569 344 : DEALLOCATE (mepos_2_grid)
1570 :
1571 : ! release buffer matrix
1572 344 : CALL cp_fm_release(fm_ia)
1573 :
1574 344 : CALL timestop(handle)
1575 :
1576 1032 : END SUBROUTINE create_dbcsr_gamma
1577 :
1578 : ! **************************************************************************************************
1579 : !> \brief prepare array for redistribution
1580 : !> \param para_env ...
1581 : !> \param para_env_sub ...
1582 : !> \param ngroup ...
1583 : !> \param group_grid_2_mepos ...
1584 : !> \param mepos_2_grid_group ...
1585 : !> \param pos_info ...
1586 : ! **************************************************************************************************
1587 350 : SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
1588 : group_grid_2_mepos, mepos_2_grid_group, &
1589 : pos_info)
1590 : TYPE(mp_para_env_type), INTENT(IN) :: para_env, para_env_sub
1591 : INTEGER, INTENT(IN) :: ngroup
1592 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
1593 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
1594 : OPTIONAL :: pos_info
1595 :
1596 : INTEGER :: i, pos_group, pos_sub
1597 350 : INTEGER, ALLOCATABLE, DIMENSION(:) :: my_pos_info
1598 :
1599 1050 : ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
1600 350 : CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
1601 :
1602 1400 : ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
1603 1734 : group_grid_2_mepos = 0
1604 1050 : ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
1605 2450 : mepos_2_grid_group = 0
1606 :
1607 1050 : DO i = 0, para_env%num_pe - 1
1608 : ! calculate position of the group
1609 700 : pos_group = i/para_env_sub%num_pe
1610 : ! calculate position in the subgroup
1611 700 : pos_sub = my_pos_info(i)
1612 : ! fill the map from the grid of groups to process
1613 700 : group_grid_2_mepos(pos_sub, pos_group) = i
1614 : ! and the opposite, from the global pos to the grid pos
1615 700 : mepos_2_grid_group(1, i) = pos_sub
1616 1050 : mepos_2_grid_group(2, i) = pos_group
1617 : END DO
1618 :
1619 350 : IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
1620 :
1621 350 : END SUBROUTINE prepare_redistribution
1622 :
1623 0 : END MODULE mp2_ri_grad_util
|