Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
10 : !> \par History
11 : !> 10.2013 created [Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_ri_grad
14 : USE atomic_kind_types, ONLY: atomic_kind_type,&
15 : get_atomic_kind_set
16 : USE cell_types, ONLY: cell_type
17 : USE cp_blacs_env, ONLY: cp_blacs_env_type
18 : USE cp_control_types, ONLY: dft_control_type
19 : USE cp_dbcsr_api, ONLY: &
20 : dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_get_info, dbcsr_multiply, dbcsr_p_type, &
21 : dbcsr_release, dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, &
22 : dbcsr_type_symmetric
23 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
24 : dbcsr_deallocate_matrix_set
25 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_param
26 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
27 : cp_fm_struct_release,&
28 : cp_fm_struct_type
29 : USE cp_fm_types, ONLY: cp_fm_create,&
30 : cp_fm_get_info,&
31 : cp_fm_release,&
32 : cp_fm_set_all,&
33 : cp_fm_to_fm_submat,&
34 : cp_fm_type
35 : USE input_constants, ONLY: do_eri_gpw,&
36 : do_eri_mme,&
37 : ri_mp2_laplace,&
38 : ri_mp2_method_gpw,&
39 : ri_rpa_method_gpw
40 : USE kinds, ONLY: dp
41 : USE libint_2c_3c, ONLY: compare_potential_types
42 : USE message_passing, ONLY: mp_para_env_release,&
43 : mp_para_env_type,&
44 : mp_request_null,&
45 : mp_request_type,&
46 : mp_waitall
47 : USE mp2_eri, ONLY: mp2_eri_2c_integrate,&
48 : mp2_eri_3c_integrate,&
49 : mp2_eri_deallocate_forces,&
50 : mp2_eri_force
51 : USE mp2_eri_gpw, ONLY: cleanup_gpw,&
52 : integrate_potential_forces_2c,&
53 : integrate_potential_forces_3c_1c,&
54 : integrate_potential_forces_3c_2c,&
55 : prepare_gpw
56 : USE mp2_types, ONLY: integ_mat_buffer_type,&
57 : integ_mat_buffer_type_2D,&
58 : mp2_type
59 : USE parallel_gemm_api, ONLY: parallel_gemm
60 : USE particle_types, ONLY: particle_type
61 : USE pw_env_types, ONLY: pw_env_type
62 : USE pw_poisson_types, ONLY: pw_poisson_type
63 : USE pw_pool_types, ONLY: pw_pool_type
64 : USE pw_types, ONLY: pw_c1d_gs_type,&
65 : pw_r3d_rs_type
66 : USE qs_environment_types, ONLY: get_qs_env,&
67 : qs_environment_type
68 : USE qs_force_types, ONLY: allocate_qs_force,&
69 : qs_force_type,&
70 : zero_qs_force
71 : USE qs_kind_types, ONLY: qs_kind_type
72 : USE qs_ks_types, ONLY: qs_ks_env_type
73 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
74 : USE task_list_types, ONLY: task_list_type
75 : USE util, ONLY: get_limit
76 : USE virial_types, ONLY: virial_type
77 : #include "./base/base_uses.f90"
78 :
79 : IMPLICIT NONE
80 :
81 : PRIVATE
82 :
83 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
84 :
85 : PUBLIC :: calc_ri_mp2_nonsep
86 :
87 : CONTAINS
88 :
89 : ! **************************************************************************************************
90 : !> \brief Calculate the non-separable part of the gradients and update the
91 : !> Lagrangian
92 : !> \param qs_env ...
93 : !> \param mp2_env ...
94 : !> \param para_env ...
95 : !> \param para_env_sub ...
96 : !> \param cell ...
97 : !> \param particle_set ...
98 : !> \param atomic_kind_set ...
99 : !> \param qs_kind_set ...
100 : !> \param mo_coeff ...
101 : !> \param dimen_RI ...
102 : !> \param Eigenval ...
103 : !> \param my_group_L_start ...
104 : !> \param my_group_L_end ...
105 : !> \param my_group_L_size ...
106 : !> \param sab_orb_sub ...
107 : !> \param mat_munu ...
108 : !> \param blacs_env_sub ...
109 : !> \author Mauro Del Ben
110 : ! **************************************************************************************************
111 272 : SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
112 272 : atomic_kind_set, qs_kind_set, mo_coeff, dimen_RI, Eigenval, &
113 : my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
114 : blacs_env_sub)
115 : TYPE(qs_environment_type), POINTER :: qs_env
116 : TYPE(mp2_type) :: mp2_env
117 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
118 : TYPE(cell_type), POINTER :: cell
119 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
120 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
121 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
122 : TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: mo_coeff
123 : INTEGER, INTENT(IN) :: dimen_RI
124 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: Eigenval
125 : INTEGER, INTENT(IN) :: my_group_L_start, my_group_L_end, &
126 : my_group_L_size
127 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
128 : POINTER :: sab_orb_sub
129 : TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu
130 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
131 :
132 : CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
133 :
134 : INTEGER :: eri_method, handle, handle2, i, ikind, &
135 : ispin, itmp(2), L_counter, LLL, &
136 : my_P_end, my_P_size, my_P_start, nao, &
137 : nspins
138 272 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, homo, kind_of, &
139 272 : natom_of_kind, virtual
140 : LOGICAL :: alpha_beta, use_virial
141 : REAL(KIND=dp) :: cutoff_old, eps_filter, factor, &
142 : factor_2c, relative_cutoff_old
143 272 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old
144 272 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: G_PQ_local, G_PQ_local_2
145 : REAL(KIND=dp), DIMENSION(3, 3) :: h_stress, pv_virial
146 272 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: I_tmp2
147 : TYPE(cp_eri_mme_param), POINTER :: eri_param
148 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
149 272 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: L1_mu_i, L2_nu_a
150 : TYPE(dbcsr_p_type) :: matrix_P_munu
151 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:) :: mo_coeff_o, mo_coeff_v
152 272 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :) :: G_P_ia
153 272 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_munu_local, matrix_P_munu_local
154 : TYPE(dbcsr_type) :: matrix_P_munu_nosym
155 272 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
156 : TYPE(dft_control_type), POINTER :: dft_control
157 272 : TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:) :: force_2c, force_2c_RI, force_3c_aux, &
158 272 : force_3c_orb_mu, force_3c_orb_nu
159 1088 : TYPE(pw_c1d_gs_type) :: dvg(3), pot_g, rho_g, rho_g_copy
160 : TYPE(pw_env_type), POINTER :: pw_env_sub
161 : TYPE(pw_poisson_type), POINTER :: poisson_env
162 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
163 : TYPE(pw_r3d_rs_type) :: psi_L, rho_r
164 272 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force, mp2_force
165 : TYPE(qs_ks_env_type), POINTER :: ks_env
166 : TYPE(task_list_type), POINTER :: task_list_sub
167 : TYPE(virial_type), POINTER :: virial
168 :
169 272 : CALL timeset(routineN, handle)
170 :
171 272 : eri_method = mp2_env%eri_method
172 272 : eri_param => mp2_env%eri_mme_param
173 :
174 : ! Find out whether we have a closed or open shell
175 272 : nspins = SIZE(Eigenval, 2)
176 272 : alpha_beta = (nspins == 2)
177 :
178 1088 : ALLOCATE (virtual(nspins), homo(nspins))
179 272 : eps_filter = mp2_env%mp2_gpw%eps_filter
180 30372 : ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
181 630 : DO ispin = 1, nspins
182 358 : mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
183 358 : mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
184 : CALL dbcsr_get_info(mo_coeff_o(ispin)%matrix, &
185 : nfullrows_total=nao, &
186 358 : nfullcols_total=homo(ispin))
187 : CALL dbcsr_get_info(mo_coeff_v(ispin)%matrix, &
188 : nfullrows_total=nao, &
189 358 : nfullcols_total=virtual(ispin))
190 16470 : DO LLL = 1, my_group_L_size
191 16198 : G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
192 : END DO
193 : END DO
194 272 : DEALLOCATE (mp2_env%ri_grad%G_P_ia)
195 :
196 272 : itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
197 272 : my_P_start = itmp(1)
198 272 : my_P_end = itmp(2)
199 272 : my_P_size = itmp(2) - itmp(1) + 1
200 :
201 1088 : ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
202 1025542 : G_PQ_local = 0.0_dp
203 972108 : G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
204 272 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
205 972108 : G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
206 272 : CALL para_env_sub%sum(G_PQ_local)
207 272 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
208 36 : ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
209 41844 : G_PQ_local_2 = 0.0_dp
210 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
211 12 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
212 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
213 12 : CALL para_env_sub%sum(G_PQ_local_2)
214 : END IF
215 :
216 : ! create matrix holding the back transformation (G_P_inu)
217 1174 : ALLOCATE (matrix_P_inu(nspins))
218 630 : DO ispin = 1, nspins
219 630 : CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
220 : END DO
221 :
222 : ! non symmetric matrix
223 : CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
224 272 : matrix_type=dbcsr_type_no_symmetry)
225 :
226 : ! create Lagrangian matrices in mixed AO/MO formalism
227 902 : ALLOCATE (Lag_mu_i_1(nspins))
228 630 : DO ispin = 1, nspins
229 358 : CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
230 630 : CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
231 : END DO
232 :
233 902 : ALLOCATE (Lag_nu_a_2(nspins))
234 630 : DO ispin = 1, nspins
235 358 : CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
236 630 : CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
237 : END DO
238 :
239 : ! get forces
240 272 : NULLIFY (force, virial)
241 272 : CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
242 :
243 : ! check if we want to calculate the virial
244 272 : use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
245 :
246 272 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
247 272 : NULLIFY (mp2_force)
248 272 : CALL allocate_qs_force(mp2_force, natom_of_kind)
249 272 : DEALLOCATE (natom_of_kind)
250 272 : CALL zero_qs_force(mp2_force)
251 272 : mp2_env%ri_grad%mp2_force => mp2_force
252 :
253 272 : factor_2c = -4.0_dp
254 272 : IF (mp2_env%method == ri_rpa_method_gpw) THEN
255 24 : factor_2c = -1.0_dp
256 24 : IF (alpha_beta) factor_2c = -2.0_dp
257 : END IF
258 :
259 : ! prepare integral derivatives with mme method
260 272 : IF (eri_method == do_eri_mme) THEN
261 1184 : ALLOCATE (matrix_P_munu_local(my_group_L_size))
262 1158 : ALLOCATE (mat_munu_local(my_group_L_size))
263 26 : L_counter = 0
264 1132 : DO LLL = my_group_L_start, my_group_L_end
265 1106 : L_counter = L_counter + 1
266 1106 : ALLOCATE (mat_munu_local(L_counter)%matrix)
267 : CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
268 1106 : matrix_type=dbcsr_type_symmetric)
269 1106 : CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
270 1106 : CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
271 :
272 : CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
273 : G_P_ia(:, L_counter), matrix_P_inu, &
274 1132 : mo_coeff_v, mo_coeff_o, eps_filter)
275 : END DO
276 :
277 78 : ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
278 95790 : I_tmp2(:, :) = 0.0_dp
279 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
280 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
281 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
282 26 : eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
283 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
284 0 : I_tmp2(:, :) = 0.0_dp
285 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
286 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
287 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
288 0 : eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
289 : END IF
290 26 : DEALLOCATE (I_tmp2)
291 :
292 : CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
293 : first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
294 : basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
295 : sab_nl=sab_orb_sub, eri_method=eri_method, &
296 : pabc=matrix_P_munu_local, &
297 26 : force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
298 :
299 26 : L_counter = 0
300 1132 : DO LLL = my_group_L_start, my_group_L_end
301 1106 : L_counter = L_counter + 1
302 2432 : DO ispin = 1, nspins
303 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
304 2432 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
305 : END DO
306 :
307 : ! The matrices of G_P_ia are deallocated here
308 : CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
309 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
310 1132 : eps_filter)
311 : END DO
312 :
313 72 : DO ikind = 1, SIZE(force)
314 : mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
315 : force_3c_orb_mu(ikind)%forces(:, :) + &
316 : force_3c_orb_nu(ikind)%forces(:, :) + &
317 334 : force_3c_aux(ikind)%forces(:, :)
318 :
319 72 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
320 0 : mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
321 : END IF
322 : END DO
323 :
324 26 : CALL mp2_eri_deallocate_forces(force_2c)
325 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
326 0 : CALL mp2_eri_deallocate_forces(force_2c_RI)
327 : END IF
328 26 : CALL mp2_eri_deallocate_forces(force_3c_aux)
329 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
330 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
331 26 : CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
332 26 : CALL dbcsr_deallocate_matrix_set(mat_munu_local)
333 :
334 246 : ELSEIF (eri_method == do_eri_gpw) THEN
335 246 : CALL get_qs_env(qs_env, ks_env=ks_env)
336 :
337 246 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
338 :
339 : ! Supporting stuff for GPW
340 : CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
341 246 : auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
342 :
343 : ! in case virial is required we need auxiliary pw
344 : ! for calculate the MP2-volume contribution to the virial
345 : ! (hartree potential derivatives)
346 246 : IF (use_virial) THEN
347 50 : CALL auxbas_pw_pool%create_pw(rho_g_copy)
348 200 : DO i = 1, 3
349 200 : CALL auxbas_pw_pool%create_pw(dvg(i))
350 : END DO
351 : END IF
352 :
353 : ! start main loop over auxiliary basis functions
354 246 : CALL timeset(routineN//"_loop", handle2)
355 :
356 246 : IF (use_virial) h_stress = 0.0_dp
357 :
358 246 : L_counter = 0
359 11052 : DO LLL = my_group_L_start, my_group_L_end
360 10806 : L_counter = L_counter + 1
361 :
362 : CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
363 : G_P_ia(:, L_counter), matrix_P_inu, &
364 10806 : mo_coeff_v, mo_coeff_o, eps_filter)
365 :
366 : CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
367 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
368 : pot_g, mp2_env%potential_parameter, use_virial, &
369 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
370 10806 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
371 :
372 10806 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
373 :
374 : CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
375 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
376 : pot_g, mp2_env%ri_metric, use_virial, &
377 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
378 498 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
379 : END IF
380 :
381 36714 : IF (use_virial) pv_virial = virial%pv_virial
382 : CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
383 10806 : task_list_sub)
384 10806 : IF (use_virial) THEN
385 28067 : h_stress = h_stress + (virial%pv_virial - pv_virial)
386 28067 : virial%pv_virial = pv_virial
387 : END IF
388 :
389 : ! The matrices of G_P_ia are deallocated here
390 : CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
391 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
392 10806 : eps_filter)
393 :
394 : CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
395 : mp2_env%ri_metric, &
396 : ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
397 : h_stress, para_env_sub, kind_of, atom_of_kind, &
398 11052 : qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
399 : END DO
400 :
401 246 : CALL timestop(handle2)
402 :
403 246 : DEALLOCATE (kind_of)
404 246 : DEALLOCATE (atom_of_kind)
405 :
406 246 : IF (use_virial) THEN
407 50 : CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
408 200 : DO i = 1, 3
409 200 : CALL auxbas_pw_pool%give_back_pw(dvg(i))
410 : END DO
411 : END IF
412 :
413 : CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
414 246 : task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
415 :
416 246 : CALL dbcsr_release(matrix_P_munu%matrix)
417 246 : DEALLOCATE (matrix_P_munu%matrix)
418 :
419 : END IF
420 :
421 872 : IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
422 :
423 272 : DEALLOCATE (G_PQ_local)
424 272 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
425 :
426 272 : CALL dbcsr_release(matrix_P_munu_nosym)
427 :
428 630 : DO ispin = 1, nspins
429 630 : CALL dbcsr_release(matrix_P_inu(ispin))
430 : END DO
431 272 : DEALLOCATE (matrix_P_inu, G_P_ia)
432 :
433 : ! move the forces in the correct place
434 272 : IF (eri_method == do_eri_gpw) THEN
435 728 : DO ikind = 1, SIZE(mp2_force)
436 6546 : mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
437 3760 : force(ikind)%rho_elec(:, :) = 0.0_dp
438 : END DO
439 : END IF
440 :
441 : ! Now we move from the local matrices to the global ones
442 : ! defined over all MPI tasks
443 : ! Start with moving from the DBCSR to FM for the lagrangians
444 :
445 1804 : ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
446 630 : DO ispin = 1, nspins
447 : ! Now we move from the local matrices to the global ones
448 : ! defined over all MPI tasks
449 : ! Start with moving from the DBCSR to FM for the lagrangians
450 358 : NULLIFY (fm_struct_tmp)
451 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
452 358 : nrow_global=nao, ncol_global=homo(ispin))
453 358 : CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
454 358 : CALL cp_fm_struct_release(fm_struct_tmp)
455 358 : CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
456 358 : CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
457 :
458 : ! release Lag_mu_i_1
459 358 : CALL dbcsr_release(Lag_mu_i_1(ispin))
460 :
461 358 : NULLIFY (fm_struct_tmp)
462 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
463 358 : nrow_global=nao, ncol_global=virtual(ispin))
464 358 : CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
465 358 : CALL cp_fm_struct_release(fm_struct_tmp)
466 358 : CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
467 358 : CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
468 :
469 : ! release Lag_nu_a_2
470 630 : CALL dbcsr_release(Lag_nu_a_2(ispin))
471 : END DO
472 272 : DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
473 :
474 : ! Set the factor to multiply P_ij (depends on the open or closed shell)
475 272 : factor = 1.0_dp
476 272 : IF (alpha_beta) factor = 0.50_dp
477 :
478 630 : DO ispin = 1, nspins
479 : CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), para_env, &
480 : para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
481 630 : factor, ispin)
482 : END DO
483 272 : DEALLOCATE (L1_mu_i, L2_nu_a)
484 :
485 272 : CALL timestop(handle)
486 :
487 544 : END SUBROUTINE calc_ri_mp2_nonsep
488 :
489 : ! **************************************************************************************************
490 : !> \brief Transforms G_P_ia to G_P_munu
491 : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
492 : !> \param G_P_munu_nosym ...
493 : !> \param mat_munu ...
494 : !> \param G_P_ia ...
495 : !> \param G_P_inu ...
496 : !> \param mo_coeff_v ...
497 : !> \param mo_coeff_o ...
498 : !> \param eps_filter ...
499 : ! **************************************************************************************************
500 11912 : SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
501 11912 : mo_coeff_v, mo_coeff_o, eps_filter)
502 : TYPE(dbcsr_type), POINTER :: G_P_munu
503 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu_nosym, mat_munu
504 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
505 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
506 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
507 : REAL(KIND=dp), INTENT(IN) :: eps_filter
508 :
509 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
510 :
511 : INTEGER :: handle
512 :
513 11912 : IF (.NOT. ASSOCIATED(G_P_munu)) THEN
514 1352 : ALLOCATE (G_P_munu)
515 : CALL dbcsr_create(G_P_munu, template=mat_munu, &
516 1352 : matrix_type=dbcsr_type_symmetric)
517 : END IF
518 :
519 11912 : CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
520 :
521 : ! symmetrize
522 11912 : CALL timeset(routineN//"_symmetrize", handle)
523 11912 : CALL dbcsr_set(G_P_munu, 0.0_dp)
524 11912 : CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
525 : CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
526 11912 : alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
527 : ! this is a trick to avoid that integrate_v_rspace starts to cry
528 11912 : CALL dbcsr_copy(mat_munu, G_P_munu, keep_sparsity=.TRUE.)
529 11912 : CALL dbcsr_copy(G_P_munu, mat_munu)
530 :
531 11912 : CALL timestop(handle)
532 :
533 11912 : END SUBROUTINE G_P_transform_MO_to_AO
534 :
535 : ! **************************************************************************************************
536 : !> \brief ...
537 : !> \param G_P_ia ...
538 : !> \param G_P_inu ...
539 : !> \param G_P_munu ...
540 : !> \param mo_coeff_v ...
541 : !> \param mo_coeff_o ...
542 : !> \param eps_filter ...
543 : ! **************************************************************************************************
544 11912 : SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
545 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
546 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
547 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu
548 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
549 : REAL(KIND=dp), INTENT(IN) :: eps_filter
550 :
551 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
552 :
553 : INTEGER :: handle, ispin
554 : REAL(KIND=dp) :: factor
555 :
556 11912 : CALL timeset(routineN, handle)
557 :
558 11912 : factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
559 :
560 11912 : CALL dbcsr_set(G_P_munu, 0.0_dp)
561 :
562 27752 : DO ispin = 1, SIZE(G_P_ia)
563 : ! first back-transformation a->nu
564 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
565 15840 : 0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
566 :
567 : ! second back-transformation i->mu
568 : CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
569 27752 : 1.0_dp, G_P_munu, filter_eps=eps_filter)
570 : END DO
571 :
572 11912 : CALL timestop(handle)
573 :
574 11912 : END SUBROUTINE G_P_transform_alpha_beta
575 :
576 : ! **************************************************************************************************
577 : !> \brief ...
578 : !> \param mat_munu ...
579 : !> \param matrix_P_inu ...
580 : !> \param Lag_mu_i_1 ...
581 : !> \param G_P_ia ...
582 : !> \param mo_coeff_o ...
583 : !> \param Lag_nu_a_2 ...
584 : !> \param eps_filter ...
585 : ! **************************************************************************************************
586 11912 : SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
587 23824 : G_P_ia, mo_coeff_o, Lag_nu_a_2, &
588 : eps_filter)
589 : TYPE(dbcsr_type), INTENT(IN) :: mat_munu
590 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: matrix_P_inu, Lag_mu_i_1
591 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: G_P_ia
592 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_o
593 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: Lag_nu_a_2
594 : REAL(KIND=dp), INTENT(IN) :: eps_filter
595 :
596 : CHARACTER(LEN=*), PARAMETER :: routineN = 'update_lagrangian'
597 :
598 : INTEGER :: handle, ispin
599 :
600 : ! update lagrangian
601 11912 : CALL timeset(routineN, handle)
602 :
603 27752 : DO ispin = 1, SIZE(G_P_ia)
604 : ! first contract mat_munu with the half back transformed Gamma_i_nu
605 : ! in order to update Lag_mu_i_1
606 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
607 15840 : 1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
608 :
609 : ! transform first index of mat_munu and store the result into matrix_P_inu
610 15840 : CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
611 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
612 15840 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
613 :
614 : ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
615 : ! in order to update Lag_nu_a_2
616 : CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
617 15840 : 1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
618 :
619 : ! release the actual gamma_P_ia
620 15840 : CALL dbcsr_release(G_P_ia(ispin)%matrix)
621 27752 : DEALLOCATE (G_P_ia(ispin)%matrix)
622 : END DO
623 :
624 11912 : CALL timestop(handle)
625 :
626 11912 : END SUBROUTINE update_lagrangian
627 :
628 : ! **************************************************************************************************
629 : !> \brief ...
630 : !> \param qs_env ...
631 : !> \param mp2_env ...
632 : !> \param mo_coeff ...
633 : !> \param homo ...
634 : !> \param virtual ...
635 : !> \param para_env ...
636 : !> \param para_env_sub ...
637 : !> \param Eigenval ...
638 : !> \param L1_mu_i ...
639 : !> \param L2_nu_a ...
640 : !> \param factor ...
641 : !> \param kspin ...
642 : ! **************************************************************************************************
643 358 : SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, para_env, para_env_sub, &
644 358 : Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
645 : TYPE(qs_environment_type), POINTER :: qs_env
646 : TYPE(mp2_type) :: mp2_env
647 : TYPE(cp_fm_type), INTENT(IN) :: mo_coeff
648 : INTEGER, INTENT(IN) :: homo, virtual
649 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
650 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval
651 : TYPE(cp_fm_type), INTENT(INOUT) :: L1_mu_i, L2_nu_a
652 : REAL(KIND=dp), INTENT(IN) :: factor
653 : INTEGER, INTENT(IN) :: kspin
654 :
655 : CHARACTER(LEN=*), PARAMETER :: routineN = 'create_W_P'
656 :
657 : INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
658 : iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
659 : my_B_virtual_end, my_B_virtual_start, mypcol, myprow, nao, ncol_local, ncol_local_1i, &
660 : ncol_local_2a, nmo, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, &
661 : number_of_rec, number_of_send, proc_receive, proc_receive_static, proc_send, &
662 : proc_send_ex, proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, &
663 : rec_row_size, send_col_size, send_counter, send_pcol, send_prow, send_row_size, &
664 : size_rec_buffer
665 : INTEGER :: size_send_buffer
666 358 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size, &
667 358 : pos_info, pos_info_ex, proc_2_send_pos
668 358 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
669 358 : my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
670 358 : sizes_2a
671 358 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: col_indeces_info_1i, &
672 358 : col_indeces_info_2a, &
673 358 : row_indeces_info_1i, &
674 358 : row_indeces_info_2a
675 358 : INTEGER, DIMENSION(:), POINTER :: col_indices, col_indices_1i, &
676 358 : col_indices_2a, row_indices, &
677 358 : row_indices_1i, row_indices_2a
678 358 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ab_rec, ab_send, mat_rec, mat_send
679 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
680 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
681 : TYPE(cp_fm_type) :: fm_P_ij, L_mu_q
682 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
683 358 : DIMENSION(:) :: buffer_rec, buffer_send
684 : TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
685 358 : DIMENSION(:) :: buffer_cyclic
686 : TYPE(mp_para_env_type), POINTER :: para_env_exchange
687 358 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
688 :
689 358 : CALL timeset(routineN, handle)
690 :
691 : ! create the globally distributed mixed lagrangian
692 358 : NULLIFY (blacs_env)
693 358 : CALL get_qs_env(qs_env, blacs_env=blacs_env)
694 :
695 358 : CALL cp_fm_get_info(mo_coeff, nrow_global=nao, ncol_global=nmo)
696 :
697 358 : NULLIFY (fm_struct_tmp)
698 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
699 358 : nrow_global=nao, ncol_global=nmo)
700 358 : CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
701 358 : CALL cp_fm_struct_release(fm_struct_tmp)
702 358 : CALL cp_fm_set_all(L_mu_q, 0.0_dp)
703 :
704 : ! create all information array
705 1074 : ALLOCATE (pos_info(0:para_env%num_pe - 1))
706 358 : CALL para_env%allgather(para_env_sub%mepos, pos_info)
707 :
708 : ! get matrix information for the global
709 : CALL cp_fm_get_info(matrix=L_mu_q, &
710 : nrow_local=nrow_local, &
711 : ncol_local=ncol_local, &
712 : row_indices=row_indices, &
713 358 : col_indices=col_indices)
714 358 : myprow = L_mu_q%matrix_struct%context%mepos(1)
715 358 : mypcol = L_mu_q%matrix_struct%context%mepos(2)
716 358 : nprow = L_mu_q%matrix_struct%context%num_pe(1)
717 358 : npcol = L_mu_q%matrix_struct%context%num_pe(2)
718 :
719 1432 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
720 1432 : grid_2_mepos = 0
721 358 : grid_2_mepos(myprow, mypcol) = para_env%mepos
722 358 : CALL para_env%sum(grid_2_mepos)
723 :
724 : ! get matrix information for L1_mu_i
725 : CALL cp_fm_get_info(matrix=L1_mu_i, &
726 : nrow_local=nrow_local_1i, &
727 : ncol_local=ncol_local_1i, &
728 : row_indices=row_indices_1i, &
729 358 : col_indices=col_indices_1i)
730 :
731 1074 : ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
732 1074 : CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
733 :
734 : ! get matrix information for L2_nu_a
735 : CALL cp_fm_get_info(matrix=L2_nu_a, &
736 : nrow_local=nrow_local_2a, &
737 : ncol_local=ncol_local_2a, &
738 : row_indices=row_indices_2a, &
739 358 : col_indices=col_indices_2a)
740 :
741 1074 : ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
742 1074 : CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
743 :
744 : ! Here we perform a ring communication scheme taking into account
745 : ! for the sub-group distribution of the source matrices.
746 : ! as a first step we need to redistribute the data within
747 : ! the subgroup.
748 : ! In order to do so we have to allocate the structure
749 : ! that will hold the local data involved in communication, this
750 : ! structure will be the same for processes in different subgroups
751 : ! sharing the same position in the subgroup.
752 : ! -1) create the exchange para_env
753 358 : color_exchange = para_env_sub%mepos
754 358 : ALLOCATE (para_env_exchange)
755 358 : CALL para_env_exchange%from_split(para_env, color_exchange)
756 1074 : ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
757 358 : CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
758 1074 : ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
759 1074 : CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
760 :
761 : ! 0) store some info about indeces of the fm matrices (subgroup)
762 358 : CALL timeset(routineN//"_inx", handle2)
763 : ! matrix L1_mu_i
764 732 : max_row_size = MAXVAL(sizes_1i(1, :))
765 732 : max_col_size = MAXVAL(sizes_1i(2, :))
766 1432 : ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
767 1432 : ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
768 1074 : ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
769 1074 : ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
770 21150 : row_indeces_info_1i = 0
771 4854 : col_indeces_info_1i = 0
772 358 : dummy_proc = 0
773 : ! row
774 6954 : DO iiB = 1, nrow_local_1i
775 6596 : i_global = row_indices_1i(iiB)
776 6596 : send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
777 6596 : i_local = L_mu_q%matrix_struct%g2l_row(i_global)
778 6596 : my_row_indeces_info_1i(1, iiB) = send_prow
779 6954 : my_row_indeces_info_1i(2, iiB) = i_local
780 : END DO
781 : ! col
782 1660 : DO jjB = 1, ncol_local_1i
783 1302 : j_global = col_indices_1i(jjB)
784 1302 : send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
785 1302 : j_local = L_mu_q%matrix_struct%g2l_col(j_global)
786 1302 : my_col_indeces_info_1i(1, jjB) = send_pcol
787 1660 : my_col_indeces_info_1i(2, jjB) = j_local
788 : END DO
789 358 : CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
790 358 : CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
791 358 : DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
792 :
793 : ! matrix L2_nu_a
794 732 : max_row_size = MAXVAL(sizes_2a(1, :))
795 732 : max_col_size = MAXVAL(sizes_2a(2, :))
796 1432 : ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
797 1432 : ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
798 1074 : ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
799 1074 : ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
800 21150 : row_indeces_info_2a = 0
801 18066 : col_indeces_info_2a = 0
802 : ! row
803 6954 : DO iiB = 1, nrow_local_2a
804 6596 : i_global = row_indices_2a(iiB)
805 6596 : send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
806 6596 : i_local = L_mu_q%matrix_struct%g2l_row(i_global)
807 6596 : my_row_indeces_info_2a(1, iiB) = send_prow
808 6954 : my_row_indeces_info_2a(2, iiB) = i_local
809 : END DO
810 : ! col
811 5828 : DO jjB = 1, ncol_local_2a
812 5470 : j_global = col_indices_2a(jjB) + homo
813 5470 : send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
814 5470 : j_local = L_mu_q%matrix_struct%g2l_col(j_global)
815 5470 : my_col_indeces_info_2a(1, jjB) = send_pcol
816 5828 : my_col_indeces_info_2a(2, jjB) = j_local
817 : END DO
818 358 : CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
819 358 : CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
820 358 : DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
821 358 : CALL timestop(handle2)
822 :
823 : ! 1) define the map for sending data in the subgroup starting with L1_mu_i
824 358 : CALL timeset(routineN//"_subinfo", handle2)
825 1074 : ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
826 732 : map_send_size = 0
827 1660 : DO jjB = 1, ncol_local_1i
828 1302 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
829 26236 : DO iiB = 1, nrow_local_1i
830 24576 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
831 24576 : proc_send = grid_2_mepos(send_prow, send_pcol)
832 24576 : proc_send_sub = pos_info(proc_send)
833 25878 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
834 : END DO
835 : END DO
836 : ! and the same for L2_nu_a
837 5828 : DO jjB = 1, ncol_local_2a
838 5470 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
839 126994 : DO iiB = 1, nrow_local_2a
840 121166 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
841 121166 : proc_send = grid_2_mepos(send_prow, send_pcol)
842 121166 : proc_send_sub = pos_info(proc_send)
843 126636 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
844 : END DO
845 : END DO
846 : ! and exchange data in order to create map_rec_size
847 1074 : ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
848 732 : map_rec_size = 0
849 358 : CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
850 358 : CALL timestop(handle2)
851 :
852 : ! 2) reorder data in sending buffer
853 358 : CALL timeset(routineN//"_sub_Bsend", handle2)
854 : ! count the number of messages (include myself)
855 358 : number_of_send = 0
856 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
857 374 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
858 732 : IF (map_send_size(proc_send) > 0) THEN
859 358 : number_of_send = number_of_send + 1
860 : END IF
861 : END DO
862 : ! allocate the structure that will hold the messages to be sent
863 1432 : ALLOCATE (buffer_send(number_of_send))
864 358 : send_counter = 0
865 1074 : ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
866 732 : proc_2_send_pos = 0
867 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
868 374 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
869 374 : size_send_buffer = map_send_size(proc_send)
870 732 : IF (map_send_size(proc_send) > 0) THEN
871 358 : send_counter = send_counter + 1
872 : ! allocate the sending buffer (msg)
873 1074 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
874 146100 : buffer_send(send_counter)%msg = 0.0_dp
875 358 : buffer_send(send_counter)%proc = proc_send
876 358 : proc_2_send_pos(proc_send) = send_counter
877 : END IF
878 : END DO
879 : ! loop over the locally held data and fill the buffer_send
880 : ! for doing that we need an array that keep track if the
881 : ! sequential increase of the index for each message
882 1074 : ALLOCATE (iii_vet(number_of_send))
883 716 : iii_vet = 0
884 1660 : DO jjB = 1, ncol_local_1i
885 1302 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
886 26236 : DO iiB = 1, nrow_local_1i
887 24576 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
888 24576 : proc_send = grid_2_mepos(send_prow, send_pcol)
889 24576 : proc_send_sub = pos_info(proc_send)
890 24576 : send_counter = proc_2_send_pos(proc_send_sub)
891 24576 : iii_vet(send_counter) = iii_vet(send_counter) + 1
892 24576 : iii = iii_vet(send_counter)
893 25878 : buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
894 : END DO
895 : END DO
896 : ! release the local data of L1_mu_i
897 358 : DEALLOCATE (L1_mu_i%local_data)
898 : ! and the same for L2_nu_a
899 5828 : DO jjB = 1, ncol_local_2a
900 5470 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
901 126994 : DO iiB = 1, nrow_local_2a
902 121166 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
903 121166 : proc_send = grid_2_mepos(send_prow, send_pcol)
904 121166 : proc_send_sub = pos_info(proc_send)
905 121166 : send_counter = proc_2_send_pos(proc_send_sub)
906 121166 : iii_vet(send_counter) = iii_vet(send_counter) + 1
907 121166 : iii = iii_vet(send_counter)
908 126636 : buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
909 : END DO
910 : END DO
911 358 : DEALLOCATE (L2_nu_a%local_data)
912 358 : DEALLOCATE (proc_2_send_pos)
913 358 : DEALLOCATE (iii_vet)
914 358 : CALL timestop(handle2)
915 :
916 : ! 3) create the buffer for receive, post the message with irecv
917 : ! and send the messages non-blocking
918 358 : CALL timeset(routineN//"_sub_isendrecv", handle2)
919 : ! count the number of messages to be received
920 358 : number_of_rec = 0
921 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
922 374 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
923 732 : IF (map_rec_size(proc_receive) > 0) THEN
924 358 : number_of_rec = number_of_rec + 1
925 : END IF
926 : END DO
927 1432 : ALLOCATE (buffer_rec(number_of_rec))
928 358 : rec_counter = 0
929 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
930 374 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
931 374 : size_rec_buffer = map_rec_size(proc_receive)
932 732 : IF (map_rec_size(proc_receive) > 0) THEN
933 358 : rec_counter = rec_counter + 1
934 : ! prepare the buffer for receive
935 1074 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
936 146100 : buffer_rec(rec_counter)%msg = 0.0_dp
937 358 : buffer_rec(rec_counter)%proc = proc_receive
938 : ! post the message to be received (not need to send to myself)
939 358 : IF (proc_receive /= para_env_sub%mepos) THEN
940 : CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
941 0 : buffer_rec(rec_counter)%msg_req)
942 : END IF
943 : END IF
944 : END DO
945 : ! send messages
946 1432 : ALLOCATE (req_send(number_of_send))
947 716 : req_send = mp_request_null
948 358 : send_counter = 0
949 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
950 374 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
951 732 : IF (map_send_size(proc_send) > 0) THEN
952 358 : send_counter = send_counter + 1
953 358 : IF (proc_send == para_env_sub%mepos) THEN
954 146100 : buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
955 : ELSE
956 : CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
957 0 : buffer_send(send_counter)%msg_req)
958 0 : req_send(send_counter) = buffer_send(send_counter)%msg_req
959 : END IF
960 : END IF
961 : END DO
962 358 : DEALLOCATE (map_send_size)
963 358 : CALL timestop(handle2)
964 :
965 : ! 4) (if memory is a problem we should move this part after point 5)
966 : ! Here we create the new buffer for cyclic(ring) communication and
967 : ! we fill it with the data received from the other member of the
968 : ! subgroup
969 358 : CALL timeset(routineN//"_Bcyclic", handle2)
970 : ! first allocata new structure
971 1774 : ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
972 1058 : DO iproc = 0, para_env_exchange%num_pe - 1
973 700 : rec_row_size = sizes(1, iproc)
974 700 : rec_col_size = sizes(2, iproc)
975 2800 : ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
976 159964 : buffer_cyclic(iproc)%msg = 0.0_dp
977 : END DO
978 : ! now collect data from other member of the subgroup and fill
979 : ! buffer_cyclic
980 358 : rec_counter = 0
981 732 : DO proc_shift = 0, para_env_sub%num_pe - 1
982 374 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
983 374 : size_rec_buffer = map_rec_size(proc_receive)
984 732 : IF (map_rec_size(proc_receive) > 0) THEN
985 358 : rec_counter = rec_counter + 1
986 :
987 : ! wait for the message
988 358 : IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
989 :
990 358 : CALL timeset(routineN//"_fill", handle3)
991 358 : iii = 0
992 1660 : DO jjB = 1, sizes_1i(2, proc_receive)
993 1302 : send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
994 1302 : j_local = col_indeces_info_1i(2, jjB, proc_receive)
995 26236 : DO iiB = 1, sizes_1i(1, proc_receive)
996 24576 : send_prow = row_indeces_info_1i(1, iiB, proc_receive)
997 24576 : proc_send = grid_2_mepos(send_prow, send_pcol)
998 24576 : proc_send_sub = pos_info(proc_send)
999 24576 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
1000 24576 : iii = iii + 1
1001 24576 : i_local = row_indeces_info_1i(2, iiB, proc_receive)
1002 24576 : proc_send_ex = pos_info_ex(proc_send)
1003 25878 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1004 : END DO
1005 : END DO
1006 : ! and the same for L2_nu_a
1007 5828 : DO jjB = 1, sizes_2a(2, proc_receive)
1008 5470 : send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
1009 5470 : j_local = col_indeces_info_2a(2, jjB, proc_receive)
1010 126994 : DO iiB = 1, sizes_2a(1, proc_receive)
1011 121166 : send_prow = row_indeces_info_2a(1, iiB, proc_receive)
1012 121166 : proc_send = grid_2_mepos(send_prow, send_pcol)
1013 121166 : proc_send_sub = pos_info(proc_send)
1014 121166 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
1015 121166 : iii = iii + 1
1016 121166 : i_local = row_indeces_info_2a(2, iiB, proc_receive)
1017 121166 : proc_send_ex = pos_info_ex(proc_send)
1018 126636 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1019 : END DO
1020 : END DO
1021 358 : CALL timestop(handle3)
1022 :
1023 : ! deallocate the received message
1024 358 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1025 : END IF
1026 : END DO
1027 358 : DEALLOCATE (row_indeces_info_1i)
1028 358 : DEALLOCATE (col_indeces_info_1i)
1029 358 : DEALLOCATE (row_indeces_info_2a)
1030 358 : DEALLOCATE (col_indeces_info_2a)
1031 716 : DEALLOCATE (buffer_rec)
1032 358 : DEALLOCATE (map_rec_size)
1033 358 : CALL timestop(handle2)
1034 :
1035 : ! 5) Wait for all messeges to be sent in the subgroup
1036 358 : CALL timeset(routineN//"_sub_waitall", handle2)
1037 358 : CALL mp_waitall(req_send(:))
1038 716 : DO send_counter = 1, number_of_send
1039 716 : DEALLOCATE (buffer_send(send_counter)%msg)
1040 : END DO
1041 716 : DEALLOCATE (buffer_send)
1042 358 : DEALLOCATE (req_send)
1043 358 : CALL timestop(handle2)
1044 :
1045 : ! 6) Start with ring communication
1046 358 : CALL timeset(routineN//"_ring", handle2)
1047 358 : proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
1048 358 : proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
1049 1058 : max_row_size = MAXVAL(sizes(1, :))
1050 1058 : max_col_size = MAXVAL(sizes(2, :))
1051 1432 : ALLOCATE (mat_send(max_row_size, max_col_size))
1052 1074 : ALLOCATE (mat_rec(max_row_size, max_col_size))
1053 88168 : mat_send = 0.0_dp
1054 82264 : mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
1055 358 : DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
1056 700 : DO proc_shift = 1, para_env_exchange%num_pe - 1
1057 342 : proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
1058 :
1059 342 : rec_row_size = sizes(1, proc_receive)
1060 342 : rec_col_size = sizes(2, proc_receive)
1061 :
1062 83246 : mat_rec = 0.0_dp
1063 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1064 342 : mat_rec, proc_receive_static)
1065 :
1066 83246 : mat_send = 0.0_dp
1067 : mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
1068 77342 : buffer_cyclic(proc_receive)%msg(:, :)
1069 :
1070 700 : DEALLOCATE (buffer_cyclic(proc_receive)%msg)
1071 : END DO
1072 : ! and finally
1073 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1074 358 : mat_rec, proc_receive_static)
1075 82264 : L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
1076 1416 : DEALLOCATE (buffer_cyclic)
1077 358 : DEALLOCATE (mat_send)
1078 358 : DEALLOCATE (mat_rec)
1079 358 : CALL timestop(handle2)
1080 :
1081 : ! release para_env_exchange
1082 358 : CALL mp_para_env_release(para_env_exchange)
1083 :
1084 358 : CALL cp_fm_release(L1_mu_i)
1085 358 : CALL cp_fm_release(L2_nu_a)
1086 358 : DEALLOCATE (pos_info_ex)
1087 358 : DEALLOCATE (grid_2_mepos)
1088 358 : DEALLOCATE (sizes)
1089 358 : DEALLOCATE (sizes_1i)
1090 358 : DEALLOCATE (sizes_2a)
1091 :
1092 : ! update the P_ij block of P_MP2 with the
1093 : ! non-singular ij pairs
1094 358 : CALL timeset(routineN//"_Pij", handle2)
1095 358 : NULLIFY (fm_struct_tmp)
1096 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1097 358 : nrow_global=homo, ncol_global=homo)
1098 358 : CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
1099 358 : CALL cp_fm_struct_release(fm_struct_tmp)
1100 358 : CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
1101 :
1102 : ! we have it, update P_ij local
1103 : CALL cp_fm_get_info(matrix=fm_P_ij, &
1104 : nrow_local=nrow_local, &
1105 : ncol_local=ncol_local, &
1106 : row_indices=row_indices, &
1107 358 : col_indices=col_indices)
1108 :
1109 358 : IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
1110 : CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, &
1111 : mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
1112 : a_first_col=1, &
1113 : a_first_row=1, &
1114 : b_first_col=1, &
1115 : b_first_row=1, &
1116 : c_first_col=1, &
1117 330 : c_first_row=1)
1118 : CALL parallel_gemm('T', 'N', homo, homo, nao, -2.0_dp, &
1119 : L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
1120 : a_first_col=1, &
1121 : a_first_row=1, &
1122 : b_first_col=1, &
1123 : b_first_row=1, &
1124 : c_first_col=1, &
1125 330 : c_first_row=1)
1126 :
1127 1524 : DO jjB = 1, ncol_local
1128 1194 : j_global = col_indices(jjB)
1129 3813 : DO iiB = 1, nrow_local
1130 2289 : i_global = row_indices(iiB)
1131 : ! diagonal elements and nearly degenerate ij pairs already updated
1132 3483 : IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
1133 679 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1134 : ELSE
1135 : fm_P_ij%local_data(iiB, jjB) = &
1136 1610 : factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
1137 : END IF
1138 : END DO
1139 : END DO
1140 : ELSE
1141 136 : DO jjB = 1, ncol_local
1142 108 : j_global = col_indices(jjB)
1143 346 : DO iiB = 1, nrow_local
1144 210 : i_global = row_indices(iiB)
1145 318 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1146 : END DO
1147 : END DO
1148 : END IF
1149 : ! deallocate the local P_ij
1150 358 : DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
1151 358 : CALL timestop(handle2)
1152 :
1153 : ! Now create and fill the P matrix (MO)
1154 : ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
1155 358 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
1156 1174 : ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1157 : END IF
1158 :
1159 358 : CALL timeset(routineN//"_PMO", handle2)
1160 358 : NULLIFY (fm_struct_tmp)
1161 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1162 358 : nrow_global=nmo, ncol_global=nmo)
1163 358 : CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
1164 358 : CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
1165 :
1166 : ! start with the (easy) occ-occ block and locally held P_ab elements
1167 358 : itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
1168 358 : my_B_virtual_start = itmp(1)
1169 358 : my_B_virtual_end = itmp(2)
1170 :
1171 : ! Fill occ-occ block
1172 358 : CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
1173 358 : CALL cp_fm_release(fm_P_ij)
1174 :
1175 : CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
1176 : nrow_local=nrow_local, &
1177 : ncol_local=ncol_local, &
1178 : row_indices=row_indices, &
1179 358 : col_indices=col_indices)
1180 :
1181 358 : IF (mp2_env%method == ri_mp2_laplace) THEN
1182 : CALL parallel_gemm('T', 'N', virtual, virtual, nao, 1.0_dp, &
1183 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1184 : a_first_col=homo + 1, &
1185 : a_first_row=1, &
1186 : b_first_col=homo + 1, &
1187 : b_first_row=1, &
1188 : c_first_col=homo + 1, &
1189 24 : c_first_row=homo + 1)
1190 : CALL parallel_gemm('T', 'N', virtual, virtual, nao, -2.0_dp, &
1191 : L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1192 : a_first_col=homo + 1, &
1193 : a_first_row=1, &
1194 : b_first_col=homo + 1, &
1195 : b_first_row=1, &
1196 : c_first_col=homo + 1, &
1197 24 : c_first_row=homo + 1)
1198 : END IF
1199 :
1200 358 : IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
1201 : ! With MP2 and RPA, we have already calculated the density matrix elements
1202 6514 : DO jjB = 1, ncol_local
1203 6180 : j_global = col_indices(jjB)
1204 6180 : IF (j_global <= homo) CYCLE
1205 61358 : DO iiB = 1, nrow_local
1206 56054 : i_global = row_indices(iiB)
1207 62234 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1208 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1209 46400 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1210 : END IF
1211 : END DO
1212 : END DO
1213 24 : ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
1214 : ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
1215 616 : DO jjB = 1, ncol_local
1216 592 : j_global = col_indices(jjB)
1217 592 : IF (j_global <= homo) CYCLE
1218 6764 : DO iiB = 1, nrow_local
1219 6240 : i_global = row_indices(iiB)
1220 6832 : IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
1221 565 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1222 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1223 552 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1224 : ELSE
1225 13 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
1226 : END IF
1227 : ELSE
1228 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1229 : factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
1230 5675 : (Eigenval(i_global) - Eigenval(j_global))
1231 : END IF
1232 : END DO
1233 : END DO
1234 : ELSE
1235 0 : CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
1236 : END IF
1237 :
1238 : ! send around the sub_group the local data and check if we
1239 : ! have to update our block with external elements
1240 1074 : ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1241 1074 : CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1242 :
1243 1074 : ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
1244 1074 : CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
1245 :
1246 1432 : ALLOCATE (ab_rec(nrow_local, ncol_local))
1247 374 : DO proc_shift = 1, para_env_sub%num_pe - 1
1248 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1249 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1250 :
1251 16 : send_prow = mepos_2_grid(1, proc_send)
1252 16 : send_pcol = mepos_2_grid(2, proc_send)
1253 :
1254 16 : send_row_size = sizes(1, proc_send)
1255 16 : send_col_size = sizes(2, proc_send)
1256 :
1257 64 : ALLOCATE (ab_send(send_row_size, send_col_size))
1258 4922 : ab_send = 0.0_dp
1259 :
1260 : ! first loop over row since in this way we can cycle
1261 206 : DO iiB = 1, send_row_size
1262 190 : i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow)
1263 190 : IF (i_global <= homo) CYCLE
1264 154 : i_global = i_global - homo
1265 154 : IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
1266 655 : DO jjB = 1, send_col_size
1267 614 : j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol)
1268 614 : IF (j_global <= homo) CYCLE
1269 487 : j_global = j_global - homo
1270 804 : ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
1271 : END DO
1272 : END DO
1273 :
1274 4922 : ab_rec = 0.0_dp
1275 : CALL para_env_sub%sendrecv(ab_send, proc_send, &
1276 16 : ab_rec, proc_receive)
1277 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
1278 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
1279 4922 : ab_rec(1:nrow_local, 1:ncol_local)
1280 :
1281 374 : DEALLOCATE (ab_send)
1282 : END DO
1283 358 : DEALLOCATE (ab_rec)
1284 358 : DEALLOCATE (mepos_2_grid)
1285 358 : DEALLOCATE (sizes)
1286 :
1287 : ! deallocate the local P_ab
1288 358 : DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
1289 358 : CALL timestop(handle2)
1290 :
1291 : ! create also W_MP2_MO
1292 358 : CALL timeset(routineN//"_WMO", handle2)
1293 358 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
1294 1174 : ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1295 : END IF
1296 :
1297 358 : CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
1298 358 : CALL cp_fm_struct_release(fm_struct_tmp)
1299 :
1300 : ! all block
1301 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 2.0_dp*factor, &
1302 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1303 : a_first_col=1, &
1304 : a_first_row=1, &
1305 : b_first_col=1, &
1306 : b_first_row=1, &
1307 : c_first_col=1, &
1308 358 : c_first_row=1)
1309 :
1310 : ! occ-occ block
1311 : CALL parallel_gemm('T', 'N', homo, homo, nao, -2.0_dp*factor, &
1312 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1313 : a_first_col=1, &
1314 : a_first_row=1, &
1315 : b_first_col=1, &
1316 : b_first_row=1, &
1317 : c_first_col=1, &
1318 358 : c_first_row=1)
1319 :
1320 : ! occ-virt block
1321 : CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
1322 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1323 : a_first_col=1, &
1324 : a_first_row=1, &
1325 : b_first_col=homo + 1, &
1326 : b_first_row=1, &
1327 : c_first_col=homo + 1, &
1328 358 : c_first_row=1)
1329 358 : CALL timestop(handle2)
1330 :
1331 : ! Calculate occ-virt block of the lagrangian in MO
1332 358 : CALL timeset(routineN//"_Ljb", handle2)
1333 358 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
1334 1174 : ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1335 : END IF
1336 :
1337 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1338 358 : nrow_global=homo, ncol_global=virtual)
1339 358 : CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb", set_zero=.TRUE.)
1340 358 : CALL cp_fm_struct_release(fm_struct_tmp)
1341 :
1342 : ! first Virtual
1343 : CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
1344 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1345 : a_first_col=1, &
1346 : a_first_row=1, &
1347 : b_first_col=homo + 1, &
1348 : b_first_row=1, &
1349 : c_first_col=1, &
1350 358 : c_first_row=1)
1351 : ! then occupied
1352 : CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
1353 : mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1354 : a_first_col=1, &
1355 : a_first_row=1, &
1356 : b_first_col=homo + 1, &
1357 : b_first_row=1, &
1358 : c_first_col=1, &
1359 358 : c_first_row=1)
1360 :
1361 : ! finally release L_mu_q
1362 358 : CALL cp_fm_release(L_mu_q)
1363 358 : CALL timestop(handle2)
1364 :
1365 : ! here we should be done next CPHF
1366 :
1367 358 : DEALLOCATE (pos_info)
1368 :
1369 358 : CALL timestop(handle)
1370 :
1371 7518 : END SUBROUTINE create_W_P
1372 :
1373 : END MODULE mp2_ri_grad
|