Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
10 : !> \par History
11 : !> 10.2013 created [Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_ri_grad
14 : USE atomic_kind_types, ONLY: atomic_kind_type,&
15 : get_atomic_kind_set
16 : USE cell_types, ONLY: cell_type
17 : USE cp_blacs_env, ONLY: cp_blacs_env_type
18 : USE cp_control_types, ONLY: dft_control_type
19 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
20 : dbcsr_deallocate_matrix_set
21 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_param
22 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
23 : cp_fm_struct_release,&
24 : cp_fm_struct_type
25 : USE cp_fm_types, ONLY: &
26 : cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, &
27 : cp_fm_set_all, cp_fm_to_fm_submat, cp_fm_type
28 : USE dbcsr_api, ONLY: &
29 : dbcsr_add, dbcsr_copy, dbcsr_copy_into_existing, dbcsr_create, dbcsr_multiply, &
30 : dbcsr_p_type, dbcsr_release, dbcsr_set, dbcsr_transposed, dbcsr_type, &
31 : dbcsr_type_no_symmetry, dbcsr_type_symmetric
32 : USE input_constants, ONLY: do_eri_gpw,&
33 : do_eri_mme,&
34 : ri_mp2_laplace,&
35 : ri_mp2_method_gpw,&
36 : ri_rpa_method_gpw
37 : USE kinds, ONLY: dp
38 : USE libint_2c_3c, ONLY: compare_potential_types
39 : USE message_passing, ONLY: mp_para_env_release,&
40 : mp_para_env_type,&
41 : mp_request_null,&
42 : mp_request_type,&
43 : mp_waitall
44 : USE mp2_eri, ONLY: mp2_eri_2c_integrate,&
45 : mp2_eri_3c_integrate,&
46 : mp2_eri_deallocate_forces,&
47 : mp2_eri_force
48 : USE mp2_eri_gpw, ONLY: cleanup_gpw,&
49 : integrate_potential_forces_2c,&
50 : integrate_potential_forces_3c_1c,&
51 : integrate_potential_forces_3c_2c,&
52 : prepare_gpw
53 : USE mp2_types, ONLY: integ_mat_buffer_type,&
54 : integ_mat_buffer_type_2D,&
55 : mp2_type
56 : USE parallel_gemm_api, ONLY: parallel_gemm
57 : USE particle_types, ONLY: particle_type
58 : USE pw_env_types, ONLY: pw_env_type
59 : USE pw_poisson_types, ONLY: pw_poisson_type
60 : USE pw_pool_types, ONLY: pw_pool_type
61 : USE pw_types, ONLY: pw_c1d_gs_type,&
62 : pw_r3d_rs_type
63 : USE qs_environment_types, ONLY: get_qs_env,&
64 : qs_environment_type
65 : USE qs_force_types, ONLY: allocate_qs_force,&
66 : qs_force_type,&
67 : zero_qs_force
68 : USE qs_kind_types, ONLY: qs_kind_type
69 : USE qs_ks_types, ONLY: qs_ks_env_type
70 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
71 : USE task_list_types, ONLY: task_list_type
72 : USE util, ONLY: get_limit
73 : USE virial_types, ONLY: virial_type
74 : #include "./base/base_uses.f90"
75 :
76 : IMPLICIT NONE
77 :
78 : PRIVATE
79 :
80 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
81 :
82 : PUBLIC :: calc_ri_mp2_nonsep
83 :
84 : CONTAINS
85 :
86 : ! **************************************************************************************************
87 : !> \brief Calculate the non-separable part of the gradients and update the
88 : !> Lagrangian
89 : !> \param qs_env ...
90 : !> \param mp2_env ...
91 : !> \param para_env ...
92 : !> \param para_env_sub ...
93 : !> \param cell ...
94 : !> \param particle_set ...
95 : !> \param atomic_kind_set ...
96 : !> \param qs_kind_set ...
97 : !> \param mo_coeff ...
98 : !> \param nmo ...
99 : !> \param homo ...
100 : !> \param dimen_RI ...
101 : !> \param Eigenval ...
102 : !> \param my_group_L_start ...
103 : !> \param my_group_L_end ...
104 : !> \param my_group_L_size ...
105 : !> \param sab_orb_sub ...
106 : !> \param mat_munu ...
107 : !> \param blacs_env_sub ...
108 : !> \author Mauro Del Ben
109 : ! **************************************************************************************************
110 260 : SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
111 260 : atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, &
112 : my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
113 : blacs_env_sub)
114 : TYPE(qs_environment_type), POINTER :: qs_env
115 : TYPE(mp2_type) :: mp2_env
116 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
117 : TYPE(cell_type), POINTER :: cell
118 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
119 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
120 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
121 : TYPE(cp_fm_type), DIMENSION(:), INTENT(IN) :: mo_coeff
122 : INTEGER, INTENT(IN) :: nmo
123 : INTEGER, DIMENSION(:), INTENT(IN) :: homo
124 : INTEGER, INTENT(IN) :: dimen_RI
125 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: Eigenval
126 : INTEGER, INTENT(IN) :: my_group_L_start, my_group_L_end, &
127 : my_group_L_size
128 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
129 : POINTER :: sab_orb_sub
130 : TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu
131 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
132 :
133 : CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
134 :
135 : INTEGER :: dimen, eri_method, handle, handle2, i, &
136 : ikind, ispin, itmp(2), L_counter, LLL, &
137 : my_P_end, my_P_size, my_P_start, nspins
138 260 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, natom_of_kind, &
139 260 : virtual
140 : LOGICAL :: alpha_beta, use_virial
141 : REAL(KIND=dp) :: cutoff_old, eps_filter, factor, &
142 : factor_2c, relative_cutoff_old
143 260 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old
144 260 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: G_PQ_local, G_PQ_local_2
145 : REAL(KIND=dp), DIMENSION(3, 3) :: h_stress, pv_virial
146 260 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: I_tmp2
147 : TYPE(cp_eri_mme_param), POINTER :: eri_param
148 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
149 260 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: L1_mu_i, L2_nu_a
150 : TYPE(dbcsr_p_type) :: matrix_P_munu
151 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:) :: mo_coeff_o, mo_coeff_v
152 260 : TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :) :: G_P_ia
153 260 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_munu_local, matrix_P_munu_local
154 : TYPE(dbcsr_type) :: matrix_P_munu_nosym
155 260 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
156 : TYPE(dft_control_type), POINTER :: dft_control
157 260 : TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:) :: force_2c, force_2c_RI, force_3c_aux, &
158 260 : force_3c_orb_mu, force_3c_orb_nu
159 1040 : TYPE(pw_c1d_gs_type) :: dvg(3), pot_g, rho_g, rho_g_copy
160 : TYPE(pw_env_type), POINTER :: pw_env_sub
161 : TYPE(pw_poisson_type), POINTER :: poisson_env
162 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
163 : TYPE(pw_r3d_rs_type) :: psi_L, rho_r
164 260 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force, mp2_force
165 : TYPE(qs_ks_env_type), POINTER :: ks_env
166 : TYPE(task_list_type), POINTER :: task_list_sub
167 : TYPE(virial_type), POINTER :: virial
168 :
169 260 : CALL timeset(routineN, handle)
170 :
171 260 : eri_method = mp2_env%eri_method
172 260 : eri_param => mp2_env%eri_mme_param
173 :
174 : ! Find out whether we have a closed or open shell
175 260 : nspins = SIZE(homo)
176 260 : alpha_beta = (nspins == 2)
177 :
178 260 : dimen = nmo
179 780 : ALLOCATE (virtual(nspins))
180 604 : virtual(:) = dimen - homo(:)
181 260 : eps_filter = mp2_env%mp2_gpw%eps_filter
182 29399 : ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
183 604 : DO ispin = 1, nspins
184 344 : mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
185 344 : mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
186 15835 : DO LLL = 1, my_group_L_size
187 15575 : G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
188 : END DO
189 : END DO
190 260 : DEALLOCATE (mp2_env%ri_grad%G_P_ia)
191 :
192 260 : itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
193 260 : my_P_start = itmp(1)
194 260 : my_P_end = itmp(2)
195 260 : my_P_size = itmp(2) - itmp(1) + 1
196 :
197 1040 : ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
198 981164 : G_PQ_local = 0.0_dp
199 927730 : G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
200 260 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
201 927730 : G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
202 260 : CALL para_env_sub%sum(G_PQ_local)
203 260 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
204 48 : ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
205 41844 : G_PQ_local_2 = 0.0_dp
206 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
207 12 : DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
208 41844 : G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
209 12 : CALL para_env_sub%sum(G_PQ_local_2)
210 : END IF
211 :
212 : ! create matrix holding the back transformation (G_P_inu)
213 1124 : ALLOCATE (matrix_P_inu(nspins))
214 604 : DO ispin = 1, nspins
215 604 : CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
216 : END DO
217 :
218 : ! non symmetric matrix
219 : CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
220 260 : matrix_type=dbcsr_type_no_symmetry)
221 :
222 : ! create Lagrangian matrices in mixed AO/MO formalism
223 1124 : ALLOCATE (Lag_mu_i_1(nspins))
224 604 : DO ispin = 1, nspins
225 344 : CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
226 604 : CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
227 : END DO
228 :
229 1124 : ALLOCATE (Lag_nu_a_2(nspins))
230 604 : DO ispin = 1, nspins
231 344 : CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
232 604 : CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
233 : END DO
234 :
235 : ! get forces
236 260 : NULLIFY (force, virial)
237 260 : CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
238 :
239 : ! check if we want to calculate the virial
240 260 : use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
241 :
242 260 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
243 260 : NULLIFY (mp2_force)
244 260 : CALL allocate_qs_force(mp2_force, natom_of_kind)
245 260 : DEALLOCATE (natom_of_kind)
246 260 : CALL zero_qs_force(mp2_force)
247 260 : mp2_env%ri_grad%mp2_force => mp2_force
248 :
249 260 : factor_2c = -4.0_dp
250 260 : IF (mp2_env%method == ri_rpa_method_gpw) THEN
251 20 : factor_2c = -1.0_dp
252 20 : IF (alpha_beta) factor_2c = -2.0_dp
253 : END IF
254 :
255 : ! prepare integral derivatives with mme method
256 260 : IF (eri_method .EQ. do_eri_mme) THEN
257 1184 : ALLOCATE (matrix_P_munu_local(my_group_L_size))
258 1184 : ALLOCATE (mat_munu_local(my_group_L_size))
259 26 : L_counter = 0
260 1132 : DO LLL = my_group_L_start, my_group_L_end
261 1106 : L_counter = L_counter + 1
262 1106 : ALLOCATE (mat_munu_local(L_counter)%matrix)
263 : CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
264 1106 : matrix_type=dbcsr_type_symmetric)
265 1106 : CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
266 1106 : CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
267 :
268 : CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
269 : G_P_ia(:, L_counter), matrix_P_inu, &
270 1132 : mo_coeff_v, mo_coeff_o, eps_filter)
271 : END DO
272 :
273 104 : ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
274 95790 : I_tmp2(:, :) = 0.0_dp
275 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
276 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
277 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
278 26 : eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
279 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
280 0 : I_tmp2(:, :) = 0.0_dp
281 : CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
282 : basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
283 : hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
284 0 : eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
285 : END IF
286 26 : DEALLOCATE (I_tmp2)
287 :
288 : CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
289 : first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
290 : basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
291 : sab_nl=sab_orb_sub, eri_method=eri_method, &
292 : pabc=matrix_P_munu_local, &
293 26 : force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
294 :
295 26 : L_counter = 0
296 1132 : DO LLL = my_group_L_start, my_group_L_end
297 1106 : L_counter = L_counter + 1
298 2432 : DO ispin = 1, nspins
299 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
300 2432 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
301 : END DO
302 :
303 : ! The matrices of G_P_ia are deallocated here
304 : CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
305 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
306 1132 : eps_filter)
307 : END DO
308 :
309 72 : DO ikind = 1, SIZE(force)
310 : mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
311 : force_3c_orb_mu(ikind)%forces(:, :) + &
312 : force_3c_orb_nu(ikind)%forces(:, :) + &
313 334 : force_3c_aux(ikind)%forces(:, :)
314 :
315 72 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
316 0 : mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
317 : END IF
318 : END DO
319 :
320 26 : CALL mp2_eri_deallocate_forces(force_2c)
321 26 : IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
322 0 : CALL mp2_eri_deallocate_forces(force_2c_RI)
323 : END IF
324 26 : CALL mp2_eri_deallocate_forces(force_3c_aux)
325 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
326 26 : CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
327 26 : CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
328 26 : CALL dbcsr_deallocate_matrix_set(mat_munu_local)
329 :
330 234 : ELSEIF (eri_method == do_eri_gpw) THEN
331 234 : CALL get_qs_env(qs_env, ks_env=ks_env)
332 :
333 234 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
334 :
335 : ! Supporting stuff for GPW
336 : CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
337 234 : auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
338 :
339 : ! in case virial is required we need auxiliary pw
340 : ! for calculate the MP2-volume contribution to the virial
341 : ! (hartree potential derivatives)
342 234 : IF (use_virial) THEN
343 46 : CALL auxbas_pw_pool%create_pw(rho_g_copy)
344 184 : DO i = 1, 3
345 184 : CALL auxbas_pw_pool%create_pw(dvg(i))
346 : END DO
347 : END IF
348 :
349 : ! start main loop over auxiliary basis functions
350 234 : CALL timeset(routineN//"_loop", handle2)
351 :
352 234 : IF (use_virial) h_stress = 0.0_dp
353 :
354 234 : L_counter = 0
355 10528 : DO LLL = my_group_L_start, my_group_L_end
356 10294 : L_counter = L_counter + 1
357 :
358 : CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
359 : G_P_ia(:, L_counter), matrix_P_inu, &
360 10294 : mo_coeff_v, mo_coeff_o, eps_filter)
361 :
362 : CALL integrate_potential_forces_2c(rho_r, LLL, mo_coeff(1), rho_g, atomic_kind_set, &
363 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
364 : pot_g, mp2_env%potential_parameter, use_virial, &
365 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
366 10294 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
367 :
368 10294 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
369 :
370 : CALL integrate_potential_forces_2c(rho_r, LLL, mo_coeff(1), rho_g, atomic_kind_set, &
371 : qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
372 : pot_g, mp2_env%ri_metric, use_virial, &
373 : rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
374 498 : mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
375 : END IF
376 :
377 34042 : IF (use_virial) pv_virial = virial%pv_virial
378 : CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
379 10294 : task_list_sub)
380 10294 : IF (use_virial) THEN
381 25727 : h_stress = h_stress + (virial%pv_virial - pv_virial)
382 25727 : virial%pv_virial = pv_virial
383 : END IF
384 :
385 : ! The matrices of G_P_ia are deallocated here
386 : CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
387 : G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
388 10294 : eps_filter)
389 :
390 : CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
391 : mp2_env%ri_metric, &
392 : ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
393 : h_stress, para_env_sub, kind_of, atom_of_kind, &
394 10528 : qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
395 : END DO
396 :
397 234 : CALL timestop(handle2)
398 :
399 234 : DEALLOCATE (kind_of)
400 234 : DEALLOCATE (atom_of_kind)
401 :
402 234 : IF (use_virial) THEN
403 46 : CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
404 184 : DO i = 1, 3
405 184 : CALL auxbas_pw_pool%give_back_pw(dvg(i))
406 : END DO
407 : END IF
408 :
409 : CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
410 234 : task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
411 :
412 234 : CALL dbcsr_release(matrix_P_munu%matrix)
413 234 : DEALLOCATE (matrix_P_munu%matrix)
414 :
415 : END IF
416 :
417 812 : IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
418 :
419 260 : DEALLOCATE (G_PQ_local)
420 260 : IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
421 :
422 260 : CALL dbcsr_release(matrix_P_munu_nosym)
423 :
424 604 : DO ispin = 1, nspins
425 604 : CALL dbcsr_release(matrix_P_inu(ispin))
426 : END DO
427 260 : DEALLOCATE (matrix_P_inu, G_P_ia)
428 :
429 : ! move the forces in the correct place
430 260 : IF (eri_method .EQ. do_eri_gpw) THEN
431 692 : DO ikind = 1, SIZE(mp2_force)
432 6218 : mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
433 3572 : force(ikind)%rho_elec(:, :) = 0.0_dp
434 : END DO
435 : END IF
436 :
437 : ! Now we move from the local matrices to the global ones
438 : ! defined over all MPI tasks
439 : ! Start with moving from the DBCSR to FM for the lagrangians
440 :
441 1988 : ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
442 604 : DO ispin = 1, nspins
443 : ! Now we move from the local matrices to the global ones
444 : ! defined over all MPI tasks
445 : ! Start with moving from the DBCSR to FM for the lagrangians
446 344 : NULLIFY (fm_struct_tmp)
447 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
448 344 : nrow_global=dimen, ncol_global=homo(ispin))
449 344 : CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
450 344 : CALL cp_fm_struct_release(fm_struct_tmp)
451 344 : CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
452 344 : CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
453 :
454 : ! release Lag_mu_i_1
455 344 : CALL dbcsr_release(Lag_mu_i_1(ispin))
456 :
457 344 : NULLIFY (fm_struct_tmp)
458 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
459 344 : nrow_global=dimen, ncol_global=virtual(ispin))
460 344 : CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
461 344 : CALL cp_fm_struct_release(fm_struct_tmp)
462 344 : CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
463 344 : CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
464 :
465 : ! release Lag_nu_a_2
466 604 : CALL dbcsr_release(Lag_nu_a_2(ispin))
467 : END DO
468 260 : DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
469 :
470 : ! Set the factor to multiply P_ij (depends on the open or closed shell)
471 260 : factor = 1.0_dp
472 260 : IF (alpha_beta) factor = 0.50_dp
473 :
474 604 : DO ispin = 1, nspins
475 : CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), dimen, para_env, &
476 : para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
477 604 : factor, ispin)
478 : END DO
479 260 : DEALLOCATE (L1_mu_i, L2_nu_a)
480 :
481 260 : CALL timestop(handle)
482 :
483 520 : END SUBROUTINE calc_ri_mp2_nonsep
484 :
485 : ! **************************************************************************************************
486 : !> \brief Transforms G_P_ia to G_P_munu
487 : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
488 : !> \param G_P_munu_nosym ...
489 : !> \param mat_munu ...
490 : !> \param G_P_ia ...
491 : !> \param G_P_inu ...
492 : !> \param mo_coeff_v ...
493 : !> \param mo_coeff_o ...
494 : !> \param eps_filter ...
495 : ! **************************************************************************************************
496 11400 : SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
497 11400 : mo_coeff_v, mo_coeff_o, eps_filter)
498 : TYPE(dbcsr_type), POINTER :: G_P_munu
499 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu_nosym, mat_munu
500 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
501 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
502 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
503 : REAL(KIND=dp), INTENT(IN) :: eps_filter
504 :
505 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
506 :
507 : INTEGER :: handle
508 :
509 11400 : IF (.NOT. ASSOCIATED(G_P_munu)) THEN
510 1340 : ALLOCATE (G_P_munu)
511 : CALL dbcsr_create(G_P_munu, template=mat_munu, &
512 1340 : matrix_type=dbcsr_type_symmetric)
513 : END IF
514 :
515 11400 : CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
516 :
517 : ! symmetrize
518 11400 : CALL timeset(routineN//"_symmetrize", handle)
519 11400 : CALL dbcsr_set(G_P_munu, 0.0_dp)
520 11400 : CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
521 : CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
522 11400 : alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
523 : ! this is a trick to avoid that integrate_v_rspace starts to cry
524 11400 : CALL dbcsr_copy_into_existing(mat_munu, G_P_munu)
525 11400 : CALL dbcsr_copy(G_P_munu, mat_munu)
526 :
527 11400 : CALL timestop(handle)
528 :
529 11400 : END SUBROUTINE G_P_transform_MO_to_AO
530 :
531 : ! **************************************************************************************************
532 : !> \brief ...
533 : !> \param G_P_ia ...
534 : !> \param G_P_inu ...
535 : !> \param G_P_munu ...
536 : !> \param mo_coeff_v ...
537 : !> \param mo_coeff_o ...
538 : !> \param eps_filter ...
539 : ! **************************************************************************************************
540 11400 : SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
541 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: G_P_ia
542 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: G_P_inu
543 : TYPE(dbcsr_type), INTENT(INOUT) :: G_P_munu
544 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_v, mo_coeff_o
545 : REAL(KIND=dp), INTENT(IN) :: eps_filter
546 :
547 : CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
548 :
549 : INTEGER :: handle, ispin
550 : REAL(KIND=dp) :: factor
551 :
552 11400 : CALL timeset(routineN, handle)
553 :
554 11400 : factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
555 :
556 11400 : CALL dbcsr_set(G_P_munu, 0.0_dp)
557 :
558 26631 : DO ispin = 1, SIZE(G_P_ia)
559 : ! first back-transformation a->nu
560 : CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
561 15231 : 0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
562 :
563 : ! second back-transformation i->mu
564 : CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
565 26631 : 1.0_dp, G_P_munu, filter_eps=eps_filter)
566 : END DO
567 :
568 11400 : CALL timestop(handle)
569 :
570 11400 : END SUBROUTINE G_P_transform_alpha_beta
571 :
572 : ! **************************************************************************************************
573 : !> \brief ...
574 : !> \param mat_munu ...
575 : !> \param matrix_P_inu ...
576 : !> \param Lag_mu_i_1 ...
577 : !> \param G_P_ia ...
578 : !> \param mo_coeff_o ...
579 : !> \param Lag_nu_a_2 ...
580 : !> \param eps_filter ...
581 : ! **************************************************************************************************
582 11400 : SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
583 22800 : G_P_ia, mo_coeff_o, Lag_nu_a_2, &
584 : eps_filter)
585 : TYPE(dbcsr_type), INTENT(IN) :: mat_munu
586 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: matrix_P_inu, Lag_mu_i_1
587 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT) :: G_P_ia
588 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_o
589 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: Lag_nu_a_2
590 : REAL(KIND=dp), INTENT(IN) :: eps_filter
591 :
592 : CHARACTER(LEN=*), PARAMETER :: routineN = 'update_lagrangian'
593 :
594 : INTEGER :: handle, ispin
595 :
596 : ! update lagrangian
597 11400 : CALL timeset(routineN, handle)
598 :
599 26631 : DO ispin = 1, SIZE(G_P_ia)
600 : ! first contract mat_munu with the half back transformed Gamma_i_nu
601 : ! in order to update Lag_mu_i_1
602 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
603 15231 : 1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
604 :
605 : ! transform first index of mat_munu and store the result into matrix_P_inu
606 15231 : CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
607 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
608 15231 : 0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
609 :
610 : ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
611 : ! in order to update Lag_nu_a_2
612 : CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
613 15231 : 1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
614 :
615 : ! release the actual gamma_P_ia
616 15231 : CALL dbcsr_release(G_P_ia(ispin)%matrix)
617 26631 : DEALLOCATE (G_P_ia(ispin)%matrix)
618 : END DO
619 :
620 11400 : CALL timestop(handle)
621 :
622 11400 : END SUBROUTINE update_lagrangian
623 :
624 : ! **************************************************************************************************
625 : !> \brief ...
626 : !> \param qs_env ...
627 : !> \param mp2_env ...
628 : !> \param mo_coeff ...
629 : !> \param homo ...
630 : !> \param virtual ...
631 : !> \param dimen ...
632 : !> \param para_env ...
633 : !> \param para_env_sub ...
634 : !> \param Eigenval ...
635 : !> \param L1_mu_i ...
636 : !> \param L2_nu_a ...
637 : !> \param factor ...
638 : !> \param kspin ...
639 : ! **************************************************************************************************
640 344 : SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, &
641 344 : Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
642 : TYPE(qs_environment_type), POINTER :: qs_env
643 : TYPE(mp2_type) :: mp2_env
644 : TYPE(cp_fm_type), INTENT(IN) :: mo_coeff
645 : INTEGER, INTENT(IN) :: homo, virtual, dimen
646 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
647 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: Eigenval
648 : TYPE(cp_fm_type), INTENT(INOUT) :: L1_mu_i, L2_nu_a
649 : REAL(KIND=dp), INTENT(IN) :: factor
650 : INTEGER, INTENT(IN) :: kspin
651 :
652 : CHARACTER(LEN=*), PARAMETER :: routineN = 'create_W_P'
653 :
654 : INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
655 : iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
656 : my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_block, ncol_block_1i, &
657 : ncol_block_2a, ncol_local, ncol_local_1i, ncol_local_2a, npcol, npcol_1i, npcol_2a, &
658 : nprow, nprow_1i, nprow_2a, nrow_block, nrow_block_1i, nrow_block_2a, nrow_local, &
659 : nrow_local_1i, nrow_local_2a, number_of_rec, number_of_send, proc_receive, &
660 : proc_receive_static, proc_send, proc_send_ex, proc_send_static, proc_send_sub, &
661 : proc_shift, rec_col_size
662 : INTEGER :: rec_counter, rec_row_size, send_col_size, send_counter, send_pcol, send_prow, &
663 : send_row_size, size_rec_buffer, size_send_buffer
664 344 : INTEGER, ALLOCATABLE, DIMENSION(:) :: iii_vet, map_rec_size, map_send_size, &
665 344 : pos_info, pos_info_ex, proc_2_send_pos
666 344 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
667 344 : my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
668 344 : sizes_2a
669 344 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: col_indeces_info_1i, &
670 344 : col_indeces_info_2a, &
671 344 : row_indeces_info_1i, &
672 344 : row_indeces_info_2a
673 344 : INTEGER, DIMENSION(:), POINTER :: col_indices, col_indices_1i, &
674 344 : col_indices_2a, row_indices, &
675 344 : row_indices_1i, row_indices_2a
676 344 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ab_rec, ab_send, mat_rec, mat_send
677 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
678 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
679 : TYPE(cp_fm_type) :: fm_P_ij, L_mu_q
680 : TYPE(integ_mat_buffer_type), ALLOCATABLE, &
681 344 : DIMENSION(:) :: buffer_rec, buffer_send
682 : TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
683 344 : DIMENSION(:) :: buffer_cyclic
684 : TYPE(mp_para_env_type), POINTER :: para_env_exchange
685 344 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: req_send
686 :
687 344 : CALL timeset(routineN, handle)
688 :
689 : ! create the globally distributed mixed lagrangian
690 344 : NULLIFY (blacs_env)
691 344 : CALL get_qs_env(qs_env, blacs_env=blacs_env)
692 :
693 344 : NULLIFY (fm_struct_tmp)
694 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
695 344 : nrow_global=dimen, ncol_global=dimen)
696 344 : CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
697 344 : CALL cp_fm_struct_release(fm_struct_tmp)
698 344 : CALL cp_fm_set_all(L_mu_q, 0.0_dp)
699 :
700 : ! create all information array
701 1032 : ALLOCATE (pos_info(0:para_env%num_pe - 1))
702 344 : CALL para_env%allgather(para_env_sub%mepos, pos_info)
703 :
704 : ! get matrix information for the global
705 : CALL cp_fm_get_info(matrix=L_mu_q, &
706 : nrow_local=nrow_local, &
707 : ncol_local=ncol_local, &
708 : row_indices=row_indices, &
709 : col_indices=col_indices, &
710 : nrow_block=nrow_block, &
711 344 : ncol_block=ncol_block)
712 344 : myprow = L_mu_q%matrix_struct%context%mepos(1)
713 344 : mypcol = L_mu_q%matrix_struct%context%mepos(2)
714 344 : nprow = L_mu_q%matrix_struct%context%num_pe(1)
715 344 : npcol = L_mu_q%matrix_struct%context%num_pe(2)
716 :
717 1376 : ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
718 1376 : grid_2_mepos = 0
719 344 : grid_2_mepos(myprow, mypcol) = para_env%mepos
720 344 : CALL para_env%sum(grid_2_mepos)
721 :
722 : ! get matrix information for L1_mu_i
723 : CALL cp_fm_get_info(matrix=L1_mu_i, &
724 : nrow_local=nrow_local_1i, &
725 : ncol_local=ncol_local_1i, &
726 : row_indices=row_indices_1i, &
727 : col_indices=col_indices_1i, &
728 : nrow_block=nrow_block_1i, &
729 344 : ncol_block=ncol_block_1i)
730 344 : nprow_1i = L1_mu_i%matrix_struct%context%num_pe(1)
731 344 : npcol_1i = L1_mu_i%matrix_struct%context%num_pe(2)
732 :
733 1032 : ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
734 1032 : CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
735 :
736 : ! get matrix information for L2_nu_a
737 : CALL cp_fm_get_info(matrix=L2_nu_a, &
738 : nrow_local=nrow_local_2a, &
739 : ncol_local=ncol_local_2a, &
740 : row_indices=row_indices_2a, &
741 : col_indices=col_indices_2a, &
742 : nrow_block=nrow_block_2a, &
743 344 : ncol_block=ncol_block_2a)
744 344 : nprow_2a = L2_nu_a%matrix_struct%context%num_pe(1)
745 344 : npcol_2a = L2_nu_a%matrix_struct%context%num_pe(2)
746 :
747 1032 : ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
748 1032 : CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
749 :
750 : ! Here we perform a ring communication scheme taking into account
751 : ! for the sub-group distribution of the source matrices.
752 : ! as a first step we need to redistribute the data within
753 : ! the subgroup.
754 : ! In order to do so we have to allocate the structure
755 : ! that will hold the local data involved in communication, this
756 : ! structure will be the same for processes in different subgroups
757 : ! sharing the same position in the subgroup.
758 : ! -1) create the exchange para_env
759 344 : color_exchange = para_env_sub%mepos
760 344 : ALLOCATE (para_env_exchange)
761 344 : CALL para_env_exchange%from_split(para_env, color_exchange)
762 1032 : ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
763 344 : CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
764 1032 : ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
765 1032 : CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
766 :
767 : ! 0) store some info about indeces of the fm matrices (subgroup)
768 344 : CALL timeset(routineN//"_inx", handle2)
769 : ! matrix L1_mu_i
770 704 : max_row_size = MAXVAL(sizes_1i(1, :))
771 704 : max_col_size = MAXVAL(sizes_1i(2, :))
772 1376 : ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
773 1376 : ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
774 1032 : ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
775 1032 : ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
776 20426 : row_indeces_info_1i = 0
777 4664 : col_indeces_info_1i = 0
778 344 : dummy_proc = 0
779 : ! row
780 6716 : DO iiB = 1, nrow_local_1i
781 6372 : i_global = row_indices_1i(iiB)
782 : send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
783 6372 : L_mu_q%matrix_struct%first_p_pos(1), nprow)
784 : i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
785 6372 : L_mu_q%matrix_struct%first_p_pos(1), nprow)
786 6372 : my_row_indeces_info_1i(1, iiB) = send_prow
787 6716 : my_row_indeces_info_1i(2, iiB) = i_local
788 : END DO
789 : ! col
790 1592 : DO jjB = 1, ncol_local_1i
791 1248 : j_global = col_indices_1i(jjB)
792 : send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
793 1248 : L_mu_q%matrix_struct%first_p_pos(2), npcol)
794 : j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
795 1248 : L_mu_q%matrix_struct%first_p_pos(2), npcol)
796 1248 : my_col_indeces_info_1i(1, jjB) = send_pcol
797 1592 : my_col_indeces_info_1i(2, jjB) = j_local
798 : END DO
799 344 : CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
800 344 : CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
801 344 : DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
802 :
803 : ! matrix L2_nu_a
804 704 : max_row_size = MAXVAL(sizes_2a(1, :))
805 704 : max_col_size = MAXVAL(sizes_2a(2, :))
806 1376 : ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
807 1376 : ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
808 1032 : ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
809 1032 : ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
810 20426 : row_indeces_info_2a = 0
811 17570 : col_indeces_info_2a = 0
812 : ! row
813 6716 : DO iiB = 1, nrow_local_2a
814 6372 : i_global = row_indices_2a(iiB)
815 : send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
816 6372 : L_mu_q%matrix_struct%first_p_pos(1), nprow)
817 : i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
818 6372 : L_mu_q%matrix_struct%first_p_pos(1), nprow)
819 6372 : my_row_indeces_info_2a(1, iiB) = send_prow
820 6716 : my_row_indeces_info_2a(2, iiB) = i_local
821 : END DO
822 : ! col
823 5658 : DO jjB = 1, ncol_local_2a
824 5314 : j_global = col_indices_2a(jjB) + homo
825 : send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
826 5314 : L_mu_q%matrix_struct%first_p_pos(2), npcol)
827 : j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
828 5314 : L_mu_q%matrix_struct%first_p_pos(2), npcol)
829 5314 : my_col_indeces_info_2a(1, jjB) = send_pcol
830 5658 : my_col_indeces_info_2a(2, jjB) = j_local
831 : END DO
832 344 : CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
833 344 : CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
834 344 : DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
835 344 : CALL timestop(handle2)
836 :
837 : ! 1) define the map for sending data in the subgroup starting with L1_mu_i
838 344 : CALL timeset(routineN//"_subinfo", handle2)
839 1032 : ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
840 704 : map_send_size = 0
841 1592 : DO jjB = 1, ncol_local_1i
842 1248 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
843 25286 : DO iiB = 1, nrow_local_1i
844 23694 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
845 23694 : proc_send = grid_2_mepos(send_prow, send_pcol)
846 23694 : proc_send_sub = pos_info(proc_send)
847 24942 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
848 : END DO
849 : END DO
850 : ! and the same for L2_nu_a
851 5658 : DO jjB = 1, ncol_local_2a
852 5314 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
853 123528 : DO iiB = 1, nrow_local_2a
854 117870 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
855 117870 : proc_send = grid_2_mepos(send_prow, send_pcol)
856 117870 : proc_send_sub = pos_info(proc_send)
857 123184 : map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
858 : END DO
859 : END DO
860 : ! and exchange data in order to create map_rec_size
861 1032 : ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
862 704 : map_rec_size = 0
863 344 : CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
864 344 : CALL timestop(handle2)
865 :
866 : ! 2) reorder data in sending buffer
867 344 : CALL timeset(routineN//"_sub_Bsend", handle2)
868 : ! count the number of messages (include myself)
869 344 : number_of_send = 0
870 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
871 360 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
872 704 : IF (map_send_size(proc_send) > 0) THEN
873 344 : number_of_send = number_of_send + 1
874 : END IF
875 : END DO
876 : ! allocate the structure that will hold the messages to be sent
877 1376 : ALLOCATE (buffer_send(number_of_send))
878 344 : send_counter = 0
879 1032 : ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
880 704 : proc_2_send_pos = 0
881 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
882 360 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
883 360 : size_send_buffer = map_send_size(proc_send)
884 704 : IF (map_send_size(proc_send) > 0) THEN
885 344 : send_counter = send_counter + 1
886 : ! allocate the sending buffer (msg)
887 1032 : ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
888 141908 : buffer_send(send_counter)%msg = 0.0_dp
889 344 : buffer_send(send_counter)%proc = proc_send
890 344 : proc_2_send_pos(proc_send) = send_counter
891 : END IF
892 : END DO
893 : ! loop over the locally held data and fill the buffer_send
894 : ! for doing that we need an array that keep track if the
895 : ! sequential increase of the index for each message
896 1032 : ALLOCATE (iii_vet(number_of_send))
897 688 : iii_vet = 0
898 1592 : DO jjB = 1, ncol_local_1i
899 1248 : send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
900 25286 : DO iiB = 1, nrow_local_1i
901 23694 : send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
902 23694 : proc_send = grid_2_mepos(send_prow, send_pcol)
903 23694 : proc_send_sub = pos_info(proc_send)
904 23694 : send_counter = proc_2_send_pos(proc_send_sub)
905 23694 : iii_vet(send_counter) = iii_vet(send_counter) + 1
906 23694 : iii = iii_vet(send_counter)
907 24942 : buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
908 : END DO
909 : END DO
910 : ! release the local data of L1_mu_i
911 344 : DEALLOCATE (L1_mu_i%local_data)
912 : ! and the same for L2_nu_a
913 5658 : DO jjB = 1, ncol_local_2a
914 5314 : send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
915 123528 : DO iiB = 1, nrow_local_2a
916 117870 : send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
917 117870 : proc_send = grid_2_mepos(send_prow, send_pcol)
918 117870 : proc_send_sub = pos_info(proc_send)
919 117870 : send_counter = proc_2_send_pos(proc_send_sub)
920 117870 : iii_vet(send_counter) = iii_vet(send_counter) + 1
921 117870 : iii = iii_vet(send_counter)
922 123184 : buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
923 : END DO
924 : END DO
925 344 : DEALLOCATE (L2_nu_a%local_data)
926 344 : DEALLOCATE (proc_2_send_pos)
927 344 : DEALLOCATE (iii_vet)
928 344 : CALL timestop(handle2)
929 :
930 : ! 3) create the buffer for receive, post the message with irecv
931 : ! and send the messages non-blocking
932 344 : CALL timeset(routineN//"_sub_isendrecv", handle2)
933 : ! count the number of messages to be received
934 344 : number_of_rec = 0
935 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
936 360 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
937 704 : IF (map_rec_size(proc_receive) > 0) THEN
938 344 : number_of_rec = number_of_rec + 1
939 : END IF
940 : END DO
941 1376 : ALLOCATE (buffer_rec(number_of_rec))
942 344 : rec_counter = 0
943 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
944 360 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
945 360 : size_rec_buffer = map_rec_size(proc_receive)
946 704 : IF (map_rec_size(proc_receive) > 0) THEN
947 344 : rec_counter = rec_counter + 1
948 : ! prepare the buffer for receive
949 1032 : ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
950 141908 : buffer_rec(rec_counter)%msg = 0.0_dp
951 344 : buffer_rec(rec_counter)%proc = proc_receive
952 : ! post the message to be received (not need to send to myself)
953 344 : IF (proc_receive /= para_env_sub%mepos) THEN
954 : CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
955 0 : buffer_rec(rec_counter)%msg_req)
956 : END IF
957 : END IF
958 : END DO
959 : ! send messages
960 1720 : ALLOCATE (req_send(number_of_send))
961 688 : req_send = mp_request_null
962 344 : send_counter = 0
963 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
964 360 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
965 704 : IF (map_send_size(proc_send) > 0) THEN
966 344 : send_counter = send_counter + 1
967 344 : IF (proc_send == para_env_sub%mepos) THEN
968 141908 : buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
969 : ELSE
970 : CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
971 0 : buffer_send(send_counter)%msg_req)
972 0 : req_send(send_counter) = buffer_send(send_counter)%msg_req
973 : END IF
974 : END IF
975 : END DO
976 344 : DEALLOCATE (map_send_size)
977 344 : CALL timestop(handle2)
978 :
979 : ! 4) (if memory is a problem we should move this part after point 5)
980 : ! Here we create the new buffer for cyclic(ring) communication and
981 : ! we fill it with the data received from the other member of the
982 : ! subgroup
983 344 : CALL timeset(routineN//"_Bcyclic", handle2)
984 : ! first allocata new structure
985 1704 : ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
986 1016 : DO iproc = 0, para_env_exchange%num_pe - 1
987 672 : rec_row_size = sizes(1, iproc)
988 672 : rec_col_size = sizes(2, iproc)
989 2688 : ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
990 155324 : buffer_cyclic(iproc)%msg = 0.0_dp
991 : END DO
992 : ! now collect data from other member of the subgroup and fill
993 : ! buffer_cyclic
994 344 : rec_counter = 0
995 704 : DO proc_shift = 0, para_env_sub%num_pe - 1
996 360 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
997 360 : size_rec_buffer = map_rec_size(proc_receive)
998 704 : IF (map_rec_size(proc_receive) > 0) THEN
999 344 : rec_counter = rec_counter + 1
1000 :
1001 : ! wait for the message
1002 344 : IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
1003 :
1004 344 : CALL timeset(routineN//"_fill", handle3)
1005 344 : iii = 0
1006 1592 : DO jjB = 1, sizes_1i(2, proc_receive)
1007 1248 : send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
1008 1248 : j_local = col_indeces_info_1i(2, jjB, proc_receive)
1009 25286 : DO iiB = 1, sizes_1i(1, proc_receive)
1010 23694 : send_prow = row_indeces_info_1i(1, iiB, proc_receive)
1011 23694 : proc_send = grid_2_mepos(send_prow, send_pcol)
1012 23694 : proc_send_sub = pos_info(proc_send)
1013 23694 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
1014 23694 : iii = iii + 1
1015 23694 : i_local = row_indeces_info_1i(2, iiB, proc_receive)
1016 23694 : proc_send_ex = pos_info_ex(proc_send)
1017 24942 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1018 : END DO
1019 : END DO
1020 : ! and the same for L2_nu_a
1021 5658 : DO jjB = 1, sizes_2a(2, proc_receive)
1022 5314 : send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
1023 5314 : j_local = col_indeces_info_2a(2, jjB, proc_receive)
1024 123528 : DO iiB = 1, sizes_2a(1, proc_receive)
1025 117870 : send_prow = row_indeces_info_2a(1, iiB, proc_receive)
1026 117870 : proc_send = grid_2_mepos(send_prow, send_pcol)
1027 117870 : proc_send_sub = pos_info(proc_send)
1028 117870 : IF (proc_send_sub /= para_env_sub%mepos) CYCLE
1029 117870 : iii = iii + 1
1030 117870 : i_local = row_indeces_info_2a(2, iiB, proc_receive)
1031 117870 : proc_send_ex = pos_info_ex(proc_send)
1032 123184 : buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
1033 : END DO
1034 : END DO
1035 344 : CALL timestop(handle3)
1036 :
1037 : ! deallocate the received message
1038 344 : DEALLOCATE (buffer_rec(rec_counter)%msg)
1039 : END IF
1040 : END DO
1041 344 : DEALLOCATE (row_indeces_info_1i)
1042 344 : DEALLOCATE (col_indeces_info_1i)
1043 344 : DEALLOCATE (row_indeces_info_2a)
1044 344 : DEALLOCATE (col_indeces_info_2a)
1045 688 : DEALLOCATE (buffer_rec)
1046 344 : DEALLOCATE (map_rec_size)
1047 344 : CALL timestop(handle2)
1048 :
1049 : ! 5) Wait for all messeges to be sent in the subgroup
1050 344 : CALL timeset(routineN//"_sub_waitall", handle2)
1051 344 : CALL mp_waitall(req_send(:))
1052 688 : DO send_counter = 1, number_of_send
1053 688 : DEALLOCATE (buffer_send(send_counter)%msg)
1054 : END DO
1055 688 : DEALLOCATE (buffer_send)
1056 344 : DEALLOCATE (req_send)
1057 344 : CALL timestop(handle2)
1058 :
1059 : ! 6) Start with ring communication
1060 344 : CALL timeset(routineN//"_ring", handle2)
1061 344 : proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
1062 344 : proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
1063 1016 : max_row_size = MAXVAL(sizes(1, :))
1064 1016 : max_col_size = MAXVAL(sizes(2, :))
1065 1376 : ALLOCATE (mat_send(max_row_size, max_col_size))
1066 1376 : ALLOCATE (mat_rec(max_row_size, max_col_size))
1067 81732 : mat_send = 0.0_dp
1068 79951 : mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
1069 344 : DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
1070 672 : DO proc_shift = 1, para_env_exchange%num_pe - 1
1071 328 : proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
1072 :
1073 328 : rec_row_size = sizes(1, proc_receive)
1074 328 : rec_col_size = sizes(2, proc_receive)
1075 :
1076 76810 : mat_rec = 0.0_dp
1077 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1078 328 : mat_rec, proc_receive_static)
1079 :
1080 76810 : mat_send = 0.0_dp
1081 : mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
1082 75029 : buffer_cyclic(proc_receive)%msg(:, :)
1083 :
1084 672 : DEALLOCATE (buffer_cyclic(proc_receive)%msg)
1085 : END DO
1086 : ! and finally
1087 : CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
1088 344 : mat_rec, proc_receive_static)
1089 79951 : L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
1090 1360 : DEALLOCATE (buffer_cyclic)
1091 344 : DEALLOCATE (mat_send)
1092 344 : DEALLOCATE (mat_rec)
1093 344 : CALL timestop(handle2)
1094 :
1095 : ! release para_env_exchange
1096 344 : CALL mp_para_env_release(para_env_exchange)
1097 :
1098 344 : CALL cp_fm_release(L1_mu_i)
1099 344 : CALL cp_fm_release(L2_nu_a)
1100 344 : DEALLOCATE (pos_info_ex)
1101 344 : DEALLOCATE (grid_2_mepos)
1102 344 : DEALLOCATE (sizes)
1103 344 : DEALLOCATE (sizes_1i)
1104 344 : DEALLOCATE (sizes_2a)
1105 :
1106 : ! update the P_ij block of P_MP2 with the
1107 : ! non-singular ij pairs
1108 344 : CALL timeset(routineN//"_Pij", handle2)
1109 344 : NULLIFY (fm_struct_tmp)
1110 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1111 344 : nrow_global=homo, ncol_global=homo)
1112 344 : CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
1113 344 : CALL cp_fm_struct_release(fm_struct_tmp)
1114 344 : CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
1115 :
1116 : ! we have it, update P_ij local
1117 : CALL cp_fm_get_info(matrix=fm_P_ij, &
1118 : nrow_local=nrow_local, &
1119 : ncol_local=ncol_local, &
1120 : row_indices=row_indices, &
1121 344 : col_indices=col_indices)
1122 :
1123 344 : IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
1124 : CALL parallel_gemm('T', 'N', homo, homo, dimen, 1.0_dp, &
1125 : mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
1126 : a_first_col=1, &
1127 : a_first_row=1, &
1128 : b_first_col=1, &
1129 : b_first_row=1, &
1130 : c_first_col=1, &
1131 320 : c_first_row=1)
1132 : CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp, &
1133 : L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
1134 : a_first_col=1, &
1135 : a_first_row=1, &
1136 : b_first_col=1, &
1137 : b_first_row=1, &
1138 : c_first_col=1, &
1139 320 : c_first_row=1)
1140 :
1141 1476 : DO jjB = 1, ncol_local
1142 1156 : j_global = col_indices(jjB)
1143 3692 : DO iiB = 1, nrow_local
1144 2216 : i_global = row_indices(iiB)
1145 : ! diagonal elements and nearly degenerate ij pairs already updated
1146 3372 : IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
1147 656 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1148 : ELSE
1149 : fm_P_ij%local_data(iiB, jjB) = &
1150 1560 : factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
1151 : END IF
1152 : END DO
1153 : END DO
1154 : ELSE
1155 116 : DO jjB = 1, ncol_local
1156 92 : j_global = col_indices(jjB)
1157 294 : DO iiB = 1, nrow_local
1158 178 : i_global = row_indices(iiB)
1159 270 : fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
1160 : END DO
1161 : END DO
1162 : END IF
1163 : ! deallocate the local P_ij
1164 344 : DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
1165 344 : CALL timestop(handle2)
1166 :
1167 : ! Now create and fill the P matrix (MO)
1168 : ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
1169 344 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
1170 1124 : ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1171 : END IF
1172 :
1173 344 : CALL timeset(routineN//"_PMO", handle2)
1174 344 : NULLIFY (fm_struct_tmp)
1175 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1176 344 : nrow_global=dimen, ncol_global=dimen)
1177 344 : CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
1178 344 : CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
1179 :
1180 : ! start with the (easy) occ-occ block and locally held P_ab elements
1181 344 : itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
1182 344 : my_B_virtual_start = itmp(1)
1183 344 : my_B_virtual_end = itmp(2)
1184 :
1185 : ! Fill occ-occ block
1186 344 : CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
1187 344 : CALL cp_fm_release(fm_P_ij)
1188 :
1189 : CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
1190 : nrow_local=nrow_local, &
1191 : ncol_local=ncol_local, &
1192 : row_indices=row_indices, &
1193 : col_indices=col_indices, &
1194 : nrow_block=nrow_block, &
1195 344 : ncol_block=ncol_block)
1196 344 : myprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(1)
1197 344 : mypcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(2)
1198 344 : nprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(1)
1199 344 : npcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(2)
1200 :
1201 344 : IF (mp2_env%method == ri_mp2_laplace) THEN
1202 : CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, &
1203 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1204 : a_first_col=homo + 1, &
1205 : a_first_row=1, &
1206 : b_first_col=homo + 1, &
1207 : b_first_row=1, &
1208 : c_first_col=homo + 1, &
1209 24 : c_first_row=homo + 1)
1210 : CALL parallel_gemm('T', 'N', virtual, virtual, dimen, -2.0_dp, &
1211 : L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
1212 : a_first_col=homo + 1, &
1213 : a_first_row=1, &
1214 : b_first_col=homo + 1, &
1215 : b_first_row=1, &
1216 : c_first_col=homo + 1, &
1217 24 : c_first_row=homo + 1)
1218 : END IF
1219 :
1220 344 : IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
1221 : ! With MP2 and RPA, we have already calculated the density matrix elements
1222 6290 : DO jjB = 1, ncol_local
1223 5970 : j_global = col_indices(jjB)
1224 5970 : IF (j_global <= homo) CYCLE
1225 59660 : DO iiB = 1, nrow_local
1226 54526 : i_global = row_indices(iiB)
1227 60496 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1228 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1229 45300 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1230 : END IF
1231 : END DO
1232 : END DO
1233 24 : ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
1234 : ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
1235 616 : DO jjB = 1, ncol_local
1236 592 : j_global = col_indices(jjB)
1237 592 : IF (j_global <= homo) CYCLE
1238 6764 : DO iiB = 1, nrow_local
1239 6240 : i_global = row_indices(iiB)
1240 6832 : IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
1241 565 : IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
1242 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1243 552 : mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
1244 : ELSE
1245 13 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
1246 : END IF
1247 : ELSE
1248 : mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
1249 : factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
1250 5675 : (Eigenval(i_global) - Eigenval(j_global))
1251 : END IF
1252 : END DO
1253 : END DO
1254 : ELSE
1255 0 : CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
1256 : END IF
1257 :
1258 : ! send around the sub_group the local data and check if we
1259 : ! have to update our block with external elements
1260 1032 : ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
1261 1032 : CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
1262 :
1263 1032 : ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
1264 1032 : CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
1265 :
1266 1376 : ALLOCATE (ab_rec(nrow_local, ncol_local))
1267 360 : DO proc_shift = 1, para_env_sub%num_pe - 1
1268 16 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
1269 16 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
1270 :
1271 16 : send_prow = mepos_2_grid(1, proc_send)
1272 16 : send_pcol = mepos_2_grid(2, proc_send)
1273 :
1274 16 : send_row_size = sizes(1, proc_send)
1275 16 : send_col_size = sizes(2, proc_send)
1276 :
1277 64 : ALLOCATE (ab_send(send_row_size, send_col_size))
1278 4922 : ab_send = 0.0_dp
1279 :
1280 : ! first loop over row since in this way we can cycle
1281 206 : DO iiB = 1, send_row_size
1282 : i_global = cp_fm_indxl2g(iiB, nrow_block, send_prow, &
1283 190 : mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(1), nprow)
1284 190 : IF (i_global <= homo) CYCLE
1285 154 : i_global = i_global - homo
1286 154 : IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
1287 493 : DO jjB = 1, send_col_size
1288 : j_global = cp_fm_indxl2g(jjB, ncol_block, send_pcol, &
1289 458 : mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(2), npcol)
1290 458 : IF (j_global <= homo) CYCLE
1291 367 : j_global = j_global - homo
1292 648 : ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
1293 : END DO
1294 : END DO
1295 :
1296 4922 : ab_rec = 0.0_dp
1297 : CALL para_env_sub%sendrecv(ab_send, proc_send, &
1298 16 : ab_rec, proc_receive)
1299 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
1300 : mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
1301 4922 : ab_rec(1:nrow_local, 1:ncol_local)
1302 :
1303 360 : DEALLOCATE (ab_send)
1304 : END DO
1305 344 : DEALLOCATE (ab_rec)
1306 344 : DEALLOCATE (mepos_2_grid)
1307 344 : DEALLOCATE (sizes)
1308 :
1309 : ! deallocate the local P_ab
1310 344 : DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
1311 344 : CALL timestop(handle2)
1312 :
1313 : ! create also W_MP2_MO
1314 344 : CALL timeset(routineN//"_WMO", handle2)
1315 344 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
1316 1124 : ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1317 : END IF
1318 :
1319 344 : CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
1320 344 : CALL cp_fm_struct_release(fm_struct_tmp)
1321 :
1322 : ! all block
1323 : CALL parallel_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, &
1324 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1325 : a_first_col=1, &
1326 : a_first_row=1, &
1327 : b_first_col=1, &
1328 : b_first_row=1, &
1329 : c_first_col=1, &
1330 344 : c_first_row=1)
1331 :
1332 : ! occ-occ block
1333 : CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, &
1334 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1335 : a_first_col=1, &
1336 : a_first_row=1, &
1337 : b_first_col=1, &
1338 : b_first_row=1, &
1339 : c_first_col=1, &
1340 344 : c_first_row=1)
1341 :
1342 : ! occ-virt block
1343 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1344 : mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
1345 : a_first_col=1, &
1346 : a_first_row=1, &
1347 : b_first_col=homo + 1, &
1348 : b_first_row=1, &
1349 : c_first_col=homo + 1, &
1350 344 : c_first_row=1)
1351 344 : CALL timestop(handle2)
1352 :
1353 : ! Calculate occ-virt block of the lagrangian in MO
1354 344 : CALL timeset(routineN//"_Ljb", handle2)
1355 344 : IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
1356 1124 : ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
1357 : END IF
1358 :
1359 : CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
1360 344 : nrow_global=homo, ncol_global=virtual)
1361 344 : CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb")
1362 344 : CALL cp_fm_struct_release(fm_struct_tmp)
1363 :
1364 : ! first Virtual
1365 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1366 : L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1367 : a_first_col=1, &
1368 : a_first_row=1, &
1369 : b_first_col=homo + 1, &
1370 : b_first_row=1, &
1371 : c_first_col=1, &
1372 344 : c_first_row=1)
1373 : ! then occupied
1374 : CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
1375 : mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
1376 : a_first_col=1, &
1377 : a_first_row=1, &
1378 : b_first_col=homo + 1, &
1379 : b_first_row=1, &
1380 : c_first_col=1, &
1381 344 : c_first_row=1)
1382 :
1383 : ! finally release L_mu_q
1384 344 : CALL cp_fm_release(L_mu_q)
1385 344 : CALL timestop(handle2)
1386 :
1387 : ! here we should be done next CPHF
1388 :
1389 344 : DEALLOCATE (pos_info)
1390 :
1391 344 : CALL timestop(handle)
1392 :
1393 6192 : END SUBROUTINE create_W_P
1394 :
1395 : END MODULE mp2_ri_grad
|