Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate MP2 energy using GPW method
10 : !> \par History
11 : !> 10.2011 created [Joost VandeVondele and Mauro Del Ben]
12 : ! **************************************************************************************************
13 : MODULE mp2_gpw_method
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE cell_types, ONLY: cell_type
16 : USE cp_blacs_env, ONLY: cp_blacs_env_type
17 : USE cp_control_types, ONLY: dft_control_type
18 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
19 : cp_dbcsr_m_by_n_from_template
20 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
21 : cp_fm_struct_release,&
22 : cp_fm_struct_type
23 : USE cp_fm_types, ONLY: cp_fm_create,&
24 : cp_fm_get_info,&
25 : cp_fm_release,&
26 : cp_fm_type
27 : USE dbcsr_api, ONLY: &
28 : dbcsr_create, dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
29 : dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
30 : dbcsr_p_type, dbcsr_release, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
31 : USE group_dist_types, ONLY: create_group_dist,&
32 : get_group_dist,&
33 : group_dist_d1_type,&
34 : release_group_dist
35 : USE kinds, ONLY: dp,&
36 : int_8
37 : USE machine, ONLY: m_memory
38 : USE message_passing, ONLY: mp_comm_type,&
39 : mp_para_env_type
40 : USE mp2_eri_gpw, ONLY: calc_potential_gpw,&
41 : cleanup_gpw,&
42 : prepare_gpw
43 : USE particle_types, ONLY: particle_type
44 : USE pw_env_types, ONLY: pw_env_type
45 : USE pw_methods, ONLY: pw_multiply,&
46 : pw_transfer,&
47 : pw_zero
48 : USE pw_poisson_types, ONLY: pw_poisson_type
49 : USE pw_pool_types, ONLY: pw_pool_type
50 : USE pw_types, ONLY: pw_c1d_gs_type,&
51 : pw_r3d_rs_type
52 : USE qs_collocate_density, ONLY: calculate_wavefunction
53 : USE qs_environment_types, ONLY: qs_environment_type
54 : USE qs_integrate_potential, ONLY: integrate_v_rspace
55 : USE qs_kind_types, ONLY: qs_kind_type
56 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type
57 : USE task_list_types, ONLY: task_list_type
58 : #include "./base/base_uses.f90"
59 :
60 : IMPLICIT NONE
61 :
62 : PRIVATE
63 :
64 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_gpw_method'
65 :
66 : PUBLIC :: mp2_gpw_compute
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief ...
72 : !> \param Emp2 ...
73 : !> \param Emp2_Cou ...
74 : !> \param Emp2_EX ...
75 : !> \param qs_env ...
76 : !> \param para_env ...
77 : !> \param para_env_sub ...
78 : !> \param color_sub ...
79 : !> \param cell ...
80 : !> \param particle_set ...
81 : !> \param atomic_kind_set ...
82 : !> \param qs_kind_set ...
83 : !> \param mo_coeff ...
84 : !> \param Eigenval ...
85 : !> \param nmo ...
86 : !> \param homo ...
87 : !> \param mat_munu ...
88 : !> \param sab_orb_sub ...
89 : !> \param mo_coeff_o ...
90 : !> \param mo_coeff_v ...
91 : !> \param eps_filter ...
92 : !> \param unit_nr ...
93 : !> \param mp2_memory ...
94 : !> \param calc_ex ...
95 : !> \param blacs_env_sub ...
96 : !> \param Emp2_AB ...
97 : ! **************************************************************************************************
98 16 : SUBROUTINE mp2_gpw_compute(Emp2, Emp2_Cou, Emp2_EX, qs_env, para_env, para_env_sub, color_sub, &
99 16 : cell, particle_set, atomic_kind_set, qs_kind_set, mo_coeff, Eigenval, nmo, homo, &
100 16 : mat_munu, sab_orb_sub, mo_coeff_o, mo_coeff_v, eps_filter, unit_nr, &
101 : mp2_memory, calc_ex, blacs_env_sub, Emp2_AB)
102 :
103 : REAL(KIND=dp), INTENT(OUT) :: Emp2, Emp2_Cou, Emp2_EX
104 : TYPE(qs_environment_type), POINTER :: qs_env
105 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
106 : INTEGER, INTENT(IN) :: color_sub
107 : TYPE(cell_type), POINTER :: cell
108 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
109 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
110 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
111 : TYPE(cp_fm_type), INTENT(IN) :: mo_coeff
112 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: Eigenval
113 : INTEGER, INTENT(IN) :: nmo
114 : INTEGER, DIMENSION(:), INTENT(IN) :: homo
115 : TYPE(dbcsr_p_type), INTENT(INOUT) :: mat_munu
116 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
117 : POINTER :: sab_orb_sub
118 : TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN) :: mo_coeff_o, mo_coeff_v
119 : REAL(KIND=dp), INTENT(IN) :: eps_filter
120 : INTEGER, INTENT(IN) :: unit_nr
121 : REAL(KIND=dp), INTENT(IN) :: mp2_memory
122 : LOGICAL, INTENT(IN) :: calc_ex
123 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
124 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: Emp2_AB
125 :
126 : CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_gpw_compute'
127 :
128 : INTEGER :: a, a_group_counter, b, b_global, b_group_counter, blk, col, col_offset, col_size, &
129 : color_counter, EX_end, EX_end_send, EX_start, EX_start_send, group_counter, handle, &
130 : handle2, handle3, i, i_counter, i_group_counter, index_proc_shift, ispin, j, max_b_size, &
131 : max_batch_size_A, max_batch_size_I, max_row_col_local, my_A_batch_size, my_A_virtual_end, &
132 : my_A_virtual_start, my_B_size, my_B_virtual_end, my_B_virtual_start, my_I_batch_size, &
133 : my_I_occupied_end, my_I_occupied_start, my_q_position, ncol_local, nfullcols_total, &
134 : nfullrows_total, ngroup, nrow_local, nspins, p, p_best, proc_receive
135 : INTEGER :: proc_send, q, q_best, row, row_offset, row_size, size_EX, size_EX_send, &
136 : sub_sub_color, wfn_calc, wfn_calc_best
137 : INTEGER(KIND=int_8) :: mem
138 16 : INTEGER, ALLOCATABLE, DIMENSION(:) :: vector_B_sizes, &
139 16 : vector_batch_A_size_group, &
140 16 : vector_batch_I_size_group
141 16 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: color_array, local_col_row_info
142 16 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
143 32 : INTEGER, DIMENSION(SIZE(homo)) :: virtual
144 : LOGICAL :: do_alpha_beta
145 : REAL(KIND=dp) :: cutoff_old, mem_min, mem_real, mem_try, &
146 : relative_cutoff_old, wfn_size
147 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: e_cutoff_old
148 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: my_Cocc, my_Cvirt
149 16 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: BIb_C, BIb_Ex, BIb_send
150 16 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: data_block
151 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
152 : TYPE(cp_fm_type) :: fm_BIb_jb
153 : TYPE(dbcsr_iterator_type) :: iter
154 52 : TYPE(dbcsr_type), DIMENSION(SIZE(homo)) :: matrix_ia_jb, matrix_ia_jnu
155 : TYPE(dft_control_type), POINTER :: dft_control
156 16 : TYPE(group_dist_d1_type) :: gd_exchange
157 : TYPE(mp_comm_type) :: comm_exchange
158 : TYPE(pw_c1d_gs_type) :: pot_g, rho_g
159 : TYPE(pw_env_type), POINTER :: pw_env_sub
160 : TYPE(pw_poisson_type), POINTER :: poisson_env
161 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
162 : TYPE(pw_r3d_rs_type) :: psi_a, rho_r
163 16 : TYPE(pw_r3d_rs_type), ALLOCATABLE, DIMENSION(:) :: psi_i
164 : TYPE(task_list_type), POINTER :: task_list_sub
165 :
166 16 : CALL timeset(routineN, handle)
167 :
168 16 : do_alpha_beta = .FALSE.
169 16 : IF (PRESENT(Emp2_AB)) do_alpha_beta = .TRUE.
170 :
171 16 : nspins = SIZE(homo)
172 34 : virtual = nmo - homo
173 :
174 34 : DO ispin = 1, nspins
175 : ! initialize and create the matrix (ia|jnu)
176 18 : CALL dbcsr_create(matrix_ia_jnu(ispin), template=mo_coeff_o(ispin)%matrix)
177 :
178 : ! Allocate Sparse matrices: (ia|jb)
179 : CALL cp_dbcsr_m_by_n_from_template(matrix_ia_jb(ispin), template=mo_coeff_o(ispin)%matrix, m=homo(ispin), n=nmo - homo(ispin), &
180 18 : sym=dbcsr_type_no_symmetry)
181 :
182 : ! set all to zero in such a way that the memory is actually allocated
183 18 : CALL dbcsr_set(matrix_ia_jnu(ispin), 0.0_dp)
184 34 : CALL dbcsr_set(matrix_ia_jb(ispin), 0.0_dp)
185 : END DO
186 16 : CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
187 :
188 16 : IF (calc_ex) THEN
189 : ! create the analogous of matrix_ia_jb in fm type
190 16 : NULLIFY (fm_struct)
191 16 : CALL dbcsr_get_info(matrix_ia_jb(1), nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
192 : CALL cp_fm_struct_create(fm_struct, context=blacs_env_sub, nrow_global=nfullrows_total, &
193 16 : ncol_global=nfullcols_total, para_env=para_env_sub)
194 16 : CALL cp_fm_create(fm_BIb_jb, fm_struct, name="fm_BIb_jb")
195 :
196 16 : CALL copy_dbcsr_to_fm(matrix_ia_jb(1), fm_BIb_jb)
197 16 : CALL cp_fm_struct_release(fm_struct)
198 :
199 : CALL cp_fm_get_info(matrix=fm_BIb_jb, &
200 : nrow_local=nrow_local, &
201 : ncol_local=ncol_local, &
202 : row_indices=row_indices, &
203 16 : col_indices=col_indices)
204 :
205 16 : max_row_col_local = MAX(nrow_local, ncol_local)
206 16 : CALL para_env_sub%max(max_row_col_local)
207 :
208 64 : ALLOCATE (local_col_row_info(0:max_row_col_local, 2))
209 868 : local_col_row_info = 0
210 : ! 0,1 nrows
211 16 : local_col_row_info(0, 1) = nrow_local
212 78 : local_col_row_info(1:nrow_local, 1) = row_indices(1:nrow_local)
213 : ! 0,2 ncols
214 16 : local_col_row_info(0, 2) = ncol_local
215 442 : local_col_row_info(1:ncol_local, 2) = col_indices(1:ncol_local)
216 : END IF
217 :
218 : ! Get everything for GPW calculations
219 : CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
220 16 : auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_a, sab_orb_sub)
221 :
222 16 : wfn_size = REAL(SIZE(rho_r%array), KIND=dp)
223 16 : CALL para_env%max(wfn_size)
224 :
225 16 : ngroup = para_env%num_pe/para_env_sub%num_pe
226 :
227 : ! calculate the minimal memory required per MPI task (p=occupied division,q=virtual division)
228 16 : p_best = ngroup
229 16 : q_best = 1
230 16 : mem_min = HUGE(0)
231 48 : DO p = 1, ngroup
232 32 : q = ngroup/p
233 32 : IF (p*q .NE. ngroup) CYCLE
234 :
235 32 : CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
236 :
237 48 : IF (mem_try <= mem_min) THEN
238 32 : mem_min = mem_try
239 32 : p_best = p
240 32 : q_best = q
241 : END IF
242 : END DO
243 24 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T69,F9.2,A3)') 'Minimum required memory per MPI process for MP2:', &
244 16 : mem_min, ' MB'
245 :
246 16 : CALL m_memory(mem)
247 16 : mem_real = (mem + 1024*1024 - 1)/(1024*1024)
248 16 : CALL para_env%min(mem_real)
249 :
250 16 : mem_real = mp2_memory - mem_real
251 16 : mem_real = MAX(mem_real, mem_min)
252 24 : IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T69,F9.2,A3)') 'Available memory per MPI process for MP2:', &
253 16 : mem_real, ' MB'
254 :
255 16 : wfn_calc_best = HUGE(wfn_calc_best)
256 48 : DO p = 1, ngroup
257 32 : q = ngroup/p
258 32 : IF (p*q .NE. ngroup) CYCLE
259 :
260 32 : CALL estimate_memory_usage(wfn_size, p, q, para_env_sub%num_pe, nmo, virtual(1), homo(1), calc_ex, mem_try)
261 :
262 32 : IF (mem_try > mem_real) CYCLE
263 26 : wfn_calc = ((homo(1) + p - 1)/p) + ((virtual(1) + q - 1)/q)
264 42 : IF (wfn_calc < wfn_calc_best) THEN
265 16 : wfn_calc_best = wfn_calc
266 16 : p_best = p
267 16 : q_best = q
268 : END IF
269 : END DO
270 :
271 16 : max_batch_size_I = (homo(1) + p_best - 1)/p_best
272 16 : max_batch_size_A = (virtual(1) + q_best - 1)/q_best
273 :
274 16 : IF (unit_nr > 0) THEN
275 : WRITE (UNIT=unit_nr, FMT="(T3,A,T77,i4)") &
276 8 : "MP2_GPW| max. batch size for the occupied states:", max_batch_size_I
277 : WRITE (UNIT=unit_nr, FMT="(T3,A,T77,i4)") &
278 8 : "MP2_GPW| max. batch size for the virtual states:", max_batch_size_A
279 : END IF
280 :
281 : CALL get_vector_batch(vector_batch_I_size_group, p_best, max_batch_size_I, homo(1))
282 : CALL get_vector_batch(vector_batch_A_size_group, q_best, max_batch_size_A, virtual(1))
283 :
284 : !XXXXXXXXXXXXX inverse group distribution
285 16 : group_counter = 0
286 16 : a_group_counter = 0
287 16 : my_A_virtual_start = 1
288 21 : DO j = 0, q_best - 1
289 21 : my_I_occupied_start = 1
290 21 : i_group_counter = 0
291 29 : DO i = 0, p_best - 1
292 24 : group_counter = group_counter + 1
293 24 : IF (color_sub == group_counter - 1) EXIT
294 8 : my_I_occupied_start = my_I_occupied_start + vector_batch_I_size_group(i)
295 29 : i_group_counter = i_group_counter + 1
296 : END DO
297 21 : my_q_position = j
298 21 : IF (color_sub == group_counter - 1) EXIT
299 5 : my_A_virtual_start = my_A_virtual_start + vector_batch_A_size_group(j)
300 21 : a_group_counter = a_group_counter + 1
301 : END DO
302 : !XXXXXXXXXXXXX inverse group distribution
303 :
304 16 : my_I_occupied_end = my_I_occupied_start + vector_batch_I_size_group(i_group_counter) - 1
305 16 : my_I_batch_size = vector_batch_I_size_group(i_group_counter)
306 16 : my_A_virtual_end = my_A_virtual_start + vector_batch_A_size_group(a_group_counter) - 1
307 16 : my_A_batch_size = vector_batch_A_size_group(a_group_counter)
308 :
309 16 : DEALLOCATE (vector_batch_I_size_group)
310 16 : DEALLOCATE (vector_batch_A_size_group)
311 :
312 : ! replicate on a local array on proc 0 the occupied and virtual wavevectior
313 : ! needed for the calculation of the WF's by calculate_wavefunction
314 : ! (external vector)
315 : CALL grep_occ_virt_wavefunc(para_env_sub, nmo, &
316 : my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
317 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size, &
318 : mo_coeff_o(1)%matrix, mo_coeff_v(1)%matrix, my_Cocc, my_Cvirt)
319 :
320 : ! divide the b states in the sub_group in such a way to create
321 : ! b_start and b_end for each proc inside the sub_group
322 16 : max_b_size = (virtual(1) + para_env_sub%num_pe - 1)/para_env_sub%num_pe
323 : CALL get_vector_batch(vector_B_sizes, para_env_sub%num_pe, max_b_size, virtual(1))
324 :
325 : ! now give to each proc its b_start and b_end
326 16 : b_group_counter = 0
327 16 : my_B_virtual_start = 1
328 16 : DO j = 0, para_env_sub%num_pe - 1
329 16 : b_group_counter = b_group_counter + 1
330 16 : IF (b_group_counter - 1 == para_env_sub%mepos) EXIT
331 16 : my_B_virtual_start = my_B_virtual_start + vector_B_sizes(j)
332 : END DO
333 16 : my_B_virtual_end = my_B_virtual_start + vector_B_sizes(para_env_sub%mepos) - 1
334 16 : my_B_size = vector_B_sizes(para_env_sub%mepos)
335 :
336 16 : DEALLOCATE (vector_B_sizes)
337 :
338 : ! create an array containing a different "color" for each pair of
339 : ! A_start and B_start, communication will take place only among
340 : ! those proc that have the same A_start and B_start
341 64 : ALLOCATE (color_array(0:para_env_sub%num_pe - 1, 0:q_best - 1))
342 68 : color_array = 0
343 : color_counter = 0
344 42 : DO j = 0, q_best - 1
345 68 : DO i = 0, para_env_sub%num_pe - 1
346 26 : color_counter = color_counter + 1
347 52 : color_array(i, j) = color_counter
348 : END DO
349 : END DO
350 16 : sub_sub_color = color_array(para_env_sub%mepos, my_q_position)
351 :
352 16 : DEALLOCATE (color_array)
353 :
354 : ! now create a group that contains all the proc that have the same 2 virtual starting points
355 : ! in this way it is possible to sum the common integrals needed for the full MP2 energy
356 16 : CALL comm_exchange%from_split(para_env, sub_sub_color)
357 :
358 : ! create an array containing the information for communication
359 16 : CALL create_group_dist(gd_exchange, my_I_occupied_start, my_I_occupied_end, my_I_batch_size, comm_exchange)
360 :
361 98 : ALLOCATE (psi_i(my_I_occupied_start:my_I_occupied_end))
362 66 : DO i = my_I_occupied_start, my_I_occupied_end
363 50 : CALL auxbas_pw_pool%create_pw(psi_i(i))
364 : CALL calculate_wavefunction(mo_coeff, i, psi_i(i), rho_g, atomic_kind_set, &
365 : qs_kind_set, cell, dft_control, particle_set, &
366 66 : pw_env_sub, external_vector=my_Cocc(:, i - my_I_occupied_start + 1))
367 : END DO
368 :
369 16 : Emp2 = 0.0_dp
370 16 : Emp2_Cou = 0.0_dp
371 16 : Emp2_EX = 0.0_dp
372 16 : IF (do_alpha_beta) Emp2_AB = 0.0_dp
373 16 : IF (calc_ex) THEN
374 80 : ALLOCATE (BIb_C(my_B_size, homo(1), my_I_batch_size))
375 : END IF
376 :
377 16 : CALL timeset(routineN//"_loop", handle2)
378 270 : DO a = homo(1) + my_A_virtual_start, homo(1) + my_A_virtual_end
379 :
380 92659 : IF (calc_ex) BIb_C = 0.0_dp
381 :
382 : ! psi_a
383 : CALL calculate_wavefunction(mo_coeff, a, psi_a, rho_g, atomic_kind_set, &
384 : qs_kind_set, cell, dft_control, particle_set, &
385 254 : pw_env_sub, external_vector=my_Cvirt(:, a - (homo(1) + my_A_virtual_start) + 1))
386 254 : i_counter = 0
387 1017 : DO i = my_I_occupied_start, my_I_occupied_end
388 763 : i_counter = i_counter + 1
389 :
390 : ! potential
391 763 : CALL pw_zero(rho_r)
392 763 : CALL pw_multiply(rho_r, psi_i(i), psi_a)
393 763 : CALL pw_transfer(rho_r, rho_g)
394 763 : CALL calc_potential_gpw(rho_r, rho_g, poisson_env, pot_g, qs_env%mp2_env%potential_parameter)
395 :
396 : ! and finally (ia|munu)
397 763 : CALL timeset(routineN//"_int", handle3)
398 763 : CALL dbcsr_set(mat_munu%matrix, 0.0_dp)
399 : CALL integrate_v_rspace(rho_r, hmat=mat_munu, qs_env=qs_env, &
400 : calculate_forces=.FALSE., compute_tau=.FALSE., gapw=.FALSE., &
401 763 : pw_env_external=pw_env_sub, task_list_external=task_list_sub)
402 763 : CALL timestop(handle3)
403 :
404 : ! multiply and goooooooo ...
405 763 : CALL timeset(routineN//"_mult_o", handle3)
406 1622 : DO ispin = 1, nspins
407 : CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu%matrix, mo_coeff_o(ispin)%matrix, &
408 1622 : 0.0_dp, matrix_ia_jnu(ispin), filter_eps=eps_filter)
409 : END DO
410 763 : CALL timestop(handle3)
411 763 : CALL timeset(routineN//"_mult_v", handle3)
412 1622 : DO ispin = 1, nspins
413 : CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_ia_jnu(ispin), mo_coeff_v(ispin)%matrix, &
414 1622 : 0.0_dp, matrix_ia_jb(ispin), filter_eps=eps_filter)
415 : END DO
416 763 : CALL timestop(handle3)
417 :
418 763 : CALL timeset(routineN//"_E_Cou", handle3)
419 763 : CALL dbcsr_iterator_start(iter, matrix_ia_jb(1))
420 1738 : DO WHILE (dbcsr_iterator_blocks_left(iter))
421 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
422 : row_size=row_size, col_size=col_size, &
423 975 : row_offset=row_offset, col_offset=col_offset)
424 24373 : DO b = 1, col_size
425 112275 : DO j = 1, row_size
426 : ! Compute the coulomb MP2 energy
427 : Emp2_Cou = Emp2_Cou - 2.0_dp*data_block(j, b)**2/ &
428 111300 : (Eigenval(a, 1) + Eigenval(homo(1) + col_offset + b - 1, 1) - Eigenval(i, 1) - Eigenval(row_offset + j - 1, 1))
429 : END DO
430 : END DO
431 : END DO
432 763 : CALL dbcsr_iterator_stop(iter)
433 763 : IF (do_alpha_beta) THEN
434 : ! Compute the coulomb only= SO = MP2 alpha-beta MP2 energy component
435 96 : CALL dbcsr_iterator_start(iter, matrix_ia_jb(2))
436 192 : DO WHILE (dbcsr_iterator_blocks_left(iter))
437 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
438 : row_size=row_size, col_size=col_size, &
439 96 : row_offset=row_offset, col_offset=col_offset)
440 2592 : DO b = 1, col_size
441 9696 : DO j = 1, row_size
442 : ! Compute the coulomb MP2 energy alpha beta case
443 : Emp2_AB = Emp2_AB - data_block(j, b)**2/ &
444 9600 : (Eigenval(a, 1) + Eigenval(homo(2) + col_offset + b - 1, 2) - Eigenval(i, 1) - Eigenval(row_offset + j - 1, 2))
445 : END DO
446 : END DO
447 : END DO
448 96 : CALL dbcsr_iterator_stop(iter)
449 : END IF
450 763 : CALL timestop(handle3)
451 :
452 : ! now collect my local data from all the other members of the group
453 : ! b_start, b_end
454 4069 : IF (calc_ex) THEN
455 763 : CALL timeset(routineN//"_E_Ex_1", handle3)
456 763 : CALL copy_dbcsr_to_fm(matrix_ia_jb(1), fm_BIb_jb)
457 : CALL grep_my_integrals(para_env_sub, fm_BIb_jb, BIb_C(1:my_B_size, 1:homo(1), i_counter), max_row_col_local, &
458 763 : local_col_row_info, my_B_virtual_end, my_B_virtual_start)
459 763 : CALL timestop(handle3)
460 : END IF
461 :
462 : END DO
463 :
464 270 : IF (calc_ex) THEN
465 :
466 254 : ASSOCIATE (mepos_in_EX_group => comm_exchange%mepos, size_of_exchange_group => comm_exchange%num_pe)
467 254 : CALL timeset(routineN//"_E_Ex_2", handle3)
468 : ! calculate the contribution to MP2 energy for my local data
469 1017 : DO i = 1, my_I_batch_size
470 3538 : DO j = my_I_occupied_start, my_I_occupied_end
471 83285 : DO b = 1, my_B_size
472 80001 : b_global = b - 1 + my_B_virtual_start
473 : Emp2_EX = Emp2_EX + BIb_C(b, j, i)*BIb_C(b, i + my_I_occupied_start - 1, j - my_I_occupied_start + 1) &
474 82522 : /(Eigenval(a, 1) + Eigenval(homo(1) + b_global, 1) - Eigenval(i + my_I_occupied_start - 1, 1) - Eigenval(j, 1))
475 : END DO
476 : END DO
477 : END DO
478 :
479 : ! start communicating and collecting exchange contributions from
480 : ! other processes in my exchange group
481 368 : DO index_proc_shift = 1, size_of_exchange_group - 1
482 114 : proc_send = MODULO(mepos_in_EX_group + index_proc_shift, size_of_exchange_group)
483 114 : proc_receive = MODULO(mepos_in_EX_group - index_proc_shift, size_of_exchange_group)
484 :
485 114 : CALL get_group_dist(gd_exchange, proc_receive, EX_start, EX_end, size_EX)
486 :
487 570 : ALLOCATE (BIb_EX(my_B_size, my_I_batch_size, size_EX))
488 9462 : BIb_EX = 0.0_dp
489 :
490 114 : CALL get_group_dist(gd_exchange, proc_send, EX_start_send, EX_end_send, size_EX_send)
491 :
492 570 : ALLOCATE (BIb_send(my_B_size, size_EX_send, my_I_batch_size))
493 : BIb_send(1:my_B_size, 1:size_EX_send, 1:my_I_batch_size) = &
494 9462 : BIb_C(1:my_B_size, EX_start_send:EX_end_send, 1:my_I_batch_size)
495 :
496 : ! send and receive the exchange array
497 114 : CALL comm_exchange%sendrecv(BIb_send, proc_send, BIb_EX, proc_receive)
498 :
499 342 : DO i = 1, my_I_batch_size
500 798 : DO j = 1, size_EX
501 9348 : DO b = 1, my_B_size
502 8664 : b_global = b - 1 + my_B_virtual_start
503 : Emp2_EX = Emp2_EX + BIb_C(b, j + EX_start - 1, i)*BIb_EX(b, i, j) &
504 : /(Eigenval(a, 1) + Eigenval(homo(1) + b_global, 1) - Eigenval(i + my_I_occupied_start - 1, 1) &
505 9120 : - Eigenval(j + EX_start - 1, 1))
506 : END DO
507 : END DO
508 : END DO
509 :
510 114 : DEALLOCATE (BIb_EX)
511 596 : DEALLOCATE (BIb_send)
512 :
513 : END DO
514 508 : CALL timestop(handle3)
515 : END ASSOCIATE
516 : END IF
517 :
518 : END DO
519 16 : CALL timestop(handle2)
520 :
521 16 : CALL para_env%sum(Emp2_Cou)
522 16 : CALL para_env%sum(Emp2_EX)
523 16 : Emp2 = Emp2_Cou + Emp2_EX
524 16 : IF (do_alpha_beta) CALL para_env%sum(Emp2_AB)
525 :
526 16 : DEALLOCATE (my_Cocc)
527 16 : DEALLOCATE (my_Cvirt)
528 :
529 16 : IF (calc_ex) THEN
530 16 : CALL cp_fm_release(fm_BIb_jb)
531 16 : DEALLOCATE (local_col_row_info)
532 16 : DEALLOCATE (BIb_C)
533 : END IF
534 16 : CALL release_group_dist(gd_exchange)
535 :
536 16 : CALL comm_exchange%free()
537 :
538 34 : DO ispin = 1, nspins
539 18 : CALL dbcsr_release(matrix_ia_jnu(ispin))
540 34 : CALL dbcsr_release(matrix_ia_jb(ispin))
541 : END DO
542 :
543 66 : DO i = my_I_occupied_start, my_I_occupied_end
544 66 : CALL psi_i(i)%release()
545 : END DO
546 16 : DEALLOCATE (psi_i)
547 :
548 : CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
549 16 : task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_a)
550 :
551 16 : CALL timestop(handle)
552 :
553 128 : END SUBROUTINE mp2_gpw_compute
554 :
555 : ! **************************************************************************************************
556 : !> \brief ...
557 : !> \param wfn_size ...
558 : !> \param p ...
559 : !> \param q ...
560 : !> \param num_w ...
561 : !> \param nmo ...
562 : !> \param virtual ...
563 : !> \param homo ...
564 : !> \param calc_ex ...
565 : !> \param mem_try ...
566 : ! **************************************************************************************************
567 64 : ELEMENTAL SUBROUTINE estimate_memory_usage(wfn_size, p, q, num_w, nmo, virtual, homo, calc_ex, mem_try)
568 : REAL(KIND=dp), INTENT(IN) :: wfn_size
569 : INTEGER, INTENT(IN) :: p, q, num_w, nmo, virtual, homo
570 : LOGICAL, INTENT(IN) :: calc_ex
571 : REAL(KIND=dp), INTENT(OUT) :: mem_try
572 :
573 : mem_try = 0.0_dp
574 : ! integrals
575 64 : mem_try = mem_try + virtual*REAL(homo, KIND=dp)**2/(p*num_w)
576 : ! array for the coefficient matrix and wave vectors
577 : mem_try = mem_try + REAL(homo, KIND=dp)*nmo/p + &
578 : REAL(virtual, KIND=dp)*nmo/q + &
579 64 : 2.0_dp*MAX(REAL(homo, KIND=dp)*nmo/p, REAL(virtual, KIND=dp)*nmo/q)
580 : ! temporary array for MO integrals and MO integrals to be exchanged
581 64 : IF (calc_ex) THEN
582 : mem_try = mem_try + 2.0_dp*MAX(virtual*REAL(homo, KIND=dp)*MIN(1, num_w - 1)/num_w, &
583 64 : virtual*REAL(homo, KIND=dp)**2/(p*p*num_w))
584 : ELSE
585 0 : mem_try = mem_try + 2.0_dp*virtual*REAL(homo, KIND=dp)
586 : END IF
587 : ! wfn
588 64 : mem_try = mem_try + ((homo + p - 1)/p)*wfn_size
589 : ! Mb
590 64 : mem_try = mem_try*8.0D+00/1024.0D+00**2
591 :
592 64 : END SUBROUTINE
593 :
594 : ! **************************************************************************************************
595 : !> \brief ...
596 : !> \param vector_batch_I_size_group ...
597 : !> \param p_best ...
598 : !> \param max_batch_size_I ...
599 : !> \param homo ...
600 : ! **************************************************************************************************
601 48 : PURE SUBROUTINE get_vector_batch(vector_batch_I_size_group, p_best, max_batch_size_I, homo)
602 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: vector_batch_I_size_group
603 : INTEGER, INTENT(IN) :: p_best, max_batch_size_I, homo
604 :
605 : INTEGER :: i, one
606 :
607 144 : ALLOCATE (vector_batch_I_size_group(0:p_best - 1))
608 :
609 112 : vector_batch_I_size_group = max_batch_size_I
610 112 : IF (SUM(vector_batch_I_size_group) /= homo) THEN
611 24 : one = 1
612 24 : IF (SUM(vector_batch_I_size_group) > homo) one = -1
613 8 : i = -1
614 : DO
615 8 : i = i + 1
616 8 : vector_batch_I_size_group(i) = vector_batch_I_size_group(i) + one
617 24 : IF (SUM(vector_batch_I_size_group) == homo) EXIT
618 0 : IF (i == p_best - 1) i = -1
619 : END DO
620 : END IF
621 :
622 48 : END SUBROUTINE get_vector_batch
623 :
624 : ! **************************************************************************************************
625 : !> \brief ...
626 : !> \param para_env_sub ...
627 : !> \param fm_BIb_jb ...
628 : !> \param BIb_jb ...
629 : !> \param max_row_col_local ...
630 : !> \param local_col_row_info ...
631 : !> \param my_B_virtual_end ...
632 : !> \param my_B_virtual_start ...
633 : ! **************************************************************************************************
634 763 : SUBROUTINE grep_my_integrals(para_env_sub, fm_BIb_jb, BIb_jb, max_row_col_local, &
635 : local_col_row_info, &
636 : my_B_virtual_end, my_B_virtual_start)
637 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
638 : TYPE(cp_fm_type), INTENT(IN) :: fm_BIb_jb
639 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: BIb_jb
640 : INTEGER, INTENT(IN) :: max_row_col_local
641 : INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN) :: local_col_row_info
642 : INTEGER, INTENT(IN) :: my_B_virtual_end, my_B_virtual_start
643 :
644 : INTEGER :: i_global, iiB, j_global, jjB, ncol_rec, &
645 : nrow_rec, proc_receive, proc_send, &
646 : proc_shift
647 763 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: rec_col_row_info
648 763 : INTEGER, DIMENSION(:), POINTER :: col_indices_rec, row_indices_rec
649 763 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: local_BI, rec_BI
650 :
651 3052 : ALLOCATE (rec_col_row_info(0:max_row_col_local, 2))
652 :
653 49085 : rec_col_row_info(:, :) = local_col_row_info
654 :
655 763 : nrow_rec = rec_col_row_info(0, 1)
656 763 : ncol_rec = rec_col_row_info(0, 2)
657 :
658 2289 : ALLOCATE (row_indices_rec(nrow_rec))
659 3740 : row_indices_rec = rec_col_row_info(1:nrow_rec, 1)
660 :
661 2289 : ALLOCATE (col_indices_rec(ncol_rec))
662 23398 : col_indices_rec = rec_col_row_info(1:ncol_rec, 2)
663 :
664 : ! accumulate data on BIb_jb buffer starting from myself
665 23398 : DO jjB = 1, ncol_rec
666 22635 : j_global = col_indices_rec(jjB)
667 23398 : IF (j_global >= my_B_virtual_start .AND. j_global <= my_B_virtual_end) THEN
668 111300 : DO iiB = 1, nrow_rec
669 88665 : i_global = row_indices_rec(iiB)
670 111300 : BIb_jb(j_global - my_B_virtual_start + 1, i_global) = fm_BIb_jb%local_data(iiB, jjB)
671 : END DO
672 : END IF
673 : END DO
674 :
675 763 : DEALLOCATE (row_indices_rec)
676 763 : DEALLOCATE (col_indices_rec)
677 :
678 763 : IF (para_env_sub%num_pe > 1) THEN
679 0 : ALLOCATE (local_BI(nrow_rec, ncol_rec))
680 0 : local_BI(1:nrow_rec, 1:ncol_rec) = fm_BIb_jb%local_data(1:nrow_rec, 1:ncol_rec)
681 :
682 0 : DO proc_shift = 1, para_env_sub%num_pe - 1
683 0 : proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
684 0 : proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
685 :
686 : ! first exchange information on the local data
687 0 : rec_col_row_info = 0
688 0 : CALL para_env_sub%sendrecv(local_col_row_info, proc_send, rec_col_row_info, proc_receive)
689 0 : nrow_rec = rec_col_row_info(0, 1)
690 0 : ncol_rec = rec_col_row_info(0, 2)
691 :
692 0 : ALLOCATE (row_indices_rec(nrow_rec))
693 0 : row_indices_rec = rec_col_row_info(1:nrow_rec, 1)
694 :
695 0 : ALLOCATE (col_indices_rec(ncol_rec))
696 0 : col_indices_rec = rec_col_row_info(1:ncol_rec, 2)
697 :
698 0 : ALLOCATE (rec_BI(nrow_rec, ncol_rec))
699 0 : rec_BI = 0.0_dp
700 :
701 : ! then send and receive the real data
702 0 : CALL para_env_sub%sendrecv(local_BI, proc_send, rec_BI, proc_receive)
703 :
704 : ! accumulate the received data on BIb_jb buffer
705 0 : DO jjB = 1, ncol_rec
706 0 : j_global = col_indices_rec(jjB)
707 0 : IF (j_global >= my_B_virtual_start .AND. j_global <= my_B_virtual_end) THEN
708 0 : DO iiB = 1, nrow_rec
709 0 : i_global = row_indices_rec(iiB)
710 0 : BIb_jb(j_global - my_B_virtual_start + 1, i_global) = rec_BI(iiB, jjB)
711 : END DO
712 : END IF
713 : END DO
714 :
715 0 : DEALLOCATE (col_indices_rec)
716 0 : DEALLOCATE (row_indices_rec)
717 0 : DEALLOCATE (rec_BI)
718 : END DO
719 :
720 0 : DEALLOCATE (local_BI)
721 : END IF
722 :
723 763 : DEALLOCATE (rec_col_row_info)
724 :
725 763 : END SUBROUTINE grep_my_integrals
726 :
727 : ! **************************************************************************************************
728 : !> \brief ...
729 : !> \param para_env_sub ...
730 : !> \param dimen ...
731 : !> \param my_I_occupied_start ...
732 : !> \param my_I_occupied_end ...
733 : !> \param my_I_batch_size ...
734 : !> \param my_A_virtual_start ...
735 : !> \param my_A_virtual_end ...
736 : !> \param my_A_batch_size ...
737 : !> \param mo_coeff_o ...
738 : !> \param mo_coeff_v ...
739 : !> \param my_Cocc ...
740 : !> \param my_Cvirt ...
741 : ! **************************************************************************************************
742 16 : SUBROUTINE grep_occ_virt_wavefunc(para_env_sub, dimen, &
743 : my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
744 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size, &
745 : mo_coeff_o, mo_coeff_v, my_Cocc, my_Cvirt)
746 :
747 : TYPE(mp_para_env_type), INTENT(IN) :: para_env_sub
748 : INTEGER, INTENT(IN) :: dimen, my_I_occupied_start, my_I_occupied_end, my_I_batch_size, &
749 : my_A_virtual_start, my_A_virtual_end, my_A_batch_size
750 : TYPE(dbcsr_type), POINTER :: mo_coeff_o, mo_coeff_v
751 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
752 : INTENT(OUT) :: my_Cocc, my_Cvirt
753 :
754 : CHARACTER(LEN=*), PARAMETER :: routineN = 'grep_occ_virt_wavefunc'
755 :
756 : INTEGER :: blk, col, col_offset, col_size, handle, &
757 : i, i_global, j, j_global, row, &
758 : row_offset, row_size
759 16 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: data_block
760 : TYPE(dbcsr_iterator_type) :: iter
761 :
762 16 : CALL timeset(routineN, handle)
763 :
764 64 : ALLOCATE (my_Cocc(dimen, my_I_batch_size))
765 1558 : my_Cocc = 0.0_dp
766 :
767 64 : ALLOCATE (my_Cvirt(dimen, my_A_batch_size))
768 8159 : my_Cvirt = 0.0_dp
769 :
770 : ! accumulate data from mo_coeff_o into Cocc
771 16 : CALL dbcsr_iterator_start(iter, mo_coeff_o)
772 68 : DO WHILE (dbcsr_iterator_blocks_left(iter))
773 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
774 : row_size=row_size, col_size=col_size, &
775 52 : row_offset=row_offset, col_offset=col_offset)
776 268 : DO j = 1, col_size
777 200 : j_global = col_offset + j - 1
778 252 : IF (j_global >= my_I_occupied_start .AND. j_global <= my_I_occupied_end) THEN
779 1656 : DO i = 1, row_size
780 1492 : i_global = row_offset + i - 1
781 1656 : my_Cocc(i_global, j_global - my_I_occupied_start + 1) = data_block(i, j)
782 : END DO
783 : END IF
784 : END DO
785 : END DO
786 16 : CALL dbcsr_iterator_stop(iter)
787 :
788 16 : CALL para_env_sub%sum(my_Cocc)
789 :
790 : ! accumulate data from mo_coeff_o into Cocc
791 16 : CALL dbcsr_iterator_start(iter, mo_coeff_v)
792 74 : DO WHILE (dbcsr_iterator_blocks_left(iter))
793 : CALL dbcsr_iterator_next_block(iter, row, col, data_block, blk, &
794 : row_size=row_size, col_size=col_size, &
795 58 : row_offset=row_offset, col_offset=col_offset)
796 1354 : DO j = 1, col_size
797 1280 : j_global = col_offset + j - 1
798 1338 : IF (j_global >= my_A_virtual_start .AND. j_global <= my_A_virtual_end) THEN
799 8700 : DO i = 1, row_size
800 7889 : i_global = row_offset + i - 1
801 8700 : my_Cvirt(i_global, j_global - my_A_virtual_start + 1) = data_block(i, j)
802 : END DO
803 : END IF
804 : END DO
805 : END DO
806 16 : CALL dbcsr_iterator_stop(iter)
807 :
808 16 : CALL para_env_sub%sum(my_Cvirt)
809 :
810 16 : CALL timestop(handle)
811 :
812 16 : END SUBROUTINE grep_occ_virt_wavefunc
813 :
814 : END MODULE mp2_gpw_method
|