Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief RI-methods for HFX and K-points.
10 : !> \auhtor Augustin Bussy (01.2023)
11 : ! **************************************************************************************************
12 :
13 : MODULE hfx_ri_kp
14 : USE admm_types, ONLY: get_admm_env
15 : USE atomic_kind_types, ONLY: atomic_kind_type,&
16 : get_atomic_kind_set
17 : USE basis_set_types, ONLY: get_gto_basis_set,&
18 : gto_basis_set_p_type
19 : USE bibliography, ONLY: Bussy2024,&
20 : cite_reference
21 : USE cell_types, ONLY: cell_type,&
22 : pbc,&
23 : real_to_scaled,&
24 : scaled_to_real
25 : USE cp_array_utils, ONLY: cp_1d_logical_p_type,&
26 : cp_2d_r_p_type,&
27 : cp_3d_r_p_type
28 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
29 : cp_blacs_env_release,&
30 : cp_blacs_env_type
31 : USE cp_control_types, ONLY: dft_control_type
32 : USE cp_dbcsr_api, ONLY: &
33 : dbcsr_add, dbcsr_clear, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, &
34 : dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
35 : dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, dbcsr_iterator_blocks_left, &
36 : dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
37 : dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_type, dbcsr_type_no_symmetry, &
38 : dbcsr_type_symmetric
39 : USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
40 : cp_dbcsr_cholesky_invert
41 : USE cp_dbcsr_contrib, ONLY: dbcsr_dot
42 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_alloc_block_from_nbl
43 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
44 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_dist2d_to_dist
45 : USE dbt_api, ONLY: &
46 : dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
47 : dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, dbt_destroy, &
48 : dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, dbt_filter, &
49 : dbt_finalize, dbt_get_block, dbt_get_info, dbt_get_stored_coordinates, &
50 : dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
51 : dbt_iterator_type, dbt_mp_environ_pgrid, dbt_pgrid_create, dbt_pgrid_destroy, &
52 : dbt_pgrid_type, dbt_put_block, dbt_scale, dbt_type
53 : USE distribution_2d_types, ONLY: distribution_2d_release,&
54 : distribution_2d_type
55 : USE hfx_ri, ONLY: get_idx_to_atom,&
56 : hfx_ri_pre_scf_calc_tensors
57 : USE hfx_types, ONLY: hfx_ri_type
58 : USE input_constants, ONLY: do_potential_short,&
59 : hfx_ri_do_2c_cholesky,&
60 : hfx_ri_do_2c_diag,&
61 : hfx_ri_do_2c_iter
62 : USE input_cp2k_hfx, ONLY: ri_pmat
63 : USE input_section_types, ONLY: section_vals_get_subs_vals,&
64 : section_vals_type,&
65 : section_vals_val_get,&
66 : section_vals_val_set
67 : USE iterate_matrix, ONLY: invert_hotelling
68 : USE kinds, ONLY: default_string_length,&
69 : dp,&
70 : int_8
71 : USE kpoint_types, ONLY: get_kpoint_info,&
72 : kpoint_type
73 : USE libint_2c_3c, ONLY: cutoff_screen_factor
74 : USE machine, ONLY: m_flush,&
75 : m_memory,&
76 : m_walltime
77 : USE mathlib, ONLY: erfc_cutoff
78 : USE message_passing, ONLY: mp_cart_type,&
79 : mp_para_env_type,&
80 : mp_request_type,&
81 : mp_waitall
82 : USE particle_methods, ONLY: get_particle_set
83 : USE particle_types, ONLY: particle_type
84 : USE physcon, ONLY: angstrom
85 : USE qs_environment_types, ONLY: get_qs_env,&
86 : qs_environment_type
87 : USE qs_force_types, ONLY: qs_force_type
88 : USE qs_integral_utils, ONLY: basis_set_list_setup
89 : USE qs_interactions, ONLY: init_interaction_radii_orb_basis
90 : USE qs_kind_types, ONLY: qs_kind_type
91 : USE qs_neighbor_list_types, ONLY: get_iterator_info,&
92 : neighbor_list_iterate,&
93 : neighbor_list_iterator_create,&
94 : neighbor_list_iterator_p_type,&
95 : neighbor_list_iterator_release,&
96 : neighbor_list_set_p_type,&
97 : release_neighbor_list_sets
98 : USE qs_scf_types, ONLY: qs_scf_env_type
99 : USE qs_tensors, ONLY: &
100 : build_2c_derivatives, build_2c_neighbor_lists, build_3c_derivatives, &
101 : build_3c_neighbor_lists, get_3c_iterator_info, get_tensor_occupancy, &
102 : neighbor_list_3c_destroy, neighbor_list_3c_iterate, neighbor_list_3c_iterator_create, &
103 : neighbor_list_3c_iterator_destroy
104 : USE qs_tensors_types, ONLY: create_2c_tensor,&
105 : create_3c_tensor,&
106 : create_tensor_batches,&
107 : distribution_2d_create,&
108 : distribution_3d_create,&
109 : distribution_3d_type,&
110 : neighbor_list_3c_iterator_type,&
111 : neighbor_list_3c_type
112 : USE util, ONLY: get_limit
113 : USE virial_types, ONLY: virial_type
114 : #include "./base/base_uses.f90"
115 :
116 : !$ USE OMP_LIB, ONLY: omp_get_num_threads
117 :
118 : IMPLICIT NONE
119 : PRIVATE
120 :
121 : PUBLIC :: hfx_ri_update_ks_kp, hfx_ri_update_forces_kp
122 :
123 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri_kp'
124 : CONTAINS
125 :
126 : ! **************************************************************************************************
127 : !> \brief I_1nitialize the ri_data for K-point. For now, we take the normal, usual existing ri_data
128 : !> and we adapt it to our needs
129 : !> \param dbcsr_template ...
130 : !> \param ri_data ...
131 : !> \param qs_env ...
132 : ! **************************************************************************************************
133 80 : SUBROUTINE adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
134 : TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
135 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
136 : TYPE(qs_environment_type), POINTER :: qs_env
137 :
138 : INTEGER :: i_img, i_RI, i_spin, iatom, natom, &
139 : nblks_RI, nimg, nkind, nspins
140 80 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, dist1, dist2, dist3
141 : TYPE(dft_control_type), POINTER :: dft_control
142 : TYPE(mp_para_env_type), POINTER :: para_env
143 :
144 80 : NULLIFY (dft_control, para_env)
145 :
146 : !The main thing that we need to do is to allocate more space for the integrals, such that there
147 : !is room for each periodic image. Note that we only go in 1D, i.e. we store (mu^0 sigma^a|P^0),
148 : !and (P^0|Q^a) => the RI basis is always in the main cell.
149 :
150 : !Get kpoint info
151 80 : CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, para_env=para_env, nkind=nkind)
152 80 : nimg = ri_data%nimg
153 :
154 : !Along the RI direction we have basis elements spread accross ncell_RI images.
155 80 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
156 240 : ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
157 562 : DO i_RI = 1, ri_data%ncell_RI
158 2744 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
159 : END DO
160 :
161 4876 : ALLOCATE (ri_data%t_3c_int_ctr_1(1, nimg))
162 : CALL create_3c_tensor(ri_data%t_3c_int_ctr_1(1, 1), dist1, dist2, dist3, &
163 : ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
164 80 : ri_data%bsizes_AO_split, [1, 2], [3], name="(AO RI | AO)")
165 :
166 1998 : DO i_img = 2, nimg
167 1998 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_1(1, i_img))
168 : END DO
169 80 : DEALLOCATE (dist1, dist2, dist3)
170 :
171 880 : ALLOCATE (ri_data%t_3c_int_ctr_2(1, 1))
172 : CALL create_3c_tensor(ri_data%t_3c_int_ctr_2(1, 1), dist1, dist2, dist3, &
173 : ri_data%pgrid_1, ri_data%bsizes_AO_split, bsizes_RI_ext, &
174 80 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
175 80 : DEALLOCATE (dist1, dist2, dist3)
176 :
177 : !We use full block sizes for the 2c quantities
178 80 : DEALLOCATE (bsizes_RI_ext)
179 80 : nblks_RI = SIZE(ri_data%bsizes_RI)
180 240 : ALLOCATE (bsizes_RI_ext(nblks_RI*ri_data%ncell_RI))
181 562 : DO i_RI = 1, ri_data%ncell_RI
182 1526 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI(:)
183 : END DO
184 :
185 3440 : ALLOCATE (ri_data%t_2c_inv(1, natom), ri_data%t_2c_int(1, natom), ri_data%t_2c_pot(1, natom))
186 : CALL create_2c_tensor(ri_data%t_2c_inv(1, 1), dist1, dist2, ri_data%pgrid_2d, &
187 : bsizes_RI_ext, bsizes_RI_ext, &
188 80 : name="(RI | RI)")
189 80 : DEALLOCATE (dist1, dist2)
190 80 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, 1))
191 80 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1))
192 160 : DO iatom = 2, natom
193 80 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_inv(1, iatom))
194 80 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_int(1, iatom))
195 160 : CALL dbt_create(ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, iatom))
196 : END DO
197 :
198 400 : ALLOCATE (ri_data%kp_cost(natom, natom, nimg))
199 14066 : ri_data%kp_cost = 0.0_dp
200 :
201 : !We store the density and KS matrix in tensor format
202 80 : nspins = dft_control%nspins
203 10604 : ALLOCATE (ri_data%rho_ao_t(nspins, nimg), ri_data%ks_t(nspins, nimg))
204 : CALL create_2c_tensor(ri_data%rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
205 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
206 80 : name="(AO | AO)")
207 80 : DEALLOCATE (dist1, dist2)
208 :
209 80 : CALL dbt_create(dbcsr_template, ri_data%ks_t(1, 1))
210 :
211 80 : IF (nspins == 2) THEN
212 26 : CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(2, 1))
213 26 : CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(2, 1))
214 : END IF
215 1998 : DO i_img = 2, nimg
216 4276 : DO i_spin = 1, nspins
217 2278 : CALL dbt_create(ri_data%rho_ao_t(1, 1), ri_data%rho_ao_t(i_spin, i_img))
218 4196 : CALL dbt_create(ri_data%ks_t(1, 1), ri_data%ks_t(i_spin, i_img))
219 : END DO
220 : END DO
221 :
222 240 : END SUBROUTINE adapt_ri_data_to_kp
223 :
224 : ! **************************************************************************************************
225 : !> \brief The pre-scf steps for RI-HFX k-points calculation. Namely the calculation of the integrals
226 : !> \param dbcsr_template ...
227 : !> \param ri_data ...
228 : !> \param qs_env ...
229 : ! **************************************************************************************************
230 80 : SUBROUTINE hfx_ri_pre_scf_kp(dbcsr_template, ri_data, qs_env)
231 : TYPE(dbcsr_type), INTENT(INOUT) :: dbcsr_template
232 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
233 : TYPE(qs_environment_type), POINTER :: qs_env
234 :
235 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_kp'
236 :
237 : INTEGER :: handle, i_img, iatom, natom, nimg, nkind
238 80 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: t_2c_op_pot, t_2c_op_RI
239 80 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
240 : TYPE(dft_control_type), POINTER :: dft_control
241 :
242 80 : NULLIFY (dft_control)
243 :
244 80 : CALL timeset(routineN, handle)
245 :
246 80 : CALL get_qs_env(qs_env, dft_control=dft_control, natom=natom, nkind=nkind)
247 :
248 80 : CALL cleanup_kp(ri_data)
249 :
250 : !We do all the checks on what we allow in this initial implementation
251 80 : IF (ri_data%flavor .NE. ri_pmat) CPABORT("K-points RI-HFX only with RHO flavor")
252 80 : IF (ri_data%same_op) ri_data%same_op = .FALSE. !force the full calculation with RI metric
253 80 : IF (ABS(ri_data%eps_pgf_orb - dft_control%qs_control%eps_pgf_orb) > 1.0E-16_dp) &
254 0 : CPABORT("RI%EPS_PGF_ORB and QS%EPS_PGF_ORB must be identical for RI-HFX k-points")
255 :
256 80 : CALL get_kp_and_ri_images(ri_data, qs_env)
257 80 : nimg = ri_data%nimg
258 :
259 : !Calculate the integrals
260 4316 : ALLOCATE (t_2c_op_pot(nimg), t_2c_op_RI(nimg))
261 4876 : ALLOCATE (t_3c_int(1, nimg))
262 80 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int, do_kpoints=.TRUE.)
263 :
264 : !Make sure the internals have the k-point format
265 80 : CALL adapt_ri_data_to_kp(dbcsr_template, ri_data, qs_env)
266 :
267 : !For each atom i, we calculate the inverse RI metric (P^0 | Q^0)^-1 without external bumping yet
268 : !Also store the off-diagonal integrals of the RI metric in case of forces, bumped from the left
269 240 : DO iatom = 1, natom
270 : CALL get_ext_2c_int(ri_data%t_2c_inv(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
271 160 : do_inverse=.TRUE.)
272 : !for the forces:
273 : !off-diagonl RI metric bumped from the left
274 : CALL get_ext_2c_int(ri_data%t_2c_int(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, &
275 160 : qs_env, off_diagonal=.TRUE.)
276 160 : CALL apply_bump(ri_data%t_2c_int(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
277 :
278 : !RI metric with bumped off-diagonal blocks (but not inverted), depumed from left and right
279 : CALL get_ext_2c_int(ri_data%t_2c_pot(1, iatom), t_2c_op_RI, iatom, iatom, 1, ri_data, qs_env, &
280 160 : do_inverse=.TRUE., skip_inverse=.TRUE.)
281 : CALL apply_bump(ri_data%t_2c_pot(1, iatom), iatom, ri_data, qs_env, from_left=.TRUE., &
282 240 : from_right=.TRUE., debump=.TRUE.)
283 :
284 : END DO
285 :
286 2078 : DO i_img = 1, nimg
287 2078 : CALL dbcsr_release(t_2c_op_RI(i_img))
288 : END DO
289 :
290 4156 : ALLOCATE (ri_data%kp_mat_2c_pot(1, nimg))
291 2078 : DO i_img = 1, nimg
292 1998 : CALL dbcsr_create(ri_data%kp_mat_2c_pot(1, i_img), template=t_2c_op_pot(i_img))
293 1998 : CALL dbcsr_copy(ri_data%kp_mat_2c_pot(1, i_img), t_2c_op_pot(i_img))
294 2078 : CALL dbcsr_release(t_2c_op_pot(i_img))
295 : END DO
296 :
297 : !reorder the 3c integrals such that empty images are bunched up together
298 80 : CALL reorder_3c_ints(t_3c_int(1, :), ri_data)
299 :
300 : !Pre-contract all 3c integrals with the bumped inverse RI metric (P^0|Q^0)^-1,
301 : !and store in ri_data%t_3c_int_ctr_1
302 80 : CALL precontract_3c_ints(t_3c_int, ri_data, qs_env)
303 :
304 80 : CALL timestop(handle)
305 :
306 2158 : END SUBROUTINE hfx_ri_pre_scf_kp
307 :
308 : ! **************************************************************************************************
309 : !> \brief clean-up the KP specific data from ri_data
310 : !> \param ri_data ...
311 : ! **************************************************************************************************
312 80 : SUBROUTINE cleanup_kp(ri_data)
313 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
314 :
315 : INTEGER :: i, j
316 :
317 80 : IF (ALLOCATED(ri_data%kp_cost)) DEALLOCATE (ri_data%kp_cost)
318 80 : IF (ALLOCATED(ri_data%idx_to_img)) DEALLOCATE (ri_data%idx_to_img)
319 80 : IF (ALLOCATED(ri_data%img_to_idx)) DEALLOCATE (ri_data%img_to_idx)
320 80 : IF (ALLOCATED(ri_data%present_images)) DEALLOCATE (ri_data%present_images)
321 80 : IF (ALLOCATED(ri_data%img_to_RI_cell)) DEALLOCATE (ri_data%img_to_RI_cell)
322 80 : IF (ALLOCATED(ri_data%RI_cell_to_img)) DEALLOCATE (ri_data%RI_cell_to_img)
323 :
324 80 : IF (ALLOCATED(ri_data%kp_mat_2c_pot)) THEN
325 716 : DO j = 1, SIZE(ri_data%kp_mat_2c_pot, 2)
326 1406 : DO i = 1, SIZE(ri_data%kp_mat_2c_pot, 1)
327 1380 : CALL dbcsr_release(ri_data%kp_mat_2c_pot(i, j))
328 : END DO
329 : END DO
330 26 : DEALLOCATE (ri_data%kp_mat_2c_pot)
331 : END IF
332 :
333 80 : IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
334 716 : DO i = 1, SIZE(ri_data%kp_t_3c_int)
335 716 : CALL dbt_destroy(ri_data%kp_t_3c_int(i))
336 : END DO
337 716 : DEALLOCATE (ri_data%kp_t_3c_int)
338 : END IF
339 :
340 80 : IF (ALLOCATED(ri_data%t_2c_inv)) THEN
341 186 : DO j = 1, SIZE(ri_data%t_2c_inv, 2)
342 292 : DO i = 1, SIZE(ri_data%t_2c_inv, 1)
343 212 : CALL dbt_destroy(ri_data%t_2c_inv(i, j))
344 : END DO
345 : END DO
346 186 : DEALLOCATE (ri_data%t_2c_inv)
347 : END IF
348 :
349 80 : IF (ALLOCATED(ri_data%t_2c_int)) THEN
350 186 : DO j = 1, SIZE(ri_data%t_2c_int, 2)
351 292 : DO i = 1, SIZE(ri_data%t_2c_int, 1)
352 212 : CALL dbt_destroy(ri_data%t_2c_int(i, j))
353 : END DO
354 : END DO
355 186 : DEALLOCATE (ri_data%t_2c_int)
356 : END IF
357 :
358 80 : IF (ALLOCATED(ri_data%t_2c_pot)) THEN
359 186 : DO j = 1, SIZE(ri_data%t_2c_pot, 2)
360 292 : DO i = 1, SIZE(ri_data%t_2c_pot, 1)
361 212 : CALL dbt_destroy(ri_data%t_2c_pot(i, j))
362 : END DO
363 : END DO
364 186 : DEALLOCATE (ri_data%t_2c_pot)
365 : END IF
366 :
367 80 : IF (ALLOCATED(ri_data%t_3c_int_ctr_1)) THEN
368 824 : DO j = 1, SIZE(ri_data%t_3c_int_ctr_1, 2)
369 1568 : DO i = 1, SIZE(ri_data%t_3c_int_ctr_1, 1)
370 1488 : CALL dbt_destroy(ri_data%t_3c_int_ctr_1(i, j))
371 : END DO
372 : END DO
373 824 : DEALLOCATE (ri_data%t_3c_int_ctr_1)
374 : END IF
375 :
376 80 : IF (ALLOCATED(ri_data%t_3c_int_ctr_2)) THEN
377 160 : DO j = 1, SIZE(ri_data%t_3c_int_ctr_2, 2)
378 240 : DO i = 1, SIZE(ri_data%t_3c_int_ctr_2, 1)
379 160 : CALL dbt_destroy(ri_data%t_3c_int_ctr_2(i, j))
380 : END DO
381 : END DO
382 160 : DEALLOCATE (ri_data%t_3c_int_ctr_2)
383 : END IF
384 :
385 80 : IF (ALLOCATED(ri_data%rho_ao_t)) THEN
386 824 : DO j = 1, SIZE(ri_data%rho_ao_t, 2)
387 1804 : DO i = 1, SIZE(ri_data%rho_ao_t, 1)
388 1724 : CALL dbt_destroy(ri_data%rho_ao_t(i, j))
389 : END DO
390 : END DO
391 1060 : DEALLOCATE (ri_data%rho_ao_t)
392 : END IF
393 :
394 80 : IF (ALLOCATED(ri_data%ks_t)) THEN
395 824 : DO j = 1, SIZE(ri_data%ks_t, 2)
396 1804 : DO i = 1, SIZE(ri_data%ks_t, 1)
397 1724 : CALL dbt_destroy(ri_data%ks_t(i, j))
398 : END DO
399 : END DO
400 1060 : DEALLOCATE (ri_data%ks_t)
401 : END IF
402 :
403 80 : END SUBROUTINE cleanup_kp
404 :
405 : ! **************************************************************************************************
406 : !> \brief Prints a progress bar for the k-point RI-HFX triple loop
407 : !> \param b_img ...
408 : !> \param nimg ...
409 : !> \param iprint ...
410 : !> \param ri_data ...
411 : ! **************************************************************************************************
412 0 : SUBROUTINE print_progress_bar(b_img, nimg, iprint, ri_data)
413 : INTEGER, INTENT(IN) :: b_img, nimg
414 : INTEGER, INTENT(INOUT) :: iprint
415 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
416 :
417 : CHARACTER(LEN=default_string_length) :: bar
418 : INTEGER :: rep
419 :
420 0 : IF (ri_data%unit_nr > 0) THEN
421 0 : IF (b_img == 1) THEN
422 0 : WRITE (ri_data%unit_nr, '(/T6,A)', advance="no") '[-'
423 0 : CALL m_flush(ri_data%unit_nr)
424 : END IF
425 0 : IF (b_img > iprint*nimg/71) THEN
426 0 : rep = MAX(1, 71/nimg)
427 0 : bar = REPEAT("-", rep)
428 0 : WRITE (ri_data%unit_nr, '(A)', advance="no") TRIM(bar)
429 0 : CALL m_flush(ri_data%unit_nr)
430 0 : iprint = iprint + 1
431 : END IF
432 0 : IF (b_img == nimg) THEN
433 0 : rep = MAX(0, 1 + 71 - iprint*rep)
434 0 : bar = REPEAT("-", rep)
435 0 : WRITE (ri_data%unit_nr, '(A,A)') TRIM(bar), '-]'
436 0 : CALL m_flush(ri_data%unit_nr)
437 : END IF
438 : END IF
439 :
440 0 : END SUBROUTINE print_progress_bar
441 :
442 : ! **************************************************************************************************
443 : !> \brief Update the KS matrices for each real-space image
444 : !> \param qs_env ...
445 : !> \param ri_data ...
446 : !> \param ks_matrix ...
447 : !> \param ehfx ...
448 : !> \param rho_ao ...
449 : !> \param geometry_did_change ...
450 : !> \param nspins ...
451 : !> \param hf_fraction ...
452 : ! **************************************************************************************************
453 248 : SUBROUTINE hfx_ri_update_ks_kp(qs_env, ri_data, ks_matrix, ehfx, rho_ao, &
454 : geometry_did_change, nspins, hf_fraction)
455 :
456 : TYPE(qs_environment_type), POINTER :: qs_env
457 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
458 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
459 : REAL(KIND=dp), INTENT(OUT) :: ehfx
460 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
461 : LOGICAL, INTENT(IN) :: geometry_did_change
462 : INTEGER, INTENT(IN) :: nspins
463 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
464 :
465 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_kp'
466 :
467 : INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_spin, iatom, &
468 : iblk, igroup, iprint, jatom, mb_img, n_batch_nze, n_nze, natom, ngroups, nimg, nimg_nze
469 : INTEGER(int_8) :: mem, nflop, nze
470 248 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_at, batch_ranges_nze, &
471 248 : idx_to_at_AO
472 248 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
473 248 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: sparsity_pattern
474 : LOGICAL :: estimate_mem, print_progress, use_delta_p
475 : REAL(dp) :: etmp, fac, occ, pfac, pref, t1, t2, t3, &
476 : t4
477 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
478 : TYPE(dbcsr_type) :: ks_desymm, rho_desymm, tmp
479 248 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
480 : TYPE(dbcsr_type), POINTER :: dbcsr_template
481 248 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ks_t_split, t_2c_ao_tmp, t_2c_work, &
482 248 : t_3c_int, t_3c_work_2, t_3c_work_3
483 248 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: ks_t, ks_t_sub, t_3c_apc, t_3c_apc_sub
484 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
485 : TYPE(section_vals_type), POINTER :: hfx_section, print_section
486 :
487 248 : NULLIFY (para_env, para_env_sub, blacs_env_sub, hfx_section, dbcsr_template, print_section)
488 :
489 248 : CALL cite_reference(Bussy2024)
490 :
491 248 : CALL timeset(routineN, handle)
492 :
493 248 : CALL get_qs_env(qs_env, para_env=para_env, natom=natom)
494 :
495 248 : IF (nspins == 1) THEN
496 164 : fac = 0.5_dp*hf_fraction
497 : ELSE
498 84 : fac = 1.0_dp*hf_fraction
499 : END IF
500 :
501 248 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
502 248 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
503 248 : CALL section_vals_val_get(hfx_section, "KP_STACK_SIZE", i_val=batch_size)
504 248 : CALL section_vals_val_get(hfx_section, "KP_USE_DELTA_P", l_val=use_delta_p)
505 248 : ri_data%kp_stack_size = batch_size
506 248 : ri_data%kp_ngroups = ngroups
507 :
508 248 : IF (geometry_did_change) THEN
509 80 : CALL hfx_ri_pre_scf_kp(ks_matrix(1, 1)%matrix, ri_data, qs_env)
510 : END IF
511 248 : nimg = ri_data%nimg
512 248 : nimg_nze = ri_data%nimg_nze
513 :
514 : !We need to calculate the KS matrix for each periodic cell with index b: F_mu^0,nu^b
515 : !F_mu^0,nu^b = -0.5 sum_a,c P_sigma^0,lambda^c (mu^0, sigma^a| P^0) V_P^0,Q^b (Q^b| nu^b lambda^a+c)
516 : !with V_P^0,Q^b = (P^0|R^0)^-1 * (R^0|S^b) * (S^b|Q^b)^-1
517 :
518 : !We use a local RI basis set for each atom in the system, which inlcudes RI basis elements for
519 : !each neighboring atom standing within the KIND radius (decay of Gaussian with smallest exponent)
520 :
521 : !We also limit the number of periodic images we consider accorrding to the HFX potentail in the
522 : !RI basis, because if V_P^0,Q^b is zero everywhere, then image b can be ignored (RI basis less diffuse)
523 :
524 : !We manage to calculate each KS matrix doing a double loop on iamges, and a double loop on atoms
525 : !First, we pre-contract and store P_sigma^0,lambda^c (mu^0, sigma^a| P^0) (P^0|R^0)^-1 into T_mu^0,lambda^a+c,P^0
526 : !Then, we loop over b_img, iatom, jatom to get (R^0|S^b)
527 : !Finally, we do an additional loop over a+c images where we do (R^0|S^b) (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c)
528 : !and the final contraction with T_mu^0,lambda^a+c,P^0
529 :
530 : !Note that the 3-center integrals are pre-contracted with the RI metric, and that the same tensor can be used
531 : !(mu^0, sigma^a| P^0) (P^0|R^0) <===> (S^b|Q^b)^-1 (Q^b| nu^b lambda^a+c) by relabelling the images
532 :
533 : !By default, build the density tensor based on the difference of this SCF P and that of the prev. SCF
534 248 : pfac = -1.0_dp
535 248 : IF (.NOT. use_delta_p) pfac = 0.0_dp
536 248 : CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, pfac, ri_data, qs_env)
537 :
538 248 : n_nze = 0
539 7034 : DO i_img = 1, nimg
540 15386 : DO i_spin = 1, nspins
541 8352 : CALL get_tensor_occupancy(ri_data%rho_ao_t(i_spin, i_img), nze, occ)
542 15138 : IF (nze > 0) THEN
543 6678 : n_nze = n_nze + 1
544 : END IF
545 : END DO
546 : END DO
547 248 : IF (n_nze == nspins) THEN
548 32 : CPWARN("It is highly recommended to restart from a converged GGA K-point calculations.")
549 : END IF
550 :
551 18114 : ALLOCATE (ks_t(nspins, nimg))
552 7034 : DO i_img = 1, nimg
553 15386 : DO i_spin = 1, nspins
554 15138 : CALL dbt_create(ri_data%ks_t(1, 1), ks_t(i_spin, i_img))
555 : END DO
556 : END DO
557 :
558 744 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
559 248 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
560 :
561 : !First we calculate and store T^1_mu^0,lambda^a+c,P = P_mu^0,lambda^c * (mu_0 sigma^a | P^0) (P^0|R^0)^-1
562 : !To avoid doing nimg**2 tiny contractions that do not scale well with a large number of CPUs,
563 : !we instead do a single loop over the a+c image index. For each a+c, we get a list of allowed
564 : !combination of a,c indices. Then we build TAS tensors P_mu^0,lambda^c with all concerned c's
565 : !and (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 with all a's. Then we perform a single contraction with larger tensors,
566 : !were the sum over a,c is automatically taken care of
567 17866 : ALLOCATE (t_3c_apc(nspins, nimg))
568 7034 : DO i_img = 1, nimg
569 15386 : DO i_spin = 1, nspins
570 15138 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
571 : END DO
572 : END DO
573 248 : CALL contract_pmat_3c(t_3c_apc, ri_data%rho_ao_t, ri_data, qs_env)
574 :
575 248 : IF (MOD(para_env%num_pe, ngroups) .NE. 0) THEN
576 0 : CPWARN("KP_NGROUPS must be an integer divisor of the total number of MPI ranks. It was set to 1.")
577 0 : ngroups = 1
578 0 : CALL section_vals_val_set(hfx_section, "KP_NGROUPS", i_val=ngroups)
579 : END IF
580 248 : IF ((MOD(ngroups, natom) .NE. 0) .AND. (MOD(natom, ngroups) .NE. 0) .AND. geometry_did_change) THEN
581 0 : IF (ngroups > 1) THEN
582 0 : CPWARN("Better load balancing is reached if NGROUPS is a multiple/divisor of the number of atoms")
583 : END IF
584 : END IF
585 248 : group_size = para_env%num_pe/ngroups
586 248 : igroup = para_env%mepos/group_size
587 :
588 248 : ALLOCATE (para_env_sub)
589 248 : CALL para_env_sub%from_split(para_env, igroup)
590 248 : CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
591 :
592 : ! The sparsity pattern of each iatom, jatom pair, on each b_img, and on which subgroup
593 1240 : ALLOCATE (sparsity_pattern(natom, natom, nimg))
594 248 : CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
595 248 : CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
596 :
597 : !Get all the required tensors in the subgroups
598 32588 : ALLOCATE (mat_2c_pot(nimg), ks_t_sub(nspins, nimg), t_2c_ao_tmp(1), ks_t_split(2), t_2c_work(3))
599 : CALL get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
600 248 : group_size, ngroups, para_env, para_env_sub, ri_data)
601 :
602 32836 : ALLOCATE (t_3c_int(nimg), t_3c_apc_sub(nspins, nimg), t_3c_work_2(3), t_3c_work_3(3))
603 : CALL get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
604 248 : group_size, ngroups, para_env, para_env_sub, ri_data)
605 :
606 : !We go atom by atom, therefore there is an automatic batching along that direction
607 : !Also, because we stack the 3c tensors nimg times, we naturally do some batching there too
608 744 : ALLOCATE (batch_ranges_at(natom + 1))
609 248 : batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
610 248 : iatom = 0
611 1138 : DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
612 1138 : IF (idx_to_at_AO(iblk) == iatom + 1) THEN
613 496 : iatom = iatom + 1
614 496 : batch_ranges_at(iatom) = iblk
615 : END IF
616 : END DO
617 :
618 248 : n_batch_nze = nimg_nze/batch_size
619 248 : IF (MODULO(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
620 744 : ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
621 608 : DO i_batch = 1, n_batch_nze
622 608 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
623 : END DO
624 248 : batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
625 :
626 248 : print_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI%PRINT")
627 248 : CALL section_vals_val_get(print_section, "KP_RI_PROGRESS_BAR", l_val=print_progress)
628 248 : CALL section_vals_val_get(print_section, "KP_RI_MEMORY_ESTIMATE", l_val=estimate_mem)
629 :
630 744 : ALLOCATE (iapc_pairs(nimg, 2))
631 248 : IF (estimate_mem .AND. geometry_did_change) THEN
632 : !Populate work tensors to simulate maximum usage
633 0 : CALL get_iapc_pairs(iapc_pairs, 1, ri_data, qs_env)
634 : CALL fill_3c_stack(t_3c_work_3(1), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
635 : filter_at=1, filter_dim=2, idx_to_at=idx_to_at_AO, &
636 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
637 : CALL fill_3c_stack(t_3c_work_3(2), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
638 : filter_at=1, filter_dim=2, idx_to_at=idx_to_at_AO, &
639 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
640 : CALL fill_3c_stack(t_3c_work_2(1), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
641 : ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_AO, &
642 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
643 : CALL fill_3c_stack(t_3c_work_2(2), t_3c_apc_sub(1, :), iapc_pairs(:, 2), 3, &
644 : ri_data, filter_at=1, filter_dim=1, idx_to_at=idx_to_at_AO, &
645 0 : img_bounds=[batch_ranges_nze(1), batch_ranges_nze(2)])
646 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, 1, 1, 1, ri_data, qs_env, &
647 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
648 0 : dbcsr_template=dbcsr_template)
649 0 : CALL m_memory(mem)
650 0 : CALL para_env%max(mem)
651 0 : CALL dbt_clear(t_3c_work_2(1))
652 0 : CALL dbt_clear(t_3c_work_2(2))
653 0 : CALL dbt_clear(t_3c_work_3(1))
654 0 : CALL dbt_clear(t_3c_work_3(2))
655 0 : CALL dbt_clear(t_2c_work(1))
656 :
657 0 : IF (ri_data%unit_nr > 0) THEN
658 : WRITE (ri_data%unit_nr, FMT="(T3,A,I14)") &
659 0 : "KP-HFX_RI_INFO| Estimated peak memory usage per MPI rank (MiB):", mem/(1024*1024)
660 0 : CALL m_flush(ri_data%unit_nr)
661 : END IF
662 : END IF
663 :
664 248 : CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
665 248 : CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
666 248 : CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
667 248 : CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
668 :
669 248 : iprint = 1
670 248 : t1 = m_walltime()
671 47750 : ri_data%kp_cost(:, :, :) = 0.0_dp
672 7034 : DO b_img = 1, nimg
673 6786 : IF (print_progress) CALL print_progress_bar(b_img, nimg, iprint, ri_data)
674 6786 : CALL dbt_batched_contract_init(ks_t_split(1))
675 6786 : CALL dbt_batched_contract_init(ks_t_split(2))
676 20358 : DO jatom = 1, natom
677 47502 : DO iatom = 1, natom
678 27144 : IF (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup) CYCLE
679 4640 : pref = 1.0_dp
680 4640 : IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp
681 :
682 : !measure the cost of the given i, j, b configuration
683 4640 : t3 = m_walltime()
684 :
685 : !Get the proper HFX potential 2c integrals (R_i^0|S_j^b)
686 4640 : CALL timeset(routineN//"_2c", handle2)
687 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
688 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
689 4640 : dbcsr_template=dbcsr_template)
690 4640 : CALL dbt_copy(t_2c_work(1), t_2c_work(2), move_data=.TRUE.) !move to split blocks
691 4640 : CALL dbt_filter(t_2c_work(2), ri_data%filter_eps)
692 4640 : CALL timestop(handle2)
693 :
694 4640 : CALL dbt_batched_contract_init(t_2c_work(2))
695 4640 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
696 4640 : CALL timeset(routineN//"_3c", handle2)
697 :
698 : !Stack the (S^b|Q^b)^-1 * (Q^b| nu^b lambda^a+c) integrals over a+c and multiply by (R_i^0|S_j^b)
699 12696 : DO i_batch = 1, n_batch_nze
700 : CALL fill_3c_stack(t_3c_work_3(3), t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
701 : filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_AO, &
702 24168 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
703 8056 : CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1), move_data=.TRUE.)
704 :
705 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_3c_work_3(1), &
706 : 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
707 : contract_1=[2], notcontract_1=[1], &
708 : contract_2=[1], notcontract_2=[2, 3], &
709 8056 : filter_eps=ri_data%filter_eps, flop=nflop)
710 8056 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
711 8056 : CALL dbt_copy(t_3c_work_3(2), t_3c_work_2(2), order=[2, 1, 3], move_data=.TRUE.)
712 8056 : CALL dbt_copy(t_3c_work_3(3), t_3c_work_3(1))
713 :
714 : !Stack the P_sigma^a,lambda^a+c * (mu^0 sigma^a | P^0)*(P^0|R^0)^-1 integrals over a+c and contract
715 : !to get the final block of the KS matrix
716 22760 : DO i_spin = 1, nspins
717 : CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
718 : ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_AO, &
719 30192 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
720 10064 : CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
721 :
722 10064 : IF (nze == 0) CYCLE
723 9809 : CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.TRUE.)
724 : CALL dbt_contract(-pref*fac, t_3c_work_2(1), t_3c_work_2(2), &
725 : 1.0_dp, ks_t_split(i_spin), map_1=[1], map_2=[2], &
726 : contract_1=[2, 3], notcontract_1=[1], &
727 : contract_2=[2, 3], notcontract_2=[1], &
728 : filter_eps=ri_data%filter_eps, &
729 9809 : move_data=i_spin == nspins, flop=nflop)
730 27929 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
731 : END DO
732 : END DO !i_batch
733 4640 : CALL timestop(handle2)
734 4640 : CALL dbt_batched_contract_finalize(t_2c_work(2))
735 :
736 4640 : t4 = m_walltime()
737 49996 : ri_data%kp_cost(iatom, jatom, b_img) = t4 - t3
738 : END DO !iatom
739 : END DO !jatom
740 6786 : CALL dbt_batched_contract_finalize(ks_t_split(1))
741 6786 : CALL dbt_batched_contract_finalize(ks_t_split(2))
742 :
743 15386 : DO i_spin = 1, nspins
744 8352 : CALL dbt_copy(ks_t_split(i_spin), t_2c_ao_tmp(1), move_data=.TRUE.)
745 15138 : CALL dbt_copy(t_2c_ao_tmp(1), ks_t_sub(i_spin, b_img), summation=.TRUE.)
746 : END DO
747 : END DO !b_img
748 248 : CALL dbt_batched_contract_finalize(t_3c_work_3(1))
749 248 : CALL dbt_batched_contract_finalize(t_3c_work_3(2))
750 248 : CALL dbt_batched_contract_finalize(t_3c_work_2(1))
751 248 : CALL dbt_batched_contract_finalize(t_3c_work_2(2))
752 248 : CALL para_env%sync()
753 248 : CALL para_env%sum(ri_data%dbcsr_nflop)
754 248 : CALL para_env%sum(ri_data%kp_cost)
755 248 : t2 = m_walltime()
756 248 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
757 :
758 : !transfer KS tensor from subgroup to main group
759 248 : CALL gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
760 :
761 : !Keep the 3c integrals on the subgroups to avoid communication at next SCF step
762 7034 : DO i_img = 1, nimg
763 7034 : CALL dbt_copy(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img), move_data=.TRUE.)
764 : END DO
765 :
766 : !clean-up subgroup tensors
767 248 : CALL dbt_destroy(t_2c_ao_tmp(1))
768 248 : CALL dbt_destroy(ks_t_split(1))
769 248 : CALL dbt_destroy(ks_t_split(2))
770 248 : CALL dbt_destroy(t_2c_work(1))
771 248 : CALL dbt_destroy(t_2c_work(2))
772 248 : CALL dbt_destroy(t_3c_work_2(1))
773 248 : CALL dbt_destroy(t_3c_work_2(2))
774 248 : CALL dbt_destroy(t_3c_work_2(3))
775 248 : CALL dbt_destroy(t_3c_work_3(1))
776 248 : CALL dbt_destroy(t_3c_work_3(2))
777 248 : CALL dbt_destroy(t_3c_work_3(3))
778 7034 : DO i_img = 1, nimg
779 6786 : CALL dbt_destroy(t_3c_int(i_img))
780 6786 : CALL dbcsr_release(mat_2c_pot(i_img))
781 15386 : DO i_spin = 1, nspins
782 8352 : CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
783 15138 : CALL dbt_destroy(ks_t_sub(i_spin, i_img))
784 : END DO
785 : END DO
786 248 : IF (ASSOCIATED(dbcsr_template)) THEN
787 248 : CALL dbcsr_release(dbcsr_template)
788 248 : DEALLOCATE (dbcsr_template)
789 : END IF
790 :
791 : !End of subgroup parallelization
792 248 : CALL cp_blacs_env_release(blacs_env_sub)
793 248 : CALL para_env_sub%free()
794 248 : DEALLOCATE (para_env_sub)
795 :
796 : !Currently, rho_ao_t holds the density difference (wrt to pref SCF step).
797 : !ks_t also hold that diff, while only having half the blocks => need to add to prev ks_t and symmetrize
798 : !We need the full thing for the energy, on the next SCF step
799 248 : CALL get_pmat_images(ri_data%rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
800 580 : DO i_spin = 1, nspins
801 8932 : DO b_img = 1, nimg
802 8352 : CALL dbt_copy(ks_t(i_spin, b_img), ri_data%ks_t(i_spin, b_img), summation=.TRUE.)
803 :
804 : !desymmetrize
805 8352 : mb_img = get_opp_index(b_img, qs_env)
806 8684 : IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
807 7428 : CALL dbt_copy(ks_t(i_spin, mb_img), ri_data%ks_t(i_spin, b_img), order=[2, 1], summation=.TRUE.)
808 : END IF
809 : END DO
810 : END DO
811 7034 : DO b_img = 1, nimg
812 15386 : DO i_spin = 1, nspins
813 15138 : CALL dbt_destroy(ks_t(i_spin, b_img))
814 : END DO
815 : END DO
816 :
817 : !calculate the energy
818 248 : CALL dbt_create(ri_data%ks_t(1, 1), t_2c_ao_tmp(1))
819 248 : CALL dbcsr_create(tmp, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_symmetric)
820 248 : CALL dbcsr_create(ks_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
821 248 : CALL dbcsr_create(rho_desymm, template=ks_matrix(1, 1)%matrix, matrix_type=dbcsr_type_no_symmetry)
822 248 : ehfx = 0.0_dp
823 7034 : DO i_img = 1, nimg
824 15386 : DO i_spin = 1, nspins
825 8352 : CALL dbt_filter(ri_data%ks_t(i_spin, i_img), ri_data%filter_eps)
826 8352 : CALL dbt_copy(ri_data%ks_t(i_spin, i_img), t_2c_ao_tmp(1))
827 8352 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), ks_desymm)
828 8352 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), tmp)
829 8352 : CALL dbcsr_add(ks_matrix(i_spin, i_img)%matrix, tmp, 1.0_dp, 1.0_dp)
830 :
831 8352 : CALL dbt_copy(ri_data%rho_ao_t(i_spin, i_img), t_2c_ao_tmp(1))
832 8352 : CALL dbt_copy_tensor_to_matrix(t_2c_ao_tmp(1), rho_desymm)
833 :
834 8352 : CALL dbcsr_dot(ks_desymm, rho_desymm, etmp)
835 8352 : ehfx = ehfx + 0.5_dp*etmp
836 :
837 15138 : IF (.NOT. use_delta_p) CALL dbt_clear(ri_data%ks_t(i_spin, i_img))
838 : END DO
839 : END DO
840 248 : CALL dbcsr_release(rho_desymm)
841 248 : CALL dbcsr_release(ks_desymm)
842 248 : CALL dbcsr_release(tmp)
843 248 : CALL dbt_destroy(t_2c_ao_tmp(1))
844 :
845 248 : CALL timestop(handle)
846 :
847 44410 : END SUBROUTINE hfx_ri_update_ks_kp
848 :
849 : ! **************************************************************************************************
850 : !> \brief Update the K-points RI-HFX forces
851 : !> \param qs_env ...
852 : !> \param ri_data ...
853 : !> \param nspins ...
854 : !> \param hf_fraction ...
855 : !> \param rho_ao ...
856 : !> \param use_virial ...
857 : !> \note Because this routine uses stored quantities calculated in the energy calculation, they should
858 : !> always be called by pairs, and with the same input densities
859 : ! **************************************************************************************************
860 46 : SUBROUTINE hfx_ri_update_forces_kp(qs_env, ri_data, nspins, hf_fraction, rho_ao, use_virial)
861 :
862 : TYPE(qs_environment_type), POINTER :: qs_env
863 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
864 : INTEGER, INTENT(IN) :: nspins
865 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
866 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
867 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial
868 :
869 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces_kp'
870 :
871 : INTEGER :: b_img, batch_size, group_size, handle, handle2, i_batch, i_img, i_loop, i_spin, &
872 : i_xyz, iatom, iblk, igroup, j_xyz, jatom, k_xyz, n_batch, natom, ngroups, nimg, nimg_nze
873 : INTEGER(int_8) :: nflop, nze
874 46 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_ranges_at, &
875 46 : batch_ranges_nze, dist1, dist2, &
876 46 : i_images, idx_to_at_AO, idx_to_at_RI, &
877 46 : kind_of
878 46 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: iapc_pairs
879 46 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: force_pattern, sparsity_pattern
880 : INTEGER, DIMENSION(2, 1) :: bounds_iat, bounds_jat
881 : LOGICAL :: use_virial_prv
882 : REAL(dp) :: fac, occ, pref, t1, t2
883 : REAL(dp), DIMENSION(3, 3) :: work_virial
884 46 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
885 : TYPE(cell_type), POINTER :: cell
886 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
887 46 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: mat_2c_pot
888 46 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_pot, mat_der_pot_sub
889 : TYPE(dbcsr_type), POINTER :: dbcsr_template
890 782 : TYPE(dbt_type) :: t_2c_R, t_2c_R_split
891 46 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_bint, t_2c_binv, t_2c_der_pot, &
892 92 : t_2c_inv, t_2c_metric, t_2c_work, &
893 46 : t_3c_der_stack, t_3c_work_2, &
894 46 : t_3c_work_3
895 46 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
896 92 : t_2c_der_metric_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_AO, t_3c_der_AO_sub, t_3c_der_RI, &
897 46 : t_3c_der_RI_sub
898 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
899 46 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
900 46 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
901 : TYPE(section_vals_type), POINTER :: hfx_section
902 : TYPE(virial_type), POINTER :: virial
903 :
904 46 : NULLIFY (para_env, para_env_sub, hfx_section, blacs_env_sub, dbcsr_template, force, atomic_kind_set, &
905 46 : virial, particle_set, cell)
906 :
907 46 : CALL timeset(routineN, handle)
908 :
909 46 : use_virial_prv = .FALSE.
910 46 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
911 :
912 46 : IF (nspins == 1) THEN
913 30 : fac = 0.5_dp*hf_fraction
914 : ELSE
915 16 : fac = 1.0_dp*hf_fraction
916 : END IF
917 :
918 : CALL get_qs_env(qs_env, natom=natom, para_env=para_env, force=force, cell=cell, virial=virial, &
919 46 : atomic_kind_set=atomic_kind_set, particle_set=particle_set)
920 46 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
921 :
922 138 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
923 46 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
924 :
925 138 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
926 46 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
927 :
928 46 : nimg = ri_data%nimg
929 12218 : ALLOCATE (t_3c_der_RI(nimg, 3), t_3c_der_AO(nimg, 3), mat_der_pot(nimg, 3), t_2c_der_metric(natom, 3))
930 :
931 : !We assume that the integrals are available from the SCF
932 : !pre-calculate the derivs. 3c tensors as (P^0| sigma^a mu^0), with t_3c_der_AO holding deriv wrt mu^0
933 46 : CALL precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
934 :
935 : !Calculate the density matrix at each image
936 2936 : ALLOCATE (rho_ao_t(nspins, nimg))
937 : CALL create_2c_tensor(rho_ao_t(1, 1), dist1, dist2, ri_data%pgrid_2d, &
938 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
939 46 : name="(AO | AO)")
940 46 : DEALLOCATE (dist1, dist2)
941 46 : IF (nspins == 2) CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(2, 1))
942 1102 : DO i_img = 2, nimg
943 2322 : DO i_spin = 1, nspins
944 2276 : CALL dbt_create(rho_ao_t(1, 1), rho_ao_t(i_spin, i_img))
945 : END DO
946 : END DO
947 46 : CALL get_pmat_images(rho_ao_t, rho_ao, 0.0_dp, ri_data, qs_env)
948 :
949 : !Contract integrals with the density matrix
950 2936 : ALLOCATE (t_3c_apc(nspins, nimg))
951 1148 : DO i_img = 1, nimg
952 2430 : DO i_spin = 1, nspins
953 2384 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_apc(i_spin, i_img))
954 : END DO
955 : END DO
956 46 : CALL contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
957 :
958 : !Setup the subgroups
959 46 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
960 46 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
961 46 : group_size = para_env%num_pe/ngroups
962 46 : igroup = para_env%mepos/group_size
963 :
964 46 : ALLOCATE (para_env_sub)
965 46 : CALL para_env_sub%from_split(para_env, igroup)
966 46 : CALL cp_blacs_env_create(blacs_env_sub, para_env_sub)
967 :
968 : !Get the ususal sparsity pattern
969 230 : ALLOCATE (sparsity_pattern(natom, natom, nimg))
970 46 : CALL get_sparsity_pattern(sparsity_pattern, ri_data, qs_env)
971 46 : CALL get_sub_dist(sparsity_pattern, ngroups, ri_data)
972 :
973 : !Get the 2-center quantities in the subgroups (note: main group derivs are deleted wihtin)
974 0 : ALLOCATE (t_2c_inv(natom), mat_2c_pot(nimg), rho_ao_t_sub(nspins, nimg), t_2c_work(5), &
975 0 : t_2c_der_metric_sub(natom, 3), mat_der_pot_sub(nimg, 3), t_2c_bint(natom), &
976 11254 : t_2c_metric(natom), t_2c_binv(natom))
977 : CALL get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
978 : rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
979 46 : mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
980 46 : CALL dbt_create(t_2c_work(1), t_2c_R) !nRI x nRI
981 46 : CALL dbt_create(t_2c_work(5), t_2c_R_split) !nRI x nRI with split blocks
982 :
983 552 : ALLOCATE (t_2c_der_pot(3))
984 184 : DO i_xyz = 1, 3
985 184 : CALL dbt_create(t_2c_R, t_2c_der_pot(i_xyz))
986 : END DO
987 :
988 : !Get the 3-center quantities in the subgroups. The integrals and t_3c_apc already there
989 0 : ALLOCATE (t_3c_work_2(3), t_3c_work_3(4), t_3c_der_stack(6), t_3c_der_AO_sub(nimg, 3), &
990 12354 : t_3c_der_RI_sub(nimg, 3), t_3c_apc_sub(nspins, nimg))
991 : CALL get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
992 : t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, t_3c_der_stack, &
993 46 : group_size, ngroups, para_env, para_env_sub, ri_data)
994 :
995 : !Set up batched contraction (go atom by atom)
996 138 : ALLOCATE (batch_ranges_at(natom + 1))
997 46 : batch_ranges_at(natom + 1) = SIZE(ri_data%bsizes_AO_split) + 1
998 46 : iatom = 0
999 232 : DO iblk = 1, SIZE(ri_data%bsizes_AO_split)
1000 232 : IF (idx_to_at_AO(iblk) == iatom + 1) THEN
1001 92 : iatom = iatom + 1
1002 92 : batch_ranges_at(iatom) = iblk
1003 : END IF
1004 : END DO
1005 :
1006 46 : CALL dbt_batched_contract_init(t_3c_work_3(1), batch_range_2=batch_ranges_at)
1007 46 : CALL dbt_batched_contract_init(t_3c_work_3(2), batch_range_2=batch_ranges_at)
1008 46 : CALL dbt_batched_contract_init(t_3c_work_3(3), batch_range_2=batch_ranges_at)
1009 46 : CALL dbt_batched_contract_init(t_3c_work_2(1), batch_range_1=batch_ranges_at)
1010 46 : CALL dbt_batched_contract_init(t_3c_work_2(2), batch_range_1=batch_ranges_at)
1011 :
1012 : !Preparing for the stacking of 3c tensors
1013 46 : nimg_nze = ri_data%nimg_nze
1014 46 : batch_size = ri_data%kp_stack_size
1015 46 : n_batch = nimg_nze/batch_size
1016 46 : IF (MODULO(nimg_nze, batch_size) .NE. 0) n_batch = n_batch + 1
1017 138 : ALLOCATE (batch_ranges_nze(n_batch + 1))
1018 112 : DO i_batch = 1, n_batch
1019 112 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
1020 : END DO
1021 46 : batch_ranges_nze(n_batch + 1) = nimg_nze + 1
1022 :
1023 : !Applying the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 from left and right
1024 : !And keep the bump on LHS only version as well, with B*M^-1 = (M^-1*B)^T
1025 138 : DO iatom = 1, natom
1026 92 : CALL dbt_create(t_2c_inv(iatom), t_2c_binv(iatom))
1027 92 : CALL dbt_copy(t_2c_inv(iatom), t_2c_binv(iatom))
1028 92 : CALL apply_bump(t_2c_binv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.FALSE.)
1029 138 : CALL apply_bump(t_2c_inv(iatom), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1030 : END DO
1031 :
1032 46 : t1 = m_walltime()
1033 46 : work_virial = 0.0_dp
1034 230 : ALLOCATE (iapc_pairs(nimg, 2), i_images(nimg))
1035 230 : ALLOCATE (force_pattern(natom, natom, nimg))
1036 7760 : force_pattern(:, :, :) = -1
1037 : !We proceed with 2 loops: one over the sparsity pattern from the SCF, one over the rest
1038 : !We use the SCF cost model for the first loop, while we calculate the cost of the upcoming loop
1039 138 : DO i_loop = 1, 2
1040 2296 : DO b_img = 1, nimg
1041 6704 : DO jatom = 1, natom
1042 15428 : DO iatom = 1, natom
1043 :
1044 8816 : pref = -0.5_dp*fac
1045 8816 : IF (i_loop == 1 .AND. (.NOT. sparsity_pattern(iatom, jatom, b_img) == igroup)) CYCLE
1046 5054 : IF (i_loop == 2 .AND. (.NOT. force_pattern(iatom, jatom, b_img) == igroup)) CYCLE
1047 :
1048 : !Get the proper HFX potential 2c integrals (R_i^0|S_j^b), times (S_j^b|Q_j^b)^-1
1049 1228 : CALL timeset(routineN//"_2c_1", handle2)
1050 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1051 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1052 1228 : dbcsr_template=dbcsr_template)
1053 : CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_inv(jatom), &
1054 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1055 : contract_1=[2], notcontract_1=[1], &
1056 : contract_2=[1], notcontract_2=[2], &
1057 1228 : filter_eps=ri_data%filter_eps, flop=nflop)
1058 1228 : CALL dbt_copy(t_2c_work(2), t_2c_work(5), move_data=.TRUE.) !move to split blocks
1059 1228 : CALL dbt_filter(t_2c_work(5), ri_data%filter_eps)
1060 1228 : CALL timestop(handle2)
1061 :
1062 1228 : CALL timeset(routineN//"_3c", handle2)
1063 6138 : bounds_iat(:, 1) = [SUM(ri_data%bsizes_AO(1:iatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:iatom))]
1064 6102 : bounds_jat(:, 1) = [SUM(ri_data%bsizes_AO(1:jatom - 1)) + 1, SUM(ri_data%bsizes_AO(1:jatom))]
1065 1228 : CALL dbt_clear(t_2c_R_split)
1066 :
1067 2717 : DO i_spin = 1, nspins
1068 2717 : CALL dbt_batched_contract_init(rho_ao_t_sub(i_spin, b_img))
1069 : END DO
1070 :
1071 1228 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env, i_images) !i = a+c-b
1072 3393 : DO i_batch = 1, n_batch
1073 :
1074 : !Stack the 3c derivatives to take the trace later on
1075 8660 : DO i_xyz = 1, 3
1076 6495 : CALL dbt_clear(t_3c_der_stack(i_xyz))
1077 : CALL fill_3c_stack(t_3c_der_stack(i_xyz), t_3c_der_RI_sub(:, i_xyz), &
1078 : iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1079 : filter_dim=2, idx_to_at=idx_to_at_AO, &
1080 19485 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1081 :
1082 6495 : CALL dbt_clear(t_3c_der_stack(3 + i_xyz))
1083 : CALL fill_3c_stack(t_3c_der_stack(3 + i_xyz), t_3c_der_AO_sub(:, i_xyz), &
1084 : iapc_pairs(:, 1), 3, ri_data, filter_at=jatom, &
1085 : filter_dim=2, idx_to_at=idx_to_at_AO, &
1086 21650 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1087 : END DO
1088 :
1089 5919 : DO i_spin = 1, nspins
1090 : !stack the t_3c_apc tensors
1091 2526 : CALL dbt_clear(t_3c_work_2(3))
1092 : CALL fill_3c_stack(t_3c_work_2(3), t_3c_apc_sub(i_spin, :), iapc_pairs(:, 2), 3, &
1093 : ri_data, filter_at=iatom, filter_dim=1, idx_to_at=idx_to_at_AO, &
1094 7578 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1095 2526 : CALL get_tensor_occupancy(t_3c_work_2(3), nze, occ)
1096 2526 : IF (nze == 0) CYCLE
1097 2516 : CALL dbt_copy(t_3c_work_2(3), t_3c_work_2(1), move_data=.TRUE.)
1098 :
1099 : !Contract with the second density matrix: P_mu^0,nu^b * t_3c_apc,
1100 : !where t_3c_apc = P_sigma^a,lambda^a+c (mu^0 P^0 sigma^a) *(P^0|R^0)^-1 (stacked along a+c)
1101 : CALL dbt_contract(1.0_dp, rho_ao_t_sub(i_spin, b_img), t_3c_work_2(1), &
1102 : 0.0_dp, t_3c_work_2(2), map_1=[1], map_2=[2, 3], &
1103 : contract_1=[1], notcontract_1=[2], &
1104 : contract_2=[1], notcontract_2=[2, 3], &
1105 : bounds_1=bounds_iat, bounds_2=bounds_jat, &
1106 2516 : filter_eps=ri_data%filter_eps, flop=nflop)
1107 2516 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1108 :
1109 2516 : CALL get_tensor_occupancy(t_3c_work_2(2), nze, occ)
1110 2516 : IF (nze == 0) CYCLE
1111 :
1112 : !Contract with V_PQ so that we can take the trace with (Q^b|nu^b lmabda^a+c)^(x)
1113 2228 : CALL dbt_copy(t_3c_work_2(2), t_3c_work_3(1), order=[2, 1, 3], move_data=.TRUE.)
1114 2228 : CALL dbt_batched_contract_init(t_2c_work(5))
1115 : CALL dbt_contract(1.0_dp, t_2c_work(5), t_3c_work_3(1), &
1116 : 0.0_dp, t_3c_work_3(2), map_1=[1], map_2=[2, 3], &
1117 : contract_1=[1], notcontract_1=[2], &
1118 : contract_2=[1], notcontract_2=[2, 3], &
1119 2228 : filter_eps=ri_data%filter_eps, flop=nflop)
1120 2228 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1121 2228 : CALL dbt_batched_contract_finalize(t_2c_work(5))
1122 :
1123 : !Contract with the 3c derivatives to get the force/virial
1124 2228 : CALL dbt_copy(t_3c_work_3(2), t_3c_work_3(4), move_data=.TRUE.)
1125 2228 : IF (use_virial_prv) THEN
1126 : CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1127 : t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1128 : idx_to_at_RI, idx_to_at_AO, i_images, &
1129 : batch_ranges_nze(i_batch), 2.0_dp*pref, &
1130 460 : ri_data, qs_env, work_virial, cell, particle_set)
1131 : ELSE
1132 : CALL get_force_from_3c_trace(force, t_3c_work_3(4), t_3c_der_stack(1:3), &
1133 : t_3c_der_stack(4:6), atom_of_kind, kind_of, &
1134 : idx_to_at_RI, idx_to_at_AO, i_images, &
1135 : batch_ranges_nze(i_batch), 2.0_dp*pref, &
1136 1768 : ri_data, qs_env)
1137 : END IF
1138 2228 : CALL dbt_clear(t_3c_work_3(4))
1139 :
1140 : !Contract with the 3-center integrals in order to have a matrix R_PQ such that
1141 : !we can take the trace sum_PQ R_PQ (P^0|Q^b)^(x)
1142 2228 : IF (i_loop == 2) CYCLE
1143 :
1144 : !Stack the 3c integrals
1145 : CALL fill_3c_stack(t_3c_work_3(4), ri_data%kp_t_3c_int, iapc_pairs(:, 1), 3, ri_data, &
1146 : filter_at=jatom, filter_dim=2, idx_to_at=idx_to_at_AO, &
1147 3522 : img_bounds=[batch_ranges_nze(i_batch), batch_ranges_nze(i_batch + 1)])
1148 1174 : CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(3), move_data=.TRUE.)
1149 :
1150 1174 : CALL dbt_batched_contract_init(t_2c_R_split)
1151 : CALL dbt_contract(1.0_dp, t_3c_work_3(1), t_3c_work_3(3), &
1152 : 1.0_dp, t_2c_R_split, map_1=[1], map_2=[2], &
1153 : contract_1=[2, 3], notcontract_1=[1], &
1154 : contract_2=[2, 3], notcontract_2=[1], &
1155 1174 : filter_eps=ri_data%filter_eps, flop=nflop)
1156 1174 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1157 1174 : CALL dbt_batched_contract_finalize(t_2c_R_split)
1158 8381 : CALL dbt_copy(t_3c_work_3(4), t_3c_work_3(1))
1159 : END DO
1160 : END DO
1161 2717 : DO i_spin = 1, nspins
1162 2717 : CALL dbt_batched_contract_finalize(rho_ao_t_sub(i_spin, b_img))
1163 : END DO
1164 1228 : CALL timestop(handle2)
1165 :
1166 1228 : IF (i_loop == 2) CYCLE
1167 646 : pref = 2.0_dp*pref
1168 646 : IF (iatom == jatom .AND. b_img == 1) pref = 0.5_dp*pref
1169 :
1170 646 : CALL timeset(routineN//"_2c_2", handle2)
1171 : !Note that the derivatives are in atomic block format (not split)
1172 646 : CALL dbt_copy(t_2c_R_split, t_2c_R, move_data=.TRUE.)
1173 :
1174 : CALL get_ext_2c_int(t_2c_work(1), mat_2c_pot, iatom, jatom, b_img, ri_data, qs_env, &
1175 : blacs_env_ext=blacs_env_sub, para_env_ext=para_env_sub, &
1176 646 : dbcsr_template=dbcsr_template)
1177 :
1178 : !We have to calculate: S^-1(iat) * R_PQ * S^-1(jat) to trace with HFX pot der
1179 : ! + R_PQ * S^-1(jat) * pot^T to trace with S^(x) (iat)
1180 : ! + pot^T * S^-1(iat) *R_PQ to trace with S^(x) (jat)
1181 :
1182 : !Because 3c tensors are all precontracted with the inverse RI metric,
1183 : !t_2c_R is currently implicitely multiplied by S^-1(iat) from the left
1184 : !and S^-1(jat) from the right, directly in the proper format for the trace
1185 : !with the HFX potential derivative
1186 :
1187 : !Trace with HFX pot deriv, that we need to build first
1188 2584 : DO i_xyz = 1, 3
1189 : CALL get_ext_2c_int(t_2c_der_pot(i_xyz), mat_der_pot_sub(:, i_xyz), iatom, jatom, &
1190 : b_img, ri_data, qs_env, blacs_env_ext=blacs_env_sub, &
1191 2584 : para_env_ext=para_env_sub, dbcsr_template=dbcsr_template)
1192 : END DO
1193 :
1194 646 : IF (use_virial_prv) THEN
1195 : CALL get_2c_der_force(force, t_2c_R, t_2c_der_pot, atom_of_kind, kind_of, &
1196 125 : b_img, pref, ri_data, qs_env, work_virial, cell, particle_set)
1197 : ELSE
1198 : CALL get_2c_der_force(force, t_2c_R, t_2c_der_pot, atom_of_kind, kind_of, &
1199 521 : b_img, pref, ri_data, qs_env)
1200 : END IF
1201 :
1202 2584 : DO i_xyz = 1, 3
1203 2584 : CALL dbt_clear(t_2c_der_pot(i_xyz))
1204 : END DO
1205 :
1206 : !R_PQ * S^-1(jat) * pot^T (=A)
1207 : CALL dbt_contract(1.0_dp, t_2c_metric(iatom), t_2c_R, & !get rid of implicit S^-1(iat)
1208 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1209 : contract_1=[2], notcontract_1=[1], &
1210 : contract_2=[1], notcontract_2=[2], &
1211 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1212 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1213 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_work(1), &
1214 : 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1215 : contract_1=[2], notcontract_1=[1], &
1216 : contract_2=[2], notcontract_2=[1], &
1217 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1218 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1219 :
1220 : !With the RI bump function, things get more complex. M = (S|P)_D + B*(S|P)_OD*B
1221 : !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1222 : CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(iatom), &
1223 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1224 : contract_1=[2], notcontract_1=[1], &
1225 : contract_2=[1], notcontract_2=[2], &
1226 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1227 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1228 :
1229 : CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1230 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1231 : contract_1=[1], notcontract_1=[2], &
1232 : contract_2=[1], notcontract_2=[2], &
1233 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1234 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1235 :
1236 646 : CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.TRUE.)
1237 : CALL get_2c_bump_forces(force, t_2c_work(4), iatom, atom_of_kind, kind_of, pref, &
1238 646 : ri_data, qs_env, work_virial)
1239 :
1240 : !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1241 : CALL dbt_contract(1.0_dp, t_2c_binv(iatom), t_2c_work(2), &
1242 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1243 : contract_1=[1], notcontract_1=[2], &
1244 : contract_2=[1], notcontract_2=[2], &
1245 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1246 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1247 :
1248 646 : IF (use_virial_prv) THEN
1249 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1250 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1251 125 : diag=.TRUE., offdiag=.FALSE.)
1252 : ELSE
1253 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1254 521 : kind_of, 1, -pref, ri_data, qs_env, diag=.TRUE., offdiag=.FALSE.)
1255 : END IF
1256 :
1257 : !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1258 646 : CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1259 646 : CALL apply_bump(t_2c_work(2), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1260 :
1261 646 : IF (use_virial_prv) THEN
1262 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1263 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1264 125 : diag=.FALSE., offdiag=.TRUE.)
1265 : ELSE
1266 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(iatom, :), atom_of_kind, &
1267 521 : kind_of, 1, -pref, ri_data, qs_env, diag=.FALSE., offdiag=.TRUE.)
1268 : END IF
1269 :
1270 : !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1271 : !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1272 : CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(iatom), &
1273 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1274 : contract_1=[2], notcontract_1=[1], &
1275 : contract_2=[1], notcontract_2=[2], &
1276 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1277 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1278 :
1279 : CALL dbt_contract(1.0_dp, t_2c_bint(iatom), t_2c_work(4), &
1280 : 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1281 : contract_1=[1], notcontract_1=[2], &
1282 : contract_2=[1], notcontract_2=[2], &
1283 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1284 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1285 :
1286 : CALL get_2c_bump_forces(force, t_2c_work(2), iatom, atom_of_kind, kind_of, -pref, &
1287 646 : ri_data, qs_env, work_virial)
1288 :
1289 : ! pot^T * S^-1(iat) * R_PQ (=A)
1290 : CALL dbt_contract(1.0_dp, t_2c_work(1), t_2c_R, &
1291 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1292 : contract_1=[1], notcontract_1=[2], &
1293 : contract_2=[1], notcontract_2=[2], &
1294 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1295 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1296 :
1297 : CALL dbt_contract(1.0_dp, t_2c_work(2), t_2c_metric(jatom), & !get rid of implicit S^-1(jat)
1298 : 0.0_dp, t_2c_work(3), map_1=[1], map_2=[2], &
1299 : contract_1=[2], notcontract_1=[1], &
1300 : contract_2=[1], notcontract_2=[2], &
1301 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1302 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1303 :
1304 : !Do the same shenanigans with the S^(x) (jatom)
1305 : !Calculate M^-1*B*A + A*B*M^-1 to contract with B^x. A is in t_2c_work(3)
1306 : CALL dbt_contract(1.0_dp, t_2c_work(3), t_2c_binv(jatom), &
1307 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1308 : contract_1=[2], notcontract_1=[1], &
1309 : contract_2=[1], notcontract_2=[2], &
1310 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1311 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1312 :
1313 : CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(3), & !use transpose of B*M^-1 = M^-1*B
1314 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1315 : contract_1=[1], notcontract_1=[2], &
1316 : contract_2=[1], notcontract_2=[2], &
1317 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1318 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1319 :
1320 646 : CALL dbt_copy(t_2c_work(2), t_2c_work(4), summation=.TRUE.)
1321 : CALL get_2c_bump_forces(force, t_2c_work(4), jatom, atom_of_kind, kind_of, pref, &
1322 646 : ri_data, qs_env, work_virial)
1323 :
1324 : !Calculate -M^-1*B*A*B*M^-1 to contracte with diagonal RI metric deriv. t_2c_work(2) holds A*B*M^-1
1325 : CALL dbt_contract(1.0_dp, t_2c_binv(jatom), t_2c_work(2), &
1326 : 0.0_dp, t_2c_work(4), map_1=[1], map_2=[2], &
1327 : contract_1=[1], notcontract_1=[2], &
1328 : contract_2=[1], notcontract_2=[2], &
1329 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1330 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1331 :
1332 646 : IF (use_virial_prv) THEN
1333 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1334 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1335 125 : diag=.TRUE., offdiag=.FALSE.)
1336 : ELSE
1337 : CALL get_2c_der_force(force, t_2c_work(4), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1338 521 : kind_of, 1, -pref, ri_data, qs_env, diag=.TRUE., offdiag=.FALSE.)
1339 : END IF
1340 :
1341 : !Calculate -B*M^-1*B*A*B*M^-1*B to contract with off-diagonal RI metric derivs
1342 646 : CALL dbt_copy(t_2c_work(4), t_2c_work(2))
1343 646 : CALL apply_bump(t_2c_work(2), jatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
1344 :
1345 646 : IF (use_virial_prv) THEN
1346 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1347 : kind_of, 1, -pref, ri_data, qs_env, work_virial, cell, particle_set, &
1348 125 : diag=.FALSE., offdiag=.TRUE.)
1349 : ELSE
1350 : CALL get_2c_der_force(force, t_2c_work(2), t_2c_der_metric_sub(jatom, :), atom_of_kind, &
1351 521 : kind_of, 1, -pref, ri_data, qs_env, diag=.FALSE., offdiag=.TRUE.)
1352 : END IF
1353 :
1354 : !Calculate -O*B*M^-1*B*A*B*M^-1 - M^-1*B*A*B*M^-1*B*O, where O is off-diagonal integrals
1355 : !t_2c_work(4) holds M^-1*B*A*B*M^-1, and exploit transpose of B*O (stored in t_2c_bint)
1356 : CALL dbt_contract(1.0_dp, t_2c_work(4), t_2c_bint(jatom), &
1357 : 0.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1358 : contract_1=[2], notcontract_1=[1], &
1359 : contract_2=[1], notcontract_2=[2], &
1360 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1361 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1362 :
1363 : CALL dbt_contract(1.0_dp, t_2c_bint(jatom), t_2c_work(4), &
1364 : 1.0_dp, t_2c_work(2), map_1=[1], map_2=[2], &
1365 : contract_1=[1], notcontract_1=[2], &
1366 : contract_2=[1], notcontract_2=[2], &
1367 646 : filter_eps=ri_data%filter_eps, flop=nflop)
1368 646 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1369 :
1370 : CALL get_2c_bump_forces(force, t_2c_work(2), jatom, atom_of_kind, kind_of, -pref, &
1371 646 : ri_data, qs_env, work_virial)
1372 :
1373 15744 : CALL timestop(handle2)
1374 : END DO !iatom
1375 : END DO !jatom
1376 : END DO !b_img
1377 :
1378 138 : IF (i_loop == 1) THEN
1379 46 : CALL update_pattern_to_forces(force_pattern, sparsity_pattern, ngroups, ri_data, qs_env)
1380 : END IF
1381 : END DO !i_loop
1382 :
1383 46 : CALL dbt_batched_contract_finalize(t_3c_work_3(1))
1384 46 : CALL dbt_batched_contract_finalize(t_3c_work_3(2))
1385 46 : CALL dbt_batched_contract_finalize(t_3c_work_3(3))
1386 46 : CALL dbt_batched_contract_finalize(t_3c_work_2(1))
1387 46 : CALL dbt_batched_contract_finalize(t_3c_work_2(2))
1388 :
1389 46 : IF (use_virial_prv) THEN
1390 40 : DO k_xyz = 1, 3
1391 130 : DO j_xyz = 1, 3
1392 390 : DO i_xyz = 1, 3
1393 : virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
1394 360 : + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
1395 : END DO
1396 : END DO
1397 : END DO
1398 : END IF
1399 :
1400 : !End of subgroup parallelization
1401 46 : CALL cp_blacs_env_release(blacs_env_sub)
1402 46 : CALL para_env_sub%free()
1403 46 : DEALLOCATE (para_env_sub)
1404 :
1405 46 : CALL para_env%sync()
1406 46 : t2 = m_walltime()
1407 46 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1408 :
1409 : !clean-up
1410 46 : IF (ASSOCIATED(dbcsr_template)) THEN
1411 46 : CALL dbcsr_release(dbcsr_template)
1412 46 : DEALLOCATE (dbcsr_template)
1413 : END IF
1414 46 : CALL dbt_destroy(t_2c_R)
1415 46 : CALL dbt_destroy(t_2c_R_split)
1416 46 : CALL dbt_destroy(t_2c_work(1))
1417 46 : CALL dbt_destroy(t_2c_work(2))
1418 46 : CALL dbt_destroy(t_2c_work(3))
1419 46 : CALL dbt_destroy(t_2c_work(4))
1420 46 : CALL dbt_destroy(t_2c_work(5))
1421 46 : CALL dbt_destroy(t_3c_work_2(1))
1422 46 : CALL dbt_destroy(t_3c_work_2(2))
1423 46 : CALL dbt_destroy(t_3c_work_2(3))
1424 46 : CALL dbt_destroy(t_3c_work_3(1))
1425 46 : CALL dbt_destroy(t_3c_work_3(2))
1426 46 : CALL dbt_destroy(t_3c_work_3(3))
1427 46 : CALL dbt_destroy(t_3c_work_3(4))
1428 46 : CALL dbt_destroy(t_3c_der_stack(1))
1429 46 : CALL dbt_destroy(t_3c_der_stack(2))
1430 46 : CALL dbt_destroy(t_3c_der_stack(3))
1431 46 : CALL dbt_destroy(t_3c_der_stack(4))
1432 46 : CALL dbt_destroy(t_3c_der_stack(5))
1433 46 : CALL dbt_destroy(t_3c_der_stack(6))
1434 184 : DO i_xyz = 1, 3
1435 184 : CALL dbt_destroy(t_2c_der_pot(i_xyz))
1436 : END DO
1437 138 : DO iatom = 1, natom
1438 92 : CALL dbt_destroy(t_2c_inv(iatom))
1439 92 : CALL dbt_destroy(t_2c_binv(iatom))
1440 92 : CALL dbt_destroy(t_2c_bint(iatom))
1441 92 : CALL dbt_destroy(t_2c_metric(iatom))
1442 414 : DO i_xyz = 1, 3
1443 368 : CALL dbt_destroy(t_2c_der_metric_sub(iatom, i_xyz))
1444 : END DO
1445 : END DO
1446 1148 : DO i_img = 1, nimg
1447 1102 : CALL dbcsr_release(mat_2c_pot(i_img))
1448 2430 : DO i_spin = 1, nspins
1449 1282 : CALL dbt_destroy(rho_ao_t_sub(i_spin, i_img))
1450 2384 : CALL dbt_destroy(t_3c_apc_sub(i_spin, i_img))
1451 : END DO
1452 : END DO
1453 184 : DO i_xyz = 1, 3
1454 3490 : DO i_img = 1, nimg
1455 3306 : CALL dbt_destroy(t_3c_der_RI_sub(i_img, i_xyz))
1456 3306 : CALL dbt_destroy(t_3c_der_AO_sub(i_img, i_xyz))
1457 3444 : CALL dbcsr_release(mat_der_pot_sub(i_img, i_xyz))
1458 : END DO
1459 : END DO
1460 :
1461 46 : CALL timestop(handle)
1462 :
1463 20468 : END SUBROUTINE hfx_ri_update_forces_kp
1464 :
1465 : ! **************************************************************************************************
1466 : !> \brief A routine the applies the RI bump matrix from the left and/or the right, given an input
1467 : !> matrix and the central RI atom. We assume atomic block sizes
1468 : !> \param t_2c_inout ...
1469 : !> \param atom_i ...
1470 : !> \param ri_data ...
1471 : !> \param qs_env ...
1472 : !> \param from_left ...
1473 : !> \param from_right ...
1474 : !> \param debump ...
1475 : ! **************************************************************************************************
1476 1956 : SUBROUTINE apply_bump(t_2c_inout, atom_i, ri_data, qs_env, from_left, from_right, debump)
1477 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_inout
1478 : INTEGER, INTENT(IN) :: atom_i
1479 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1480 : TYPE(qs_environment_type), POINTER :: qs_env
1481 : LOGICAL, INTENT(IN), OPTIONAL :: from_left, from_right, debump
1482 :
1483 : INTEGER :: i_img, i_RI, iatom, ind(2), j_img, j_RI, &
1484 : jatom, natom, nblks(2), nimg, nkind
1485 1956 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1486 1956 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1487 : LOGICAL :: found, my_debump, my_left, my_right
1488 : REAL(dp) :: bval, r0, r1, ri(3), rj(3), rref(3), &
1489 : scoord(3)
1490 1956 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1491 : TYPE(cell_type), POINTER :: cell
1492 : TYPE(dbt_iterator_type) :: iter
1493 : TYPE(kpoint_type), POINTER :: kpoints
1494 1956 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1495 1956 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1496 :
1497 1956 : NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1498 :
1499 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1500 1956 : kpoints=kpoints, particle_set=particle_set)
1501 1956 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1502 :
1503 1956 : my_debump = .FALSE.
1504 1956 : IF (PRESENT(debump)) my_debump = debump
1505 :
1506 1956 : my_left = .FALSE.
1507 1956 : IF (PRESENT(from_left)) my_left = from_left
1508 :
1509 1956 : my_right = .FALSE.
1510 1956 : IF (PRESENT(from_right)) my_right = from_right
1511 1956 : CPASSERT(my_left .OR. my_right)
1512 :
1513 1956 : CALL dbt_get_info(t_2c_inout, nblks_total=nblks)
1514 1956 : CPASSERT(nblks(1) == ri_data%ncell_RI*natom)
1515 1956 : CPASSERT(nblks(2) == ri_data%ncell_RI*natom)
1516 :
1517 1956 : nimg = ri_data%nimg
1518 :
1519 : !Loop over the RI cells and atoms, and apply bump accordingly
1520 1956 : r1 = ri_data%kp_RI_range
1521 1956 : r0 = ri_data%kp_bump_rad
1522 1956 : rref = pbc(particle_set(atom_i)%r, cell)
1523 :
1524 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_inout,natom,ri_data,cell,particle_set,index_to_cell,my_left, &
1525 : !$OMP my_right,r0,r1,rref,my_debump) &
1526 1956 : !$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,bval)
1527 : CALL dbt_iterator_start(iter, t_2c_inout)
1528 : DO WHILE (dbt_iterator_blocks_left(iter))
1529 : CALL dbt_iterator_next_block(iter, ind)
1530 : CALL dbt_get_block(t_2c_inout, ind, blk, found)
1531 : IF (.NOT. found) CYCLE
1532 :
1533 : i_RI = (ind(1) - 1)/natom + 1
1534 : i_img = ri_data%RI_cell_to_img(i_RI)
1535 : iatom = ind(1) - (i_RI - 1)*natom
1536 :
1537 : CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
1538 : CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
1539 :
1540 : j_RI = (ind(2) - 1)/natom + 1
1541 : j_img = ri_data%RI_cell_to_img(j_RI)
1542 : jatom = ind(2) - (j_RI - 1)*natom
1543 :
1544 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1545 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1546 :
1547 : IF (.NOT. my_debump) THEN
1548 : IF (my_left) blk(:, :) = blk(:, :)*bump(NORM2(ri - rref), r0, r1)
1549 : IF (my_right) blk(:, :) = blk(:, :)*bump(NORM2(rj - rref), r0, r1)
1550 : ELSE
1551 : !Note: by construction, the bump function is never quite zero, as its range is the same
1552 : ! as that of the extended RI basis (but we are safe)
1553 : bval = bump(NORM2(ri - rref), r0, r1)
1554 : IF (my_left .AND. bval > EPSILON(1.0_dp)) blk(:, :) = blk(:, :)/bval
1555 : bval = bump(NORM2(rj - rref), r0, r1)
1556 : IF (my_right .AND. bval > EPSILON(1.0_dp)) blk(:, :) = blk(:, :)/bval
1557 : END IF
1558 :
1559 : CALL dbt_put_block(t_2c_inout, ind, SHAPE(blk), blk)
1560 :
1561 : DEALLOCATE (blk)
1562 : END DO
1563 : CALL dbt_iterator_stop(iter)
1564 : !$OMP END PARALLEL
1565 1956 : CALL dbt_filter(t_2c_inout, ri_data%filter_eps)
1566 :
1567 3912 : END SUBROUTINE apply_bump
1568 :
1569 : ! **************************************************************************************************
1570 : !> \brief A routine that calculates the forces due to the derivative of the bump function
1571 : !> \param force ...
1572 : !> \param t_2c_in ...
1573 : !> \param atom_i ...
1574 : !> \param atom_of_kind ...
1575 : !> \param kind_of ...
1576 : !> \param pref ...
1577 : !> \param ri_data ...
1578 : !> \param qs_env ...
1579 : !> \param work_virial ...
1580 : ! **************************************************************************************************
1581 2584 : SUBROUTINE get_2c_bump_forces(force, t_2c_in, atom_i, atom_of_kind, kind_of, pref, ri_data, &
1582 : qs_env, work_virial)
1583 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1584 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_in
1585 : INTEGER, INTENT(IN) :: atom_i
1586 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
1587 : REAL(dp), INTENT(IN) :: pref
1588 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1589 : TYPE(qs_environment_type), POINTER :: qs_env
1590 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT) :: work_virial
1591 :
1592 : INTEGER :: i, i_img, i_RI, i_xyz, iat_of_kind, iatom, ikind, ind(2), j_img, j_RI, j_xyz, &
1593 : jat_of_kind, jatom, jkind, natom, nblks(2), nimg, nkind
1594 2584 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1595 2584 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1596 : LOGICAL :: found
1597 : REAL(dp) :: new_force, r0, r1, ri(3), rj(3), &
1598 : rref(3), scoord(3), x
1599 2584 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
1600 : TYPE(cell_type), POINTER :: cell
1601 : TYPE(dbt_iterator_type) :: iter
1602 : TYPE(kpoint_type), POINTER :: kpoints
1603 2584 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1604 2584 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1605 :
1606 2584 : NULLIFY (qs_kind_set, particle_set, kpoints, index_to_cell, cell_to_index, cell)
1607 :
1608 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
1609 2584 : kpoints=kpoints, particle_set=particle_set)
1610 2584 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1611 :
1612 2584 : CALL dbt_get_info(t_2c_in, nblks_total=nblks)
1613 2584 : CPASSERT(nblks(1) == ri_data%ncell_RI*natom)
1614 2584 : CPASSERT(nblks(2) == ri_data%ncell_RI*natom)
1615 :
1616 2584 : nimg = ri_data%nimg
1617 :
1618 : !Loop over the RI cells and atoms, and apply bump accordingly
1619 2584 : r1 = ri_data%kp_RI_range
1620 2584 : r0 = ri_data%kp_bump_rad
1621 2584 : rref = pbc(particle_set(atom_i)%r, cell)
1622 :
1623 2584 : iat_of_kind = atom_of_kind(atom_i)
1624 2584 : ikind = kind_of(atom_i)
1625 :
1626 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_2c_in,natom,ri_data,cell,particle_set,index_to_cell,pref, &
1627 : !$OMP force,r0,r1,rref,atom_of_kind,kind_of,iat_of_kind,ikind,work_virial) &
1628 : !$OMP PRIVATE(iter,ind,blk,found,i_RI,i_img,iatom,j_RI,j_img,jatom,scoord,ri,rj,jkind,jat_of_kind, &
1629 2584 : !$OMP new_force,i_xyz,i,x,j_xyz)
1630 : CALL dbt_iterator_start(iter, t_2c_in)
1631 : DO WHILE (dbt_iterator_blocks_left(iter))
1632 : CALL dbt_iterator_next_block(iter, ind)
1633 : IF (ind(1) .NE. ind(2)) CYCLE !bump matrix is diagonal
1634 :
1635 : CALL dbt_get_block(t_2c_in, ind, blk, found)
1636 : IF (.NOT. found) CYCLE
1637 :
1638 : !bump is a function of x = SQRT((R - Rref)^2). We refer to R as jatom, and Rref as atom_i
1639 : j_RI = (ind(2) - 1)/natom + 1
1640 : j_img = ri_data%RI_cell_to_img(j_RI)
1641 : jatom = ind(2) - (j_RI - 1)*natom
1642 : jat_of_kind = atom_of_kind(jatom)
1643 : jkind = kind_of(jatom)
1644 :
1645 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
1646 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
1647 : x = NORM2(rj - rref)
1648 : IF (x < r0 .OR. x > r1) CYCLE
1649 :
1650 : new_force = 0.0_dp
1651 : DO i = 1, SIZE(blk, 1)
1652 : new_force = new_force + blk(i, i)
1653 : END DO
1654 : new_force = pref*new_force*dbump(x, r0, r1)
1655 :
1656 : !x = SQRT((R - Rref)^2), so we multiply by dx/dR and dx/dRref
1657 : DO i_xyz = 1, 3
1658 : !Force acting on second atom
1659 : !$OMP ATOMIC
1660 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) + &
1661 : new_force*(rj(i_xyz) - rref(i_xyz))/x
1662 :
1663 : !virial acting on second atom
1664 : CALL real_to_scaled(scoord, rj, cell)
1665 : DO j_xyz = 1, 3
1666 : !$OMP ATOMIC
1667 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1668 : + new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1669 : END DO
1670 :
1671 : !Force acting on reference atom, defining the RI basis
1672 : !$OMP ATOMIC
1673 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) - &
1674 : new_force*(rj(i_xyz) - rref(i_xyz))/x
1675 :
1676 : !virial of ref atom
1677 : CALL real_to_scaled(scoord, rref, cell)
1678 : DO j_xyz = 1, 3
1679 : !$OMP ATOMIC
1680 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) &
1681 : - new_force*scoord(j_xyz)*(rj(i_xyz) - rref(i_xyz))/x
1682 : END DO
1683 : END DO !i_xyz
1684 :
1685 : DEALLOCATE (blk)
1686 : END DO
1687 : CALL dbt_iterator_stop(iter)
1688 : !$OMP END PARALLEL
1689 :
1690 5168 : END SUBROUTINE get_2c_bump_forces
1691 :
1692 : ! **************************************************************************************************
1693 : !> \brief The bumb function as defined by Juerg
1694 : !> \param x ...
1695 : !> \param r0 ...
1696 : !> \param r1 ...
1697 : !> \return ...
1698 : ! **************************************************************************************************
1699 27477 : FUNCTION bump(x, r0, r1) RESULT(b)
1700 : REAL(dp), INTENT(IN) :: x, r0, r1
1701 : REAL(dp) :: b
1702 :
1703 : REAL(dp) :: r
1704 :
1705 : !Head-Gordon
1706 : !b = 1.0_dp/(1.0_dp+EXP((r1-r0)/(r1-x)-(r1-r0)/(x-r0)))
1707 : !Juerg
1708 27477 : r = (x - r0)/(r1 - r0)
1709 27477 : b = -6.0_dp*r**5 + 15.0_dp*r**4 - 10.0_dp*r**3 + 1.0_dp
1710 27477 : IF (x .GE. r1) b = 0.0_dp
1711 27477 : IF (x .LE. r0) b = 1.0_dp
1712 :
1713 27477 : END FUNCTION bump
1714 :
1715 : ! **************************************************************************************************
1716 : !> \brief The derivative of the bump function
1717 : !> \param x ...
1718 : !> \param r0 ...
1719 : !> \param r1 ...
1720 : !> \return ...
1721 : ! **************************************************************************************************
1722 597 : FUNCTION dbump(x, r0, r1) RESULT(b)
1723 : REAL(dp), INTENT(IN) :: x, r0, r1
1724 : REAL(dp) :: b
1725 :
1726 : REAL(dp) :: r
1727 :
1728 597 : r = (x - r0)/(r1 - r0)
1729 597 : b = (-30.0_dp*r**4 + 60.0_dp*r**3 - 30.0_dp*r**2)/(r1 - r0)
1730 597 : IF (x .GE. r1) b = 0.0_dp
1731 597 : IF (x .LE. r0) b = 0.0_dp
1732 :
1733 597 : END FUNCTION dbump
1734 :
1735 : ! **************************************************************************************************
1736 : !> \brief return the cell index a+c corresponding to given cell index i and b, with i = a+c-b
1737 : !> \param i_index ...
1738 : !> \param b_index ...
1739 : !> \param qs_env ...
1740 : !> \return ...
1741 : ! **************************************************************************************************
1742 518709 : FUNCTION get_apc_index_from_ib(i_index, b_index, qs_env) RESULT(apc_index)
1743 : INTEGER, INTENT(IN) :: i_index, b_index
1744 : TYPE(qs_environment_type), POINTER :: qs_env
1745 : INTEGER :: apc_index
1746 :
1747 : INTEGER, DIMENSION(3) :: cell_apc
1748 518709 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1749 518709 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1750 : TYPE(kpoint_type), POINTER :: kpoints
1751 :
1752 518709 : CALL get_qs_env(qs_env, kpoints=kpoints)
1753 518709 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1754 :
1755 : !i = a+c-b => a+c = i+b
1756 2074836 : cell_apc(:) = index_to_cell(:, i_index) + index_to_cell(:, b_index)
1757 :
1758 3541648 : IF (ANY([cell_apc(1), cell_apc(2), cell_apc(3)] < LBOUND(cell_to_index)) .OR. &
1759 : ANY([cell_apc(1), cell_apc(2), cell_apc(3)] > UBOUND(cell_to_index))) THEN
1760 :
1761 : apc_index = 0
1762 : ELSE
1763 447335 : apc_index = cell_to_index(cell_apc(1), cell_apc(2), cell_apc(3))
1764 : END IF
1765 :
1766 518709 : END FUNCTION get_apc_index_from_ib
1767 :
1768 : ! **************************************************************************************************
1769 : !> \brief return the cell index i corresponding to the summ of cell_a and cell_c
1770 : !> \param a_index ...
1771 : !> \param c_index ...
1772 : !> \param qs_env ...
1773 : !> \return ...
1774 : ! **************************************************************************************************
1775 0 : FUNCTION get_apc_index(a_index, c_index, qs_env) RESULT(i_index)
1776 : INTEGER, INTENT(IN) :: a_index, c_index
1777 : TYPE(qs_environment_type), POINTER :: qs_env
1778 : INTEGER :: i_index
1779 :
1780 : INTEGER, DIMENSION(3) :: cell_i
1781 0 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1782 0 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1783 : TYPE(kpoint_type), POINTER :: kpoints
1784 :
1785 0 : CALL get_qs_env(qs_env, kpoints=kpoints)
1786 0 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1787 :
1788 0 : cell_i(:) = index_to_cell(:, a_index) + index_to_cell(:, c_index)
1789 :
1790 0 : IF (ANY([cell_i(1), cell_i(2), cell_i(3)] < LBOUND(cell_to_index)) .OR. &
1791 : ANY([cell_i(1), cell_i(2), cell_i(3)] > UBOUND(cell_to_index))) THEN
1792 :
1793 : i_index = 0
1794 : ELSE
1795 0 : i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1796 : END IF
1797 :
1798 0 : END FUNCTION get_apc_index
1799 :
1800 : ! **************************************************************************************************
1801 : !> \brief return the cell index i corresponding to the summ of cell_a + cell_c - cell_b
1802 : !> \param apc_index ...
1803 : !> \param b_index ...
1804 : !> \param qs_env ...
1805 : !> \return ...
1806 : ! **************************************************************************************************
1807 714366 : FUNCTION get_i_index(apc_index, b_index, qs_env) RESULT(i_index)
1808 : INTEGER, INTENT(IN) :: apc_index, b_index
1809 : TYPE(qs_environment_type), POINTER :: qs_env
1810 : INTEGER :: i_index
1811 :
1812 : INTEGER, DIMENSION(3) :: cell_i
1813 714366 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1814 714366 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1815 : TYPE(kpoint_type), POINTER :: kpoints
1816 :
1817 714366 : CALL get_qs_env(qs_env, kpoints=kpoints)
1818 714366 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1819 :
1820 2857464 : cell_i(:) = index_to_cell(:, apc_index) - index_to_cell(:, b_index)
1821 :
1822 4880718 : IF (ANY([cell_i(1), cell_i(2), cell_i(3)] < LBOUND(cell_to_index)) .OR. &
1823 : ANY([cell_i(1), cell_i(2), cell_i(3)] > UBOUND(cell_to_index))) THEN
1824 :
1825 : i_index = 0
1826 : ELSE
1827 611682 : i_index = cell_to_index(cell_i(1), cell_i(2), cell_i(3))
1828 : END IF
1829 :
1830 714366 : END FUNCTION get_i_index
1831 :
1832 : ! **************************************************************************************************
1833 : !> \brief A routine that returns all allowed a,c pairs such that a+c images corresponds to the value
1834 : !> of the apc_index input. Takes into account that image a corresponds to 3c integrals, which
1835 : !> are ordered in their own way
1836 : !> \param ac_pairs ...
1837 : !> \param apc_index ...
1838 : !> \param ri_data ...
1839 : !> \param qs_env ...
1840 : ! **************************************************************************************************
1841 16994 : SUBROUTINE get_ac_pairs(ac_pairs, apc_index, ri_data, qs_env)
1842 : INTEGER, DIMENSION(:, :), INTENT(INOUT) :: ac_pairs
1843 : INTEGER, INTENT(IN) :: apc_index
1844 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1845 : TYPE(qs_environment_type), POINTER :: qs_env
1846 :
1847 : INTEGER :: a_index, actual_img, c_index, nimg
1848 :
1849 16994 : nimg = SIZE(ac_pairs, 1)
1850 :
1851 1479714 : ac_pairs(:, :) = 0
1852 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(ac_pairs,nimg,ri_data,qs_env,apc_index) &
1853 16994 : !$OMP PRIVATE(a_index,actual_img,c_index)
1854 : DO a_index = 1, nimg
1855 : actual_img = ri_data%idx_to_img(a_index)
1856 : !c = a+c - a
1857 : c_index = get_i_index(apc_index, actual_img, qs_env)
1858 : ac_pairs(a_index, 1) = a_index
1859 : ac_pairs(a_index, 2) = c_index
1860 : END DO
1861 : !$OMP END PARALLEL DO
1862 :
1863 16994 : END SUBROUTINE get_ac_pairs
1864 :
1865 : ! **************************************************************************************************
1866 : !> \brief A routine that returns all allowed i,a+c pairs such that, for the given value of b, we have
1867 : !> i = a+c-b. Takes into account that image i corrsponds to the 3c ints, which are ordered in
1868 : !> their own way
1869 : !> \param iapc_pairs ...
1870 : !> \param b_index ...
1871 : !> \param ri_data ...
1872 : !> \param qs_env ...
1873 : !> \param actual_i_img ...
1874 : ! **************************************************************************************************
1875 13756 : SUBROUTINE get_iapc_pairs(iapc_pairs, b_index, ri_data, qs_env, actual_i_img)
1876 : INTEGER, DIMENSION(:, :), INTENT(INOUT) :: iapc_pairs
1877 : INTEGER, INTENT(IN) :: b_index
1878 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1879 : TYPE(qs_environment_type), POINTER :: qs_env
1880 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: actual_i_img
1881 :
1882 : INTEGER :: actual_img, apc_index, i_index, nimg
1883 :
1884 13756 : nimg = SIZE(iapc_pairs, 1)
1885 57318 : IF (PRESENT(actual_i_img)) actual_i_img(:) = 0
1886 :
1887 1078686 : iapc_pairs(:, :) = 0
1888 : !$OMP PARALLEL DO DEFAULT(NONE) SHARED(iapc_pairs,nimg,ri_data,qs_env,b_index,actual_i_img) &
1889 13756 : !$OMP PRIVATE(i_index,actual_img,apc_index)
1890 : DO i_index = 1, nimg
1891 : actual_img = ri_data%idx_to_img(i_index)
1892 : apc_index = get_apc_index_from_ib(actual_img, b_index, qs_env)
1893 : IF (apc_index == 0) CYCLE
1894 : iapc_pairs(i_index, 1) = i_index
1895 : iapc_pairs(i_index, 2) = apc_index
1896 : IF (PRESENT(actual_i_img)) actual_i_img(i_index) = actual_img
1897 : END DO
1898 :
1899 13756 : END SUBROUTINE get_iapc_pairs
1900 :
1901 : ! **************************************************************************************************
1902 : !> \brief A function that, given a cell index a, returun the index corresponding to -a, and zero if
1903 : !> if out of bounds
1904 : !> \param a_index ...
1905 : !> \param qs_env ...
1906 : !> \return ...
1907 : ! **************************************************************************************************
1908 83455 : FUNCTION get_opp_index(a_index, qs_env) RESULT(opp_index)
1909 : INTEGER, INTENT(IN) :: a_index
1910 : TYPE(qs_environment_type), POINTER :: qs_env
1911 : INTEGER :: opp_index
1912 :
1913 : INTEGER, DIMENSION(3) :: opp_cell
1914 83455 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
1915 83455 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1916 : TYPE(kpoint_type), POINTER :: kpoints
1917 :
1918 83455 : NULLIFY (kpoints, cell_to_index, index_to_cell)
1919 :
1920 83455 : CALL get_qs_env(qs_env, kpoints=kpoints)
1921 83455 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
1922 :
1923 333820 : opp_cell(:) = -index_to_cell(:, a_index)
1924 :
1925 584185 : IF (ANY([opp_cell(1), opp_cell(2), opp_cell(3)] < LBOUND(cell_to_index)) .OR. &
1926 : ANY([opp_cell(1), opp_cell(2), opp_cell(3)] > UBOUND(cell_to_index))) THEN
1927 :
1928 : opp_index = 0
1929 : ELSE
1930 83455 : opp_index = cell_to_index(opp_cell(1), opp_cell(2), opp_cell(3))
1931 : END IF
1932 :
1933 83455 : END FUNCTION get_opp_index
1934 :
1935 : ! **************************************************************************************************
1936 : !> \brief A routine that returns the actual non-symemtric density matrix for each image, by Fourier
1937 : !> transforming the kpoint density matrix
1938 : !> \param rho_ao_t ...
1939 : !> \param rho_ao ...
1940 : !> \param scale_prev_p ...
1941 : !> \param ri_data ...
1942 : !> \param qs_env ...
1943 : ! **************************************************************************************************
1944 542 : SUBROUTINE get_pmat_images(rho_ao_t, rho_ao, scale_prev_p, ri_data, qs_env)
1945 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t
1946 : TYPE(dbcsr_p_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao
1947 : REAL(dp), INTENT(IN) :: scale_prev_p
1948 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1949 : TYPE(qs_environment_type), POINTER :: qs_env
1950 :
1951 : INTEGER :: cell_j(3), i_img, i_spin, iatom, icol, &
1952 : irow, j_img, jatom, mi_img, mj_img, &
1953 : nimg, nspins
1954 542 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
1955 : LOGICAL :: found
1956 : REAL(dp) :: fac
1957 542 : REAL(dp), DIMENSION(:, :), POINTER :: pblock, pblock_desymm
1958 542 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: matrix_ks, rho_desymm
1959 4878 : TYPE(dbt_type) :: tmp
1960 : TYPE(dft_control_type), POINTER :: dft_control
1961 : TYPE(kpoint_type), POINTER :: kpoints
1962 : TYPE(neighbor_list_iterator_p_type), &
1963 542 : DIMENSION(:), POINTER :: nl_iterator
1964 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
1965 542 : POINTER :: sab_nl, sab_nl_nosym
1966 : TYPE(qs_scf_env_type), POINTER :: scf_env
1967 :
1968 542 : NULLIFY (rho_desymm, kpoints, sab_nl_nosym, scf_env, matrix_ks, dft_control, &
1969 542 : sab_nl, nl_iterator, cell_to_index, pblock, pblock_desymm)
1970 :
1971 542 : CALL get_qs_env(qs_env, kpoints=kpoints, scf_env=scf_env, matrix_ks_kp=matrix_ks, dft_control=dft_control)
1972 542 : CALL get_kpoint_info(kpoints, sab_nl_nosym=sab_nl_nosym, cell_to_index=cell_to_index, sab_nl=sab_nl)
1973 :
1974 542 : IF (dft_control%do_admm) THEN
1975 302 : CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_kp=matrix_ks)
1976 : END IF
1977 :
1978 542 : nspins = SIZE(matrix_ks, 1)
1979 542 : nimg = ri_data%nimg
1980 :
1981 34828 : ALLOCATE (rho_desymm(nspins, nimg))
1982 15216 : DO i_img = 1, nimg
1983 33202 : DO i_spin = 1, nspins
1984 17986 : ALLOCATE (rho_desymm(i_spin, i_img)%matrix)
1985 : CALL dbcsr_create(rho_desymm(i_spin, i_img)%matrix, template=matrix_ks(i_spin, i_img)%matrix, &
1986 17986 : matrix_type=dbcsr_type_no_symmetry)
1987 32660 : CALL cp_dbcsr_alloc_block_from_nbl(rho_desymm(i_spin, i_img)%matrix, sab_nl_nosym)
1988 : END DO
1989 : END DO
1990 542 : CALL dbt_create(rho_desymm(1, 1)%matrix, tmp)
1991 :
1992 : !We transfor the symmtric typed (but not actually symmetric: P_ab^i = P_ba^-i) real-spaced density
1993 : !matrix into proper non-symemtric ones (using the same nl for consistency)
1994 542 : CALL neighbor_list_iterator_create(nl_iterator, sab_nl)
1995 24035 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
1996 23493 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
1997 23493 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
1998 23493 : IF (j_img > nimg .OR. j_img < 1) CYCLE
1999 :
2000 18111 : fac = 1.0_dp
2001 18111 : IF (iatom == jatom) fac = 0.5_dp
2002 18111 : mj_img = get_opp_index(j_img, qs_env)
2003 : !if no opposite image, then no sum of P^j + P^-j => need full diag
2004 18111 : IF (mj_img == 0) fac = 1.0_dp
2005 :
2006 18111 : irow = iatom
2007 18111 : icol = jatom
2008 18111 : IF (iatom > jatom) THEN
2009 : !because symmetric nl. Value for atom pair i,j is actually stored in j,i if i > j
2010 5935 : irow = jatom
2011 5935 : icol = iatom
2012 : END IF
2013 :
2014 41058 : DO i_spin = 1, nspins
2015 22405 : CALL dbcsr_get_block_p(rho_ao(i_spin, j_img)%matrix, irow, icol, pblock, found)
2016 22405 : IF (.NOT. found) CYCLE
2017 :
2018 : !distribution of symm and non-symm matrix match in that way
2019 22405 : CALL dbcsr_get_block_p(rho_desymm(i_spin, j_img)%matrix, iatom, jatom, pblock_desymm, found)
2020 22405 : IF (.NOT. found) CYCLE
2021 :
2022 90708 : IF (iatom > jatom) THEN
2023 723498 : pblock_desymm(:, :) = fac*TRANSPOSE(pblock(:, :))
2024 : ELSE
2025 1729360 : pblock_desymm(:, :) = fac*pblock(:, :)
2026 : END IF
2027 : END DO
2028 : END DO
2029 542 : CALL neighbor_list_iterator_release(nl_iterator)
2030 :
2031 15216 : DO i_img = 1, nimg
2032 33202 : DO i_spin = 1, nspins
2033 17986 : CALL dbt_scale(rho_ao_t(i_spin, i_img), scale_prev_p)
2034 :
2035 17986 : CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, i_img)%matrix, tmp)
2036 17986 : CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), summation=.TRUE., move_data=.TRUE.)
2037 :
2038 : !symmetrize by addin transpose of opp img
2039 17986 : mi_img = get_opp_index(i_img, qs_env)
2040 17986 : IF (mi_img > 0 .AND. mi_img .LE. nimg) THEN
2041 15998 : CALL dbt_copy_matrix_to_tensor(rho_desymm(i_spin, mi_img)%matrix, tmp)
2042 15998 : CALL dbt_copy(tmp, rho_ao_t(i_spin, i_img), order=[2, 1], summation=.TRUE., move_data=.TRUE.)
2043 : END IF
2044 32660 : CALL dbt_filter(rho_ao_t(i_spin, i_img), ri_data%filter_eps)
2045 : END DO
2046 : END DO
2047 :
2048 15216 : DO i_img = 1, nimg
2049 33202 : DO i_spin = 1, nspins
2050 17986 : CALL dbcsr_release(rho_desymm(i_spin, i_img)%matrix)
2051 32660 : DEALLOCATE (rho_desymm(i_spin, i_img)%matrix)
2052 : END DO
2053 : END DO
2054 :
2055 542 : CALL dbt_destroy(tmp)
2056 542 : DEALLOCATE (rho_desymm)
2057 :
2058 1084 : END SUBROUTINE get_pmat_images
2059 :
2060 : ! **************************************************************************************************
2061 : !> \brief A routine that, given a cell index b and atom indices ij, returns a 2c tensor with the HFX
2062 : !> potential (P_i^0|Q_j^b), within the extended RI basis
2063 : !> \param t_2c_pot ...
2064 : !> \param mat_orig ...
2065 : !> \param atom_i ...
2066 : !> \param atom_j ...
2067 : !> \param img_b ...
2068 : !> \param ri_data ...
2069 : !> \param qs_env ...
2070 : !> \param do_inverse ...
2071 : !> \param para_env_ext ...
2072 : !> \param blacs_env_ext ...
2073 : !> \param dbcsr_template ...
2074 : !> \param off_diagonal ...
2075 : !> \param skip_inverse ...
2076 : ! **************************************************************************************************
2077 9208 : SUBROUTINE get_ext_2c_int(t_2c_pot, mat_orig, atom_i, atom_j, img_b, ri_data, qs_env, do_inverse, &
2078 : para_env_ext, blacs_env_ext, dbcsr_template, off_diagonal, skip_inverse)
2079 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_pot
2080 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_orig
2081 : INTEGER, INTENT(IN) :: atom_i, atom_j, img_b
2082 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2083 : TYPE(qs_environment_type), POINTER :: qs_env
2084 : LOGICAL, INTENT(IN), OPTIONAL :: do_inverse
2085 : TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env_ext
2086 : TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: blacs_env_ext
2087 : TYPE(dbcsr_type), OPTIONAL, POINTER :: dbcsr_template
2088 : LOGICAL, INTENT(IN), OPTIONAL :: off_diagonal, skip_inverse
2089 :
2090 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_ext_2c_int'
2091 :
2092 : INTEGER :: group, handle, handle2, i_img, i_RI, iatom, iblk, ikind, img_tot, j_img, j_RI, &
2093 : jatom, jblk, jkind, n_dependent, natom, nblks_RI, nimg, nkind
2094 9208 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2
2095 9208 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: present_atoms_i, present_atoms_j
2096 : INTEGER, DIMENSION(3) :: cell_b, cell_i, cell_j, cell_tot
2097 9208 : INTEGER, DIMENSION(:), POINTER :: col_dist, col_dist_ext, ri_blk_size_ext, &
2098 9208 : row_dist, row_dist_ext
2099 9208 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell, pgrid
2100 9208 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
2101 : LOGICAL :: do_inverse_prv, found, my_offd, &
2102 : skip_inverse_prv, use_template
2103 : REAL(dp) :: bfac, dij, r0, r1, threshold
2104 : REAL(dp), DIMENSION(3) :: ri, rij, rj, rref, scoord
2105 9208 : REAL(dp), DIMENSION(:, :), POINTER :: pblock
2106 : TYPE(cell_type), POINTER :: cell
2107 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
2108 : TYPE(dbcsr_distribution_type) :: dbcsr_dist, dbcsr_dist_ext
2109 : TYPE(dbcsr_iterator_type) :: dbcsr_iter
2110 : TYPE(dbcsr_type) :: work, work_tight, work_tight_inv
2111 64456 : TYPE(dbt_type) :: t_2c_tmp
2112 : TYPE(distribution_2d_type), POINTER :: dist_2d
2113 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2114 9208 : DIMENSION(:), TARGET :: basis_set_RI
2115 : TYPE(kpoint_type), POINTER :: kpoints
2116 : TYPE(mp_para_env_type), POINTER :: para_env
2117 : TYPE(neighbor_list_iterator_p_type), &
2118 9208 : DIMENSION(:), POINTER :: nl_iterator
2119 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2120 9208 : POINTER :: nl_2c
2121 9208 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2122 9208 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2123 :
2124 9208 : NULLIFY (qs_kind_set, nl_2c, nl_iterator, cell, kpoints, cell_to_index, index_to_cell, dist_2d, &
2125 9208 : para_env, pblock, blacs_env, particle_set, col_dist, row_dist, pgrid, &
2126 9208 : col_dist_ext, row_dist_ext)
2127 :
2128 9208 : CALL timeset(routineN, handle)
2129 :
2130 : !Idea: run over the neighbor list once for i and once for j, and record in which cell the MIC
2131 : ! atoms are. Then loop over the atoms and only take the pairs the we need
2132 :
2133 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, cell=cell, &
2134 9208 : kpoints=kpoints, para_env=para_env, blacs_env=blacs_env, particle_set=particle_set)
2135 9208 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell)
2136 :
2137 9208 : do_inverse_prv = .FALSE.
2138 9208 : IF (PRESENT(do_inverse)) do_inverse_prv = do_inverse
2139 320 : IF (do_inverse_prv) THEN
2140 320 : CPASSERT(atom_i == atom_j)
2141 : END IF
2142 :
2143 9208 : skip_inverse_prv = .FALSE.
2144 9208 : IF (PRESENT(skip_inverse)) skip_inverse_prv = skip_inverse
2145 :
2146 9208 : my_offd = .FALSE.
2147 9208 : IF (PRESENT(off_diagonal)) my_offd = off_diagonal
2148 :
2149 9208 : IF (PRESENT(para_env_ext)) para_env => para_env_ext
2150 9208 : IF (PRESENT(blacs_env_ext)) blacs_env => blacs_env_ext
2151 :
2152 9208 : nimg = SIZE(mat_orig)
2153 :
2154 9208 : CALL timeset(routineN//"_nl_iter", handle2)
2155 :
2156 : !create our own dist_2d in the subgroup
2157 36832 : ALLOCATE (dist1(natom), dist2(natom))
2158 27624 : DO iatom = 1, natom
2159 18416 : dist1(iatom) = MOD(iatom, blacs_env%num_pe(1))
2160 27624 : dist2(iatom) = MOD(iatom, blacs_env%num_pe(2))
2161 : END DO
2162 9208 : CALL distribution_2d_create(dist_2d, dist1, dist2, nkind, particle_set, blacs_env_ext=blacs_env)
2163 :
2164 40679 : ALLOCATE (basis_set_RI(nkind))
2165 9208 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
2166 :
2167 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
2168 9208 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
2169 :
2170 55248 : ALLOCATE (present_atoms_i(natom, nimg), present_atoms_j(natom, nimg))
2171 953167 : present_atoms_i = 0
2172 953167 : present_atoms_j = 0
2173 :
2174 9208 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
2175 409876 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
2176 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, r=rij, cell=cell_j, &
2177 400668 : ikind=ikind, jkind=jkind)
2178 :
2179 1602672 : dij = NORM2(rij)
2180 :
2181 400668 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
2182 400668 : IF (j_img > nimg .OR. j_img < 1) CYCLE
2183 :
2184 398338 : IF (iatom == atom_i .AND. dij .LE. ri_data%kp_RI_range) present_atoms_i(jatom, j_img) = 1
2185 407546 : IF (iatom == atom_j .AND. dij .LE. ri_data%kp_RI_range) present_atoms_j(jatom, j_img) = 1
2186 : END DO
2187 9208 : CALL neighbor_list_iterator_release(nl_iterator)
2188 9208 : CALL release_neighbor_list_sets(nl_2c)
2189 9208 : CALL distribution_2d_release(dist_2d)
2190 9208 : CALL timestop(handle2)
2191 :
2192 9208 : CALL para_env%sum(present_atoms_i)
2193 9208 : CALL para_env%sum(present_atoms_j)
2194 :
2195 : !Need to build a work matrix with matching distribution to mat_orig
2196 : !If template is provided, use it. If not, we create it.
2197 9208 : use_template = .FALSE.
2198 9208 : IF (PRESENT(dbcsr_template)) THEN
2199 8452 : IF (ASSOCIATED(dbcsr_template)) use_template = .TRUE.
2200 : END IF
2201 :
2202 : IF (use_template) THEN
2203 8158 : CALL dbcsr_create(work, template=dbcsr_template)
2204 : ELSE
2205 1050 : CALL dbcsr_get_info(mat_orig(1), distribution=dbcsr_dist)
2206 1050 : CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, pgrid=pgrid)
2207 4200 : ALLOCATE (row_dist_ext(ri_data%ncell_RI*natom), col_dist_ext(ri_data%ncell_RI*natom))
2208 2100 : ALLOCATE (ri_blk_size_ext(ri_data%ncell_RI*natom))
2209 7204 : DO i_RI = 1, ri_data%ncell_RI
2210 30770 : row_dist_ext((i_RI - 1)*natom + 1:i_RI*natom) = row_dist(:)
2211 30770 : col_dist_ext((i_RI - 1)*natom + 1:i_RI*natom) = col_dist(:)
2212 19512 : RI_blk_size_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
2213 : END DO
2214 :
2215 : CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2216 1050 : row_dist=row_dist_ext, col_dist=col_dist_ext)
2217 : CALL dbcsr_create(work, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2218 1050 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2219 1050 : CALL dbcsr_distribution_release(dbcsr_dist_ext)
2220 1050 : DEALLOCATE (col_dist_ext, row_dist_ext, RI_blk_size_ext)
2221 :
2222 3150 : IF (PRESENT(dbcsr_template)) THEN
2223 294 : ALLOCATE (dbcsr_template)
2224 294 : CALL dbcsr_create(dbcsr_template, template=work)
2225 : END IF
2226 : END IF !use_template
2227 :
2228 36832 : cell_b(:) = index_to_cell(:, img_b)
2229 323861 : DO i_img = 1, nimg
2230 314653 : i_RI = ri_data%img_to_RI_cell(i_img)
2231 314653 : IF (i_RI == 0) CYCLE
2232 221148 : cell_i(:) = index_to_cell(:, i_img)
2233 2283724 : DO j_img = 1, nimg
2234 2219229 : j_RI = ri_data%img_to_RI_cell(j_img)
2235 2219229 : IF (j_RI == 0) CYCLE
2236 1792812 : cell_j(:) = index_to_cell(:, j_img)
2237 1792812 : cell_tot = cell_j - cell_i + cell_b
2238 :
2239 3092996 : IF (ANY([cell_tot(1), cell_tot(2), cell_tot(3)] < LBOUND(cell_to_index)) .OR. &
2240 : ANY([cell_tot(1), cell_tot(2), cell_tot(3)] > UBOUND(cell_to_index))) CYCLE
2241 409584 : img_tot = cell_to_index(cell_tot(1), cell_tot(2), cell_tot(3))
2242 409584 : IF (img_tot > nimg .OR. img_tot < 1) CYCLE
2243 :
2244 280422 : CALL dbcsr_iterator_start(dbcsr_iter, mat_orig(img_tot))
2245 817406 : DO WHILE (dbcsr_iterator_blocks_left(dbcsr_iter))
2246 536984 : CALL dbcsr_iterator_next_block(dbcsr_iter, row=iatom, column=jatom)
2247 536984 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2248 209075 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2249 93781 : IF (my_offd .AND. (i_RI - 1)*natom + iatom == (j_RI - 1)*natom + jatom) CYCLE
2250 :
2251 93456 : CALL dbcsr_get_block_p(mat_orig(img_tot), iatom, jatom, pblock, found)
2252 93456 : IF (.NOT. found) CYCLE
2253 :
2254 817406 : CALL dbcsr_put_block(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock)
2255 :
2256 : END DO
2257 2775685 : CALL dbcsr_iterator_stop(dbcsr_iter)
2258 :
2259 : END DO !j_img
2260 : END DO !i_img
2261 9208 : CALL dbcsr_finalize(work)
2262 :
2263 9208 : IF (do_inverse_prv) THEN
2264 :
2265 320 : r1 = ri_data%kp_RI_range
2266 320 : r0 = ri_data%kp_bump_rad
2267 :
2268 : !Because there are a lot of empty rows/cols in work, we need to get rid of them for inversion
2269 24296 : nblks_RI = SUM(present_atoms_i)
2270 1600 : ALLOCATE (col_dist_ext(nblks_RI), row_dist_ext(nblks_RI), RI_blk_size_ext(nblks_RI))
2271 320 : iblk = 0
2272 8312 : DO i_img = 1, nimg
2273 7992 : i_RI = ri_data%img_to_RI_cell(i_img)
2274 7992 : IF (i_RI == 0) CYCLE
2275 6104 : DO iatom = 1, natom
2276 3856 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2277 1300 : iblk = iblk + 1
2278 1300 : col_dist_ext(iblk) = col_dist(iatom)
2279 1300 : row_dist_ext(iblk) = row_dist(iatom)
2280 11848 : RI_blk_size_ext(iblk) = ri_data%bsizes_RI(iatom)
2281 : END DO
2282 : END DO
2283 :
2284 : CALL dbcsr_distribution_new(dbcsr_dist_ext, group=group, pgrid=pgrid, &
2285 320 : row_dist=row_dist_ext, col_dist=col_dist_ext)
2286 : CALL dbcsr_create(work_tight, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2287 320 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2288 : CALL dbcsr_create(work_tight_inv, dist=dbcsr_dist_ext, name="RI_ext", matrix_type=dbcsr_type_no_symmetry, &
2289 320 : row_blk_size=RI_blk_size_ext, col_blk_size=RI_blk_size_ext)
2290 320 : CALL dbcsr_distribution_release(dbcsr_dist_ext)
2291 320 : DEALLOCATE (col_dist_ext, row_dist_ext, RI_blk_size_ext)
2292 :
2293 : !We apply a bump function to the RI metric inverse for smooth RI basis extension:
2294 : ! S^-1 = B * ((P|Q)_D + B*(P|Q)_OD*B)^-1 * B, with D block-diagonal blocks and OD off-diagonal
2295 320 : rref = pbc(particle_set(atom_i)%r, cell)
2296 :
2297 320 : iblk = 0
2298 8312 : DO i_img = 1, nimg
2299 7992 : i_RI = ri_data%img_to_RI_cell(i_img)
2300 7992 : IF (i_RI == 0) CYCLE
2301 6104 : DO iatom = 1, natom
2302 3856 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2303 1300 : iblk = iblk + 1
2304 :
2305 1300 : CALL real_to_scaled(scoord, pbc(particle_set(iatom)%r, cell), cell)
2306 5200 : CALL scaled_to_real(ri, scoord(:) + index_to_cell(:, i_img), cell)
2307 :
2308 1300 : jblk = 0
2309 47668 : DO j_img = 1, nimg
2310 38376 : j_RI = ri_data%img_to_RI_cell(j_img)
2311 38376 : IF (j_RI == 0) CYCLE
2312 32032 : DO jatom = 1, natom
2313 18784 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2314 6124 : jblk = jblk + 1
2315 :
2316 6124 : CALL real_to_scaled(scoord, pbc(particle_set(jatom)%r, cell), cell)
2317 24496 : CALL scaled_to_real(rj, scoord(:) + index_to_cell(:, j_img), cell)
2318 :
2319 6124 : CALL dbcsr_get_block_p(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock, found)
2320 6124 : IF (.NOT. found) CYCLE
2321 :
2322 2732 : bfac = 1.0_dp
2323 15224 : IF (iblk .NE. jblk) bfac = bump(NORM2(ri - rref), r0, r1)*bump(NORM2(rj - rref), r0, r1)
2324 5601896 : CALL dbcsr_put_block(work_tight, iblk, jblk, bfac*pblock(:, :))
2325 : END DO
2326 : END DO
2327 : END DO
2328 : END DO
2329 320 : CALL dbcsr_finalize(work_tight)
2330 320 : CALL dbcsr_clear(work)
2331 :
2332 320 : IF (.NOT. skip_inverse_prv) THEN
2333 160 : SELECT CASE (ri_data%t2c_method)
2334 : CASE (hfx_ri_do_2c_iter)
2335 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
2336 0 : CALL invert_hotelling(work_tight_inv, work_tight, threshold=threshold, silent=.FALSE.)
2337 : CASE (hfx_ri_do_2c_cholesky)
2338 160 : CALL dbcsr_copy(work_tight_inv, work_tight)
2339 160 : CALL cp_dbcsr_cholesky_decompose(work_tight_inv, para_env=para_env, blacs_env=blacs_env)
2340 : CALL cp_dbcsr_cholesky_invert(work_tight_inv, para_env=para_env, blacs_env=blacs_env, &
2341 160 : uplo_to_full=.TRUE.)
2342 : CASE (hfx_ri_do_2c_diag)
2343 0 : CALL dbcsr_copy(work_tight_inv, work_tight)
2344 : CALL cp_dbcsr_power(work_tight_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
2345 160 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
2346 : END SELECT
2347 : ELSE
2348 160 : CALL dbcsr_copy(work_tight_inv, work_tight)
2349 : END IF
2350 :
2351 : !move back data to standard extended RI pattern
2352 : !Note: we apply the external bump to ((P|Q)_D + B*(P|Q)_OD*B)^-1 later, because this matrix
2353 : ! is required for forces
2354 320 : iblk = 0
2355 8312 : DO i_img = 1, nimg
2356 7992 : i_RI = ri_data%img_to_RI_cell(i_img)
2357 7992 : IF (i_RI == 0) CYCLE
2358 6104 : DO iatom = 1, natom
2359 3856 : IF (present_atoms_i(iatom, i_img) == 0) CYCLE
2360 1300 : iblk = iblk + 1
2361 :
2362 1300 : jblk = 0
2363 47668 : DO j_img = 1, nimg
2364 38376 : j_RI = ri_data%img_to_RI_cell(j_img)
2365 38376 : IF (j_RI == 0) CYCLE
2366 32032 : DO jatom = 1, natom
2367 18784 : IF (present_atoms_j(jatom, j_img) == 0) CYCLE
2368 6124 : jblk = jblk + 1
2369 :
2370 6124 : CALL dbcsr_get_block_p(work_tight_inv, iblk, jblk, pblock, found)
2371 6124 : IF (.NOT. found) CYCLE
2372 :
2373 60057 : CALL dbcsr_put_block(work, (i_RI - 1)*natom + iatom, (j_RI - 1)*natom + jatom, pblock)
2374 : END DO
2375 : END DO
2376 : END DO
2377 : END DO
2378 320 : CALL dbcsr_finalize(work)
2379 :
2380 320 : CALL dbcsr_release(work_tight)
2381 640 : CALL dbcsr_release(work_tight_inv)
2382 : END IF
2383 :
2384 9208 : CALL dbt_create(work, t_2c_tmp)
2385 9208 : CALL dbt_copy_matrix_to_tensor(work, t_2c_tmp)
2386 9208 : CALL dbt_copy(t_2c_tmp, t_2c_pot, move_data=.TRUE.)
2387 9208 : CALL dbt_filter(t_2c_pot, ri_data%filter_eps)
2388 :
2389 9208 : CALL dbt_destroy(t_2c_tmp)
2390 9208 : CALL dbcsr_release(work)
2391 :
2392 9208 : CALL timestop(handle)
2393 :
2394 36832 : END SUBROUTINE get_ext_2c_int
2395 :
2396 : ! **************************************************************************************************
2397 : !> \brief Pre-contract the density matrices with the 3-center integrals:
2398 : !> P_sigma^a,lambda^a+c (mu^0 sigma^a| P^0)
2399 : !> \param t_3c_apc ...
2400 : !> \param rho_ao_t ...
2401 : !> \param ri_data ...
2402 : !> \param qs_env ...
2403 : ! **************************************************************************************************
2404 294 : SUBROUTINE contract_pmat_3c(t_3c_apc, rho_ao_t, ri_data, qs_env)
2405 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, rho_ao_t
2406 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2407 : TYPE(qs_environment_type), POINTER :: qs_env
2408 :
2409 : CHARACTER(len=*), PARAMETER :: routineN = 'contract_pmat_3c'
2410 :
2411 : INTEGER :: apc_img, b_img, batch_size, handle, &
2412 : i_batch, i_img, i_spin, idx, j_batch, &
2413 : n_batch_img, n_batch_nze, nimg, &
2414 : nimg_nze, nspins
2415 : INTEGER(int_8) :: nflop, nze
2416 294 : INTEGER, ALLOCATABLE, DIMENSION(:) :: apc_filter, batch_ranges_img, &
2417 294 : batch_ranges_nze, int_indices
2418 294 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ac_pairs, iapc_pairs
2419 : REAL(dp) :: occ, t1, t2
2420 2646 : TYPE(dbt_type) :: t_3c_tmp
2421 294 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: ints_stack, res_stack, rho_stack
2422 : TYPE(dft_control_type), POINTER :: dft_control
2423 :
2424 294 : CALL timeset(routineN, handle)
2425 :
2426 294 : CALL get_qs_env(qs_env, dft_control=dft_control)
2427 :
2428 294 : nimg = ri_data%nimg
2429 294 : nimg_nze = ri_data%nimg_nze
2430 294 : nspins = dft_control%nspins
2431 :
2432 294 : CALL dbt_create(t_3c_apc(1, 1), t_3c_tmp)
2433 :
2434 294 : batch_size = ri_data%kp_stack_size
2435 :
2436 1470 : ALLOCATE (apc_filter(nimg), iapc_pairs(nimg, 2))
2437 8182 : apc_filter = 0
2438 8182 : DO b_img = 1, nimg
2439 7888 : CALL get_iapc_pairs(iapc_pairs, b_img, ri_data, qs_env)
2440 217710 : DO i_img = 1, nimg_nze
2441 209528 : idx = iapc_pairs(i_img, 2)
2442 209528 : IF (idx < 1 .OR. idx > nimg) CYCLE
2443 217416 : apc_filter(idx) = 1
2444 : END DO
2445 : END DO
2446 :
2447 : !batching over all images
2448 294 : n_batch_img = nimg/batch_size
2449 294 : IF (MODULO(nimg, batch_size) .NE. 0) n_batch_img = n_batch_img + 1
2450 882 : ALLOCATE (batch_ranges_img(n_batch_img + 1))
2451 876 : DO i_batch = 1, n_batch_img
2452 876 : batch_ranges_img(i_batch) = (i_batch - 1)*batch_size + 1
2453 : END DO
2454 294 : batch_ranges_img(n_batch_img + 1) = nimg + 1
2455 :
2456 : !batching over images with non-zero 3c integrals
2457 294 : n_batch_nze = nimg_nze/batch_size
2458 294 : IF (MODULO(nimg_nze, batch_size) .NE. 0) n_batch_nze = n_batch_nze + 1
2459 882 : ALLOCATE (batch_ranges_nze(n_batch_nze + 1))
2460 720 : DO i_batch = 1, n_batch_nze
2461 720 : batch_ranges_nze(i_batch) = (i_batch - 1)*batch_size + 1
2462 : END DO
2463 294 : batch_ranges_nze(n_batch_nze + 1) = nimg_nze + 1
2464 :
2465 : !Create the stack tensors in the approriate distribution
2466 9114 : ALLOCATE (rho_stack(2), ints_stack(2), res_stack(2))
2467 : CALL get_stack_tensors(res_stack, rho_stack, ints_stack, rho_ao_t(1, 1), &
2468 294 : ri_data%t_3c_int_ctr_1(1, 1), batch_size, ri_data, qs_env)
2469 :
2470 1176 : ALLOCATE (ac_pairs(nimg, 2), int_indices(nimg_nze))
2471 6056 : DO i_img = 1, nimg_nze
2472 6056 : int_indices(i_img) = i_img
2473 : END DO
2474 :
2475 294 : t1 = m_walltime()
2476 720 : DO j_batch = 1, n_batch_nze
2477 : !First batch is over the integrals. They are always in the same order, consistent with get_ac_pairs
2478 : CALL fill_3c_stack(ints_stack(1), ri_data%t_3c_int_ctr_1(1, :), int_indices, 3, ri_data, &
2479 1278 : img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)])
2480 426 : CALL dbt_copy(ints_stack(1), ints_stack(2), move_data=.TRUE.)
2481 :
2482 1280 : DO i_spin = 1, nspins
2483 2296 : DO i_batch = 1, n_batch_img
2484 : !Second batch is over the P matrix. Here we fill the stacked rho tensors col by col
2485 18304 : DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2486 16994 : IF (apc_filter(apc_img) == 0) CYCLE
2487 16994 : CALL get_ac_pairs(ac_pairs, apc_img, ri_data, qs_env)
2488 : CALL fill_2c_stack(rho_stack(1), rho_ao_t(i_spin, :), ac_pairs(:, 2), 1, ri_data, &
2489 : img_bounds=[batch_ranges_nze(j_batch), batch_ranges_nze(j_batch + 1)], &
2490 52292 : shift=apc_img - batch_ranges_img(i_batch) + 1)
2491 :
2492 : END DO !apc_img
2493 1310 : CALL get_tensor_occupancy(rho_stack(1), nze, occ)
2494 1310 : IF (nze == 0) CYCLE
2495 1290 : CALL dbt_copy(rho_stack(1), rho_stack(2), move_data=.TRUE.)
2496 :
2497 : !The actual contraction
2498 1290 : CALL dbt_batched_contract_init(rho_stack(2))
2499 : CALL dbt_contract(1.0_dp, ints_stack(2), rho_stack(2), &
2500 : 0.0_dp, res_stack(2), map_1=[1, 2], map_2=[3], &
2501 : contract_1=[3], notcontract_1=[1, 2], &
2502 : contract_2=[1], notcontract_2=[2], &
2503 1290 : filter_eps=ri_data%filter_eps, flop=nflop)
2504 1290 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2505 1290 : CALL dbt_batched_contract_finalize(rho_stack(2))
2506 1290 : CALL dbt_copy(res_stack(2), res_stack(1), move_data=.TRUE.)
2507 :
2508 19996 : DO apc_img = batch_ranges_img(i_batch), batch_ranges_img(i_batch + 1) - 1
2509 : !Destack the resulting tensor and put it in t_3c_apc with correct apc_img
2510 16836 : IF (apc_filter(apc_img) == 0) CYCLE
2511 16836 : CALL unstack_t_3c_apc(t_3c_tmp, res_stack(1), apc_img - batch_ranges_img(i_batch) + 1)
2512 18146 : CALL dbt_copy(t_3c_tmp, t_3c_apc(i_spin, apc_img), summation=.TRUE., move_data=.TRUE.)
2513 : END DO
2514 :
2515 : END DO !i_batch
2516 : END DO !i_spin
2517 : END DO !j_batch
2518 294 : DEALLOCATE (batch_ranges_img)
2519 294 : DEALLOCATE (batch_ranges_nze)
2520 294 : t2 = m_walltime()
2521 294 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2522 :
2523 294 : CALL dbt_destroy(rho_stack(1))
2524 294 : CALL dbt_destroy(rho_stack(2))
2525 294 : CALL dbt_destroy(ints_stack(1))
2526 294 : CALL dbt_destroy(ints_stack(2))
2527 294 : CALL dbt_destroy(res_stack(1))
2528 294 : CALL dbt_destroy(res_stack(2))
2529 294 : CALL dbt_destroy(t_3c_tmp)
2530 :
2531 294 : CALL timestop(handle)
2532 :
2533 2940 : END SUBROUTINE contract_pmat_3c
2534 :
2535 : ! **************************************************************************************************
2536 : !> \brief Pre-contract 3-center integrals with the bumped invrse RI metric, for each atom
2537 : !> \param t_3c_int ...
2538 : !> \param ri_data ...
2539 : !> \param qs_env ...
2540 : ! **************************************************************************************************
2541 80 : SUBROUTINE precontract_3c_ints(t_3c_int, ri_data, qs_env)
2542 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_int
2543 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2544 : TYPE(qs_environment_type), POINTER :: qs_env
2545 :
2546 : CHARACTER(len=*), PARAMETER :: routineN = 'precontract_3c_ints'
2547 :
2548 : INTEGER :: batch_size, handle, i_batch, i_img, &
2549 : i_RI, iatom, is, n_batch, natom, &
2550 : nblks, nblks_3c(3), nimg
2551 : INTEGER(int_8) :: nflop
2552 80 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges, bsizes_RI_ext, bsizes_RI_ext_split, &
2553 80 : bsizes_stack, dist1, dist2, dist3, dist_stack3, idx_to_at_AO, int_indices
2554 720 : TYPE(dbt_distribution_type) :: t_dist
2555 16720 : TYPE(dbt_type) :: t_2c_RI_tmp(2), t_3c_tmp(3)
2556 :
2557 80 : CALL timeset(routineN, handle)
2558 :
2559 80 : CALL get_qs_env(qs_env, natom=natom)
2560 :
2561 80 : nimg = ri_data%nimg
2562 240 : ALLOCATE (int_indices(nimg))
2563 2078 : DO i_img = 1, nimg
2564 2078 : int_indices(i_img) = i_img
2565 : END DO
2566 :
2567 240 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
2568 80 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2569 :
2570 80 : nblks = SIZE(ri_data%bsizes_RI_split)
2571 240 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
2572 240 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
2573 562 : DO i_RI = 1, ri_data%ncell_RI
2574 1446 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
2575 2744 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
2576 : END DO
2577 : CALL create_2c_tensor(t_2c_RI_tmp(1), dist1, dist2, ri_data%pgrid_2d, &
2578 : bsizes_RI_ext, bsizes_RI_ext, &
2579 : name="(RI | RI)")
2580 80 : DEALLOCATE (dist1, dist2)
2581 : CALL create_2c_tensor(t_2c_RI_tmp(2), dist1, dist2, ri_data%pgrid_2d, &
2582 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
2583 : name="(RI | RI)")
2584 80 : DEALLOCATE (dist1, dist2)
2585 :
2586 : !For more efficiency, we stack multiple images of the 3-center integrals into a single tensor
2587 80 : batch_size = ri_data%kp_stack_size
2588 80 : n_batch = nimg/batch_size
2589 80 : IF (MODULO(nimg, batch_size) .NE. 0) n_batch = n_batch + 1
2590 240 : ALLOCATE (batch_ranges(n_batch + 1))
2591 234 : DO i_batch = 1, n_batch
2592 234 : batch_ranges(i_batch) = (i_batch - 1)*batch_size + 1
2593 : END DO
2594 80 : batch_ranges(n_batch + 1) = nimg + 1
2595 :
2596 80 : nblks = SIZE(ri_data%bsizes_AO_split)
2597 240 : ALLOCATE (bsizes_stack(batch_size*nblks))
2598 1456 : DO is = 1, batch_size
2599 6736 : bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
2600 : END DO
2601 :
2602 80 : CALL dbt_get_info(t_3c_int(1, 1), nblks_total=nblks_3c)
2603 720 : ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)), dist_stack3(batch_size*nblks_3c(3)))
2604 80 : CALL dbt_get_info(t_3c_int(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, proc_dist_3=dist3)
2605 1456 : DO is = 1, batch_size
2606 6736 : dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
2607 : END DO
2608 :
2609 80 : CALL dbt_distribution_new(t_dist, ri_data%pgrid, dist1, dist2, dist_stack3)
2610 : CALL dbt_create(t_3c_tmp(1), "ints_stack", t_dist, [1], [2, 3], bsizes_RI_ext_split, &
2611 80 : ri_data%bsizes_AO_split, bsizes_stack)
2612 80 : CALL dbt_distribution_destroy(t_dist)
2613 80 : DEALLOCATE (dist1, dist2, dist3, dist_stack3)
2614 :
2615 80 : CALL dbt_create(t_3c_tmp(1), t_3c_tmp(2))
2616 80 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp(3))
2617 :
2618 240 : DO iatom = 1, natom
2619 160 : CALL dbt_copy(ri_data%t_2c_inv(1, iatom), t_2c_RI_tmp(1))
2620 160 : CALL apply_bump(t_2c_RI_tmp(1), iatom, ri_data, qs_env, from_left=.TRUE., from_right=.TRUE.)
2621 160 : CALL dbt_copy(t_2c_RI_tmp(1), t_2c_RI_tmp(2), move_data=.TRUE.)
2622 :
2623 160 : CALL dbt_batched_contract_init(t_2c_RI_tmp(2))
2624 468 : DO i_batch = 1, n_batch
2625 :
2626 : CALL fill_3c_stack(t_3c_tmp(1), t_3c_int(1, :), int_indices, 3, ri_data, &
2627 : img_bounds=[batch_ranges(i_batch), batch_ranges(i_batch + 1)], &
2628 924 : filter_at=iatom, filter_dim=2, idx_to_at=idx_to_at_AO)
2629 :
2630 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp(2), t_3c_tmp(1), &
2631 : 0.0_dp, t_3c_tmp(2), map_1=[1], map_2=[2, 3], &
2632 : contract_1=[2], notcontract_1=[1], &
2633 : contract_2=[1], notcontract_2=[2, 3], &
2634 308 : filter_eps=ri_data%filter_eps, flop=nflop)
2635 308 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2636 :
2637 4304 : DO i_img = batch_ranges(i_batch), batch_ranges(i_batch + 1) - 1
2638 3996 : CALL unstack_t_3c_apc(t_3c_tmp(3), t_3c_tmp(2), i_img - batch_ranges(i_batch) + 1)
2639 : CALL dbt_copy(t_3c_tmp(3), ri_data%t_3c_int_ctr_1(1, i_img), summation=.TRUE., &
2640 4304 : order=[2, 1, 3], move_data=.TRUE.)
2641 : END DO
2642 468 : CALL dbt_clear(t_3c_tmp(1))
2643 : END DO
2644 240 : CALL dbt_batched_contract_finalize(t_2c_RI_tmp(2))
2645 :
2646 : END DO
2647 80 : CALL dbt_destroy(t_2c_RI_tmp(1))
2648 80 : CALL dbt_destroy(t_2c_RI_tmp(2))
2649 80 : CALL dbt_destroy(t_3c_tmp(1))
2650 80 : CALL dbt_destroy(t_3c_tmp(2))
2651 80 : CALL dbt_destroy(t_3c_tmp(3))
2652 :
2653 2078 : DO i_img = 1, nimg
2654 2078 : CALL dbt_destroy(t_3c_int(1, i_img))
2655 : END DO
2656 :
2657 80 : CALL timestop(handle)
2658 :
2659 400 : END SUBROUTINE precontract_3c_ints
2660 :
2661 : ! **************************************************************************************************
2662 : !> \brief Copy the data of a 2D tensor living in the main MPI group to a sub-group, given the proc
2663 : !> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2664 : !> \param t2c_sub ...
2665 : !> \param t2c_main ...
2666 : !> \param group_size ...
2667 : !> \param ngroups ...
2668 : !> \param para_env ...
2669 : ! **************************************************************************************************
2670 9832 : SUBROUTINE copy_2c_to_subgroup(t2c_sub, t2c_main, group_size, ngroups, para_env)
2671 : TYPE(dbt_type), INTENT(INOUT) :: t2c_sub, t2c_main
2672 : INTEGER, INTENT(IN) :: group_size, ngroups
2673 : TYPE(mp_para_env_type), POINTER :: para_env
2674 :
2675 : INTEGER :: batch_size, i, i_batch, i_msg, iblk, &
2676 : igroup, iproc, ir, is, jblk, n_batch, &
2677 : nocc, tag
2678 9832 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2
2679 9832 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: block_dest, block_source
2680 9832 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: current_dest
2681 : INTEGER, DIMENSION(2) :: ind, nblks
2682 : LOGICAL :: found
2683 9832 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
2684 9832 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2685 : TYPE(dbt_iterator_type) :: iter
2686 9832 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2687 :
2688 : !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2689 : ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2690 : ! to have blocks pre-reserved though
2691 :
2692 9832 : CALL dbt_get_info(t2c_main, nblks_total=nblks)
2693 :
2694 : !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2695 39328 : ALLOCATE (block_source(nblks(1), nblks(2)))
2696 186060 : block_source = -1
2697 9832 : nocc = 0
2698 9832 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t2c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2699 : CALL dbt_iterator_start(iter, t2c_main)
2700 : DO WHILE (dbt_iterator_blocks_left(iter))
2701 : CALL dbt_iterator_next_block(iter, ind)
2702 : CALL dbt_get_block(t2c_main, ind, blk, found)
2703 : IF (.NOT. found) CYCLE
2704 :
2705 : block_source(ind(1), ind(2)) = para_env%mepos
2706 : !$OMP ATOMIC
2707 : nocc = nocc + 1
2708 : DEALLOCATE (blk)
2709 : END DO
2710 : CALL dbt_iterator_stop(iter)
2711 : !$OMP END PARALLEL
2712 :
2713 9832 : CALL para_env%sum(nocc)
2714 9832 : CALL para_env%sum(block_source)
2715 186060 : block_source = block_source + para_env%num_pe - 1
2716 9832 : IF (nocc == 0) RETURN
2717 :
2718 : !Loop over the sub tensor, get the block destination
2719 9652 : igroup = para_env%mepos/group_size
2720 28956 : ALLOCATE (block_dest(nblks(1), nblks(2)))
2721 184800 : block_dest = -1
2722 35832 : DO jblk = 1, nblks(2)
2723 184800 : DO iblk = 1, nblks(1)
2724 148968 : IF (block_source(iblk, jblk) == -1) CYCLE
2725 :
2726 121128 : CALL dbt_get_stored_coordinates(t2c_sub, [iblk, jblk], iproc)
2727 175148 : block_dest(iblk, jblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2728 : END DO
2729 : END DO
2730 :
2731 48260 : ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)))
2732 9652 : CALL dbt_get_info(t2c_main, blk_size_1=bsizes1, blk_size_2=bsizes2)
2733 :
2734 48260 : ALLOCATE (current_dest(nblks(1), nblks(2), 0:ngroups - 1))
2735 28956 : DO igroup = 0, ngroups - 1
2736 : !for a given subgroup, need to make the destination available to everyone in the main group
2737 369600 : current_dest(:, :, igroup) = block_dest(:, :)
2738 28956 : CALL para_env%bcast(current_dest(:, :, igroup), source=igroup*group_size) !bcast from first proc in sub-group
2739 : END DO
2740 :
2741 : !We go by batches, which cannot be larger than the maximum MPI tag value
2742 9652 : batch_size = MIN(para_env%get_tag_ub(), 128000, nocc*ngroups)
2743 9652 : n_batch = (nocc*ngroups)/batch_size
2744 9652 : IF (MODULO(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2745 :
2746 19304 : DO i_batch = 1, n_batch
2747 : !Loop over groups, blocks and send/receive
2748 200112 : ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2749 200112 : ALLOCATE (send_req(batch_size), recv_req(batch_size))
2750 : ir = 0
2751 : is = 0
2752 : i_msg = 0
2753 35832 : DO jblk = 1, nblks(2)
2754 184800 : DO iblk = 1, nblks(1)
2755 473084 : DO igroup = 0, ngroups - 1
2756 297936 : IF (block_source(iblk, jblk) == -1) CYCLE
2757 :
2758 80752 : i_msg = i_msg + 1
2759 80752 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2760 :
2761 : !a unique tag per block, within this batch
2762 80752 : tag = i_msg - (i_batch - 1)*batch_size
2763 :
2764 80752 : found = .FALSE.
2765 80752 : IF (para_env%mepos == block_source(iblk, jblk)) THEN
2766 121128 : CALL dbt_get_block(t2c_main, [iblk, jblk], blk, found)
2767 : END IF
2768 :
2769 : !If blocks live on same proc, simply copy. Else MPI send/recv
2770 80752 : IF (block_source(iblk, jblk) == current_dest(iblk, jblk, igroup)) THEN
2771 121128 : IF (found) CALL dbt_put_block(t2c_sub, [iblk, jblk], SHAPE(blk), blk)
2772 : ELSE
2773 40376 : IF (para_env%mepos == block_source(iblk, jblk) .AND. found) THEN
2774 80752 : ALLOCATE (send_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2775 22294244 : send_buff(tag)%array(:, :) = blk(:, :)
2776 20188 : is = is + 1
2777 : CALL para_env%isend(msgin=send_buff(tag)%array, dest=current_dest(iblk, jblk, igroup), &
2778 20188 : request=send_req(is), tag=tag)
2779 : END IF
2780 :
2781 40376 : IF (para_env%mepos == current_dest(iblk, jblk, igroup)) THEN
2782 80752 : ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk)))
2783 20188 : ir = ir + 1
2784 : CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk), &
2785 20188 : request=recv_req(ir), tag=tag)
2786 : END IF
2787 : END IF
2788 :
2789 229720 : IF (found) DEALLOCATE (blk)
2790 : END DO
2791 : END DO
2792 : END DO
2793 :
2794 9652 : CALL mp_waitall(send_req(1:is))
2795 9652 : CALL mp_waitall(recv_req(1:ir))
2796 : !clean-up
2797 90404 : DO i = 1, batch_size
2798 90404 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
2799 : END DO
2800 :
2801 : !Finally copy the data from the buffer to the sub-tensor
2802 : i_msg = 0
2803 35832 : DO jblk = 1, nblks(2)
2804 184800 : DO iblk = 1, nblks(1)
2805 473084 : DO igroup = 0, ngroups - 1
2806 297936 : IF (block_source(iblk, jblk) == -1) CYCLE
2807 :
2808 80752 : i_msg = i_msg + 1
2809 80752 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2810 :
2811 : !a unique tag per block, within this batch
2812 80752 : tag = i_msg - (i_batch - 1)*batch_size
2813 :
2814 80752 : IF (para_env%mepos == current_dest(iblk, jblk, igroup) .AND. &
2815 148968 : block_source(iblk, jblk) .NE. current_dest(iblk, jblk, igroup)) THEN
2816 :
2817 80752 : ALLOCATE (blk(bsizes1(iblk), bsizes2(jblk)))
2818 22294244 : blk(:, :) = recv_buff(tag)%array(:, :)
2819 100940 : CALL dbt_put_block(t2c_sub, [iblk, jblk], SHAPE(blk), blk)
2820 20188 : DEALLOCATE (blk)
2821 : END IF
2822 : END DO
2823 : END DO
2824 : END DO
2825 :
2826 : !clean-up
2827 90404 : DO i = 1, batch_size
2828 90404 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
2829 : END DO
2830 19304 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
2831 : END DO !i_batch
2832 9652 : CALL dbt_finalize(t2c_sub)
2833 :
2834 19664 : END SUBROUTINE copy_2c_to_subgroup
2835 :
2836 : ! **************************************************************************************************
2837 : !> \brief Pre-compute the destination of the block of a 3D tensor in various subgroups
2838 : !> \param subgroup_dest ...
2839 : !> \param t3c_sub ...
2840 : !> \param t3c_main ...
2841 : !> \param group_size ...
2842 : !> \param ngroups ...
2843 : !> \param para_env ...
2844 : ! **************************************************************************************************
2845 588 : SUBROUTINE get_3c_subgroup_dest(subgroup_dest, t3c_sub, t3c_main, group_size, ngroups, para_env)
2846 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :), &
2847 : INTENT(INOUT) :: subgroup_dest
2848 : TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2849 : INTEGER, INTENT(IN) :: group_size, ngroups
2850 : TYPE(mp_para_env_type), POINTER :: para_env
2851 :
2852 : INTEGER :: iblk, igroup, iproc, jblk, kblk
2853 588 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_dest
2854 : INTEGER, DIMENSION(3) :: nblks
2855 :
2856 588 : CALL dbt_get_info(t3c_main, nblks_total=nblks)
2857 :
2858 : !Loop over the sub tensor, get the block destination
2859 588 : igroup = para_env%mepos/group_size
2860 2940 : ALLOCATE (block_dest(nblks(1), nblks(2), nblks(3)))
2861 1764 : DO kblk = 1, nblks(3)
2862 9460 : DO jblk = 1, nblks(2)
2863 34952 : DO iblk = 1, nblks(1)
2864 104320 : CALL dbt_get_stored_coordinates(t3c_sub, [iblk, jblk, kblk], iproc)
2865 33776 : block_dest(iblk, jblk, kblk) = igroup*group_size + iproc !mapping of iproc in subgroup to main group idx
2866 : END DO
2867 : END DO
2868 : END DO
2869 :
2870 3528 : ALLOCATE (subgroup_dest(nblks(1), nblks(2), nblks(3), ngroups))
2871 1764 : DO igroup = 0, ngroups - 1
2872 : !for a given subgroup, need to make the destination available to everyone in the main group
2873 71080 : subgroup_dest(:, :, :, igroup + 1) = block_dest(:, :, :)
2874 1764 : CALL para_env%bcast(subgroup_dest(:, :, :, igroup + 1), source=igroup*group_size) !bcast from first proc in subgroup
2875 : END DO
2876 :
2877 588 : END SUBROUTINE get_3c_subgroup_dest
2878 :
2879 : ! **************************************************************************************************
2880 : !> \brief Copy the data of a 3D tensor living in the main MPI group to a sub-group, given the proc
2881 : !> mapping from one to the other (e.g. for a proc idx in the subgroup, we get the idx in the main)
2882 : !> \param t3c_sub ...
2883 : !> \param t3c_main ...
2884 : !> \param ngroups ...
2885 : !> \param para_env ...
2886 : !> \param subgroup_dest ...
2887 : !> \param iatom_to_subgroup ...
2888 : !> \param dim_at ...
2889 : !> \param idx_to_at ...
2890 : ! **************************************************************************************************
2891 14652 : SUBROUTINE copy_3c_to_subgroup(t3c_sub, t3c_main, ngroups, para_env, subgroup_dest, &
2892 14652 : iatom_to_subgroup, dim_at, idx_to_at)
2893 : TYPE(dbt_type), INTENT(INOUT) :: t3c_sub, t3c_main
2894 : INTEGER, INTENT(IN) :: ngroups
2895 : TYPE(mp_para_env_type), POINTER :: para_env
2896 : INTEGER, DIMENSION(:, :, :, :), INTENT(IN) :: subgroup_dest
2897 : TYPE(cp_1d_logical_p_type), DIMENSION(:), &
2898 : INTENT(INOUT), OPTIONAL :: iatom_to_subgroup
2899 : INTEGER, INTENT(IN), OPTIONAL :: dim_at
2900 : INTEGER, DIMENSION(:), OPTIONAL :: idx_to_at
2901 :
2902 : INTEGER :: batch_size, i, i_batch, i_msg, iatom, &
2903 : iblk, igroup, ir, is, isbuff, jblk, &
2904 : kblk, n_batch, nocc, tag
2905 14652 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes1, bsizes2, bsizes3
2906 14652 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: block_source
2907 : INTEGER, DIMENSION(3) :: ind, nblks
2908 : LOGICAL :: filter_at, found
2909 14652 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
2910 14652 : TYPE(cp_3d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
2911 : TYPE(dbt_iterator_type) :: iter
2912 14652 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
2913 :
2914 : !Stategy: we loop over the main tensor, and send all the data. Then we loop over the sub tensor
2915 : ! and receive it. We do all of it with async MPI communication. The sub tensor needs
2916 : ! to have blocks pre-reserved though
2917 :
2918 14652 : CALL dbt_get_info(t3c_main, nblks_total=nblks)
2919 :
2920 : !in some cases, only copy a fraction of the 3c tensor to a given subgroup (corresponding to some atoms)
2921 14652 : filter_at = .FALSE.
2922 14652 : IF (PRESENT(iatom_to_subgroup) .AND. PRESENT(dim_at) .AND. PRESENT(idx_to_at)) THEN
2923 8620 : filter_at = .TRUE.
2924 8620 : CPASSERT(nblks(dim_at) == SIZE(idx_to_at))
2925 : END IF
2926 :
2927 : !Loop over the main tensor, count how many blocks are there, which ones, and on which proc
2928 73260 : ALLOCATE (block_source(nblks(1), nblks(2), nblks(3)))
2929 1036628 : block_source = -1
2930 14652 : nocc = 0
2931 14652 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t3c_main,para_env,nocc,block_source) PRIVATE(iter,ind,blk,found)
2932 : CALL dbt_iterator_start(iter, t3c_main)
2933 : DO WHILE (dbt_iterator_blocks_left(iter))
2934 : CALL dbt_iterator_next_block(iter, ind)
2935 : CALL dbt_get_block(t3c_main, ind, blk, found)
2936 : IF (.NOT. found) CYCLE
2937 :
2938 : block_source(ind(1), ind(2), ind(3)) = para_env%mepos
2939 : !$OMP ATOMIC
2940 : nocc = nocc + 1
2941 : DEALLOCATE (blk)
2942 : END DO
2943 : CALL dbt_iterator_stop(iter)
2944 : !$OMP END PARALLEL
2945 :
2946 14652 : CALL para_env%sum(nocc)
2947 14652 : CALL para_env%sum(block_source)
2948 1036628 : block_source = block_source + para_env%num_pe - 1
2949 14652 : IF (nocc == 0) RETURN
2950 :
2951 102564 : ALLOCATE (bsizes1(nblks(1)), bsizes2(nblks(2)), bsizes3(nblks(3)))
2952 14652 : CALL dbt_get_info(t3c_main, blk_size_1=bsizes1, blk_size_2=bsizes2, blk_size_3=bsizes3)
2953 :
2954 : !We go by batches, which cannot be larger than the maximum MPI tag value
2955 14652 : batch_size = MIN(para_env%get_tag_ub(), 128000, nocc*ngroups)
2956 14652 : n_batch = (nocc*ngroups)/batch_size
2957 14652 : IF (MODULO(nocc*ngroups, batch_size) .NE. 0) n_batch = n_batch + 1
2958 :
2959 29304 : DO i_batch = 1, n_batch
2960 : !Loop over groups, blocks and send/receive
2961 716888 : ALLOCATE (send_buff(batch_size), recv_buff(batch_size))
2962 716888 : ALLOCATE (send_req(batch_size), recv_req(batch_size))
2963 : ir = 0
2964 : is = 0
2965 : i_msg = 0
2966 : isbuff = 0
2967 43956 : DO kblk = 1, nblks(3)
2968 277108 : DO jblk = 1, nblks(2)
2969 1021976 : DO iblk = 1, nblks(1)
2970 759520 : IF (block_source(iblk, jblk, kblk) == -1) CYCLE
2971 :
2972 164570 : found = .FALSE.
2973 164570 : IF (para_env%mepos == block_source(iblk, jblk, kblk)) THEN
2974 329140 : CALL dbt_get_block(t3c_main, [iblk, jblk, kblk], blk, found)
2975 82285 : IF (found) THEN
2976 82285 : isbuff = isbuff + 1
2977 411425 : ALLOCATE (send_buff(isbuff)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
2978 : END IF
2979 : END IF
2980 :
2981 493710 : DO igroup = 0, ngroups - 1
2982 :
2983 329140 : i_msg = i_msg + 1
2984 329140 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
2985 :
2986 : !a unique tag per block, within this batch
2987 329140 : tag = i_msg - (i_batch - 1)*batch_size
2988 :
2989 329140 : IF (filter_at) THEN
2990 961520 : ind(:) = [iblk, jblk, kblk]
2991 240380 : iatom = idx_to_at(ind(dim_at))
2992 240380 : IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) CYCLE
2993 : END IF
2994 :
2995 : !If blocks live on same proc, simply copy. Else MPI send/recv
2996 373520 : IF (block_source(iblk, jblk, kblk) == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
2997 479680 : IF (found) CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], SHAPE(blk), blk)
2998 : ELSE
2999 89030 : IF (para_env%mepos == block_source(iblk, jblk, kblk) .AND. found) THEN
3000 128638380 : send_buff(isbuff)%array(:, :, :) = blk(:, :, :)
3001 44515 : is = is + 1
3002 : CALL para_env%isend(msgin=send_buff(isbuff)%array, &
3003 : dest=subgroup_dest(iblk, jblk, kblk, igroup + 1), &
3004 44515 : request=send_req(is), tag=tag)
3005 : END IF
3006 :
3007 89030 : IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3008 222575 : ALLOCATE (recv_buff(tag)%array(bsizes1(iblk), bsizes2(jblk), bsizes3(kblk)))
3009 44515 : ir = ir + 1
3010 : CALL para_env%irecv(msgout=recv_buff(tag)%array, source=block_source(iblk, jblk, kblk), &
3011 44515 : request=recv_req(ir), tag=tag)
3012 : END IF
3013 : END IF
3014 : END DO !igroup
3015 :
3016 397722 : IF (found) DEALLOCATE (blk)
3017 : END DO
3018 : END DO
3019 : END DO
3020 :
3021 : !Finally copy the data from the buffer to the sub-tensor
3022 : i_msg = 0
3023 : ir = 0
3024 43956 : DO kblk = 1, nblks(3)
3025 277108 : DO jblk = 1, nblks(2)
3026 1021976 : DO iblk = 1, nblks(1)
3027 2511712 : DO igroup = 0, ngroups - 1
3028 1519040 : IF (block_source(iblk, jblk, kblk) == -1) CYCLE
3029 :
3030 329140 : i_msg = i_msg + 1
3031 329140 : IF (i_msg < (i_batch - 1)*batch_size + 1 .OR. i_msg > i_batch*batch_size) CYCLE
3032 :
3033 : !a unique tag per block, within this batch
3034 329140 : tag = i_msg - (i_batch - 1)*batch_size
3035 :
3036 329140 : IF (filter_at) THEN
3037 961520 : ind(:) = [iblk, jblk, kblk]
3038 240380 : iatom = idx_to_at(ind(dim_at))
3039 240380 : IF (.NOT. iatom_to_subgroup(iatom)%array(igroup + 1)) CYCLE
3040 : END IF
3041 :
3042 208950 : IF (para_env%mepos == subgroup_dest(iblk, jblk, kblk, igroup + 1) .AND. &
3043 759520 : block_source(iblk, jblk, kblk) .NE. subgroup_dest(iblk, jblk, kblk, igroup + 1)) THEN
3044 :
3045 44515 : ir = ir + 1
3046 44515 : CALL mp_waitall(recv_req(ir:ir))
3047 311605 : CALL dbt_put_block(t3c_sub, [iblk, jblk, kblk], SHAPE(recv_buff(tag)%array), recv_buff(tag)%array)
3048 : END IF
3049 : END DO
3050 : END DO
3051 : END DO
3052 : END DO
3053 :
3054 : !clean-up
3055 14652 : CALL mp_waitall(send_req(1:is))
3056 343792 : DO i = 1, batch_size
3057 329140 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3058 343792 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3059 : END DO
3060 29304 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3061 : END DO !i_batch
3062 14652 : CALL dbt_finalize(t3c_sub)
3063 :
3064 29304 : END SUBROUTINE copy_3c_to_subgroup
3065 :
3066 : ! **************************************************************************************************
3067 : !> \brief A routine that gather the pieces of the KS matrix accross the subgroup and puts it in the
3068 : !> main group. Each b_img, iatom, jatom tuple is one a single CPU
3069 : !> \param ks_t ...
3070 : !> \param ks_t_sub ...
3071 : !> \param group_size ...
3072 : !> \param sparsity_pattern ...
3073 : !> \param para_env ...
3074 : !> \param ri_data ...
3075 : ! **************************************************************************************************
3076 248 : SUBROUTINE gather_ks_matrix(ks_t, ks_t_sub, group_size, sparsity_pattern, para_env, ri_data)
3077 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t, ks_t_sub
3078 : INTEGER, INTENT(IN) :: group_size
3079 : INTEGER, DIMENSION(:, :, :), INTENT(IN) :: sparsity_pattern
3080 : TYPE(mp_para_env_type), POINTER :: para_env
3081 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3082 :
3083 : CHARACTER(len=*), PARAMETER :: routineN = 'gather_ks_matrix'
3084 :
3085 : INTEGER :: b_img, dest, handle, i, i_spin, iatom, &
3086 : igroup, ir, is, jatom, n_mess, natom, &
3087 : nimg, nspins, source, tag
3088 : LOGICAL :: found
3089 248 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3090 248 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: recv_buff, send_buff
3091 248 : TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_req, send_req
3092 :
3093 248 : CALL timeset(routineN, handle)
3094 :
3095 248 : nimg = SIZE(sparsity_pattern, 3)
3096 248 : natom = SIZE(sparsity_pattern, 2)
3097 248 : nspins = SIZE(ks_t, 1)
3098 :
3099 7034 : DO b_img = 1, nimg
3100 : n_mess = 0
3101 15138 : DO i_spin = 1, nspins
3102 31842 : DO jatom = 1, natom
3103 58464 : DO iatom = 1, natom
3104 50112 : IF (sparsity_pattern(iatom, jatom, b_img) > -1) n_mess = n_mess + 1
3105 : END DO
3106 : END DO
3107 : END DO
3108 :
3109 51326 : ALLOCATE (send_buff(n_mess), recv_buff(n_mess))
3110 58112 : ALLOCATE (send_req(n_mess), recv_req(n_mess))
3111 6786 : ir = 0
3112 6786 : is = 0
3113 6786 : n_mess = 0
3114 6786 : tag = 0
3115 :
3116 15138 : DO i_spin = 1, nspins
3117 31842 : DO jatom = 1, natom
3118 58464 : DO iatom = 1, natom
3119 33408 : IF (sparsity_pattern(iatom, jatom, b_img) < 0) CYCLE
3120 11664 : n_mess = n_mess + 1
3121 11664 : tag = tag + 1
3122 :
3123 : !sending the message
3124 34992 : CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3125 34992 : CALL dbt_get_stored_coordinates(ks_t_sub(i_spin, b_img), [iatom, jatom], source) !source within sub
3126 11664 : igroup = sparsity_pattern(iatom, jatom, b_img)
3127 11664 : source = source + igroup*group_size
3128 11664 : IF (para_env%mepos == source) THEN
3129 17496 : CALL dbt_get_block(ks_t_sub(i_spin, b_img), [iatom, jatom], blk, found)
3130 5832 : IF (source == dest) THEN
3131 3531 : IF (found) CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], SHAPE(blk), blk)
3132 : ELSE
3133 19540 : ALLOCATE (send_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3134 316161 : send_buff(n_mess)%array(:, :) = 0.0_dp
3135 4885 : IF (found) THEN
3136 219507 : send_buff(n_mess)%array(:, :) = blk(:, :)
3137 : END IF
3138 4885 : is = is + 1
3139 : CALL para_env%isend(msgin=send_buff(n_mess)%array, dest=dest, &
3140 4885 : request=send_req(is), tag=tag)
3141 : END IF
3142 5832 : DEALLOCATE (blk)
3143 : END IF
3144 :
3145 : !receiving the message
3146 28368 : IF (para_env%mepos == dest .AND. source .NE. dest) THEN
3147 19540 : ALLOCATE (recv_buff(n_mess)%array(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3148 4885 : ir = ir + 1
3149 : CALL para_env%irecv(msgout=recv_buff(n_mess)%array, source=source, &
3150 4885 : request=recv_req(ir), tag=tag)
3151 : END IF
3152 : END DO !iatom
3153 : END DO !jatom
3154 : END DO !ispin
3155 :
3156 6786 : CALL mp_waitall(send_req(1:is))
3157 6786 : CALL mp_waitall(recv_req(1:ir))
3158 :
3159 : !Copy the messages received into the KS matrix
3160 6786 : n_mess = 0
3161 15138 : DO i_spin = 1, nspins
3162 31842 : DO jatom = 1, natom
3163 58464 : DO iatom = 1, natom
3164 33408 : IF (sparsity_pattern(iatom, jatom, b_img) < 0) CYCLE
3165 11664 : n_mess = n_mess + 1
3166 :
3167 34992 : CALL dbt_get_stored_coordinates(ks_t(i_spin, b_img), [iatom, jatom], dest)
3168 28368 : IF (para_env%mepos == dest) THEN
3169 5832 : IF (.NOT. ASSOCIATED(recv_buff(n_mess)%array)) CYCLE
3170 19540 : ALLOCATE (blk(ri_data%bsizes_AO(iatom), ri_data%bsizes_AO(jatom)))
3171 316161 : blk(:, :) = recv_buff(n_mess)%array(:, :)
3172 24425 : CALL dbt_put_block(ks_t(i_spin, b_img), [iatom, jatom], SHAPE(blk), blk)
3173 4885 : DEALLOCATE (blk)
3174 : END IF
3175 : END DO
3176 : END DO
3177 : END DO
3178 :
3179 : !clean-up
3180 18450 : DO i = 1, n_mess
3181 11664 : IF (ASSOCIATED(send_buff(i)%array)) DEALLOCATE (send_buff(i)%array)
3182 18450 : IF (ASSOCIATED(recv_buff(i)%array)) DEALLOCATE (recv_buff(i)%array)
3183 : END DO
3184 7034 : DEALLOCATE (send_buff, recv_buff, send_req, recv_req)
3185 : END DO !b_img
3186 :
3187 248 : CALL timestop(handle)
3188 :
3189 248 : END SUBROUTINE gather_ks_matrix
3190 :
3191 : ! **************************************************************************************************
3192 : !> \brief copy all required 2c tensors from the main MPI group to the subgroups
3193 : !> \param mat_2c_pot ...
3194 : !> \param t_2c_work ...
3195 : !> \param t_2c_ao_tmp ...
3196 : !> \param ks_t_split ...
3197 : !> \param ks_t_sub ...
3198 : !> \param group_size ...
3199 : !> \param ngroups ...
3200 : !> \param para_env ...
3201 : !> \param para_env_sub ...
3202 : !> \param ri_data ...
3203 : ! **************************************************************************************************
3204 248 : SUBROUTINE get_subgroup_2c_tensors(mat_2c_pot, t_2c_work, t_2c_ao_tmp, ks_t_split, ks_t_sub, &
3205 : group_size, ngroups, para_env, para_env_sub, ri_data)
3206 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3207 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work, t_2c_ao_tmp, ks_t_split
3208 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: ks_t_sub
3209 : INTEGER, INTENT(IN) :: group_size, ngroups
3210 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3211 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3212 :
3213 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_2c_tensors'
3214 :
3215 : INTEGER :: handle, i, i_img, i_RI, i_spin, iproc, &
3216 : j, natom, nblks, nimg, nspins
3217 : INTEGER(int_8) :: nze
3218 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3219 248 : dist1, dist2
3220 : INTEGER, DIMENSION(2) :: pdims_2d
3221 496 : INTEGER, DIMENSION(:), POINTER :: col_dist, RI_blk_size, row_dist
3222 248 : INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3223 : REAL(dp) :: occ
3224 : TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3225 744 : TYPE(dbt_pgrid_type) :: pgrid_2d
3226 3224 : TYPE(dbt_type) :: work, work_sub
3227 :
3228 248 : CALL timeset(routineN, handle)
3229 :
3230 : !Create the 2d pgrid
3231 248 : pdims_2d = 0
3232 248 : CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3233 :
3234 248 : natom = SIZE(ri_data%bsizes_RI)
3235 248 : nblks = SIZE(ri_data%bsizes_RI_split)
3236 744 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
3237 744 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
3238 1606 : DO i_RI = 1, ri_data%ncell_RI
3239 4074 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
3240 7492 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
3241 : END DO
3242 :
3243 : !nRI x nRI 2c tensors
3244 : CALL create_2c_tensor(t_2c_work(1), dist1, dist2, pgrid_2d, &
3245 : bsizes_RI_ext, bsizes_RI_ext, &
3246 : name="(RI | RI)")
3247 248 : DEALLOCATE (dist1, dist2)
3248 :
3249 : CALL create_2c_tensor(t_2c_work(2), dist1, dist2, pgrid_2d, &
3250 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
3251 248 : name="(RI | RI)")
3252 248 : DEALLOCATE (dist1, dist2)
3253 :
3254 : !the AO based tensors
3255 : CALL create_2c_tensor(ks_t_split(1), dist1, dist2, pgrid_2d, &
3256 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3257 : name="(AO | AO)")
3258 248 : DEALLOCATE (dist1, dist2)
3259 248 : CALL dbt_create(ks_t_split(1), ks_t_split(2))
3260 :
3261 : CALL create_2c_tensor(t_2c_ao_tmp(1), dist1, dist2, pgrid_2d, &
3262 : ri_data%bsizes_AO, ri_data%bsizes_AO, &
3263 : name="(AO | AO)")
3264 248 : DEALLOCATE (dist1, dist2)
3265 :
3266 248 : nspins = SIZE(ks_t_sub, 1)
3267 248 : nimg = SIZE(ks_t_sub, 2)
3268 7034 : DO i_img = 1, nimg
3269 15386 : DO i_spin = 1, nspins
3270 15138 : CALL dbt_create(t_2c_ao_tmp(1), ks_t_sub(i_spin, i_img))
3271 : END DO
3272 : END DO
3273 :
3274 : !Finally the HFX potential matrices
3275 : !For now, we do a convoluted things where we go to tensors first, then back to matrices.
3276 : CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3277 : ri_data%bsizes_RI, ri_data%bsizes_RI, &
3278 : name="(RI | RI)")
3279 248 : CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3280 :
3281 992 : ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3282 248 : iproc = 0
3283 496 : DO i = 0, pdims_2d(1) - 1
3284 744 : DO j = 0, pdims_2d(2) - 1
3285 248 : dbcsr_pgrid(i, j) = iproc
3286 496 : iproc = iproc + 1
3287 : END DO
3288 : END DO
3289 :
3290 : !We need to have the same exact 2d block dist as the tensors
3291 992 : ALLOCATE (col_dist(natom), row_dist(natom))
3292 744 : row_dist(:) = dist1(:)
3293 744 : col_dist(:) = dist2(:)
3294 :
3295 496 : ALLOCATE (RI_blk_size(natom))
3296 744 : RI_blk_size(:) = ri_data%bsizes_RI(:)
3297 :
3298 : CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3299 248 : row_dist=row_dist, col_dist=col_dist)
3300 : CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3301 248 : row_blk_size=RI_blk_size, col_blk_size=RI_blk_size)
3302 :
3303 7034 : DO i_img = 1, nimg
3304 6786 : IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3305 6786 : CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3306 6786 : CALL get_tensor_occupancy(work, nze, occ)
3307 6786 : IF (nze == 0) CYCLE
3308 :
3309 5114 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3310 5114 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3311 5114 : CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3312 12148 : CALL dbt_clear(work_sub)
3313 : END DO
3314 :
3315 248 : CALL dbt_destroy(work)
3316 248 : CALL dbt_destroy(work_sub)
3317 248 : CALL dbt_pgrid_destroy(pgrid_2d)
3318 248 : CALL dbcsr_distribution_release(dbcsr_dist_sub)
3319 248 : DEALLOCATE (col_dist, row_dist, RI_blk_size, dbcsr_pgrid)
3320 248 : CALL timestop(handle)
3321 :
3322 2232 : END SUBROUTINE get_subgroup_2c_tensors
3323 :
3324 : ! **************************************************************************************************
3325 : !> \brief copy all required 3c tensors from the main MPI group to the subgroups
3326 : !> \param t_3c_int ...
3327 : !> \param t_3c_work_2 ...
3328 : !> \param t_3c_work_3 ...
3329 : !> \param t_3c_apc ...
3330 : !> \param t_3c_apc_sub ...
3331 : !> \param group_size ...
3332 : !> \param ngroups ...
3333 : !> \param para_env ...
3334 : !> \param para_env_sub ...
3335 : !> \param ri_data ...
3336 : ! **************************************************************************************************
3337 248 : SUBROUTINE get_subgroup_3c_tensors(t_3c_int, t_3c_work_2, t_3c_work_3, t_3c_apc, t_3c_apc_sub, &
3338 : group_size, ngroups, para_env, para_env_sub, ri_data)
3339 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_int, t_3c_work_2, t_3c_work_3
3340 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_apc, t_3c_apc_sub
3341 : INTEGER, INTENT(IN) :: group_size, ngroups
3342 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3343 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3344 :
3345 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_3c_tensors'
3346 :
3347 : INTEGER :: batch_size, bo(2), handle, handle2, &
3348 : i_blk, i_img, i_RI, i_spin, ib, natom, &
3349 : nblks_AO, nblks_RI, nimg, nspins
3350 : INTEGER(int_8) :: nze
3351 248 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3352 248 : bsizes_stack, bsizes_tmp, dist1, &
3353 248 : dist2, dist3, dist_stack, idx_to_at
3354 248 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3355 : INTEGER, DIMENSION(3) :: pdims
3356 : REAL(dp) :: occ
3357 2232 : TYPE(dbt_distribution_type) :: t_dist
3358 744 : TYPE(dbt_pgrid_type) :: pgrid
3359 6200 : TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3360 :
3361 248 : CALL timeset(routineN, handle)
3362 :
3363 248 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
3364 744 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks_RI))
3365 1606 : DO i_RI = 1, ri_data%ncell_RI
3366 7492 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
3367 : END DO
3368 :
3369 : !Preparing larger block sizes for efficient communication (less, bigger messages)
3370 248 : natom = SIZE(ri_data%bsizes_RI)
3371 248 : nblks_RI = natom
3372 744 : ALLOCATE (bsizes_tmp(nblks_RI))
3373 744 : DO i_blk = 1, nblks_RI
3374 496 : bo = get_limit(natom, nblks_RI, i_blk - 1)
3375 1240 : bsizes_tmp(i_blk) = SUM(ri_data%bsizes_RI(bo(1):bo(2)))
3376 : END DO
3377 744 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*nblks_RI))
3378 1606 : DO i_RI = 1, ri_data%ncell_RI
3379 4322 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = bsizes_tmp(:)
3380 : END DO
3381 :
3382 248 : batch_size = ri_data%kp_stack_size
3383 248 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
3384 744 : ALLOCATE (bsizes_stack(batch_size*nblks_AO))
3385 4984 : DO ib = 1, batch_size
3386 20760 : bsizes_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = ri_data%bsizes_AO_split(:)
3387 : END DO
3388 :
3389 : !Create the pgrid for the configuration correspoinding to ri_data%t_3c_int_ctr_3
3390 248 : natom = SIZE(ri_data%bsizes_RI)
3391 248 : pdims = 0
3392 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3393 992 : tensor_dims=[SIZE(bsizes_RI_ext_split), 1, batch_size*SIZE(ri_data%bsizes_AO_split)])
3394 :
3395 : !Create all required 3c tensors in that configuration
3396 : CALL create_3c_tensor(t_3c_int(1), dist1, dist2, dist3, &
3397 : pgrid, bsizes_RI_ext_split, ri_data%bsizes_AO_split, &
3398 248 : ri_data%bsizes_AO_split, [1], [2, 3], name="(RI | AO AO)")
3399 248 : nimg = SIZE(t_3c_int)
3400 6786 : DO i_img = 2, nimg
3401 6786 : CALL dbt_create(t_3c_int(1), t_3c_int(i_img))
3402 : END DO
3403 :
3404 : !The stacked work tensors, in a distribution that matches that of t_3c_int
3405 496 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3406 4984 : DO ib = 1, batch_size
3407 20760 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3408 : END DO
3409 :
3410 248 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3411 : CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3412 248 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3413 248 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3414 248 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3415 248 : CALL dbt_distribution_destroy(t_dist)
3416 248 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3417 :
3418 : !For more efficient communication, we use intermediate tensors with larger block size
3419 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3420 : pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3421 248 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3422 248 : DEALLOCATE (dist1, dist2, dist3)
3423 :
3424 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3425 : ri_data%pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3426 248 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3427 248 : DEALLOCATE (dist1, dist2, dist3)
3428 :
3429 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3430 248 : group_size, ngroups, para_env)
3431 :
3432 : !Finally copy the integrals into the subgroups (if not there already)
3433 248 : CALL timeset(routineN//"_ints", handle2)
3434 248 : IF (ALLOCATED(ri_data%kp_t_3c_int)) THEN
3435 4956 : DO i_img = 1, nimg
3436 4956 : CALL dbt_copy(ri_data%kp_t_3c_int(i_img), t_3c_int(i_img), move_data=.TRUE.)
3437 : END DO
3438 : ELSE
3439 2878 : ALLOCATE (ri_data%kp_t_3c_int(nimg))
3440 2078 : DO i_img = 1, nimg
3441 1998 : CALL dbt_create(t_3c_int(i_img), ri_data%kp_t_3c_int(i_img))
3442 1998 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, i_img), nze, occ)
3443 1998 : IF (nze == 0) CYCLE
3444 1776 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, i_img), work_atom_block, order=[2, 1, 3])
3445 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3446 1776 : ngroups, para_env, subgroup_dest)
3447 1776 : CALL dbt_copy(work_atom_block_sub, t_3c_int(i_img), move_data=.TRUE.)
3448 3854 : CALL dbt_filter(t_3c_int(i_img), ri_data%filter_eps)
3449 : END DO
3450 : END IF
3451 248 : CALL timestop(handle2)
3452 248 : CALL dbt_pgrid_destroy(pgrid)
3453 248 : CALL dbt_destroy(work_atom_block)
3454 248 : CALL dbt_destroy(work_atom_block_sub)
3455 248 : DEALLOCATE (subgroup_dest)
3456 :
3457 : !Do the same for the t_3c_ctr_2 configuration
3458 248 : pdims = 0
3459 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3460 992 : tensor_dims=[1, SIZE(bsizes_RI_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3461 :
3462 : !For more efficient communication, we use intermediate tensors with larger block size
3463 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3464 : pgrid, ri_data%bsizes_AO, bsizes_RI_ext, &
3465 248 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3466 248 : DEALLOCATE (dist1, dist2, dist3)
3467 :
3468 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3469 : ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_RI_ext, &
3470 248 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3471 248 : DEALLOCATE (dist1, dist2, dist3)
3472 :
3473 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3474 248 : group_size, ngroups, para_env)
3475 :
3476 : !template for t_3c_apc_sub
3477 : CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3478 : pgrid, ri_data%bsizes_AO_split, bsizes_RI_ext_split, &
3479 248 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3480 :
3481 : !create t_3c_work_2 tensors in a distribution that matches the above
3482 496 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3483 4984 : DO ib = 1, batch_size
3484 20760 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3485 : END DO
3486 :
3487 248 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3488 : CALL dbt_create(t_3c_work_2(1), "work_2_stack", t_dist, [1], [2, 3], &
3489 248 : ri_data%bsizes_AO_split, bsizes_RI_ext_split, bsizes_stack)
3490 248 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3491 248 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3492 248 : CALL dbt_distribution_destroy(t_dist)
3493 248 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3494 :
3495 : !Finally copy data from t_3c_apc to the subgroups
3496 744 : ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3497 248 : CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3498 248 : nspins = SIZE(t_3c_apc, 1)
3499 248 : CALL timeset(routineN//"_apc", handle2)
3500 7034 : DO i_img = 1, nimg
3501 15138 : DO i_spin = 1, nspins
3502 8352 : CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3503 8352 : CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3504 8352 : IF (nze == 0) CYCLE
3505 7362 : CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.TRUE.)
3506 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3507 7362 : subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3508 7362 : CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.TRUE.)
3509 22500 : CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3510 : END DO
3511 15386 : DO i_spin = 1, nspins
3512 15138 : CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3513 : END DO
3514 : END DO
3515 248 : CALL timestop(handle2)
3516 248 : CALL dbt_pgrid_destroy(pgrid)
3517 248 : CALL dbt_destroy(tmp)
3518 248 : CALL dbt_destroy(work_atom_block)
3519 248 : CALL dbt_destroy(work_atom_block_sub)
3520 :
3521 248 : CALL timestop(handle)
3522 :
3523 992 : END SUBROUTINE get_subgroup_3c_tensors
3524 :
3525 : ! **************************************************************************************************
3526 : !> \brief copy all required 2c force tensors from the main MPI group to the subgroups
3527 : !> \param t_2c_inv ...
3528 : !> \param t_2c_bint ...
3529 : !> \param t_2c_metric ...
3530 : !> \param mat_2c_pot ...
3531 : !> \param t_2c_work ...
3532 : !> \param rho_ao_t ...
3533 : !> \param rho_ao_t_sub ...
3534 : !> \param t_2c_der_metric ...
3535 : !> \param t_2c_der_metric_sub ...
3536 : !> \param mat_der_pot ...
3537 : !> \param mat_der_pot_sub ...
3538 : !> \param group_size ...
3539 : !> \param ngroups ...
3540 : !> \param para_env ...
3541 : !> \param para_env_sub ...
3542 : !> \param ri_data ...
3543 : !> \note Main MPI group tensors are deleted within this routine, for memory optimization
3544 : ! **************************************************************************************************
3545 92 : SUBROUTINE get_subgroup_2c_derivs(t_2c_inv, t_2c_bint, t_2c_metric, mat_2c_pot, t_2c_work, rho_ao_t, &
3546 46 : rho_ao_t_sub, t_2c_der_metric, t_2c_der_metric_sub, mat_der_pot, &
3547 46 : mat_der_pot_sub, group_size, ngroups, para_env, para_env_sub, ri_data)
3548 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_inv, t_2c_bint, t_2c_metric
3549 : TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT) :: mat_2c_pot
3550 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_work
3551 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: rho_ao_t, rho_ao_t_sub, t_2c_der_metric, &
3552 : t_2c_der_metric_sub
3553 : TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot, mat_der_pot_sub
3554 : INTEGER, INTENT(IN) :: group_size, ngroups
3555 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3556 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3557 :
3558 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_2c_derivs'
3559 :
3560 : INTEGER :: handle, i, i_img, i_RI, i_spin, i_xyz, &
3561 : iatom, iproc, j, natom, nblks, nimg, &
3562 : nspins
3563 : INTEGER(int_8) :: nze
3564 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3565 46 : dist1, dist2
3566 : INTEGER, DIMENSION(2) :: pdims_2d
3567 92 : INTEGER, DIMENSION(:), POINTER :: col_dist, RI_blk_size, row_dist
3568 46 : INTEGER, DIMENSION(:, :), POINTER :: dbcsr_pgrid
3569 : REAL(dp) :: occ
3570 : TYPE(dbcsr_distribution_type) :: dbcsr_dist_sub
3571 138 : TYPE(dbt_pgrid_type) :: pgrid_2d
3572 598 : TYPE(dbt_type) :: work, work_sub
3573 :
3574 46 : CALL timeset(routineN, handle)
3575 :
3576 : !Note: a fair portion of this routine is copied from the energy version of it
3577 : !Create the 2d pgrid
3578 46 : pdims_2d = 0
3579 46 : CALL dbt_pgrid_create(para_env_sub, pdims_2d, pgrid_2d)
3580 :
3581 46 : natom = SIZE(ri_data%bsizes_RI)
3582 46 : nblks = SIZE(ri_data%bsizes_RI_split)
3583 138 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*natom))
3584 138 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks))
3585 318 : DO i_RI = 1, ri_data%ncell_RI
3586 816 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
3587 1622 : bsizes_RI_ext_split((i_RI - 1)*nblks + 1:i_RI*nblks) = ri_data%bsizes_RI_split(:)
3588 : END DO
3589 :
3590 : !nRI x nRI 2c tensors
3591 : CALL create_2c_tensor(t_2c_inv(1), dist1, dist2, pgrid_2d, &
3592 : bsizes_RI_ext, bsizes_RI_ext, &
3593 : name="(RI | RI)")
3594 46 : DEALLOCATE (dist1, dist2)
3595 :
3596 46 : CALL dbt_create(t_2c_inv(1), t_2c_bint(1))
3597 46 : CALL dbt_create(t_2c_inv(1), t_2c_metric(1))
3598 92 : DO iatom = 2, natom
3599 46 : CALL dbt_create(t_2c_inv(1), t_2c_inv(iatom))
3600 46 : CALL dbt_create(t_2c_inv(1), t_2c_bint(iatom))
3601 92 : CALL dbt_create(t_2c_inv(1), t_2c_metric(iatom))
3602 : END DO
3603 46 : CALL dbt_create(t_2c_inv(1), t_2c_work(1))
3604 46 : CALL dbt_create(t_2c_inv(1), t_2c_work(2))
3605 46 : CALL dbt_create(t_2c_inv(1), t_2c_work(3))
3606 46 : CALL dbt_create(t_2c_inv(1), t_2c_work(4))
3607 :
3608 : CALL create_2c_tensor(t_2c_work(5), dist1, dist2, pgrid_2d, &
3609 : bsizes_RI_ext_split, bsizes_RI_ext_split, &
3610 46 : name="(RI | RI)")
3611 46 : DEALLOCATE (dist1, dist2)
3612 :
3613 : !copy the data from the main group.
3614 138 : DO iatom = 1, natom
3615 92 : CALL copy_2c_to_subgroup(t_2c_inv(iatom), ri_data%t_2c_inv(1, iatom), group_size, ngroups, para_env)
3616 92 : CALL copy_2c_to_subgroup(t_2c_bint(iatom), ri_data%t_2c_int(1, iatom), group_size, ngroups, para_env)
3617 138 : CALL copy_2c_to_subgroup(t_2c_metric(iatom), ri_data%t_2c_pot(1, iatom), group_size, ngroups, para_env)
3618 : END DO
3619 :
3620 : !This includes the derivatives of the RI metric, for which there is one per atom
3621 184 : DO i_xyz = 1, 3
3622 460 : DO iatom = 1, natom
3623 276 : CALL dbt_create(t_2c_inv(1), t_2c_der_metric_sub(iatom, i_xyz))
3624 : CALL copy_2c_to_subgroup(t_2c_der_metric_sub(iatom, i_xyz), t_2c_der_metric(iatom, i_xyz), &
3625 276 : group_size, ngroups, para_env)
3626 414 : CALL dbt_destroy(t_2c_der_metric(iatom, i_xyz))
3627 : END DO
3628 : END DO
3629 :
3630 : !AO x AO 2c tensors
3631 : CALL create_2c_tensor(rho_ao_t_sub(1, 1), dist1, dist2, pgrid_2d, &
3632 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
3633 : name="(AO | AO)")
3634 46 : DEALLOCATE (dist1, dist2)
3635 46 : nspins = SIZE(rho_ao_t, 1)
3636 46 : nimg = SIZE(rho_ao_t, 2)
3637 :
3638 1148 : DO i_img = 1, nimg
3639 2430 : DO i_spin = 1, nspins
3640 1282 : IF (.NOT. (i_img == 1 .AND. i_spin == 1)) &
3641 1236 : CALL dbt_create(rho_ao_t_sub(1, 1), rho_ao_t_sub(i_spin, i_img))
3642 : CALL copy_2c_to_subgroup(rho_ao_t_sub(i_spin, i_img), rho_ao_t(i_spin, i_img), &
3643 1282 : group_size, ngroups, para_env)
3644 2384 : CALL dbt_destroy(rho_ao_t(i_spin, i_img))
3645 : END DO
3646 : END DO
3647 :
3648 : !The RIxRI matrices, going through tensors
3649 : CALL create_2c_tensor(work_sub, dist1, dist2, pgrid_2d, &
3650 : ri_data%bsizes_RI, ri_data%bsizes_RI, &
3651 : name="(RI | RI)")
3652 46 : CALL dbt_create(ri_data%kp_mat_2c_pot(1, 1), work)
3653 :
3654 184 : ALLOCATE (dbcsr_pgrid(0:pdims_2d(1) - 1, 0:pdims_2d(2) - 1))
3655 46 : iproc = 0
3656 92 : DO i = 0, pdims_2d(1) - 1
3657 138 : DO j = 0, pdims_2d(2) - 1
3658 46 : dbcsr_pgrid(i, j) = iproc
3659 92 : iproc = iproc + 1
3660 : END DO
3661 : END DO
3662 :
3663 : !We need to have the same exact 2d block dist as the tensors
3664 184 : ALLOCATE (col_dist(natom), row_dist(natom))
3665 138 : row_dist(:) = dist1(:)
3666 138 : col_dist(:) = dist2(:)
3667 :
3668 92 : ALLOCATE (RI_blk_size(natom))
3669 138 : RI_blk_size(:) = ri_data%bsizes_RI(:)
3670 :
3671 : CALL dbcsr_distribution_new(dbcsr_dist_sub, group=para_env_sub%get_handle(), pgrid=dbcsr_pgrid, &
3672 46 : row_dist=row_dist, col_dist=col_dist)
3673 : CALL dbcsr_create(mat_2c_pot(1), dist=dbcsr_dist_sub, name="sub", matrix_type=dbcsr_type_no_symmetry, &
3674 46 : row_blk_size=RI_blk_size, col_blk_size=RI_blk_size)
3675 :
3676 : !The HFX potential
3677 1148 : DO i_img = 1, nimg
3678 1102 : IF (i_img > 1) CALL dbcsr_create(mat_2c_pot(i_img), template=mat_2c_pot(1))
3679 1102 : CALL dbt_copy_matrix_to_tensor(ri_data%kp_mat_2c_pot(1, i_img), work)
3680 1102 : CALL get_tensor_occupancy(work, nze, occ)
3681 1102 : IF (nze == 0) CYCLE
3682 :
3683 722 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3684 722 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_2c_pot(i_img))
3685 722 : CALL dbcsr_filter(mat_2c_pot(i_img), ri_data%filter_eps)
3686 1870 : CALL dbt_clear(work_sub)
3687 : END DO
3688 :
3689 : !The derivatives of the HFX potential
3690 184 : DO i_xyz = 1, 3
3691 3490 : DO i_img = 1, nimg
3692 3306 : CALL dbcsr_create(mat_der_pot_sub(i_img, i_xyz), template=mat_2c_pot(1))
3693 3306 : CALL dbt_copy_matrix_to_tensor(mat_der_pot(i_img, i_xyz), work)
3694 3306 : CALL dbcsr_release(mat_der_pot(i_img, i_xyz))
3695 3306 : CALL get_tensor_occupancy(work, nze, occ)
3696 3306 : IF (nze == 0) CYCLE
3697 :
3698 2162 : CALL copy_2c_to_subgroup(work_sub, work, group_size, ngroups, para_env)
3699 2162 : CALL dbt_copy_tensor_to_matrix(work_sub, mat_der_pot_sub(i_img, i_xyz))
3700 2162 : CALL dbcsr_filter(mat_der_pot_sub(i_img, i_xyz), ri_data%filter_eps)
3701 5606 : CALL dbt_clear(work_sub)
3702 : END DO
3703 : END DO
3704 :
3705 46 : CALL dbt_destroy(work)
3706 46 : CALL dbt_destroy(work_sub)
3707 46 : CALL dbt_pgrid_destroy(pgrid_2d)
3708 46 : CALL dbcsr_distribution_release(dbcsr_dist_sub)
3709 46 : DEALLOCATE (col_dist, row_dist, RI_blk_size, dbcsr_pgrid)
3710 :
3711 46 : CALL timestop(handle)
3712 :
3713 368 : END SUBROUTINE get_subgroup_2c_derivs
3714 :
3715 : ! **************************************************************************************************
3716 : !> \brief copy all required 3c derivative tensors from the main MPI group to the subgroups
3717 : !> \param t_3c_work_2 ...
3718 : !> \param t_3c_work_3 ...
3719 : !> \param t_3c_der_AO ...
3720 : !> \param t_3c_der_AO_sub ...
3721 : !> \param t_3c_der_RI ...
3722 : !> \param t_3c_der_RI_sub ...
3723 : !> \param t_3c_apc ...
3724 : !> \param t_3c_apc_sub ...
3725 : !> \param t_3c_der_stack ...
3726 : !> \param group_size ...
3727 : !> \param ngroups ...
3728 : !> \param para_env ...
3729 : !> \param para_env_sub ...
3730 : !> \param ri_data ...
3731 : !> \note the tensor containing the derivatives in the main MPI group are deleted for memory
3732 : ! **************************************************************************************************
3733 46 : SUBROUTINE get_subgroup_3c_derivs(t_3c_work_2, t_3c_work_3, t_3c_der_AO, t_3c_der_AO_sub, &
3734 46 : t_3c_der_RI, t_3c_der_RI_sub, t_3c_apc, t_3c_apc_sub, &
3735 46 : t_3c_der_stack, group_size, ngroups, para_env, para_env_sub, &
3736 : ri_data)
3737 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_work_2, t_3c_work_3
3738 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_AO, t_3c_der_AO_sub, &
3739 : t_3c_der_RI, t_3c_der_RI_sub, &
3740 : t_3c_apc, t_3c_apc_sub
3741 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_der_stack
3742 : INTEGER, INTENT(IN) :: group_size, ngroups
3743 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
3744 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3745 :
3746 : CHARACTER(len=*), PARAMETER :: routineN = 'get_subgroup_3c_derivs'
3747 :
3748 : INTEGER :: batch_size, handle, i_img, i_RI, i_spin, &
3749 : i_xyz, ib, nblks_AO, nblks_RI, nimg, &
3750 : nspins, pdims(3)
3751 : INTEGER(int_8) :: nze
3752 46 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, &
3753 46 : bsizes_stack, dist1, dist2, dist3, &
3754 46 : dist_stack, idx_to_at
3755 46 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :, :) :: subgroup_dest
3756 : REAL(dp) :: occ
3757 414 : TYPE(dbt_distribution_type) :: t_dist
3758 138 : TYPE(dbt_pgrid_type) :: pgrid
3759 1150 : TYPE(dbt_type) :: tmp, work_atom_block, work_atom_block_sub
3760 :
3761 46 : CALL timeset(routineN, handle)
3762 :
3763 : !We use intermediate tensors with larger block size for more optimized communication
3764 46 : nblks_RI = SIZE(ri_data%bsizes_RI)
3765 138 : ALLOCATE (bsizes_RI_ext(ri_data%ncell_RI*nblks_RI))
3766 318 : DO i_RI = 1, ri_data%ncell_RI
3767 862 : bsizes_RI_ext((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI(:)
3768 : END DO
3769 :
3770 46 : CALL dbt_get_info(ri_data%kp_t_3c_int(1), pdims=pdims)
3771 46 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3772 :
3773 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3774 : pgrid, bsizes_RI_ext, ri_data%bsizes_AO, &
3775 46 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3776 46 : DEALLOCATE (dist1, dist2, dist3)
3777 :
3778 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3779 : ri_data%pgrid_2, bsizes_RI_ext, ri_data%bsizes_AO, &
3780 46 : ri_data%bsizes_AO, [1], [2, 3], name="(RI | AO AO)")
3781 46 : DEALLOCATE (dist1, dist2, dist3)
3782 46 : CALL dbt_pgrid_destroy(pgrid)
3783 :
3784 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3785 46 : group_size, ngroups, para_env)
3786 :
3787 : !We use the 3c integrals on the subgroup as template for the derivatives
3788 46 : nimg = ri_data%nimg
3789 184 : DO i_xyz = 1, 3
3790 3444 : DO i_img = 1, nimg
3791 3306 : CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_AO_sub(i_img, i_xyz))
3792 3306 : CALL get_tensor_occupancy(t_3c_der_AO(i_img, i_xyz), nze, occ)
3793 3306 : IF (nze == 0) CYCLE
3794 :
3795 2138 : CALL dbt_copy(t_3c_der_AO(i_img, i_xyz), work_atom_block, move_data=.TRUE.)
3796 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3797 2138 : ngroups, para_env, subgroup_dest)
3798 2138 : CALL dbt_copy(work_atom_block_sub, t_3c_der_AO_sub(i_img, i_xyz), move_data=.TRUE.)
3799 5582 : CALL dbt_filter(t_3c_der_AO_sub(i_img, i_xyz), ri_data%filter_eps)
3800 : END DO
3801 :
3802 3444 : DO i_img = 1, nimg
3803 3306 : CALL dbt_create(ri_data%kp_t_3c_int(1), t_3c_der_RI_sub(i_img, i_xyz))
3804 3306 : CALL get_tensor_occupancy(t_3c_der_RI(i_img, i_xyz), nze, occ)
3805 3306 : IF (nze == 0) CYCLE
3806 :
3807 2118 : CALL dbt_copy(t_3c_der_RI(i_img, i_xyz), work_atom_block, move_data=.TRUE.)
3808 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, &
3809 2118 : ngroups, para_env, subgroup_dest)
3810 2118 : CALL dbt_copy(work_atom_block_sub, t_3c_der_RI_sub(i_img, i_xyz), move_data=.TRUE.)
3811 5562 : CALL dbt_filter(t_3c_der_RI_sub(i_img, i_xyz), ri_data%filter_eps)
3812 : END DO
3813 :
3814 3490 : DO i_img = 1, nimg
3815 3306 : CALL dbt_destroy(t_3c_der_RI(i_img, i_xyz))
3816 3444 : CALL dbt_destroy(t_3c_der_AO(i_img, i_xyz))
3817 : END DO
3818 : END DO
3819 46 : CALL dbt_destroy(work_atom_block_sub)
3820 46 : CALL dbt_destroy(work_atom_block)
3821 46 : DEALLOCATE (subgroup_dest)
3822 :
3823 : !Deal with t_3c_apc
3824 46 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
3825 138 : ALLOCATE (bsizes_RI_ext_split(ri_data%ncell_RI*nblks_RI))
3826 318 : DO i_RI = 1, ri_data%ncell_RI
3827 1622 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
3828 : END DO
3829 :
3830 46 : pdims = 0
3831 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid, &
3832 184 : tensor_dims=[1, SIZE(bsizes_RI_ext_split), batch_size*SIZE(ri_data%bsizes_AO_split)])
3833 :
3834 : CALL create_3c_tensor(work_atom_block_sub, dist1, dist2, dist3, &
3835 : pgrid, ri_data%bsizes_AO, bsizes_RI_ext, &
3836 46 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3837 46 : DEALLOCATE (dist1, dist2, dist3)
3838 :
3839 : CALL create_3c_tensor(work_atom_block, dist1, dist2, dist3, &
3840 : ri_data%pgrid_1, ri_data%bsizes_AO, bsizes_RI_ext, &
3841 46 : ri_data%bsizes_AO, [1], [2, 3], name="(AO RI | AO)")
3842 46 : DEALLOCATE (dist1, dist2, dist3)
3843 :
3844 : CALL create_3c_tensor(tmp, dist1, dist2, dist3, &
3845 : pgrid, ri_data%bsizes_AO_split, bsizes_RI_ext_split, &
3846 46 : ri_data%bsizes_AO_split, [1], [2, 3], name="(AO RI | AO)")
3847 46 : DEALLOCATE (dist1, dist2, dist3)
3848 :
3849 : CALL get_3c_subgroup_dest(subgroup_dest, work_atom_block_sub, work_atom_block, &
3850 46 : group_size, ngroups, para_env)
3851 :
3852 138 : ALLOCATE (idx_to_at(SIZE(ri_data%bsizes_AO)))
3853 46 : CALL get_idx_to_atom(idx_to_at, ri_data%bsizes_AO, ri_data%bsizes_AO)
3854 46 : nspins = SIZE(t_3c_apc, 1)
3855 1148 : DO i_img = 1, nimg
3856 2384 : DO i_spin = 1, nspins
3857 1282 : CALL dbt_create(tmp, t_3c_apc_sub(i_spin, i_img))
3858 1282 : CALL get_tensor_occupancy(t_3c_apc(i_spin, i_img), nze, occ)
3859 1282 : IF (nze == 0) CYCLE
3860 1258 : CALL dbt_copy(t_3c_apc(i_spin, i_img), work_atom_block, move_data=.TRUE.)
3861 : CALL copy_3c_to_subgroup(work_atom_block_sub, work_atom_block, ngroups, para_env, &
3862 1258 : subgroup_dest, ri_data%iatom_to_subgroup, 1, idx_to_at)
3863 1258 : CALL dbt_copy(work_atom_block_sub, t_3c_apc_sub(i_spin, i_img), move_data=.TRUE.)
3864 3642 : CALL dbt_filter(t_3c_apc_sub(i_spin, i_img), ri_data%filter_eps)
3865 : END DO
3866 2430 : DO i_spin = 1, nspins
3867 2384 : CALL dbt_destroy(t_3c_apc(i_spin, i_img))
3868 : END DO
3869 : END DO
3870 46 : CALL dbt_destroy(tmp)
3871 46 : CALL dbt_destroy(work_atom_block)
3872 46 : CALL dbt_destroy(work_atom_block_sub)
3873 46 : CALL dbt_pgrid_destroy(pgrid)
3874 :
3875 : !t_3c_work_3 based on structure of 3c integrals/derivs
3876 46 : batch_size = ri_data%kp_stack_size
3877 46 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
3878 138 : ALLOCATE (bsizes_stack(batch_size*nblks_AO))
3879 814 : DO ib = 1, batch_size
3880 3854 : bsizes_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = ri_data%bsizes_AO_split(:)
3881 : END DO
3882 :
3883 322 : ALLOCATE (dist1(ri_data%ncell_RI*nblks_RI), dist2(nblks_AO), dist3(nblks_AO))
3884 : CALL dbt_get_info(ri_data%kp_t_3c_int(1), proc_dist_1=dist1, proc_dist_2=dist2, &
3885 46 : proc_dist_3=dist3, pdims=pdims)
3886 :
3887 138 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3888 814 : DO ib = 1, batch_size
3889 3854 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3890 : END DO
3891 :
3892 46 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3893 46 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3894 : CALL dbt_create(t_3c_work_3(1), "work_3_stack", t_dist, [1], [2, 3], &
3895 46 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, bsizes_stack)
3896 46 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(2))
3897 46 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(3))
3898 46 : CALL dbt_create(t_3c_work_3(1), t_3c_work_3(4))
3899 46 : CALL dbt_distribution_destroy(t_dist)
3900 46 : CALL dbt_pgrid_destroy(pgrid)
3901 46 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3902 :
3903 : !the derivatives are stacked in the same way
3904 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(1))
3905 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(2))
3906 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(3))
3907 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(4))
3908 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(5))
3909 46 : CALL dbt_create(t_3c_work_3(1), t_3c_der_stack(6))
3910 :
3911 : !t_3c_work_2 based on structure of t_3c_apc
3912 322 : ALLOCATE (dist1(nblks_AO), dist2(ri_data%ncell_RI*nblks_RI), dist3(nblks_AO))
3913 : CALL dbt_get_info(t_3c_apc_sub(1, 1), proc_dist_1=dist1, proc_dist_2=dist2, &
3914 46 : proc_dist_3=dist3, pdims=pdims)
3915 :
3916 138 : ALLOCATE (dist_stack(batch_size*nblks_AO))
3917 814 : DO ib = 1, batch_size
3918 3854 : dist_stack((ib - 1)*nblks_AO + 1:ib*nblks_AO) = dist3(:)
3919 : END DO
3920 :
3921 46 : CALL dbt_pgrid_create(para_env_sub, pdims, pgrid)
3922 46 : CALL dbt_distribution_new(t_dist, pgrid, dist1, dist2, dist_stack)
3923 : CALL dbt_create(t_3c_work_2(1), "work_3_stack", t_dist, [1], [2, 3], &
3924 46 : ri_data%bsizes_AO_split, bsizes_RI_ext_split, bsizes_stack)
3925 46 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(2))
3926 46 : CALL dbt_create(t_3c_work_2(1), t_3c_work_2(3))
3927 46 : CALL dbt_distribution_destroy(t_dist)
3928 46 : CALL dbt_pgrid_destroy(pgrid)
3929 46 : DEALLOCATE (dist1, dist2, dist3, dist_stack)
3930 :
3931 46 : CALL timestop(handle)
3932 :
3933 92 : END SUBROUTINE get_subgroup_3c_derivs
3934 :
3935 : ! **************************************************************************************************
3936 : !> \brief A routine that reorders the t_3c_int tensors such that all items which are fully empty
3937 : !> are bunched together. This way, we can get much more efficient screening based on NZE
3938 : !> \param t_3c_ints ...
3939 : !> \param ri_data ...
3940 : ! **************************************************************************************************
3941 80 : SUBROUTINE reorder_3c_ints(t_3c_ints, ri_data)
3942 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_ints
3943 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3944 :
3945 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reorder_3c_ints'
3946 :
3947 : INTEGER :: handle, i_img, idx, idx_empty, idx_full, &
3948 : nimg
3949 : INTEGER(int_8) :: nze
3950 : REAL(dp) :: occ
3951 80 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
3952 :
3953 80 : CALL timeset(routineN, handle)
3954 :
3955 80 : nimg = ri_data%nimg
3956 2878 : ALLOCATE (t_3c_tmp(nimg))
3957 2078 : DO i_img = 1, nimg
3958 1998 : CALL dbt_create(t_3c_ints(i_img), t_3c_tmp(i_img))
3959 2078 : CALL dbt_copy(t_3c_ints(i_img), t_3c_tmp(i_img), move_data=.TRUE.)
3960 : END DO
3961 :
3962 : !Loop over the images, check if ints have NZE == 0, and put them at the start or end of the
3963 : !initial tensor array. Keep the mapping in an array
3964 240 : ALLOCATE (ri_data%idx_to_img(nimg))
3965 80 : idx_full = 0
3966 80 : idx_empty = nimg + 1
3967 :
3968 2078 : DO i_img = 1, nimg
3969 1998 : CALL get_tensor_occupancy(t_3c_tmp(i_img), nze, occ)
3970 1998 : IF (nze == 0) THEN
3971 542 : idx_empty = idx_empty - 1
3972 542 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_empty), move_data=.TRUE.)
3973 542 : ri_data%idx_to_img(idx_empty) = i_img
3974 : ELSE
3975 1456 : idx_full = idx_full + 1
3976 1456 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_ints(idx_full), move_data=.TRUE.)
3977 1456 : ri_data%idx_to_img(idx_full) = i_img
3978 : END IF
3979 4076 : CALL dbt_destroy(t_3c_tmp(i_img))
3980 : END DO
3981 :
3982 : !store the highest image index with non-zero integrals
3983 80 : ri_data%nimg_nze = idx_full
3984 :
3985 160 : ALLOCATE (ri_data%img_to_idx(nimg))
3986 2078 : DO idx = 1, nimg
3987 2078 : ri_data%img_to_idx(ri_data%idx_to_img(idx)) = idx
3988 : END DO
3989 :
3990 80 : CALL timestop(handle)
3991 :
3992 2158 : END SUBROUTINE reorder_3c_ints
3993 :
3994 : ! **************************************************************************************************
3995 : !> \brief A routine that reorders the 3c derivatives, the same way that the integrals are, also to
3996 : !> increase efficiency of screening
3997 : !> \param t_3c_derivs ...
3998 : !> \param ri_data ...
3999 : ! **************************************************************************************************
4000 92 : SUBROUTINE reorder_3c_derivs(t_3c_derivs, ri_data)
4001 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_derivs
4002 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4003 :
4004 : CHARACTER(LEN=*), PARAMETER :: routineN = 'reorder_3c_derivs'
4005 :
4006 : INTEGER :: handle, i_img, i_xyz, idx, nimg
4007 : INTEGER(int_8) :: nze
4008 : REAL(dp) :: occ
4009 92 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_3c_tmp
4010 :
4011 92 : CALL timeset(routineN, handle)
4012 :
4013 92 : nimg = ri_data%nimg
4014 3216 : ALLOCATE (t_3c_tmp(nimg))
4015 2296 : DO i_img = 1, nimg
4016 2296 : CALL dbt_create(t_3c_derivs(1, 1), t_3c_tmp(i_img))
4017 : END DO
4018 :
4019 368 : DO i_xyz = 1, 3
4020 6888 : DO i_img = 1, nimg
4021 6888 : CALL dbt_copy(t_3c_derivs(i_img, i_xyz), t_3c_tmp(i_img), move_data=.TRUE.)
4022 : END DO
4023 6980 : DO i_img = 1, nimg
4024 6612 : idx = ri_data%img_to_idx(i_img)
4025 6612 : CALL dbt_copy(t_3c_tmp(i_img), t_3c_derivs(idx, i_xyz), move_data=.TRUE.)
4026 6612 : CALL get_tensor_occupancy(t_3c_derivs(idx, i_xyz), nze, occ)
4027 6888 : IF (nze > 0) ri_data%nimg_nze = MAX(idx, ri_data%nimg_nze)
4028 : END DO
4029 : END DO
4030 :
4031 2296 : DO i_img = 1, nimg
4032 2296 : CALL dbt_destroy(t_3c_tmp(i_img))
4033 : END DO
4034 :
4035 92 : CALL timestop(handle)
4036 :
4037 2388 : END SUBROUTINE reorder_3c_derivs
4038 :
4039 : ! **************************************************************************************************
4040 : !> \brief Get the sparsity pattern related to the non-symmetric AO basis overlap neighbor list
4041 : !> \param pattern ...
4042 : !> \param ri_data ...
4043 : !> \param qs_env ...
4044 : ! **************************************************************************************************
4045 294 : SUBROUTINE get_sparsity_pattern(pattern, ri_data, qs_env)
4046 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: pattern
4047 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4048 : TYPE(qs_environment_type), POINTER :: qs_env
4049 :
4050 : INTEGER :: iatom, j_img, jatom, mj_img, natom, nimg
4051 294 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bins
4052 294 : INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: tmp_pattern
4053 : INTEGER, DIMENSION(3) :: cell_j
4054 294 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
4055 294 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4056 : TYPE(dft_control_type), POINTER :: dft_control
4057 : TYPE(kpoint_type), POINTER :: kpoints
4058 : TYPE(mp_para_env_type), POINTER :: para_env
4059 : TYPE(neighbor_list_iterator_p_type), &
4060 294 : DIMENSION(:), POINTER :: nl_iterator
4061 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4062 294 : POINTER :: nl_2c
4063 :
4064 294 : NULLIFY (nl_2c, nl_iterator, kpoints, cell_to_index, dft_control, index_to_cell, para_env)
4065 :
4066 294 : CALL get_qs_env(qs_env, kpoints=kpoints, dft_control=dft_control, para_env=para_env, natom=natom)
4067 294 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index, index_to_cell=index_to_cell, sab_nl=nl_2c)
4068 :
4069 294 : nimg = ri_data%nimg
4070 55510 : pattern(:, :, :) = 0
4071 :
4072 : !We use the symmetric nl for all images that have an opposite cell
4073 294 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4074 12973 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4075 12679 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4076 :
4077 12679 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4078 12679 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4079 :
4080 9696 : mj_img = get_opp_index(j_img, qs_env)
4081 9696 : IF (mj_img > nimg .OR. mj_img < 1) CYCLE
4082 :
4083 9249 : IF (ri_data%present_images(j_img) == 0) CYCLE
4084 :
4085 12679 : pattern(iatom, jatom, j_img) = 1
4086 : END DO
4087 294 : CALL neighbor_list_iterator_release(nl_iterator)
4088 :
4089 : !If there is no opposite cell present, then we take into account the non-symmetric nl
4090 294 : CALL get_kpoint_info(kpoints, sab_nl_nosym=nl_2c)
4091 :
4092 294 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4093 17146 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4094 16852 : CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, cell=cell_j)
4095 :
4096 16852 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4097 16852 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4098 :
4099 12452 : mj_img = get_opp_index(j_img, qs_env)
4100 12452 : IF (mj_img .LE. nimg .AND. mj_img > 0) CYCLE
4101 :
4102 456 : IF (ri_data%present_images(j_img) == 0) CYCLE
4103 :
4104 16852 : pattern(iatom, jatom, j_img) = 1
4105 : END DO
4106 294 : CALL neighbor_list_iterator_release(nl_iterator)
4107 :
4108 110726 : CALL para_env%sum(pattern)
4109 :
4110 : !If the opposite image is considered, then there is no need to compute diagonal twice
4111 7888 : DO j_img = 2, nimg
4112 23076 : DO iatom = 1, natom
4113 22782 : IF (pattern(iatom, iatom, j_img) .NE. 0) THEN
4114 5184 : mj_img = get_opp_index(j_img, qs_env)
4115 5184 : IF (mj_img > nimg .OR. mj_img < 1) CYCLE
4116 5184 : pattern(iatom, iatom, mj_img) = 0
4117 : END IF
4118 : END DO
4119 : END DO
4120 :
4121 : ! We want to equilibrate the sparsity pattern such that there are same amount of blocks
4122 : ! for each atom i of i,j pairs
4123 882 : ALLOCATE (bins(natom))
4124 882 : bins(:) = 0
4125 :
4126 1470 : ALLOCATE (tmp_pattern(natom, natom, nimg))
4127 55510 : tmp_pattern(:, :, :) = 0
4128 8182 : DO j_img = 1, nimg
4129 23958 : DO jatom = 1, natom
4130 55216 : DO iatom = 1, natom
4131 31552 : IF (pattern(iatom, jatom, j_img) == 0) CYCLE
4132 10572 : mj_img = get_opp_index(j_img, qs_env)
4133 :
4134 : !Should we take the i,j,b or th j,i,-b atomic block?
4135 26348 : IF (mj_img > nimg .OR. mj_img < 1) THEN
4136 : !No opposite image, no choice
4137 214 : bins(iatom) = bins(iatom) + 1
4138 214 : tmp_pattern(iatom, jatom, j_img) = 1
4139 : ELSE
4140 :
4141 10358 : IF (bins(iatom) > bins(jatom)) THEN
4142 2184 : bins(jatom) = bins(jatom) + 1
4143 2184 : tmp_pattern(jatom, iatom, mj_img) = 1
4144 : ELSE
4145 8174 : bins(iatom) = bins(iatom) + 1
4146 8174 : tmp_pattern(iatom, jatom, j_img) = 1
4147 : END IF
4148 : END IF
4149 : END DO
4150 : END DO
4151 : END DO
4152 :
4153 : ! -1 => unoccupied, 0 => occupied
4154 55510 : pattern(:, :, :) = tmp_pattern(:, :, :) - 1
4155 :
4156 588 : END SUBROUTINE get_sparsity_pattern
4157 :
4158 : ! **************************************************************************************************
4159 : !> \brief Distribute the iatom, jatom, b_img triplet over the subgroupd to spread the load
4160 : !> the group id for each triplet is passed as the value of sparsity_pattern(i, j, b),
4161 : !> with -1 being an unoccupied block
4162 : !> \param sparsity_pattern ...
4163 : !> \param ngroups ...
4164 : !> \param ri_data ...
4165 : ! **************************************************************************************************
4166 294 : SUBROUTINE get_sub_dist(sparsity_pattern, ngroups, ri_data)
4167 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: sparsity_pattern
4168 : INTEGER, INTENT(IN) :: ngroups
4169 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4170 :
4171 : INTEGER :: b_img, ctr, iat, iatom, igroup, jatom, &
4172 : natom, nimg, ub
4173 294 : INTEGER, ALLOCATABLE, DIMENSION(:) :: max_at_per_group
4174 : REAL(dp) :: cost
4175 294 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4176 :
4177 294 : natom = SIZE(sparsity_pattern, 2)
4178 294 : nimg = SIZE(sparsity_pattern, 3)
4179 :
4180 : !To avoid unnecessary data replication accross the subgroups, we want to have a limited number
4181 : !of subgroup with the data of a given iatom. At the minimum, all groups have 1 atom
4182 : !We assume that the cost associated to each iatom is roughly the same
4183 294 : IF (.NOT. ALLOCATED(ri_data%iatom_to_subgroup)) THEN
4184 378 : ALLOCATE (ri_data%iatom_to_subgroup(natom), max_at_per_group(ngroups))
4185 162 : DO iatom = 1, natom
4186 108 : NULLIFY (ri_data%iatom_to_subgroup(iatom)%array)
4187 216 : ALLOCATE (ri_data%iatom_to_subgroup(iatom)%array(ngroups))
4188 378 : ri_data%iatom_to_subgroup(iatom)%array(:) = .FALSE.
4189 : END DO
4190 :
4191 54 : ub = natom/ngroups
4192 54 : IF (ub*ngroups < natom) ub = ub + 1
4193 162 : max_at_per_group(:) = MAX(1, ub)
4194 :
4195 : !We want each atom to be present the same amount of times. Some groups might have more atoms
4196 : !than other to achieve this.
4197 : ctr = 0
4198 162 : DO WHILE (MODULO(SUM(max_at_per_group), natom) .NE. 0)
4199 0 : igroup = MODULO(ctr, ngroups) + 1
4200 0 : max_at_per_group(igroup) = max_at_per_group(igroup) + 1
4201 54 : ctr = ctr + 1
4202 : END DO
4203 :
4204 : ctr = 0
4205 162 : DO igroup = 1, ngroups
4206 270 : DO iat = 1, max_at_per_group(igroup)
4207 108 : iatom = MODULO(ctr, natom) + 1
4208 108 : ri_data%iatom_to_subgroup(iatom)%array(igroup) = .TRUE.
4209 216 : ctr = ctr + 1
4210 : END DO
4211 : END DO
4212 : END IF
4213 :
4214 882 : ALLOCATE (bins(ngroups))
4215 882 : bins = 0.0_dp
4216 8182 : DO b_img = 1, nimg
4217 23958 : DO jatom = 1, natom
4218 55216 : DO iatom = 1, natom
4219 31552 : IF (sparsity_pattern(iatom, jatom, b_img) == -1) CYCLE
4220 52860 : igroup = MINLOC(bins, 1, MASK=ri_data%iatom_to_subgroup(iatom)%array) - 1
4221 :
4222 : !Use cost information from previous SCF if available
4223 654670 : IF (ANY(ri_data%kp_cost > EPSILON(0.0_dp))) THEN
4224 7948 : cost = ri_data%kp_cost(iatom, jatom, b_img)
4225 : ELSE
4226 2624 : cost = REAL(ri_data%bsizes_AO(iatom)*ri_data%bsizes_AO(jatom), dp)
4227 : END IF
4228 10572 : bins(igroup + 1) = bins(igroup + 1) + cost
4229 47328 : sparsity_pattern(iatom, jatom, b_img) = igroup
4230 : END DO
4231 : END DO
4232 : END DO
4233 :
4234 294 : END SUBROUTINE get_sub_dist
4235 :
4236 : ! **************************************************************************************************
4237 : !> \brief A rouine that updates the sparsity pattern for force calculation, where all i,j,b combinations
4238 : !> are visited.
4239 : !> \param force_pattern ...
4240 : !> \param scf_pattern ...
4241 : !> \param ngroups ...
4242 : !> \param ri_data ...
4243 : !> \param qs_env ...
4244 : ! **************************************************************************************************
4245 46 : SUBROUTINE update_pattern_to_forces(force_pattern, scf_pattern, ngroups, ri_data, qs_env)
4246 : INTEGER, DIMENSION(:, :, :), INTENT(INOUT) :: force_pattern, scf_pattern
4247 : INTEGER, INTENT(IN) :: ngroups
4248 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4249 : TYPE(qs_environment_type), POINTER :: qs_env
4250 :
4251 : INTEGER :: b_img, iatom, igroup, jatom, mb_img, &
4252 : natom, nimg
4253 46 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: bins
4254 :
4255 46 : natom = SIZE(scf_pattern, 2)
4256 46 : nimg = SIZE(scf_pattern, 3)
4257 :
4258 138 : ALLOCATE (bins(ngroups))
4259 138 : bins = 0.0_dp
4260 :
4261 1148 : DO b_img = 1, nimg
4262 1102 : mb_img = get_opp_index(b_img, qs_env)
4263 3352 : DO jatom = 1, natom
4264 7714 : DO iatom = 1, natom
4265 : !Important: same distribution as KS matrix, because reuse t_3c_apc
4266 22040 : igroup = MINLOC(bins, 1, MASK=ri_data%iatom_to_subgroup(iatom)%array) - 1
4267 :
4268 : !check that block not already treated
4269 4408 : IF (scf_pattern(iatom, jatom, b_img) > -1) CYCLE
4270 :
4271 : !If not, take the cost of block j, i, -b (same energy contribution)
4272 5320 : IF (mb_img > 0 .AND. mb_img .LE. nimg) THEN
4273 2672 : IF (scf_pattern(jatom, iatom, mb_img) == -1) CYCLE
4274 1164 : bins(igroup + 1) = bins(igroup + 1) + ri_data%kp_cost(jatom, iatom, mb_img)
4275 1164 : force_pattern(iatom, jatom, b_img) = igroup
4276 : END IF
4277 : END DO
4278 : END DO
4279 : END DO
4280 :
4281 46 : END SUBROUTINE update_pattern_to_forces
4282 :
4283 : ! **************************************************************************************************
4284 : !> \brief A routine that determines the extend of the KP RI-HFX periodic images, including for the
4285 : !> extension of the RI basis
4286 : !> \param ri_data ...
4287 : !> \param qs_env ...
4288 : ! **************************************************************************************************
4289 80 : SUBROUTINE get_kp_and_ri_images(ri_data, qs_env)
4290 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4291 : TYPE(qs_environment_type), POINTER :: qs_env
4292 :
4293 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_kp_and_ri_images'
4294 :
4295 : CHARACTER(LEN=512) :: warning_msg
4296 : INTEGER :: cell_j(3), cell_k(3), handle, i_img, iatom, ikind, j_img, jatom, jcell, katom, &
4297 : kcell, kp_index_lbounds(3), kp_index_ubounds(3), natom, ngroups, nimg, nkind, pcoord(3), &
4298 : pdims(3)
4299 80 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, &
4300 80 : nRI_per_atom, present_img, RI_cells
4301 80 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
4302 : REAL(dp) :: bump_fact, dij, dik, image_range, &
4303 : RI_range, rij(3), rik(3)
4304 560 : TYPE(dbt_type) :: t_dummy
4305 : TYPE(dft_control_type), POINTER :: dft_control
4306 : TYPE(distribution_2d_type), POINTER :: dist_2d
4307 : TYPE(distribution_3d_type) :: dist_3d
4308 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4309 80 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
4310 : TYPE(kpoint_type), POINTER :: kpoints
4311 80 : TYPE(mp_cart_type) :: mp_comm_t3c
4312 : TYPE(mp_para_env_type), POINTER :: para_env
4313 : TYPE(neighbor_list_3c_iterator_type) :: nl_3c_iter
4314 : TYPE(neighbor_list_3c_type) :: nl_3c
4315 : TYPE(neighbor_list_iterator_p_type), &
4316 80 : DIMENSION(:), POINTER :: nl_iterator
4317 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4318 80 : POINTER :: nl_2c
4319 80 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4320 80 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4321 : TYPE(section_vals_type), POINTER :: hfx_section
4322 :
4323 80 : NULLIFY (qs_kind_set, dist_2d, nl_2c, nl_iterator, dft_control, &
4324 80 : particle_set, kpoints, para_env, cell_to_index, hfx_section)
4325 :
4326 80 : CALL timeset(routineN, handle)
4327 :
4328 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, &
4329 : dft_control=dft_control, particle_set=particle_set, kpoints=kpoints, &
4330 80 : para_env=para_env, natom=natom)
4331 80 : nimg = dft_control%nimages
4332 80 : CALL get_kpoint_info(kpoints, cell_to_index=cell_to_index)
4333 320 : kp_index_lbounds = LBOUND(cell_to_index)
4334 320 : kp_index_ubounds = UBOUND(cell_to_index)
4335 :
4336 80 : hfx_section => section_vals_get_subs_vals(qs_env%input, "DFT%XC%HF%RI")
4337 80 : CALL section_vals_val_get(hfx_section, "KP_NGROUPS", i_val=ngroups)
4338 :
4339 560 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
4340 80 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
4341 80 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
4342 :
4343 : !In case of shortrange HFX potential, it is imprtant to be consistent with the rest of the KP
4344 : !code, and use EPS_SCHWARZ to determine the range (rather than eps_filter_2c in normal RI-HFX)
4345 80 : IF (ri_data%hfx_pot%potential_type == do_potential_short) THEN
4346 0 : CALL erfc_cutoff(ri_data%eps_schwarz, ri_data%hfx_pot%omega, ri_data%hfx_pot%cutoff_radius)
4347 : WRITE (warning_msg, '(A)') &
4348 : "The SHORTANGE HFX potential typically extends over many periodic images, "// &
4349 : "possibly slowing down the calculation. Consider using the TRUNCATED "// &
4350 0 : "potential for better computational performance."
4351 0 : CPWARN(warning_msg)
4352 : END IF
4353 :
4354 : !Determine the range for contributing periodic images, and for the RI basis extension
4355 80 : ri_data%kp_RI_range = 0.0_dp
4356 80 : ri_data%kp_image_range = 0.0_dp
4357 200 : DO ikind = 1, nkind
4358 :
4359 120 : CALL init_interaction_radii_orb_basis(basis_set_AO(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4360 120 : CALL get_gto_basis_set(basis_set_AO(ikind)%gto_basis_set, kind_radius=RI_range)
4361 120 : ri_data%kp_RI_range = MAX(RI_range, ri_data%kp_RI_range)
4362 :
4363 120 : CALL init_interaction_radii_orb_basis(basis_set_AO(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4364 120 : CALL init_interaction_radii_orb_basis(basis_set_RI(ikind)%gto_basis_set, ri_data%eps_pgf_orb)
4365 120 : CALL get_gto_basis_set(basis_set_RI(ikind)%gto_basis_set, kind_radius=image_range)
4366 :
4367 120 : image_range = 2.0_dp*image_range + cutoff_screen_factor*ri_data%hfx_pot%cutoff_radius
4368 320 : ri_data%kp_image_range = MAX(image_range, ri_data%kp_image_range)
4369 : END DO
4370 :
4371 80 : CALL section_vals_val_get(hfx_section, "KP_RI_BUMP_FACTOR", r_val=bump_fact)
4372 80 : ri_data%kp_bump_rad = bump_fact*ri_data%kp_RI_range
4373 :
4374 : !For the extent of the KP RI-HFX images, we are limited by the RI-HFX potential in
4375 : !(mu^0 sigma^a|P^0) (P^0|Q^b) (Q^b|nu^b lambda^a+c), if there is no contact between
4376 : !any P^0 and Q^b, then image b does not contribute
4377 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
4378 80 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
4379 :
4380 240 : ALLOCATE (present_img(nimg))
4381 3448 : present_img = 0
4382 80 : ri_data%nimg = 0
4383 80 : CALL neighbor_list_iterator_create(nl_iterator, nl_2c)
4384 1926 : DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
4385 1846 : CALL get_iterator_info(nl_iterator, r=rij, cell=cell_j)
4386 :
4387 7384 : dij = NORM2(rij)
4388 :
4389 1846 : j_img = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4390 1846 : IF (j_img > nimg .OR. j_img < 1) CYCLE
4391 :
4392 1810 : IF (dij > ri_data%kp_image_range) CYCLE
4393 :
4394 1810 : ri_data%nimg = MAX(j_img, ri_data%nimg)
4395 1846 : present_img(j_img) = 1
4396 :
4397 : END DO
4398 80 : CALL neighbor_list_iterator_release(nl_iterator)
4399 80 : CALL release_neighbor_list_sets(nl_2c)
4400 80 : CALL para_env%max(ri_data%nimg)
4401 80 : IF (ri_data%nimg > nimg) &
4402 0 : CPABORT("Make sure the smallest exponent of the RI-HFX basis is larger than that of the ORB basis.")
4403 :
4404 : !Keep track of which images will not contribute, so that can be ignored before calculation
4405 80 : CALL para_env%sum(present_img)
4406 240 : ALLOCATE (ri_data%present_images(ri_data%nimg))
4407 2078 : ri_data%present_images = 0
4408 2078 : DO i_img = 1, ri_data%nimg
4409 2078 : IF (present_img(i_img) > 0) ri_data%present_images(i_img) = 1
4410 : END DO
4411 :
4412 : CALL create_3c_tensor(t_dummy, dist_AO_1, dist_AO_2, dist_RI, &
4413 : ri_data%pgrid, ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4414 80 : map1=[1, 2], map2=[3], name="(AO AO | RI)")
4415 :
4416 80 : CALL dbt_mp_environ_pgrid(ri_data%pgrid, pdims, pcoord)
4417 80 : CALL mp_comm_t3c%create(ri_data%pgrid%mp_comm_2d, 3, pdims)
4418 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
4419 80 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
4420 80 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
4421 80 : CALL dbt_destroy(t_dummy)
4422 :
4423 : !For the extension of the RI basis P in (mu^0 sigma^a |P^i), we consider an atom if the distance,
4424 : !between mu^0 and P^i if smaller or equal to the kind radius of mu^0
4425 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, &
4426 : ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., &
4427 80 : own_dist=.TRUE.)
4428 :
4429 160 : ALLOCATE (RI_cells(nimg))
4430 3448 : RI_cells = 0
4431 :
4432 240 : ALLOCATE (nRI_per_atom(natom))
4433 240 : nRI_per_atom = 0
4434 :
4435 80 : CALL neighbor_list_3c_iterator_create(nl_3c_iter, nl_3c)
4436 76342 : DO WHILE (neighbor_list_3c_iterate(nl_3c_iter) == 0)
4437 : CALL get_3c_iterator_info(nl_3c_iter, cell_k=cell_k, rik=rik, cell_j=cell_j, &
4438 76262 : iatom=iatom, jatom=jatom, katom=katom)
4439 305048 : dik = NORM2(rik)
4440 :
4441 533834 : IF (ANY([cell_j(1), cell_j(2), cell_j(3)] < kp_index_lbounds) .OR. &
4442 : ANY([cell_j(1), cell_j(2), cell_j(3)] > kp_index_ubounds)) CYCLE
4443 :
4444 76262 : jcell = cell_to_index(cell_j(1), cell_j(2), cell_j(3))
4445 76262 : IF (jcell > nimg .OR. jcell < 1) CYCLE
4446 :
4447 508705 : IF (ANY([cell_k(1), cell_k(2), cell_k(3)] < kp_index_lbounds) .OR. &
4448 : ANY([cell_k(1), cell_k(2), cell_k(3)] > kp_index_ubounds)) CYCLE
4449 :
4450 68523 : kcell = cell_to_index(cell_k(1), cell_k(2), cell_k(3))
4451 68523 : IF (kcell > nimg .OR. kcell < 1) CYCLE
4452 :
4453 54552 : IF (dik > ri_data%kp_RI_range) CYCLE
4454 6827 : RI_cells(kcell) = 1
4455 :
4456 6907 : IF (jcell == 1 .AND. iatom == jatom) nRI_per_atom(iatom) = nRI_per_atom(iatom) + ri_data%bsizes_RI(katom)
4457 : END DO
4458 80 : CALL neighbor_list_3c_iterator_destroy(nl_3c_iter)
4459 80 : CALL neighbor_list_3c_destroy(nl_3c)
4460 80 : CALL para_env%sum(RI_cells)
4461 80 : CALL para_env%sum(nRI_per_atom)
4462 :
4463 160 : ALLOCATE (ri_data%img_to_RI_cell(nimg))
4464 80 : ri_data%ncell_RI = 0
4465 3448 : ri_data%img_to_RI_cell = 0
4466 3448 : DO i_img = 1, nimg
4467 3448 : IF (RI_cells(i_img) > 0) THEN
4468 482 : ri_data%ncell_RI = ri_data%ncell_RI + 1
4469 482 : ri_data%img_to_RI_cell(i_img) = ri_data%ncell_RI
4470 : END IF
4471 : END DO
4472 :
4473 240 : ALLOCATE (ri_data%RI_cell_to_img(ri_data%ncell_RI))
4474 3448 : DO i_img = 1, nimg
4475 3448 : IF (ri_data%img_to_RI_cell(i_img) > 0) ri_data%RI_cell_to_img(ri_data%img_to_RI_cell(i_img)) = i_img
4476 : END DO
4477 :
4478 : !Print some info
4479 80 : IF (ri_data%unit_nr > 0) THEN
4480 : WRITE (ri_data%unit_nr, FMT="(/T3,A,I29)") &
4481 40 : "KP-HFX_RI_INFO| Number of RI-KP parallel groups:", ngroups
4482 : WRITE (ri_data%unit_nr, FMT="(T3,A,I29)") &
4483 40 : "KP-HFX_RI_INFO| Tensor stack size: ", ri_data%kp_stack_size
4484 : WRITE (ri_data%unit_nr, FMT="(T3,A,F31.3,A)") &
4485 40 : "KP-HFX_RI_INFO| RI basis extension radius:", ri_data%kp_RI_range*angstrom, " Ang"
4486 : WRITE (ri_data%unit_nr, FMT="(T3,A,F12.3,A, F6.3, A)") &
4487 40 : "KP-HFX_RI_INFO| RI basis bump factor and bump radius:", bump_fact, " /", &
4488 80 : ri_data%kp_bump_rad*angstrom, " Ang"
4489 : WRITE (ri_data%unit_nr, FMT="(T3,A,I16,A)") &
4490 40 : "KP-HFX_RI_INFO| The extended RI bases cover up to ", ri_data%ncell_RI, " unit cells"
4491 : WRITE (ri_data%unit_nr, FMT="(T3,A,I18)") &
4492 120 : "KP-HFX_RI_INFO| Average number of sgf in extended RI bases:", SUM(nRI_per_atom)/natom
4493 : WRITE (ri_data%unit_nr, FMT="(T3,A,F13.3,A)") &
4494 40 : "KP-HFX_RI_INFO| Consider all image cells within a radius of ", ri_data%kp_image_range*angstrom, " Ang"
4495 : WRITE (ri_data%unit_nr, FMT="(T3,A,I27/)") &
4496 40 : "KP-HFX_RI_INFO| Number of image cells considered: ", ri_data%nimg
4497 40 : CALL m_flush(ri_data%unit_nr)
4498 : END IF
4499 :
4500 80 : CALL timestop(handle)
4501 :
4502 960 : END SUBROUTINE get_kp_and_ri_images
4503 :
4504 : ! **************************************************************************************************
4505 : !> \brief A routine that creates tensors structure for rho_ao and 3c_ints in a stacked format for
4506 : !> the efficient contractions of rho_sigma^0,lambda^c * (mu^0 sigam^a | P) => TAS tensors
4507 : !> \param res_stack ...
4508 : !> \param rho_stack ...
4509 : !> \param ints_stack ...
4510 : !> \param rho_template ...
4511 : !> \param ints_template ...
4512 : !> \param stack_size ...
4513 : !> \param ri_data ...
4514 : !> \param qs_env ...
4515 : !> \note The result tensor has the exact same shape and distribution as the integral tensor
4516 : ! **************************************************************************************************
4517 294 : SUBROUTINE get_stack_tensors(res_stack, rho_stack, ints_stack, rho_template, ints_template, &
4518 : stack_size, ri_data, qs_env)
4519 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: res_stack, rho_stack, ints_stack
4520 : TYPE(dbt_type), INTENT(INOUT) :: rho_template, ints_template
4521 : INTEGER, INTENT(IN) :: stack_size
4522 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4523 : TYPE(qs_environment_type), POINTER :: qs_env
4524 :
4525 : INTEGER :: is, nblks, nblks_3c(3), pdims_3d(3)
4526 294 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_stack, dist1, &
4527 294 : dist2, dist3, dist_stack1, &
4528 294 : dist_stack2, dist_stack3
4529 2646 : TYPE(dbt_distribution_type) :: t_dist
4530 882 : TYPE(dbt_pgrid_type) :: pgrid
4531 : TYPE(mp_para_env_type), POINTER :: para_env
4532 :
4533 294 : NULLIFY (para_env)
4534 :
4535 294 : CALL get_qs_env(qs_env, para_env=para_env)
4536 :
4537 294 : nblks = SIZE(ri_data%bsizes_AO_split)
4538 882 : ALLOCATE (bsizes_stack(stack_size*nblks))
4539 5798 : DO is = 1, stack_size
4540 24614 : bsizes_stack((is - 1)*nblks + 1:is*nblks) = ri_data%bsizes_AO_split(:)
4541 : END DO
4542 :
4543 2646 : ALLOCATE (dist1(nblks), dist2(nblks), dist_stack1(stack_size*nblks), dist_stack2(stack_size*nblks))
4544 294 : CALL dbt_get_info(rho_template, proc_dist_1=dist1, proc_dist_2=dist2)
4545 5798 : DO is = 1, stack_size
4546 24320 : dist_stack1((is - 1)*nblks + 1:is*nblks) = dist1(:)
4547 24614 : dist_stack2((is - 1)*nblks + 1:is*nblks) = dist2(:)
4548 : END DO
4549 :
4550 : !First 2c tensor matches the distribution of template
4551 : !It is stacked in both directions
4552 294 : CALL dbt_distribution_new(t_dist, ri_data%pgrid_2d, dist_stack1, dist_stack2)
4553 294 : CALL dbt_create(rho_stack(1), "RHO_stack", t_dist, [1], [2], bsizes_stack, bsizes_stack)
4554 294 : CALL dbt_distribution_destroy(t_dist)
4555 294 : DEALLOCATE (dist1, dist2, dist_stack1, dist_stack2)
4556 :
4557 : !Second 2c tensor has optimal distribution on the 2d pgrid
4558 294 : CALL create_2c_tensor(rho_stack(2), dist1, dist2, ri_data%pgrid_2d, bsizes_stack, bsizes_stack, name="RHO_stack")
4559 294 : DEALLOCATE (dist1, dist2)
4560 :
4561 294 : CALL dbt_get_info(ints_template, nblks_total=nblks_3c)
4562 2058 : ALLOCATE (dist1(nblks_3c(1)), dist2(nblks_3c(2)), dist3(nblks_3c(3)))
4563 1470 : ALLOCATE (dist_stack3(stack_size*nblks_3c(3)), bsizes_RI_ext(nblks_3c(2)))
4564 : CALL dbt_get_info(ints_template, proc_dist_1=dist1, proc_dist_2=dist2, &
4565 294 : proc_dist_3=dist3, blk_size_2=bsizes_RI_ext)
4566 5798 : DO is = 1, stack_size
4567 24614 : dist_stack3((is - 1)*nblks_3c(3) + 1:is*nblks_3c(3)) = dist3(:)
4568 : END DO
4569 :
4570 : !First 3c tensor matches the distribution of template
4571 294 : CALL dbt_distribution_new(t_dist, ri_data%pgrid_1, dist1, dist2, dist_stack3)
4572 : CALL dbt_create(ints_stack(1), "ints_stack", t_dist, [1, 2], [3], ri_data%bsizes_AO_split, &
4573 294 : bsizes_RI_ext, bsizes_stack)
4574 294 : CALL dbt_distribution_destroy(t_dist)
4575 294 : DEALLOCATE (dist1, dist2, dist3, dist_stack3)
4576 :
4577 : !Second 3c tensor has optimal pgrid
4578 294 : pdims_3d = 0
4579 1176 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid, tensor_dims=[nblks_3c(1), nblks_3c(2), stack_size*nblks_3c(3)])
4580 : CALL create_3c_tensor(ints_stack(2), dist1, dist2, dist3, pgrid, ri_data%bsizes_AO_split, &
4581 294 : bsizes_RI_ext, bsizes_stack, [1, 2], [3], name="ints_stack")
4582 294 : DEALLOCATE (dist1, dist2, dist3)
4583 294 : CALL dbt_pgrid_destroy(pgrid)
4584 :
4585 : !The result tensor has the same shape and dist as the integral tensor
4586 294 : CALL dbt_create(ints_stack(1), res_stack(1))
4587 294 : CALL dbt_create(ints_stack(2), res_stack(2))
4588 :
4589 588 : END SUBROUTINE get_stack_tensors
4590 :
4591 : ! **************************************************************************************************
4592 : !> \brief Fill the stack of 3c tensors accrding to the order in the images input
4593 : !> \param t_3c_stack ...
4594 : !> \param t_3c_in ...
4595 : !> \param images ...
4596 : !> \param stack_dim ...
4597 : !> \param ri_data ...
4598 : !> \param filter_at ...
4599 : !> \param filter_dim ...
4600 : !> \param idx_to_at ...
4601 : !> \param img_bounds ...
4602 : ! **************************************************************************************************
4603 35544 : SUBROUTINE fill_3c_stack(t_3c_stack, t_3c_in, images, stack_dim, ri_data, filter_at, filter_dim, &
4604 35544 : idx_to_at, img_bounds)
4605 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_stack
4606 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_3c_in
4607 : INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4608 : INTEGER, INTENT(IN) :: stack_dim
4609 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4610 : INTEGER, INTENT(IN), OPTIONAL :: filter_at, filter_dim
4611 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL :: idx_to_at
4612 : INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2)
4613 :
4614 : INTEGER :: dest(3), i_img, idx, ind(3), lb, nblks, &
4615 : nimg, offset, ub
4616 : LOGICAL :: do_filter, found
4617 35544 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4618 : TYPE(dbt_iterator_type) :: iter
4619 :
4620 : !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4621 : !in the stack tensor (corresponding to pair index). Distributions match by construction
4622 35544 : nimg = ri_data%nimg
4623 35544 : nblks = SIZE(ri_data%bsizes_AO_split)
4624 :
4625 35544 : do_filter = .FALSE.
4626 35118 : IF (PRESENT(filter_at) .AND. PRESENT(filter_dim) .AND. PRESENT(idx_to_at)) do_filter = .TRUE.
4627 :
4628 35544 : lb = 1
4629 35544 : ub = nimg
4630 35544 : offset = 0
4631 35544 : IF (PRESENT(img_bounds)) THEN
4632 35544 : lb = img_bounds(1)
4633 35544 : ub = img_bounds(2) - 1
4634 35544 : offset = lb - 1
4635 : END IF
4636 :
4637 525497 : DO idx = lb, ub
4638 489953 : i_img = images(idx)
4639 489953 : IF (i_img == 0 .OR. i_img > nimg) CYCLE
4640 :
4641 : !$OMP PARALLEL DEFAULT(NONE) &
4642 : !$OMP SHARED(idx,i_img,t_3c_in,t_3c_stack,nblks,stack_dim,filter_at,filter_dim,idx_to_at,do_filter,offset) &
4643 525497 : !$OMP PRIVATE(iter,ind,blk,found,dest)
4644 : CALL dbt_iterator_start(iter, t_3c_in(i_img))
4645 : DO WHILE (dbt_iterator_blocks_left(iter))
4646 : CALL dbt_iterator_next_block(iter, ind)
4647 : CALL dbt_get_block(t_3c_in(i_img), ind, blk, found)
4648 : IF (.NOT. found) CYCLE
4649 :
4650 : IF (do_filter) THEN
4651 : IF (.NOT. idx_to_at(ind(filter_dim)) == filter_at) CYCLE
4652 : END IF
4653 :
4654 : IF (stack_dim == 1) THEN
4655 : dest = [(idx - offset - 1)*nblks + ind(1), ind(2), ind(3)]
4656 : ELSE IF (stack_dim == 2) THEN
4657 : dest = [ind(1), (idx - offset - 1)*nblks + ind(2), ind(3)]
4658 : ELSE
4659 : dest = [ind(1), ind(2), (idx - offset - 1)*nblks + ind(3)]
4660 : END IF
4661 :
4662 : CALL dbt_put_block(t_3c_stack, dest, SHAPE(blk), blk)
4663 : DEALLOCATE (blk)
4664 : END DO
4665 : CALL dbt_iterator_stop(iter)
4666 : !$OMP END PARALLEL
4667 : END DO !i_img
4668 35544 : CALL dbt_finalize(t_3c_stack)
4669 :
4670 71088 : END SUBROUTINE fill_3c_stack
4671 :
4672 : ! **************************************************************************************************
4673 : !> \brief Fill the stack of 2c tensors based on the content of images input
4674 : !> \param t_2c_stack ...
4675 : !> \param t_2c_in ...
4676 : !> \param images ...
4677 : !> \param stack_dim ...
4678 : !> \param ri_data ...
4679 : !> \param img_bounds ...
4680 : !> \param shift ...
4681 : ! **************************************************************************************************
4682 16994 : SUBROUTINE fill_2c_stack(t_2c_stack, t_2c_in, images, stack_dim, ri_data, img_bounds, shift)
4683 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_stack
4684 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_in
4685 : INTEGER, DIMENSION(:), INTENT(INOUT) :: images
4686 : INTEGER, INTENT(IN) :: stack_dim
4687 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4688 : INTEGER, INTENT(IN), OPTIONAL :: img_bounds(2), shift
4689 :
4690 : INTEGER :: dest(2), i_img, idx, ind(2), lb, &
4691 : my_shift, nblks, nimg, offset, ub
4692 : LOGICAL :: found
4693 16994 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
4694 : TYPE(dbt_iterator_type) :: iter
4695 :
4696 : !We loop over the a images from the ac_pairs, then copy the 3c ints to the correct spot in
4697 : !in the stack tensor (corresponding to pair index). Distributions match by construction
4698 16994 : nimg = ri_data%nimg
4699 16994 : nblks = SIZE(ri_data%bsizes_AO_split)
4700 :
4701 16994 : lb = 1
4702 16994 : ub = nimg
4703 16994 : offset = 0
4704 16994 : IF (PRESENT(img_bounds)) THEN
4705 16994 : lb = img_bounds(1)
4706 16994 : ub = img_bounds(2) - 1
4707 16994 : offset = lb - 1
4708 : END IF
4709 :
4710 16994 : my_shift = 1
4711 16994 : IF (PRESENT(shift)) my_shift = shift
4712 :
4713 253764 : DO idx = lb, ub
4714 236770 : i_img = images(idx)
4715 236770 : IF (i_img == 0 .OR. i_img > nimg) CYCLE
4716 :
4717 : !$OMP PARALLEL DEFAULT(NONE) SHARED(idx,i_img,t_2c_in,t_2c_stack,nblks,stack_dim,offset,my_shift) &
4718 253764 : !$OMP PRIVATE(iter,ind,blk,found,dest)
4719 : CALL dbt_iterator_start(iter, t_2c_in(i_img))
4720 : DO WHILE (dbt_iterator_blocks_left(iter))
4721 : CALL dbt_iterator_next_block(iter, ind)
4722 : CALL dbt_get_block(t_2c_in(i_img), ind, blk, found)
4723 : IF (.NOT. found) CYCLE
4724 :
4725 : IF (stack_dim == 1) THEN
4726 : dest = [(idx - offset - 1)*nblks + ind(1), (my_shift - 1)*nblks + ind(2)]
4727 : ELSE
4728 : dest = [(my_shift - 1)*nblks + ind(1), (idx - offset - 1)*nblks + ind(2)]
4729 : END IF
4730 :
4731 : CALL dbt_put_block(t_2c_stack, dest, SHAPE(blk), blk)
4732 : DEALLOCATE (blk)
4733 : END DO
4734 : CALL dbt_iterator_stop(iter)
4735 : !$OMP END PARALLEL
4736 : END DO !idx
4737 16994 : CALL dbt_finalize(t_2c_stack)
4738 :
4739 33988 : END SUBROUTINE fill_2c_stack
4740 :
4741 : ! **************************************************************************************************
4742 : !> \brief Unstacks a stacked 3c tensor containing t_3c_apc
4743 : !> \param t_3c_apc ...
4744 : !> \param t_stacked ...
4745 : !> \param idx ...
4746 : ! **************************************************************************************************
4747 20832 : SUBROUTINE unstack_t_3c_apc(t_3c_apc, t_stacked, idx)
4748 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_apc, t_stacked
4749 : INTEGER, INTENT(IN) :: idx
4750 :
4751 : INTEGER :: current_idx
4752 : INTEGER, DIMENSION(3) :: ind, nblks_3c
4753 : LOGICAL :: found
4754 20832 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4755 : TYPE(dbt_iterator_type) :: iter
4756 :
4757 : !Note: t_3c_apc and t_stacked must have the same ditribution
4758 20832 : CALL dbt_get_info(t_3c_apc, nblks_total=nblks_3c)
4759 :
4760 20832 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_apc,t_stacked,idx,nblks_3c) PRIVATE(iter,ind,blk,found,current_idx)
4761 : CALL dbt_iterator_start(iter, t_stacked)
4762 : DO WHILE (dbt_iterator_blocks_left(iter))
4763 : CALL dbt_iterator_next_block(iter, ind)
4764 :
4765 : !tensor is stacked along the 3rd dimension
4766 : current_idx = (ind(3) - 1)/nblks_3c(3) + 1
4767 : IF (.NOT. idx == current_idx) CYCLE
4768 :
4769 : CALL dbt_get_block(t_stacked, ind, blk, found)
4770 : IF (.NOT. found) CYCLE
4771 :
4772 : CALL dbt_put_block(t_3c_apc, [ind(1), ind(2), ind(3) - (idx - 1)*nblks_3c(3)], SHAPE(blk), blk)
4773 : DEALLOCATE (blk)
4774 : END DO
4775 : CALL dbt_iterator_stop(iter)
4776 : !$OMP END PARALLEL
4777 :
4778 20832 : END SUBROUTINE unstack_t_3c_apc
4779 :
4780 : ! **************************************************************************************************
4781 : !> \brief copies the 3c integrals correspoinding to a single atom mu from the general (P^0| mu^0 sigam^a)
4782 : !> \param t_3c_at ...
4783 : !> \param t_3c_ints ...
4784 : !> \param iatom ...
4785 : !> \param dim_at ...
4786 : !> \param idx_to_at ...
4787 : ! **************************************************************************************************
4788 0 : SUBROUTINE get_atom_3c_ints(t_3c_at, t_3c_ints, iatom, dim_at, idx_to_at)
4789 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_at, t_3c_ints
4790 : INTEGER, INTENT(IN) :: iatom, dim_at
4791 : INTEGER, DIMENSION(:), INTENT(IN) :: idx_to_at
4792 :
4793 : INTEGER, DIMENSION(3) :: ind
4794 : LOGICAL :: found
4795 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk
4796 : TYPE(dbt_iterator_type) :: iter
4797 :
4798 0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(t_3c_ints,t_3c_at,iatom,idx_to_at,dim_at) PRIVATE(iter,ind,blk,found)
4799 : CALL dbt_iterator_start(iter, t_3c_ints)
4800 : DO WHILE (dbt_iterator_blocks_left(iter))
4801 : CALL dbt_iterator_next_block(iter, ind)
4802 : IF (.NOT. idx_to_at(ind(dim_at)) == iatom) CYCLE
4803 :
4804 : CALL dbt_get_block(t_3c_ints, ind, blk, found)
4805 : IF (.NOT. found) CYCLE
4806 :
4807 : CALL dbt_put_block(t_3c_at, ind, SHAPE(blk), blk)
4808 : DEALLOCATE (blk)
4809 : END DO
4810 : CALL dbt_iterator_stop(iter)
4811 : !$OMP END PARALLEL
4812 0 : CALL dbt_finalize(t_3c_at)
4813 :
4814 0 : END SUBROUTINE get_atom_3c_ints
4815 :
4816 : ! **************************************************************************************************
4817 : !> \brief Precalculate the 3c and 2c derivatives tensors
4818 : !> \param t_3c_der_RI ...
4819 : !> \param t_3c_der_AO ...
4820 : !> \param mat_der_pot ...
4821 : !> \param t_2c_der_metric ...
4822 : !> \param ri_data ...
4823 : !> \param qs_env ...
4824 : ! **************************************************************************************************
4825 46 : SUBROUTINE precalc_derivatives(t_3c_der_RI, t_3c_der_AO, mat_der_pot, t_2c_der_metric, ri_data, qs_env)
4826 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_RI, t_3c_der_AO
4827 : TYPE(dbcsr_type), DIMENSION(:, :), INTENT(INOUT) :: mat_der_pot
4828 : TYPE(dbt_type), DIMENSION(:, :), INTENT(INOUT) :: t_2c_der_metric
4829 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
4830 : TYPE(qs_environment_type), POINTER :: qs_env
4831 :
4832 : CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
4833 :
4834 : INTEGER :: handle, handle2, i_img, i_mem, i_RI, &
4835 : i_xyz, iatom, n_mem, natom, nblks_RI, &
4836 : ncell_RI, nimg, nkind, nthreads
4837 : INTEGER(int_8) :: nze
4838 46 : INTEGER, ALLOCATABLE, DIMENSION(:) :: bsizes_RI_ext, bsizes_RI_ext_split, dist_AO_1, &
4839 92 : dist_AO_2, dist_RI, dist_RI_ext, dummy_end, dummy_start, end_blocks, start_blocks
4840 : INTEGER, DIMENSION(3) :: pcoord, pdims
4841 92 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
4842 : REAL(dp) :: occ
4843 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
4844 : TYPE(dbcsr_type) :: dbcsr_template
4845 46 : TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :) :: mat_der_metric
4846 414 : TYPE(dbt_distribution_type) :: t_dist
4847 138 : TYPE(dbt_pgrid_type) :: pgrid
4848 414 : TYPE(dbt_type) :: t_3c_template
4849 46 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_AO_prv, t_3c_der_RI_prv
4850 : TYPE(dft_control_type), POINTER :: dft_control
4851 : TYPE(distribution_2d_type), POINTER :: dist_2d
4852 : TYPE(distribution_3d_type) :: dist_3d
4853 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
4854 46 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
4855 46 : TYPE(mp_cart_type) :: mp_comm_t3c
4856 : TYPE(mp_para_env_type), POINTER :: para_env
4857 : TYPE(neighbor_list_3c_type) :: nl_3c
4858 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
4859 46 : POINTER :: nl_2c
4860 46 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
4861 46 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
4862 :
4863 46 : NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env, row_bsize, col_bsize)
4864 :
4865 46 : CALL timeset(routineN, handle)
4866 :
4867 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
4868 46 : particle_set=particle_set, dft_control=dft_control, para_env=para_env)
4869 :
4870 46 : nimg = ri_data%nimg
4871 46 : ncell_RI = ri_data%ncell_RI
4872 :
4873 324 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
4874 46 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
4875 46 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
4876 46 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
4877 46 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
4878 :
4879 : !Dealing with the 3c derivatives
4880 46 : nthreads = 1
4881 46 : !$ nthreads = omp_get_num_threads()
4882 46 : pdims = 0
4883 184 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
4884 :
4885 : CALL create_3c_tensor(t_3c_template, dist_AO_1, dist_AO_2, dist_RI, pgrid, &
4886 : ri_data%bsizes_AO, ri_data%bsizes_AO, ri_data%bsizes_RI, &
4887 46 : map1=[1, 2], map2=[3], name="tmp")
4888 46 : CALL dbt_destroy(t_3c_template)
4889 :
4890 : !We stack the RI basis images. Keep consistent distribution
4891 46 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
4892 138 : ALLOCATE (dist_RI_ext(natom*ncell_RI))
4893 92 : ALLOCATE (bsizes_RI_ext(natom*ncell_RI))
4894 138 : ALLOCATE (bsizes_RI_ext_split(nblks_RI*ncell_RI))
4895 318 : DO i_RI = 1, ncell_RI
4896 816 : bsizes_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = ri_data%bsizes_RI(:)
4897 816 : dist_RI_ext((i_RI - 1)*natom + 1:i_RI*natom) = dist_RI(:)
4898 1622 : bsizes_RI_ext_split((i_RI - 1)*nblks_RI + 1:i_RI*nblks_RI) = ri_data%bsizes_RI_split(:)
4899 : END DO
4900 :
4901 46 : CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
4902 : CALL dbt_create(t_3c_template, "KP_3c_der", t_dist, [1, 2], [3], &
4903 46 : ri_data%bsizes_AO, ri_data%bsizes_AO, bsizes_RI_ext)
4904 46 : CALL dbt_distribution_destroy(t_dist)
4905 :
4906 8084 : ALLOCATE (t_3c_der_RI_prv(nimg, 1, 3), t_3c_der_AO_prv(nimg, 1, 3))
4907 184 : DO i_xyz = 1, 3
4908 3490 : DO i_img = 1, nimg
4909 3306 : CALL dbt_create(t_3c_template, t_3c_der_RI_prv(i_img, 1, i_xyz))
4910 3444 : CALL dbt_create(t_3c_template, t_3c_der_AO_prv(i_img, 1, i_xyz))
4911 : END DO
4912 : END DO
4913 46 : CALL dbt_destroy(t_3c_template)
4914 :
4915 46 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
4916 46 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
4917 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
4918 46 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
4919 46 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
4920 46 : CALL dbt_pgrid_destroy(pgrid)
4921 :
4922 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
4923 46 : "HFX_3c_nl", qs_env, op_pos=2, sym_jk=.FALSE., own_dist=.TRUE.)
4924 :
4925 46 : n_mem = ri_data%n_mem
4926 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
4927 : start_blocks, end_blocks)
4928 46 : DEALLOCATE (dummy_start, dummy_end)
4929 :
4930 : CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid_2, &
4931 : bsizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4932 46 : map1=[1], map2=[2, 3], name="der (RI | AO AO)")
4933 184 : DO i_xyz = 1, 3
4934 3490 : DO i_img = 1, nimg
4935 3306 : CALL dbt_create(t_3c_template, t_3c_der_RI(i_img, i_xyz))
4936 3444 : CALL dbt_create(t_3c_template, t_3c_der_AO(i_img, i_xyz))
4937 : END DO
4938 : END DO
4939 :
4940 128 : DO i_mem = 1, n_mem
4941 : CALL build_3c_derivatives(t_3c_der_AO_prv, t_3c_der_RI_prv, ri_data%filter_eps, qs_env, &
4942 : nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, &
4943 : ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=2, &
4944 : do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
4945 : bounds_k=[start_blocks(i_mem), end_blocks(i_mem)], &
4946 246 : RI_range=ri_data%kp_RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
4947 :
4948 82 : CALL timeset(routineN//"_cpy", handle2)
4949 : !We go from (mu^0 sigma^i | P^j) to (P^i| sigma^j mu^0) and finally to (P^i| mu^0 sigma^j)
4950 2186 : DO i_img = 1, nimg
4951 8498 : DO i_xyz = 1, 3
4952 : !derivative wrt to mu^0
4953 6312 : CALL get_tensor_occupancy(t_3c_der_AO_prv(i_img, 1, i_xyz), nze, occ)
4954 6312 : IF (nze > 0) THEN
4955 : CALL dbt_copy(t_3c_der_AO_prv(i_img, 1, i_xyz), t_3c_template, &
4956 3830 : order=[3, 2, 1], move_data=.TRUE.)
4957 3830 : CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4958 : CALL dbt_copy(t_3c_template, t_3c_der_AO(i_img, i_xyz), &
4959 3830 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
4960 : END IF
4961 :
4962 : !derivative wrt to P^i
4963 6312 : CALL get_tensor_occupancy(t_3c_der_RI_prv(i_img, 1, i_xyz), nze, occ)
4964 14728 : IF (nze > 0) THEN
4965 : CALL dbt_copy(t_3c_der_RI_prv(i_img, 1, i_xyz), t_3c_template, &
4966 3820 : order=[3, 2, 1], move_data=.TRUE.)
4967 3820 : CALL dbt_filter(t_3c_template, ri_data%filter_eps)
4968 : CALL dbt_copy(t_3c_template, t_3c_der_RI(i_img, i_xyz), &
4969 3820 : order=[1, 3, 2], move_data=.TRUE., summation=.TRUE.)
4970 : END IF
4971 : END DO
4972 : END DO
4973 210 : CALL timestop(handle2)
4974 : END DO
4975 46 : CALL dbt_destroy(t_3c_template)
4976 :
4977 46 : CALL neighbor_list_3c_destroy(nl_3c)
4978 184 : DO i_xyz = 1, 3
4979 3490 : DO i_img = 1, nimg
4980 3306 : CALL dbt_destroy(t_3c_der_RI_prv(i_img, 1, i_xyz))
4981 3444 : CALL dbt_destroy(t_3c_der_AO_prv(i_img, 1, i_xyz))
4982 : END DO
4983 : END DO
4984 6658 : DEALLOCATE (t_3c_der_RI_prv, t_3c_der_AO_prv)
4985 :
4986 : !Reorder 3c derivatives to be consistant with ints
4987 46 : CALL reorder_3c_derivs(t_3c_der_RI, ri_data)
4988 46 : CALL reorder_3c_derivs(t_3c_der_AO, ri_data)
4989 :
4990 46 : CALL timeset(routineN//"_2c", handle2)
4991 : !The 2-center derivatives
4992 46 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
4993 138 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
4994 92 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
4995 138 : row_bsize(:) = ri_data%bsizes_RI
4996 138 : col_bsize(:) = ri_data%bsizes_RI
4997 :
4998 : CALL dbcsr_create(dbcsr_template, "2c_der", dbcsr_dist, dbcsr_type_no_symmetry, &
4999 46 : row_bsize, col_bsize)
5000 46 : CALL dbcsr_distribution_release(dbcsr_dist)
5001 46 : DEALLOCATE (col_bsize, row_bsize)
5002 :
5003 3582 : ALLOCATE (mat_der_metric(nimg, 3))
5004 184 : DO i_xyz = 1, 3
5005 3490 : DO i_img = 1, nimg
5006 3306 : CALL dbcsr_create(mat_der_pot(i_img, i_xyz), template=dbcsr_template)
5007 3444 : CALL dbcsr_create(mat_der_metric(i_img, i_xyz), template=dbcsr_template)
5008 : END DO
5009 : END DO
5010 46 : CALL dbcsr_release(dbcsr_template)
5011 :
5012 : !HFX potential derivatives
5013 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
5014 46 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
5015 : CALL build_2c_derivatives(mat_der_pot, ri_data%filter_eps_2c, qs_env, nl_2c, &
5016 46 : basis_set_RI, basis_set_RI, ri_data%hfx_pot, do_kpoints=.TRUE.)
5017 46 : CALL release_neighbor_list_sets(nl_2c)
5018 :
5019 : !RI metric derivatives
5020 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
5021 46 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
5022 : CALL build_2c_derivatives(mat_der_metric, ri_data%filter_eps_2c, qs_env, nl_2c, &
5023 46 : basis_set_RI, basis_set_RI, ri_data%ri_metric, do_kpoints=.TRUE.)
5024 46 : CALL release_neighbor_list_sets(nl_2c)
5025 :
5026 : !Get into extended RI basis and tensor format
5027 184 : DO i_xyz = 1, 3
5028 414 : DO iatom = 1, natom
5029 276 : CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_der_metric(iatom, i_xyz))
5030 : CALL get_ext_2c_int(t_2c_der_metric(iatom, i_xyz), mat_der_metric(:, i_xyz), &
5031 414 : iatom, iatom, 1, ri_data, qs_env)
5032 : END DO
5033 3490 : DO i_img = 1, nimg
5034 3444 : CALL dbcsr_release(mat_der_metric(i_img, i_xyz))
5035 : END DO
5036 : END DO
5037 46 : CALL timestop(handle2)
5038 :
5039 46 : CALL timestop(handle)
5040 :
5041 276 : END SUBROUTINE precalc_derivatives
5042 :
5043 : ! **************************************************************************************************
5044 : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
5045 : !> \param force ...
5046 : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
5047 : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
5048 : !> \param atom_of_kind ...
5049 : !> \param kind_of ...
5050 : !> \param img in which periodic image the second center of the tensor is
5051 : !> \param pref ...
5052 : !> \param ri_data ...
5053 : !> \param qs_env ...
5054 : !> \param work_virial ...
5055 : !> \param cell ...
5056 : !> \param particle_set ...
5057 : !> \param diag ...
5058 : !> \param offdiag ...
5059 : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution. Atomic block sizes are
5060 : !> assumed
5061 : ! **************************************************************************************************
5062 3230 : SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, img, pref, &
5063 : ri_data, qs_env, work_virial, cell, particle_set, diag, offdiag)
5064 :
5065 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5066 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
5067 : TYPE(dbt_type), DIMENSION(:), INTENT(INOUT) :: t_2c_der
5068 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of
5069 : INTEGER, INTENT(IN) :: img
5070 : REAL(dp), INTENT(IN) :: pref
5071 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5072 : TYPE(qs_environment_type), POINTER :: qs_env
5073 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5074 : TYPE(cell_type), OPTIONAL, POINTER :: cell
5075 : TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5076 : POINTER :: particle_set
5077 : LOGICAL, INTENT(IN), OPTIONAL :: diag, offdiag
5078 :
5079 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_2c_der_force'
5080 :
5081 : INTEGER :: handle, i_img, i_RI, i_xyz, iat, &
5082 : iat_of_kind, ikind, j_img, j_RI, &
5083 : j_xyz, jat, jat_of_kind, jkind, natom
5084 : INTEGER, DIMENSION(2) :: ind
5085 3230 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5086 : LOGICAL :: found, my_diag, my_offdiag, use_virial
5087 : REAL(dp) :: new_force
5088 3230 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
5089 : REAL(dp), DIMENSION(3) :: scoord
5090 : TYPE(dbt_iterator_type) :: iter
5091 : TYPE(kpoint_type), POINTER :: kpoints
5092 :
5093 3230 : NULLIFY (kpoints, index_to_cell)
5094 :
5095 : !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
5096 : !update the relevant force
5097 :
5098 3230 : CALL timeset(routineN, handle)
5099 :
5100 3230 : use_virial = .FALSE.
5101 3230 : IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .TRUE.
5102 :
5103 3230 : my_diag = .FALSE.
5104 3230 : IF (PRESENT(diag)) my_diag = diag
5105 :
5106 2584 : my_offdiag = .FALSE.
5107 2584 : IF (PRESENT(diag)) my_offdiag = offdiag
5108 :
5109 3230 : CALL get_qs_env(qs_env, kpoints=kpoints, natom=natom)
5110 3230 : CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5111 :
5112 : !$OMP PARALLEL DEFAULT(NONE) &
5113 : !$OMP SHARED(t_2c_der,t_2c_contr,work_virial,force,use_virial,natom,index_to_cell,ri_data,img) &
5114 : !$OMP SHARED(pref,atom_of_kind,kind_of,particle_set,cell,my_diag,my_offdiag) &
5115 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,i_RI,i_img,j_RI,j_img) &
5116 3230 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
5117 : DO i_xyz = 1, 3
5118 : CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
5119 : DO WHILE (dbt_iterator_blocks_left(iter))
5120 : CALL dbt_iterator_next_block(iter, ind)
5121 :
5122 : !Only take forecs due to block diagonal or block off-diagonal, depending on arguments
5123 : IF ((my_diag .AND. .NOT. my_offdiag) .OR. (.NOT. my_diag .AND. my_offdiag)) THEN
5124 : IF (my_diag .AND. (ind(1) .NE. ind(2))) CYCLE
5125 : IF (my_offdiag .AND. (ind(1) == ind(2))) CYCLE
5126 : END IF
5127 :
5128 : CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
5129 : CPASSERT(found)
5130 : CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
5131 :
5132 : IF (found) THEN
5133 :
5134 : !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
5135 : !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
5136 : new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
5137 :
5138 : i_RI = (ind(1) - 1)/natom + 1
5139 : i_img = ri_data%RI_cell_to_img(i_RI)
5140 : iat = ind(1) - (i_RI - 1)*natom
5141 : iat_of_kind = atom_of_kind(iat)
5142 : ikind = kind_of(iat)
5143 :
5144 : j_RI = (ind(2) - 1)/natom + 1
5145 : j_img = ri_data%RI_cell_to_img(j_RI)
5146 : jat = ind(2) - (j_RI - 1)*natom
5147 : jat_of_kind = atom_of_kind(jat)
5148 : jkind = kind_of(jat)
5149 :
5150 : !Force on iatom (first center)
5151 : !$OMP ATOMIC
5152 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5153 : + new_force
5154 :
5155 : IF (use_virial) THEN
5156 :
5157 : CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5158 : scoord(:) = scoord(:) + REAL(index_to_cell(:, i_img), dp)
5159 :
5160 : DO j_xyz = 1, 3
5161 : !$OMP ATOMIC
5162 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5163 : END DO
5164 : END IF
5165 :
5166 : !Force on jatom (second center)
5167 : !$OMP ATOMIC
5168 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5169 : - new_force
5170 :
5171 : IF (use_virial) THEN
5172 :
5173 : CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5174 : scoord(:) = scoord(:) + REAL(index_to_cell(:, j_img) + index_to_cell(:, img), dp)
5175 :
5176 : DO j_xyz = 1, 3
5177 : !$OMP ATOMIC
5178 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) - new_force*scoord(j_xyz)
5179 : END DO
5180 : END IF
5181 :
5182 : DEALLOCATE (contr_blk)
5183 : END IF
5184 :
5185 : DEALLOCATE (der_blk)
5186 : END DO !iter
5187 : CALL dbt_iterator_stop(iter)
5188 :
5189 : END DO !i_xyz
5190 : !$OMP END PARALLEL
5191 3230 : CALL timestop(handle)
5192 :
5193 6460 : END SUBROUTINE get_2c_der_force
5194 :
5195 : ! **************************************************************************************************
5196 : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
5197 : !> force = sum_ijk A_ijk B_ijk., the B tensor is (P^0| sigma^0 lambda^img), with P in the
5198 : !> extended RI basis. Note that all tensors are stacked along the 3rd dimension
5199 : !> \param force ...
5200 : !> \param t_3c_contr ...
5201 : !> \param t_3c_der_1 ...
5202 : !> \param t_3c_der_2 ...
5203 : !> \param atom_of_kind ...
5204 : !> \param kind_of ...
5205 : !> \param idx_to_at_RI ...
5206 : !> \param idx_to_at_AO ...
5207 : !> \param i_images ...
5208 : !> \param lb_img ...
5209 : !> \param pref ...
5210 : !> \param ri_data ...
5211 : !> \param qs_env ...
5212 : !> \param work_virial ...
5213 : !> \param cell ...
5214 : !> \param particle_set ...
5215 : ! **************************************************************************************************
5216 2228 : SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der_1, t_3c_der_2, atom_of_kind, kind_of, &
5217 4456 : idx_to_at_RI, idx_to_at_AO, i_images, lb_img, pref, &
5218 : ri_data, qs_env, work_virial, cell, particle_set)
5219 :
5220 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
5221 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
5222 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der_1, t_3c_der_2
5223 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at_RI, &
5224 : idx_to_at_AO, i_images
5225 : INTEGER, INTENT(IN) :: lb_img
5226 : REAL(dp), INTENT(IN) :: pref
5227 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
5228 : TYPE(qs_environment_type), POINTER :: qs_env
5229 : REAL(dp), DIMENSION(3, 3), INTENT(INOUT), OPTIONAL :: work_virial
5230 : TYPE(cell_type), OPTIONAL, POINTER :: cell
5231 : TYPE(particle_type), DIMENSION(:), OPTIONAL, &
5232 : POINTER :: particle_set
5233 :
5234 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
5235 :
5236 : INTEGER :: handle, i_RI, i_xyz, iat, iat_of_kind, idx, ikind, j_xyz, jat, jat_of_kind, &
5237 : jkind, kat, kat_of_kind, kkind, nblks_AO, nblks_RI, RI_img
5238 : INTEGER, DIMENSION(3) :: ind
5239 2228 : INTEGER, DIMENSION(:, :), POINTER :: index_to_cell
5240 : LOGICAL :: found, found_1, found_2, use_virial
5241 : REAL(dp) :: new_force
5242 2228 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk_1, der_blk_2, &
5243 2228 : der_blk_3
5244 : REAL(dp), DIMENSION(3) :: scoord
5245 : TYPE(dbt_iterator_type) :: iter
5246 : TYPE(kpoint_type), POINTER :: kpoints
5247 :
5248 2228 : NULLIFY (kpoints, index_to_cell)
5249 :
5250 2228 : CALL timeset(routineN, handle)
5251 :
5252 2228 : CALL get_qs_env(qs_env, kpoints=kpoints)
5253 2228 : CALL get_kpoint_info(kpoints, index_to_cell=index_to_cell)
5254 :
5255 2228 : nblks_RI = SIZE(ri_data%bsizes_RI_split)
5256 2228 : nblks_AO = SIZE(ri_data%bsizes_AO_split)
5257 :
5258 2228 : use_virial = .FALSE.
5259 2228 : IF (PRESENT(work_virial) .AND. PRESENT(cell) .AND. PRESENT(particle_set)) use_virial = .TRUE.
5260 :
5261 : !$OMP PARALLEL DEFAULT(NONE) &
5262 : !$OMP SHARED(t_3c_der_1, t_3c_der_2,t_3c_contr,work_virial,force,use_virial,index_to_cell,i_images,lb_img) &
5263 : !$OMP SHARED(pref,idx_to_at_AO,atom_of_kind,kind_of,particle_set,cell,idx_to_at_RI,ri_data,nblks_RI,nblks_AO) &
5264 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk_1,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord) &
5265 2228 : !$OMP PRIVATE(jat,kat,jat_of_kind,kat_of_kind,jkind,kkind,i_RI,RI_img,der_blk_2,der_blk_3,found_1,found_2,idx)
5266 : CALL dbt_iterator_start(iter, t_3c_contr)
5267 : DO WHILE (dbt_iterator_blocks_left(iter))
5268 : CALL dbt_iterator_next_block(iter, ind)
5269 :
5270 : CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
5271 : IF (found) THEN
5272 :
5273 : DO i_xyz = 1, 3
5274 : CALL dbt_get_block(t_3c_der_1(i_xyz), ind, der_blk_1, found_1)
5275 : IF (.NOT. found_1) THEN
5276 : DEALLOCATE (der_blk_1)
5277 : ALLOCATE (der_blk_1(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5278 : der_blk_1(:, :, :) = 0.0_dp
5279 : END IF
5280 : CALL dbt_get_block(t_3c_der_2(i_xyz), ind, der_blk_2, found_2)
5281 : IF (.NOT. found_2) THEN
5282 : DEALLOCATE (der_blk_2)
5283 : ALLOCATE (der_blk_2(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5284 : der_blk_2(:, :, :) = 0.0_dp
5285 : END IF
5286 :
5287 : ALLOCATE (der_blk_3(SIZE(contr_blk, 1), SIZE(contr_blk, 2), SIZE(contr_blk, 3)))
5288 : der_blk_3(:, :, :) = -(der_blk_1(:, :, :) + der_blk_2(:, :, :))
5289 :
5290 : !We assume the tensors are in the format (P^0| sigma^0 mu^a+c-b), with P a member of the
5291 : !extended RI basis set
5292 :
5293 : !Force for the first center (RI extended basis, zero cell)
5294 : new_force = pref*SUM(der_blk_1(:, :, :)*contr_blk(:, :, :))
5295 :
5296 : i_RI = (ind(1) - 1)/nblks_RI + 1
5297 : RI_img = ri_data%RI_cell_to_img(i_RI)
5298 : iat = idx_to_at_RI(ind(1) - (i_RI - 1)*nblks_RI)
5299 : iat_of_kind = atom_of_kind(iat)
5300 : ikind = kind_of(iat)
5301 :
5302 : !$OMP ATOMIC
5303 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
5304 : + new_force
5305 :
5306 : IF (use_virial) THEN
5307 :
5308 : CALL real_to_scaled(scoord, pbc(particle_set(iat)%r, cell), cell)
5309 : scoord(:) = scoord(:) + REAL(index_to_cell(:, RI_img), dp)
5310 :
5311 : DO j_xyz = 1, 3
5312 : !$OMP ATOMIC
5313 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5314 : END DO
5315 : END IF
5316 :
5317 : !Force with respect to the second center (AO basis, zero cell)
5318 : new_force = pref*SUM(der_blk_2(:, :, :)*contr_blk(:, :, :))
5319 : jat = idx_to_at_AO(ind(2))
5320 : jat_of_kind = atom_of_kind(jat)
5321 : jkind = kind_of(jat)
5322 :
5323 : !$OMP ATOMIC
5324 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
5325 : + new_force
5326 :
5327 : IF (use_virial) THEN
5328 :
5329 : CALL real_to_scaled(scoord, pbc(particle_set(jat)%r, cell), cell)
5330 :
5331 : DO j_xyz = 1, 3
5332 : !$OMP ATOMIC
5333 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5334 : END DO
5335 : END IF
5336 :
5337 : !Force with respect to the third center (AO basis, apc_img - b_img)
5338 : !Note: tensors are stacked along the 3rd direction
5339 : new_force = pref*SUM(der_blk_3(:, :, :)*contr_blk(:, :, :))
5340 : idx = (ind(3) - 1)/nblks_AO + 1
5341 : kat = idx_to_at_AO(ind(3) - (idx - 1)*nblks_AO)
5342 : kat_of_kind = atom_of_kind(kat)
5343 : kkind = kind_of(kat)
5344 :
5345 : !$OMP ATOMIC
5346 : force(kkind)%fock_4c(i_xyz, kat_of_kind) = force(kkind)%fock_4c(i_xyz, kat_of_kind) &
5347 : + new_force
5348 :
5349 : IF (use_virial) THEN
5350 : CALL real_to_scaled(scoord, pbc(particle_set(kat)%r, cell), cell)
5351 : scoord(:) = scoord(:) + REAL(index_to_cell(:, i_images(lb_img - 1 + idx)), dp)
5352 :
5353 : DO j_xyz = 1, 3
5354 : !$OMP ATOMIC
5355 : work_virial(i_xyz, j_xyz) = work_virial(i_xyz, j_xyz) + new_force*scoord(j_xyz)
5356 : END DO
5357 : END IF
5358 :
5359 : DEALLOCATE (der_blk_1, der_blk_2, der_blk_3)
5360 : END DO !i_xyz
5361 : DEALLOCATE (contr_blk)
5362 : END IF !found
5363 : END DO !iter
5364 : CALL dbt_iterator_stop(iter)
5365 : !$OMP END PARALLEL
5366 2228 : CALL timestop(handle)
5367 :
5368 4456 : END SUBROUTINE get_force_from_3c_trace
5369 :
5370 : END MODULE
|