Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief RI-methods for HFX
10 : ! **************************************************************************************************
11 :
12 : MODULE hfx_ri
13 :
14 : USE OMP_LIB, ONLY: omp_get_num_threads,&
15 : omp_get_thread_num
16 : USE arnoldi_api, ONLY: arnoldi_extremal
17 : USE atomic_kind_types, ONLY: atomic_kind_type,&
18 : get_atomic_kind,&
19 : get_atomic_kind_set
20 : USE basis_set_types, ONLY: get_gto_basis_set,&
21 : gto_basis_set_p_type,&
22 : gto_basis_set_type
23 : USE cell_types, ONLY: cell_type,&
24 : get_cell
25 : USE cp_blacs_env, ONLY: cp_blacs_env_type
26 : USE cp_control_types, ONLY: dft_control_type
27 : USE cp_dbcsr_api, ONLY: &
28 : dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
29 : dbcsr_distribution_get, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
30 : dbcsr_get_info, dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
31 : dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
32 : dbcsr_type_symmetric
33 : USE cp_dbcsr_cholesky, ONLY: cp_dbcsr_cholesky_decompose,&
34 : cp_dbcsr_cholesky_invert
35 : USE cp_dbcsr_contrib, ONLY: dbcsr_add_on_diag,&
36 : dbcsr_dot,&
37 : dbcsr_frobenius_norm
38 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_power
39 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
40 : copy_fm_to_dbcsr,&
41 : cp_dbcsr_dist2d_to_dist,&
42 : dbcsr_deallocate_matrix_set
43 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
44 : cp_fm_struct_release,&
45 : cp_fm_struct_type
46 : USE cp_fm_types, ONLY: cp_fm_create,&
47 : cp_fm_release,&
48 : cp_fm_type,&
49 : cp_fm_write_unformatted
50 : USE cp_log_handling, ONLY: cp_get_default_logger,&
51 : cp_logger_type
52 : USE cp_output_handling, ONLY: cp_p_file,&
53 : cp_print_key_finished_output,&
54 : cp_print_key_should_output,&
55 : cp_print_key_unit_nr
56 : USE dbt_api, ONLY: &
57 : dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
58 : dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
59 : dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
60 : dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
61 : dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
62 : dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
63 : dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
64 : USE distribution_2d_types, ONLY: distribution_2d_type
65 : USE hfx_types, ONLY: alloc_containers,&
66 : block_ind_type,&
67 : dealloc_containers,&
68 : hfx_compression_type,&
69 : hfx_ri_init,&
70 : hfx_ri_release,&
71 : hfx_ri_type
72 : USE input_constants, ONLY: hfx_ri_do_2c_cholesky,&
73 : hfx_ri_do_2c_diag,&
74 : hfx_ri_do_2c_iter
75 : USE input_cp2k_hfx, ONLY: ri_mo,&
76 : ri_pmat
77 : USE input_section_types, ONLY: section_vals_get_subs_vals,&
78 : section_vals_type,&
79 : section_vals_val_get
80 : USE iterate_matrix, ONLY: invert_hotelling,&
81 : matrix_sqrt_newton_schulz
82 : USE kinds, ONLY: default_string_length,&
83 : dp,&
84 : int_8
85 : USE machine, ONLY: m_walltime
86 : USE message_passing, ONLY: mp_cart_type,&
87 : mp_comm_type,&
88 : mp_para_env_type
89 : USE orbital_pointers, ONLY: nso
90 : USE particle_methods, ONLY: get_particle_set
91 : USE particle_types, ONLY: particle_type
92 : USE qs_environment_types, ONLY: get_qs_env,&
93 : qs_environment_type
94 : USE qs_force_types, ONLY: qs_force_type
95 : USE qs_integral_utils, ONLY: basis_set_list_setup
96 : USE qs_interactions, ONLY: init_interaction_radii_orb_basis
97 : USE qs_kind_types, ONLY: qs_kind_type
98 : USE qs_ks_types, ONLY: qs_ks_env_type
99 : USE qs_mo_types, ONLY: get_mo_set,&
100 : mo_set_type
101 : USE qs_neighbor_list_types, ONLY: neighbor_list_set_p_type,&
102 : release_neighbor_list_sets
103 : USE qs_rho_types, ONLY: qs_rho_get,&
104 : qs_rho_type
105 : USE qs_tensors, ONLY: &
106 : build_2c_derivatives, build_2c_integrals, build_2c_neighbor_lists, build_3c_derivatives, &
107 : build_3c_integrals, build_3c_neighbor_lists, calc_2c_virial, calc_3c_virial, &
108 : compress_tensor, decompress_tensor, get_tensor_occupancy, neighbor_list_3c_destroy
109 : USE qs_tensors_types, ONLY: create_2c_tensor,&
110 : create_3c_tensor,&
111 : create_tensor_batches,&
112 : distribution_3d_create,&
113 : distribution_3d_type,&
114 : neighbor_list_3c_type,&
115 : split_block_sizes
116 : USE string_utilities, ONLY: uppercase
117 : USE util, ONLY: sort
118 : USE virial_types, ONLY: virial_type
119 : #include "./base/base_uses.f90"
120 :
121 : !$ USE OMP_LIB, ONLY: omp_get_num_threads
122 :
123 : IMPLICIT NONE
124 : PRIVATE
125 :
126 : PUBLIC :: hfx_ri_update_ks, hfx_ri_update_forces, get_force_from_3c_trace, get_2c_der_force, &
127 : get_idx_to_atom, print_ri_hfx, hfx_ri_pre_scf_calc_tensors, check_inverse
128 :
129 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
130 : CONTAINS
131 :
132 : ! **************************************************************************************************
133 : !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
134 : !> \param ri_data ...
135 : !> \param qs_env ...
136 : !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
137 : ! **************************************************************************************************
138 22 : SUBROUTINE switch_ri_flavor(ri_data, qs_env)
139 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
140 : TYPE(qs_environment_type), POINTER :: qs_env
141 :
142 : CHARACTER(LEN=*), PARAMETER :: routineN = 'switch_ri_flavor'
143 :
144 : INTEGER :: handle, n_mem, new_flavor
145 22 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
146 : TYPE(dft_control_type), POINTER :: dft_control
147 : TYPE(mp_para_env_type), POINTER :: para_env
148 22 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
149 22 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
150 :
151 22 : NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
152 :
153 22 : CALL timeset(routineN, handle)
154 :
155 : CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
156 22 : particle_set=particle_set, qs_kind_set=qs_kind_set)
157 :
158 22 : CALL hfx_ri_release(ri_data, write_stats=.FALSE.)
159 :
160 22 : IF (ri_data%flavor == ri_pmat) THEN
161 : new_flavor = ri_mo
162 : ELSE
163 12 : new_flavor = ri_pmat
164 : END IF
165 22 : ri_data%flavor = new_flavor
166 :
167 22 : n_mem = ri_data%n_mem_input
168 22 : ri_data%n_mem_input = ri_data%n_mem_flavor_switch
169 22 : ri_data%n_mem_flavor_switch = n_mem
170 :
171 22 : CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
172 :
173 : !Need to recalculate the integrals in the new flavor
174 : !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
175 : ! that only makes sense if actual integral calculation is not negligible
176 22 : IF (ri_data%flavor == ri_pmat) THEN
177 12 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
178 : ELSE
179 10 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
180 : END IF
181 :
182 22 : IF (ri_data%unit_nr > 0) THEN
183 0 : IF (ri_data%flavor == ri_pmat) THEN
184 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
185 : ELSE
186 0 : WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
187 : END IF
188 : END IF
189 :
190 22 : CALL timestop(handle)
191 :
192 22 : END SUBROUTINE switch_ri_flavor
193 :
194 : ! **************************************************************************************************
195 : !> \brief Pre-SCF steps in MO flavor of RI HFX
196 : !>
197 : !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
198 : !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
199 : !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
200 : !> \param qs_env ...
201 : !> \param ri_data ...
202 : !> \param nspins ...
203 : ! **************************************************************************************************
204 26 : SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
205 : TYPE(qs_environment_type), POINTER :: qs_env
206 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
207 : INTEGER, INTENT(IN) :: nspins
208 :
209 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_mo'
210 :
211 : INTEGER :: handle, handle2, ispin, n_dependent, &
212 : unit_nr, unit_nr_dbcsr
213 : REAL(KIND=dp) :: threshold
214 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
215 156 : TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
216 130 : t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
217 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
218 26 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int
219 : TYPE(mp_para_env_type), POINTER :: para_env
220 :
221 26 : CALL timeset(routineN, handle)
222 :
223 26 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
224 26 : unit_nr = ri_data%unit_nr
225 :
226 26 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
227 :
228 26 : CALL timeset(routineN//"_int", handle2)
229 :
230 806 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
231 26 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int)
232 :
233 26 : CALL timestop(handle2)
234 :
235 26 : CALL timeset(routineN//"_2c", handle2)
236 26 : IF (.NOT. ri_data%same_op) THEN
237 4 : SELECT CASE (ri_data%t2c_method)
238 : CASE (hfx_ri_do_2c_iter)
239 0 : CALL dbcsr_create(t_2c_op_RI_inv(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
240 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
241 0 : CALL invert_hotelling(t_2c_op_RI_inv(1), t_2c_op_RI(1), threshold=threshold, silent=.FALSE.)
242 : CASE (hfx_ri_do_2c_cholesky)
243 4 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
244 4 : CALL cp_dbcsr_cholesky_decompose(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env)
245 4 : CALL cp_dbcsr_cholesky_invert(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
246 : CASE (hfx_ri_do_2c_diag)
247 0 : CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
248 : CALL cp_dbcsr_power(t_2c_op_RI_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
249 4 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
250 : END SELECT
251 :
252 4 : IF (ri_data%check_2c_inv) THEN
253 0 : CALL check_inverse(t_2c_op_RI_inv(1), t_2c_op_RI(1), unit_nr=unit_nr)
254 : END IF
255 :
256 4 : CALL dbcsr_release(t_2c_op_RI(1))
257 :
258 4 : SELECT CASE (ri_data%t2c_method)
259 : CASE (hfx_ri_do_2c_iter)
260 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
261 0 : CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
262 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
263 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
264 0 : ri_data%max_iter_lanczos)
265 :
266 0 : CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
267 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
268 4 : CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
269 : CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
270 8 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
271 : END SELECT
272 :
273 : !We need S^-1 and (P|Q) for the forces.
274 4 : CALL dbt_create(t_2c_op_RI_inv(1), t_2c_work(1))
275 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_RI_inv(1), t_2c_work(1))
276 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
277 4 : CALL dbt_destroy(t_2c_work(1))
278 4 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
279 :
280 4 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
281 4 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
282 4 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
283 4 : CALL dbt_destroy(t_2c_work(1))
284 4 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
285 :
286 4 : IF (ri_data%check_2c_inv) THEN
287 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
288 : END IF
289 4 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
290 : CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_RI_inv(1), t_2c_op_pot_sqrt(1), &
291 4 : 0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
292 4 : CALL dbcsr_release(t_2c_op_RI_inv(1))
293 4 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
294 : ELSE
295 22 : SELECT CASE (ri_data%t2c_method)
296 : CASE (hfx_ri_do_2c_iter)
297 0 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
298 0 : CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
299 : CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
300 : ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
301 0 : ri_data%max_iter_lanczos)
302 0 : CALL dbcsr_release(t_2c_op_pot_sqrt(1))
303 : CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
304 22 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
305 : CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
306 44 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
307 : END SELECT
308 22 : IF (ri_data%check_2c_inv) THEN
309 0 : CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
310 : END IF
311 :
312 : !We need (P|Q)^-1 for the forces
313 22 : CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
314 22 : CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
315 22 : CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
316 22 : CALL dbcsr_release(dbcsr_work_1(1))
317 22 : CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
318 22 : CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
319 22 : CALL dbcsr_release(dbcsr_work_2(1))
320 22 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
321 22 : CALL dbt_destroy(t_2c_work(1))
322 22 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
323 : END IF
324 :
325 26 : CALL dbcsr_release(t_2c_op_pot(1))
326 :
327 26 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
328 26 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
329 26 : CALL dbcsr_release(t_2c_int_mat(1))
330 58 : DO ispin = 1, nspins
331 58 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
332 : END DO
333 26 : CALL dbt_destroy(t_2c_int(1))
334 26 : CALL timestop(handle2)
335 :
336 26 : CALL timeset(routineN//"_3c", handle2)
337 26 : CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
338 26 : CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
339 26 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
340 26 : CALL dbt_destroy(t_3c_int(1, 1))
341 26 : CALL timestop(handle2)
342 :
343 26 : CALL timestop(handle)
344 :
345 260 : END SUBROUTINE
346 :
347 : ! **************************************************************************************************
348 : !> \brief ...
349 : !> \param matrix_1 ...
350 : !> \param matrix_2 ...
351 : !> \param name ...
352 : !> \param unit_nr ...
353 : ! **************************************************************************************************
354 0 : SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
355 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_1, matrix_2
356 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
357 : INTEGER, INTENT(IN) :: unit_nr
358 :
359 : CHARACTER(len=default_string_length) :: name_prv
360 : REAL(KIND=dp) :: error, frob_matrix, frob_matrix_base
361 : TYPE(dbcsr_type) :: matrix_tmp
362 :
363 0 : IF (PRESENT(name)) THEN
364 0 : name_prv = name
365 : ELSE
366 0 : CALL dbcsr_get_info(matrix_1, name=name_prv)
367 : END IF
368 :
369 0 : CALL dbcsr_create(matrix_tmp, template=matrix_1)
370 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
371 0 : 0.0_dp, matrix_tmp)
372 0 : frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
373 0 : CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
374 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
375 0 : error = frob_matrix/frob_matrix_base
376 0 : IF (unit_nr > 0) THEN
377 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
378 0 : "HFX_RI_INFO| Error for INV(", TRIM(name_prv), "):", error
379 : END IF
380 :
381 0 : CALL dbcsr_release(matrix_tmp)
382 0 : END SUBROUTINE
383 :
384 : ! **************************************************************************************************
385 : !> \brief ...
386 : !> \param matrix ...
387 : !> \param matrix_sqrt ...
388 : !> \param matrix_sqrt_inv ...
389 : !> \param name ...
390 : !> \param unit_nr ...
391 : ! **************************************************************************************************
392 0 : SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
393 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix
394 : TYPE(dbcsr_type), INTENT(IN), OPTIONAL :: matrix_sqrt, matrix_sqrt_inv
395 : CHARACTER(len=*), INTENT(IN), OPTIONAL :: name
396 : INTEGER, INTENT(IN) :: unit_nr
397 :
398 : CHARACTER(len=default_string_length) :: name_prv
399 : REAL(KIND=dp) :: frob_matrix
400 : TYPE(dbcsr_type) :: matrix_copy, matrix_tmp
401 :
402 0 : IF (PRESENT(name)) THEN
403 0 : name_prv = name
404 : ELSE
405 0 : CALL dbcsr_get_info(matrix, name=name_prv)
406 : END IF
407 0 : IF (PRESENT(matrix_sqrt)) THEN
408 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
409 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt)
410 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
411 0 : 0.0_dp, matrix_tmp)
412 0 : CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
413 0 : frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
414 0 : IF (unit_nr > 0) THEN
415 : WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
416 0 : "HFX_RI_INFO| Error for SQRT(", TRIM(name_prv), "):", frob_matrix
417 : END IF
418 0 : CALL dbcsr_release(matrix_tmp)
419 0 : CALL dbcsr_release(matrix_copy)
420 : END IF
421 :
422 0 : IF (PRESENT(matrix_sqrt_inv)) THEN
423 0 : CALL dbcsr_create(matrix_tmp, template=matrix)
424 0 : CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
425 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
426 0 : 0.0_dp, matrix_tmp)
427 0 : CALL check_inverse(matrix_tmp, matrix, name="SQRT("//TRIM(name_prv)//")", unit_nr=unit_nr)
428 0 : CALL dbcsr_release(matrix_tmp)
429 0 : CALL dbcsr_release(matrix_copy)
430 : END IF
431 :
432 0 : END SUBROUTINE
433 :
434 : ! **************************************************************************************************
435 : !> \brief Calculate 2-center and 3-center integrals
436 : !>
437 : !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
438 : !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
439 : !> v_1 is HF operator, v_2 is RI metric
440 : !> \param qs_env ...
441 : !> \param ri_data ...
442 : !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
443 : !> \param t_2c_int_pot K_1(P, R)
444 : !> \param t_3c_int int_3c(mu, lambda, P)
445 : !> \param do_kpoints ...
446 : !> \notes the integral tensor arrays are already allocated on entry
447 : ! **************************************************************************************************
448 4004 : SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
449 : TYPE(qs_environment_type), POINTER :: qs_env
450 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
451 : TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT) :: t_2c_int_RI, t_2c_int_pot
452 : TYPE(dbt_type), DIMENSION(:, :) :: t_3c_int
453 : LOGICAL, INTENT(IN), OPTIONAL :: do_kpoints
454 :
455 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_calc_tensors'
456 :
457 : CHARACTER :: symm
458 : INTEGER :: handle, i_img, i_mem, ibasis, j_img, &
459 : n_mem, natom, nblks, nimg, nkind, &
460 : nthreads
461 : INTEGER(int_8) :: nze
462 178 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, dist_RI_ext, &
463 356 : ends_array_mc_block_int, ends_array_mc_int, sizes_AO, sizes_RI, sizes_RI_ext, &
464 178 : sizes_RI_ext_split, starts_array_mc_block_int, starts_array_mc_int
465 : INTEGER, DIMENSION(3) :: pcoord, pdims
466 356 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
467 : LOGICAL :: converged, do_kpoints_prv
468 : REAL(dp) :: max_ev, min_ev, occ, RI_range
469 178 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
470 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
471 1246 : TYPE(dbt_distribution_type) :: t_dist
472 534 : TYPE(dbt_pgrid_type) :: pgrid
473 1246 : TYPE(dbt_type) :: t_3c_tmp
474 178 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
475 : TYPE(dft_control_type), POINTER :: dft_control
476 : TYPE(distribution_2d_type), POINTER :: dist_2d
477 : TYPE(distribution_3d_type) :: dist_3d
478 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
479 178 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
480 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
481 178 : TYPE(mp_cart_type) :: mp_comm_t3c
482 : TYPE(mp_para_env_type), POINTER :: para_env
483 : TYPE(neighbor_list_3c_type) :: nl_3c
484 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
485 178 : POINTER :: nl_2c_pot, nl_2c_RI
486 178 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
487 178 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
488 : TYPE(qs_ks_env_type), POINTER :: ks_env
489 :
490 178 : CALL timeset(routineN, handle)
491 178 : NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_RI, &
492 178 : particle_set, qs_kind_set, ks_env, para_env)
493 :
494 : CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
495 178 : distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
496 :
497 178 : RI_range = 0.0_dp
498 178 : do_kpoints_prv = .FALSE.
499 178 : IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
500 178 : nimg = 1
501 178 : IF (do_kpoints_prv) THEN
502 70 : nimg = ri_data%nimg
503 70 : RI_range = ri_data%kp_RI_range
504 : END IF
505 :
506 712 : ALLOCATE (sizes_RI(natom), sizes_AO(natom))
507 1332 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
508 178 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
509 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_RI, basis=basis_set_RI)
510 178 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
511 178 : CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_AO, basis=basis_set_AO)
512 :
513 488 : DO ibasis = 1, SIZE(basis_set_AO)
514 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
515 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
516 : ! interaction radii should be based on eps_pgf_orb controlled in RI section
517 : ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
518 : ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
519 310 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
520 488 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
521 : END DO
522 :
523 178 : n_mem = ri_data%n_mem
524 : CALL create_tensor_batches(sizes_RI, n_mem, starts_array_mc_int, ends_array_mc_int, &
525 : starts_array_mc_block_int, ends_array_mc_block_int)
526 :
527 178 : DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
528 :
529 : !We separate the K-points and standard 3c integrals, because handle quite differently
530 178 : IF (.NOT. do_kpoints_prv) THEN
531 :
532 108 : nthreads = 1
533 108 : !$ nthreads = omp_get_num_threads()
534 108 : pdims = 0
535 432 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(n_mem*nthreads)), natom, natom])
536 :
537 972 : ALLOCATE (t_3c_int_batched(1, 1))
538 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_RI, dist_AO_1, dist_AO_2, pgrid, &
539 : sizes_RI, sizes_AO, sizes_AO, map1=[1], map2=[2, 3], &
540 108 : name="(RI | AO AO)")
541 :
542 108 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
543 108 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
544 108 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
545 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
546 108 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
547 108 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
548 :
549 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
550 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
551 : map1=[1], map2=[2, 3], &
552 108 : name="O (RI AO | AO)")
553 :
554 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
555 108 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
556 :
557 424 : DO i_mem = 1, n_mem
558 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
559 : basis_set_RI, basis_set_AO, basis_set_AO, &
560 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
561 : desymmetrize=.FALSE., &
562 948 : bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
563 316 : CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.TRUE., move_data=.TRUE.)
564 424 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
565 : END DO
566 :
567 108 : CALL dbt_destroy(t_3c_int_batched(1, 1))
568 :
569 108 : CALL neighbor_list_3c_destroy(nl_3c)
570 :
571 108 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
572 :
573 108 : IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
574 : ! desymmetrize
575 82 : CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
576 82 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
577 :
578 : ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
579 : ! with RI metric but not to bare integral tensor
580 82 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
581 : ELSE
582 26 : CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
583 : END IF
584 :
585 108 : CALL dbt_destroy(t_3c_tmp)
586 :
587 : ELSE !do_kpoints
588 :
589 70 : nthreads = 1
590 70 : !$ nthreads = omp_get_num_threads()
591 70 : pdims = 0
592 280 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, MAX(1, natom/(n_mem*nthreads))])
593 :
594 : !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
595 : !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
596 70 : nblks = SIZE(ri_data%bsizes_RI_split)
597 350 : ALLOCATE (sizes_RI_ext(natom*ri_data%ncell_RI), sizes_RI_ext_split(nblks*ri_data%ncell_RI))
598 506 : DO i_img = 1, ri_data%ncell_RI
599 1308 : sizes_RI_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_RI(:)
600 2344 : sizes_RI_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
601 : END DO
602 :
603 : CALL create_3c_tensor(t_3c_tmp, dist_AO_1, dist_AO_2, dist_RI, &
604 : pgrid, sizes_AO, sizes_AO, sizes_RI, map1=[1, 2], map2=[3], &
605 70 : name="(AO AO | RI)")
606 70 : CALL dbt_destroy(t_3c_tmp)
607 :
608 : !For the integrals to work, the distribution along the RI direction must be repeated
609 210 : ALLOCATE (dist_RI_ext(natom*ri_data%ncell_RI))
610 506 : DO i_img = 1, ri_data%ncell_RI
611 1378 : dist_RI_ext((i_img - 1)*natom + 1:i_img*natom) = dist_RI(:)
612 : END DO
613 :
614 2416 : ALLOCATE (t_3c_int_batched(nimg, 1))
615 70 : CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
616 : CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
617 70 : sizes_AO, sizes_AO, sizes_RI_ext)
618 1716 : DO i_img = 2, nimg
619 1716 : CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
620 : END DO
621 70 : CALL dbt_distribution_destroy(t_dist)
622 :
623 70 : CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
624 70 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
625 70 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
626 : CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
627 70 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
628 70 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
629 :
630 : ! create 3c tensor for storage of ints
631 : CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
632 70 : "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., own_dist=.TRUE.)
633 :
634 : CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
635 : sizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
636 : map1=[1], map2=[2, 3], &
637 70 : name="O (RI AO | AO)")
638 1716 : DO j_img = 2, nimg
639 1716 : CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
640 : END DO
641 :
642 70 : CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
643 192 : DO i_mem = 1, n_mem
644 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
645 : basis_set_AO, basis_set_AO, basis_set_RI, &
646 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
647 : desymmetrize=.FALSE., do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
648 : bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
649 366 : RI_range=RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
650 :
651 3440 : DO i_img = 1, nimg
652 : !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
653 3248 : CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
654 3370 : IF (nze > 0) THEN
655 2154 : CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.TRUE.)
656 2154 : CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
657 : CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
658 2154 : summation=.TRUE., move_data=.TRUE.)
659 : END IF
660 : END DO
661 : END DO
662 :
663 1786 : DO i_img = 1, nimg
664 1786 : CALL dbt_destroy(t_3c_int_batched(i_img, 1))
665 : END DO
666 1786 : DEALLOCATE (t_3c_int_batched)
667 70 : CALL neighbor_list_3c_destroy(nl_3c)
668 70 : CALL dbt_destroy(t_3c_tmp)
669 : END IF
670 178 : CALL dbt_pgrid_destroy(pgrid)
671 :
672 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
673 : "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
674 178 : dist_2d=dist_2d)
675 :
676 178 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
677 534 : ALLOCATE (row_bsize(SIZE(sizes_RI)))
678 356 : ALLOCATE (col_bsize(SIZE(sizes_RI)))
679 676 : row_bsize(:) = sizes_RI
680 676 : col_bsize(:) = sizes_RI
681 :
682 : !Use non-symmetric nl and matrices for k-points
683 178 : symm = dbcsr_type_symmetric
684 178 : IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
685 :
686 178 : CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
687 1824 : DO i_img = 2, nimg
688 1824 : CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
689 : END DO
690 :
691 178 : IF (.NOT. ri_data%same_op) THEN
692 88 : CALL dbcsr_create(t_2c_int_RI(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
693 1734 : DO i_img = 2, nimg
694 1734 : CALL dbcsr_create(t_2c_int_RI(i_img), template=t_2c_int_RI(1))
695 : END DO
696 : END IF
697 178 : DEALLOCATE (col_bsize, row_bsize)
698 :
699 178 : CALL dbcsr_distribution_release(dbcsr_dist)
700 :
701 : CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_RI, basis_set_RI, &
702 178 : ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
703 178 : CALL release_neighbor_list_sets(nl_2c_pot)
704 :
705 178 : IF (.NOT. ri_data%same_op) THEN
706 : CALL build_2c_neighbor_lists(nl_2c_RI, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
707 : "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
708 88 : dist_2d=dist_2d)
709 :
710 : CALL build_2c_integrals(t_2c_int_RI, ri_data%filter_eps_2c, qs_env, nl_2c_RI, basis_set_RI, basis_set_RI, &
711 88 : ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
712 :
713 88 : CALL release_neighbor_list_sets(nl_2c_RI)
714 : END IF
715 :
716 488 : DO ibasis = 1, SIZE(basis_set_AO)
717 310 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
718 310 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
719 : ! reset interaction radii of orb basis
720 310 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
721 488 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
722 : END DO
723 :
724 178 : IF (ri_data%calc_condnum) THEN
725 : CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
726 4 : max_iter=ri_data%max_iter_lanczos, converged=converged)
727 :
728 4 : IF (.NOT. converged) THEN
729 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
730 : END IF
731 :
732 4 : IF (ri_data%unit_nr > 0) THEN
733 2 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
734 2 : IF (min_ev > 0) THEN
735 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
736 2 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
737 : ELSE
738 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
739 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
740 : END IF
741 : END IF
742 :
743 4 : IF (.NOT. ri_data%same_op) THEN
744 : CALL arnoldi_extremal(t_2c_int_RI(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
745 2 : max_iter=ri_data%max_iter_lanczos, converged=converged)
746 :
747 2 : IF (.NOT. converged) THEN
748 0 : CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
749 : END IF
750 :
751 2 : IF (ri_data%unit_nr > 0) THEN
752 1 : WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
753 1 : IF (min_ev > 0) THEN
754 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
755 1 : "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
756 : ELSE
757 : WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
758 0 : "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
759 : END IF
760 : END IF
761 : END IF
762 : END IF
763 :
764 178 : CALL timestop(handle)
765 1176 : END SUBROUTINE
766 :
767 : ! **************************************************************************************************
768 : !> \brief Pre-SCF steps in rho flavor of RI HFX
769 : !>
770 : !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
771 : !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
772 : !> \param qs_env ...
773 : !> \param ri_data ...
774 : ! **************************************************************************************************
775 82 : SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data)
776 : TYPE(qs_environment_type), POINTER :: qs_env
777 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
778 :
779 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat'
780 :
781 : INTEGER :: handle, handle2, i_mem, j_mem, &
782 : n_dependent, unit_nr, unit_nr_dbcsr
783 : INTEGER(int_8) :: nflop, nze, nze_O
784 82 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI
785 : INTEGER, DIMENSION(2, 1) :: bounds_i
786 : INTEGER, DIMENSION(2, 2) :: bounds_j
787 : INTEGER, DIMENSION(3) :: dims_3c
788 : REAL(KIND=dp) :: compression_factor, memory_3c, occ, &
789 : threshold
790 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
791 328 : TYPE(dbcsr_type), DIMENSION(1) :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, &
792 246 : t_2c_tmp, t_2c_tmp_2
793 738 : TYPE(dbt_type) :: t_3c_2
794 164 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_int, t_2c_work
795 82 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_1
796 : TYPE(mp_para_env_type), POINTER :: para_env
797 :
798 82 : CALL timeset(routineN, handle)
799 :
800 82 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
801 82 : unit_nr = ri_data%unit_nr
802 :
803 82 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
804 :
805 82 : CALL timeset(routineN//"_int", handle2)
806 :
807 2542 : ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
808 82 : CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int_1)
809 :
810 82 : CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.TRUE.)
811 :
812 82 : CALL dbt_destroy(t_3c_int_1(1, 1))
813 :
814 82 : CALL timestop(handle2)
815 :
816 82 : CALL timeset(routineN//"_2c", handle2)
817 :
818 82 : IF (ri_data%same_op) t_2c_op_RI(1) = t_2c_op_pot(1)
819 82 : CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
820 82 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
821 :
822 82 : SELECT CASE (ri_data%t2c_method)
823 : CASE (hfx_ri_do_2c_iter)
824 : CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_RI(1), &
825 0 : threshold=threshold, silent=.FALSE.)
826 : CASE (hfx_ri_do_2c_cholesky)
827 82 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
828 82 : CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
829 82 : CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
830 : CASE (hfx_ri_do_2c_diag)
831 0 : CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
832 : CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
833 82 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
834 : END SELECT
835 :
836 82 : IF (ri_data%check_2c_inv) THEN
837 0 : CALL check_inverse(t_2c_int_mat(1), t_2c_op_RI(1), unit_nr=unit_nr)
838 : END IF
839 :
840 : !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
841 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
842 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
843 82 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
844 82 : CALL dbt_destroy(t_2c_work(1))
845 82 : CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
846 82 : IF (.NOT. ri_data%same_op) THEN
847 : !Also save the RI (P|Q) integral
848 14 : CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
849 14 : CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
850 14 : CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
851 14 : CALL dbt_destroy(t_2c_work(1))
852 14 : CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
853 : END IF
854 :
855 82 : IF (ri_data%same_op) THEN
856 68 : CALL dbcsr_release(t_2c_op_pot(1))
857 : ELSE
858 14 : CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
859 14 : CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
860 14 : CALL dbcsr_release(t_2c_op_RI(1))
861 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
862 14 : filter_eps=ri_data%filter_eps)
863 :
864 14 : CALL dbcsr_release(t_2c_op_pot(1))
865 : CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
866 14 : filter_eps=ri_data%filter_eps)
867 14 : CALL dbcsr_release(t_2c_tmp(1))
868 14 : CALL dbcsr_release(t_2c_int_mat(1))
869 14 : t_2c_int_mat(1) = t_2c_tmp_2(1)
870 : END IF
871 :
872 82 : CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
873 82 : CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
874 82 : CALL dbcsr_release(t_2c_int_mat(1))
875 82 : CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.TRUE.)
876 82 : CALL dbt_destroy(t_2c_int(1))
877 82 : CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
878 :
879 82 : CALL timestop(handle2)
880 :
881 82 : CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
882 :
883 82 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
884 :
885 82 : memory_3c = 0.0_dp
886 82 : nze_O = 0
887 :
888 246 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
889 246 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
890 328 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
891 82 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
892 328 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
893 82 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
894 :
895 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_RI, &
896 82 : batch_range_2=batch_ranges_AO)
897 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_RI, &
898 82 : batch_range_2=batch_ranges_AO)
899 :
900 328 : DO i_mem = 1, ri_data%n_mem_RI
901 738 : bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
902 :
903 246 : CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
904 984 : DO j_mem = 1, ri_data%n_mem
905 2214 : bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
906 2214 : bounds_j(:, 2) = [1, dims_3c(3)]
907 738 : CALL timeset(routineN//"_RIx3C", handle2)
908 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
909 : 0.0_dp, t_3c_2, &
910 : contract_1=[2], notcontract_1=[1], &
911 : contract_2=[1], notcontract_2=[2, 3], &
912 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
913 : bounds_2=bounds_i, &
914 : bounds_3=bounds_j, &
915 : unit_nr=unit_nr_dbcsr, &
916 738 : flop=nflop)
917 :
918 738 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
919 738 : CALL timestop(handle2)
920 :
921 738 : CALL timeset(routineN//"_copy_2", handle2)
922 738 : CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
923 :
924 738 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
925 738 : nze_O = nze_O + nze
926 :
927 : CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
928 738 : ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
929 :
930 3198 : CALL timestop(handle2)
931 : END DO
932 328 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
933 : END DO
934 82 : CALL dbt_batched_contract_finalize(t_3c_2)
935 82 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
936 :
937 82 : CALL para_env%sum(memory_3c)
938 82 : compression_factor = REAL(nze_O, dp)*1.0E-06*8.0_dp/memory_3c
939 :
940 82 : IF (unit_nr > 0) THEN
941 : WRITE (UNIT=unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
942 20 : "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
943 :
944 : WRITE (UNIT=unit_nr, FMT="((T3,A,T60,F21.2))") &
945 20 : "MEMORY_INFO| Compression factor: ", compression_factor
946 : END IF
947 :
948 82 : CALL dbt_clear(ri_data%t_2c_int(1, 1))
949 82 : CALL dbt_destroy(t_3c_2)
950 :
951 82 : CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.TRUE.)
952 :
953 82 : CALL timestop(handle)
954 738 : END SUBROUTINE
955 :
956 : ! **************************************************************************************************
957 : !> \brief Sorts 2d indices w.r.t. rows and columns
958 : !> \param blk_ind ...
959 : ! **************************************************************************************************
960 0 : SUBROUTINE sort_unique_blkind_2d(blk_ind)
961 : INTEGER, ALLOCATABLE, DIMENSION(:, :), &
962 : INTENT(INOUT) :: blk_ind
963 :
964 : INTEGER :: end_ind, iblk, iblk_all, irow, nblk, &
965 : ncols, start_ind
966 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ind_1, ind_2, sort_1, sort_2
967 0 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: blk_ind_tmp
968 :
969 0 : nblk = SIZE(blk_ind, 1)
970 :
971 0 : ALLOCATE (sort_1(nblk))
972 0 : ALLOCATE (ind_1(nblk))
973 :
974 0 : sort_1(:) = blk_ind(:, 1)
975 0 : CALL sort(sort_1, nblk, ind_1)
976 :
977 0 : blk_ind(:, :) = blk_ind(ind_1, :)
978 :
979 0 : start_ind = 1
980 :
981 0 : DO WHILE (start_ind <= nblk)
982 0 : irow = blk_ind(start_ind, 1)
983 0 : end_ind = start_ind
984 :
985 0 : IF (end_ind + 1 <= nblk) THEN
986 0 : DO WHILE (blk_ind(end_ind + 1, 1) == irow)
987 0 : end_ind = end_ind + 1
988 0 : IF (end_ind + 1 > nblk) EXIT
989 : END DO
990 : END IF
991 :
992 0 : ncols = end_ind - start_ind + 1
993 0 : ALLOCATE (sort_2(ncols))
994 0 : ALLOCATE (ind_2(ncols))
995 0 : sort_2(:) = blk_ind(start_ind:end_ind, 2)
996 0 : CALL sort(sort_2, ncols, ind_2)
997 0 : ind_2 = ind_2 + start_ind - 1
998 :
999 0 : blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
1000 0 : start_ind = end_ind + 1
1001 :
1002 0 : DEALLOCATE (sort_2, ind_2)
1003 : END DO
1004 :
1005 0 : ALLOCATE (blk_ind_tmp(nblk, 2))
1006 0 : blk_ind_tmp = 0
1007 :
1008 : iblk = 0
1009 0 : DO iblk_all = 1, nblk
1010 0 : IF (iblk >= 1) THEN
1011 0 : IF (ALL(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
1012 : CYCLE
1013 : END IF
1014 : END IF
1015 0 : iblk = iblk + 1
1016 0 : blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
1017 : END DO
1018 0 : nblk = iblk
1019 :
1020 0 : DEALLOCATE (blk_ind)
1021 0 : ALLOCATE (blk_ind(nblk, 2))
1022 :
1023 0 : blk_ind(:, :) = blk_ind_tmp(:nblk, :)
1024 :
1025 0 : END SUBROUTINE
1026 :
1027 : ! **************************************************************************************************
1028 : !> \brief ...
1029 : !> \param qs_env ...
1030 : !> \param ri_data ...
1031 : !> \param ks_matrix ...
1032 : !> \param ehfx ...
1033 : !> \param mos ...
1034 : !> \param rho_ao ...
1035 : !> \param geometry_did_change ...
1036 : !> \param nspins ...
1037 : !> \param hf_fraction ...
1038 : ! **************************************************************************************************
1039 1888 : SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
1040 : geometry_did_change, nspins, hf_fraction)
1041 : TYPE(qs_environment_type), POINTER :: qs_env
1042 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1043 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
1044 : REAL(KIND=dp), INTENT(OUT) :: ehfx
1045 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
1046 : OPTIONAL :: mos
1047 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
1048 : LOGICAL, INTENT(IN) :: geometry_did_change
1049 : INTEGER, INTENT(IN) :: nspins
1050 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
1051 :
1052 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks'
1053 :
1054 : CHARACTER(1) :: mtype
1055 : INTEGER :: handle, handle2, i, ispin, j
1056 : INTEGER(int_8) :: nblks
1057 : INTEGER, DIMENSION(2) :: homo
1058 : LOGICAL :: is_antisymmetric
1059 : REAL(dp) :: etmp, fac
1060 1888 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
1061 : TYPE(cp_fm_type), POINTER :: mo_coeff
1062 1888 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: my_ks_matrix, my_rho_ao
1063 5664 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
1064 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
1065 : TYPE(mp_para_env_type), POINTER :: para_env
1066 :
1067 1888 : CALL timeset(routineN, handle)
1068 :
1069 1888 : IF (nspins == 1) THEN
1070 1406 : fac = 0.5_dp*hf_fraction
1071 : ELSE
1072 482 : fac = 1.0_dp*hf_fraction
1073 : END IF
1074 :
1075 : !If incoming assymetric matrices, need to convert to normal
1076 1888 : NULLIFY (my_ks_matrix, my_rho_ao)
1077 1888 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
1078 1888 : is_antisymmetric = mtype == dbcsr_type_antisymmetric
1079 1888 : IF (is_antisymmetric) THEN
1080 960 : ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
1081 960 : ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
1082 :
1083 320 : DO i = 1, SIZE(ks_matrix, 1)
1084 480 : DO j = 1, SIZE(ks_matrix, 2)
1085 160 : ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
1086 : CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
1087 160 : matrix_type=dbcsr_type_no_symmetry)
1088 160 : CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
1089 : CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
1090 160 : matrix_type=dbcsr_type_no_symmetry)
1091 320 : CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
1092 : END DO
1093 : END DO
1094 : ELSE
1095 1728 : my_ks_matrix => ks_matrix
1096 1728 : my_rho_ao => rho_ao
1097 : END IF
1098 :
1099 : !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
1100 : ! the current flavor is not yet RHO. We switch back to MO if there are
1101 : ! MOs available and the current flavor is actually RHO
1102 1888 : IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
1103 12 : CALL switch_ri_flavor(ri_data, qs_env)
1104 1876 : ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
1105 10 : CALL switch_ri_flavor(ri_data, qs_env)
1106 : END IF
1107 :
1108 2216 : SELECT CASE (ri_data%flavor)
1109 : CASE (ri_mo)
1110 328 : CPASSERT(PRESENT(mos))
1111 328 : CALL timeset(routineN//"_MO", handle2)
1112 :
1113 834 : DO ispin = 1, nspins
1114 506 : NULLIFY (mo_coeff_b_tmp)
1115 506 : CPASSERT(mos(ispin)%uniform_occupation)
1116 506 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
1117 :
1118 506 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
1119 834 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
1120 : END DO
1121 :
1122 834 : DO ispin = 1, nspins
1123 506 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
1124 834 : homo(ispin) = mos(ispin)%homo
1125 : END DO
1126 :
1127 328 : CALL timestop(handle2)
1128 :
1129 : CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
1130 328 : geometry_did_change, nspins, fac)
1131 : CASE (ri_pmat)
1132 :
1133 1560 : NULLIFY (para_env)
1134 1560 : CALL get_qs_env(qs_env, para_env=para_env)
1135 3424 : DO ispin = 1, SIZE(my_rho_ao, 1)
1136 1864 : nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
1137 1864 : CALL para_env%sum(nblks)
1138 3424 : IF (nblks == 0) THEN
1139 0 : CPABORT("received empty density matrix")
1140 : END IF
1141 : END DO
1142 :
1143 : CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
1144 3776 : geometry_did_change, nspins, fac)
1145 :
1146 : END SELECT
1147 :
1148 4258 : DO ispin = 1, nspins
1149 4258 : CALL dbcsr_release(mo_coeff_b(ispin))
1150 : END DO
1151 :
1152 4258 : DO ispin = 1, nspins
1153 4258 : CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
1154 : END DO
1155 :
1156 1888 : CALL timeset(routineN//"_energy", handle2)
1157 : ! Calculate the exchange energy
1158 1888 : ehfx = 0.0_dp
1159 4258 : DO ispin = 1, nspins
1160 : CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
1161 2370 : etmp)
1162 4258 : ehfx = ehfx + 0.5_dp*etmp
1163 :
1164 : END DO
1165 1888 : CALL timestop(handle2)
1166 :
1167 : !Anti-symmetric case
1168 1888 : IF (is_antisymmetric) THEN
1169 320 : DO i = 1, SIZE(ks_matrix, 1)
1170 480 : DO j = 1, SIZE(ks_matrix, 2)
1171 160 : CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
1172 320 : CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
1173 : END DO
1174 : END DO
1175 160 : CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
1176 160 : CALL dbcsr_deallocate_matrix_set(my_rho_ao)
1177 : END IF
1178 :
1179 1888 : CALL timestop(handle)
1180 1888 : END SUBROUTINE
1181 :
1182 : ! **************************************************************************************************
1183 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
1184 : !>
1185 : !> C(mu, i) (MO coefficients)
1186 : !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
1187 : !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
1188 : !> \param qs_env ...
1189 : !> \param ri_data ...
1190 : !> \param ks_matrix ...
1191 : !> \param mo_coeff C(mu, i)
1192 : !> \param homo ...
1193 : !> \param geometry_did_change ...
1194 : !> \param nspins ...
1195 : !> \param fac ...
1196 : ! **************************************************************************************************
1197 328 : SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
1198 328 : homo, geometry_did_change, nspins, fac)
1199 : TYPE(qs_environment_type), POINTER :: qs_env
1200 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1201 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix
1202 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1203 : INTEGER, DIMENSION(:) :: homo
1204 : LOGICAL, INTENT(IN) :: geometry_did_change
1205 : INTEGER, INTENT(IN) :: nspins
1206 : REAL(dp), INTENT(IN) :: fac
1207 :
1208 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_mo'
1209 :
1210 : INTEGER :: bsize, bsum, comm_2d_handle, handle, &
1211 : handle2, i_mem, iblock, iproc, ispin, &
1212 : n_mem, n_mos, nblock, unit_nr_dbcsr
1213 : INTEGER(int_8) :: nblks, nflop
1214 328 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
1215 328 : mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
1216 328 : mem_start_block_2, mo_bsizes_1, mo_bsizes_2
1217 328 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: bounds
1218 : INTEGER, DIMENSION(2) :: pdims_2d
1219 : INTEGER, DIMENSION(3) :: pdims
1220 : LOGICAL :: do_initialize
1221 : REAL(dp) :: t1, t2
1222 : TYPE(dbcsr_distribution_type) :: ks_dist
1223 1640 : TYPE(dbt_pgrid_type) :: pgrid, pgrid_2d
1224 8200 : TYPE(dbt_type) :: ks_t, ks_t_mat, mo_coeff_t, &
1225 2952 : mo_coeff_t_split
1226 328 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_mo_1, t_3c_int_mo_2
1227 : TYPE(mp_comm_type) :: comm_2d
1228 : TYPE(mp_para_env_type), POINTER :: para_env
1229 :
1230 328 : CALL timeset(routineN, handle)
1231 :
1232 328 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1233 :
1234 328 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1235 :
1236 328 : IF (geometry_did_change) THEN
1237 16 : CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
1238 : END IF
1239 :
1240 328 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
1241 328 : IF (nblks == 0) THEN
1242 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1243 : END IF
1244 :
1245 834 : DO ispin = 1, nspins
1246 506 : nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
1247 834 : IF (nblks == 0) THEN
1248 0 : CPABORT("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1249 : END IF
1250 : END DO
1251 :
1252 328 : IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
1253 18 : do_initialize = .TRUE.
1254 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
1255 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
1256 18 : CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
1257 256 : ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
1258 238 : ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
1259 238 : ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
1260 238 : ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
1261 : ELSE
1262 : do_initialize = .FALSE.
1263 : END IF
1264 :
1265 328 : CALL get_qs_env(qs_env, para_env=para_env)
1266 :
1267 328 : ALLOCATE (bounds(2, 1))
1268 :
1269 328 : CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
1270 328 : CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
1271 :
1272 328 : CALL comm_2d%set_handle(comm_2d_handle)
1273 328 : pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
1274 :
1275 : CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
1276 328 : name="(AO | AO)")
1277 :
1278 328 : DEALLOCATE (dist1, dist2)
1279 :
1280 328 : CALL para_env%sync()
1281 328 : t1 = m_walltime()
1282 :
1283 7544 : ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
1284 834 : DO ispin = 1, nspins
1285 :
1286 506 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
1287 1518 : ALLOCATE (mo_bsizes_2(n_mos))
1288 3192 : mo_bsizes_2 = 1
1289 :
1290 : CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
1291 506 : mem_start_block_2, mem_end_block_2)
1292 506 : n_mem = ri_data%n_mem
1293 1518 : ALLOCATE (mem_size(n_mem))
1294 :
1295 2374 : DO i_mem = 1, n_mem
1296 4554 : bsize = SUM(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
1297 2374 : mem_size(i_mem) = bsize
1298 : END DO
1299 :
1300 506 : CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
1301 1012 : ALLOCATE (mem_start_block_1(n_mem))
1302 1012 : ALLOCATE (mem_end_block_1(n_mem))
1303 506 : nblock = SIZE(mo_bsizes_1)
1304 506 : iblock = 0
1305 2374 : DO i_mem = 1, n_mem
1306 : bsum = 0
1307 506 : DO
1308 1868 : iblock = iblock + 1
1309 1868 : CPASSERT(iblock <= nblock)
1310 1868 : bsum = bsum + mo_bsizes_1(iblock)
1311 1868 : IF (bsum == mem_size(i_mem)) THEN
1312 1868 : IF (i_mem == 1) THEN
1313 506 : mem_start_block_1(i_mem) = 1
1314 : ELSE
1315 1362 : mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
1316 : END IF
1317 1868 : mem_end_block_1(i_mem) = iblock
1318 : EXIT
1319 : END IF
1320 : END DO
1321 : END DO
1322 :
1323 1518 : ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
1324 2374 : batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
1325 506 : batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
1326 :
1327 1012 : ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
1328 2374 : batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
1329 506 : batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
1330 :
1331 506 : iproc = para_env%mepos
1332 :
1333 : CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
1334 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
1335 : [1, 2], [3], &
1336 506 : name="(AO RI | MO)")
1337 :
1338 506 : DEALLOCATE (dist1, dist2, dist3)
1339 :
1340 : CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
1341 : mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1342 : [1], [2, 3], &
1343 506 : name="(MO | RI AO)")
1344 :
1345 506 : DEALLOCATE (dist1, dist2, dist3)
1346 :
1347 : CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
1348 : name="(AO | MO)")
1349 :
1350 506 : DEALLOCATE (dist1, dist2)
1351 :
1352 506 : CPASSERT(homo(ispin)/ri_data%n_mem > 0)
1353 :
1354 506 : IF (do_initialize) THEN
1355 22 : pdims(:) = 0
1356 :
1357 : CALL dbt_pgrid_create(para_env, pdims, pgrid, &
1358 : tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
1359 : (homo(ispin) - 1)/ri_data%n_mem + 1, &
1360 88 : SIZE(ri_data%bsizes_AO_fit)])
1361 : CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1362 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1363 : [1], [2, 3], &
1364 22 : name="(RI | MO AO)")
1365 :
1366 22 : DEALLOCATE (dist1, dist2, dist3)
1367 :
1368 : CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
1369 : ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
1370 : [1, 2], [3], &
1371 22 : name="(RI MO | AO)")
1372 22 : DEALLOCATE (dist1, dist2, dist3)
1373 22 : CALL dbt_pgrid_destroy(pgrid)
1374 :
1375 22 : CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
1376 22 : CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1377 : END IF
1378 :
1379 506 : CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
1380 506 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
1381 506 : CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.TRUE.)
1382 506 : CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
1383 506 : CALL dbt_destroy(mo_coeff_t)
1384 :
1385 506 : CALL dbt_batched_contract_init(ks_t)
1386 506 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
1387 506 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
1388 :
1389 506 : CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
1390 506 : CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
1391 506 : CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
1392 :
1393 2374 : DO i_mem = 1, n_mem
1394 :
1395 5604 : bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
1396 :
1397 1868 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1398 1868 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
1399 : CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
1400 1868 : batch_range_3=batch_ranges_1)
1401 1868 : CALL timeset(routineN//"_MOx3C_R", handle2)
1402 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
1403 : 0.0_dp, t_3c_int_mo_1(1, 1), &
1404 : contract_1=[1], notcontract_1=[2], &
1405 : contract_2=[3], notcontract_2=[1, 2], &
1406 : map_1=[3], map_2=[1, 2], &
1407 : bounds_2=bounds, &
1408 : filter_eps=ri_data%filter_eps_mo/2, &
1409 : unit_nr=unit_nr_dbcsr, &
1410 : move_data=.FALSE., &
1411 1868 : flop=nflop)
1412 :
1413 1868 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1414 :
1415 1868 : CALL timestop(handle2)
1416 1868 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1417 1868 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1418 1868 : CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
1419 :
1420 1868 : CALL timeset(routineN//"_copy_1", handle2)
1421 1868 : CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.TRUE.)
1422 1868 : CALL timestop(handle2)
1423 :
1424 1868 : CALL dbt_batched_contract_init(mo_coeff_t_split)
1425 1868 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
1426 : CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
1427 1868 : batch_range_1=batch_ranges_1)
1428 :
1429 1868 : CALL timeset(routineN//"_MOx3C_L", handle2)
1430 : CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
1431 : 0.0_dp, t_3c_int_mo_2(1, 1), &
1432 : contract_1=[1], notcontract_1=[2], &
1433 : contract_2=[1], notcontract_2=[2, 3], &
1434 : map_1=[1], map_2=[2, 3], &
1435 : bounds_2=bounds, &
1436 : filter_eps=ri_data%filter_eps_mo/2, &
1437 : unit_nr=unit_nr_dbcsr, &
1438 : move_data=.FALSE., &
1439 1868 : flop=nflop)
1440 :
1441 1868 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1442 :
1443 1868 : CALL timestop(handle2)
1444 :
1445 1868 : CALL dbt_batched_contract_finalize(mo_coeff_t_split)
1446 1868 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1447 1868 : CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
1448 :
1449 1868 : CALL timeset(routineN//"_copy_1", handle2)
1450 : CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
1451 1868 : summation=.TRUE., move_data=.TRUE.)
1452 :
1453 1868 : CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
1454 1868 : CALL timestop(handle2)
1455 :
1456 1868 : CALL timeset(routineN//"_RIx3C", handle2)
1457 :
1458 : CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
1459 : 0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
1460 : contract_1=[1], notcontract_1=[2], &
1461 : contract_2=[1], notcontract_2=[2, 3], &
1462 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
1463 : unit_nr=unit_nr_dbcsr, &
1464 1868 : flop=nflop)
1465 :
1466 1868 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1467 :
1468 1868 : CALL timestop(handle2)
1469 :
1470 1868 : CALL timeset(routineN//"_copy_2", handle2)
1471 :
1472 : ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
1473 1868 : CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.TRUE.)
1474 1868 : CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1475 1868 : CALL timestop(handle2)
1476 :
1477 1868 : CALL timeset(routineN//"_3Cx3C", handle2)
1478 : CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
1479 : 1.0_dp, ks_t, &
1480 : contract_1=[1, 2], notcontract_1=[3], &
1481 : contract_2=[1, 2], notcontract_2=[3], &
1482 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1483 : unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
1484 1868 : flop=nflop)
1485 :
1486 1868 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1487 :
1488 15450 : CALL timestop(handle2)
1489 : END DO
1490 :
1491 506 : CALL dbt_batched_contract_finalize(ks_t)
1492 506 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
1493 506 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
1494 :
1495 506 : CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
1496 506 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
1497 506 : CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
1498 :
1499 506 : CALL dbt_destroy(t_3c_int_mo_1(1, 1))
1500 506 : CALL dbt_destroy(t_3c_int_mo_2(1, 1))
1501 506 : CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
1502 :
1503 506 : CALL dbt_destroy(mo_coeff_t_split)
1504 :
1505 506 : CALL dbt_filter(ks_t, ri_data%filter_eps)
1506 :
1507 506 : CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
1508 506 : CALL dbt_copy(ks_t, ks_t_mat, move_data=.TRUE.)
1509 506 : CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1510 506 : CALL dbt_destroy(ks_t_mat)
1511 :
1512 0 : DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
1513 2858 : mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
1514 :
1515 : END DO
1516 :
1517 328 : CALL dbt_pgrid_destroy(pgrid_2d)
1518 328 : CALL dbt_destroy(ks_t)
1519 :
1520 328 : CALL para_env%sync()
1521 328 : t2 = m_walltime()
1522 :
1523 328 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1524 :
1525 328 : CALL timestop(handle)
1526 :
1527 2952 : END SUBROUTINE
1528 :
1529 : ! **************************************************************************************************
1530 : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
1531 : !>
1532 : !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
1533 : !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
1534 : !> \param qs_env ...
1535 : !> \param ri_data ...
1536 : !> \param ks_matrix ...
1537 : !> \param rho_ao ...
1538 : !> \param geometry_did_change ...
1539 : !> \param nspins ...
1540 : !> \param fac ...
1541 : ! **************************************************************************************************
1542 1560 : SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
1543 : geometry_did_change, nspins, fac)
1544 : TYPE(qs_environment_type), POINTER :: qs_env
1545 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1546 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: ks_matrix, rho_ao
1547 : LOGICAL, INTENT(IN) :: geometry_did_change
1548 : INTEGER, INTENT(IN) :: nspins
1549 : REAL(dp), INTENT(IN) :: fac
1550 :
1551 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_Pmat'
1552 :
1553 : INTEGER :: handle, handle2, i_mem, ispin, j_mem, &
1554 : n_mem, n_mem_RI, unit_nr, unit_nr_dbcsr
1555 : INTEGER(int_8) :: flops_ks_max, flops_p_max, nblks, nflop, &
1556 : nze, nze_3c, nze_3c_1, nze_3c_2, &
1557 : nze_ks, nze_rho
1558 1560 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_AO, batch_ranges_RI, dist1, &
1559 1560 : dist2
1560 : INTEGER, DIMENSION(2, 1) :: bounds_i
1561 : INTEGER, DIMENSION(2, 2) :: bounds_ij, bounds_j
1562 : INTEGER, DIMENSION(3) :: dims_3c
1563 : REAL(dp) :: memory_3c, occ, occ_3c, occ_3c_1, &
1564 : occ_3c_2, occ_ks, occ_rho, t1, t2, &
1565 : unused
1566 39000 : TYPE(dbt_type) :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
1567 20280 : t_3c_3, tensor_old
1568 : TYPE(mp_para_env_type), POINTER :: para_env
1569 :
1570 1560 : IF (.NOT. fac > EPSILON(0.0_dp)) RETURN
1571 :
1572 1560 : CALL timeset(routineN, handle)
1573 :
1574 1560 : NULLIFY (para_env)
1575 :
1576 : ! get a useful output_unit
1577 1560 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1578 1560 : unit_nr = ri_data%unit_nr
1579 :
1580 1560 : CALL get_qs_env(qs_env, para_env=para_env)
1581 :
1582 1560 : CPASSERT(SIZE(ks_matrix, 2) == 1)
1583 :
1584 1560 : IF (geometry_did_change) THEN
1585 70 : CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
1586 160 : DO ispin = 1, nspins
1587 90 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
1588 160 : CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
1589 : END DO
1590 : END IF
1591 :
1592 1560 : nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
1593 1560 : IF (nblks == 0) THEN
1594 0 : CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
1595 : END IF
1596 :
1597 1560 : n_mem = ri_data%n_mem
1598 1560 : n_mem_RI = ri_data%n_mem_RI
1599 :
1600 1560 : CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
1601 1560 : CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
1602 :
1603 : CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
1604 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1605 : name="(AO | AO)")
1606 1560 : DEALLOCATE (dist1, dist2)
1607 :
1608 1560 : CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
1609 1560 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
1610 :
1611 1560 : CALL para_env%sync()
1612 1560 : t1 = m_walltime()
1613 :
1614 1560 : flops_ks_max = 0; flops_p_max = 0
1615 :
1616 4680 : ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
1617 4680 : ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
1618 6240 : batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
1619 1560 : batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
1620 6240 : batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
1621 1560 : batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
1622 :
1623 1560 : memory_3c = 0.0_dp
1624 3424 : DO ispin = 1, nspins
1625 :
1626 1864 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
1627 :
1628 : nze_rho = 0
1629 : occ_rho = 0.0_dp
1630 1864 : nze_3c_1 = 0
1631 1864 : occ_3c_1 = 0.0_dp
1632 1864 : nze_3c_2 = 0
1633 1864 : occ_3c_2 = 0.0_dp
1634 :
1635 1864 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1636 :
1637 : !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
1638 1864 : CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
1639 1864 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1640 :
1641 1864 : CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
1642 :
1643 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_AO, &
1644 1864 : batch_range_2=batch_ranges_RI)
1645 1864 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_AO, batch_range_2=batch_ranges_RI)
1646 :
1647 1864 : CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
1648 :
1649 7456 : DO i_mem = 1, n_mem
1650 :
1651 5592 : CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
1652 : CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_RI, &
1653 5592 : batch_range_3=batch_ranges_AO)
1654 5592 : CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges_AO)
1655 22368 : DO j_mem = 1, n_mem_RI
1656 :
1657 16776 : CALL timeset(routineN//"_Px3C", handle2)
1658 :
1659 16776 : CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
1660 50328 : bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1661 50328 : bounds_j(:, 1) = [1, dims_3c(1)]
1662 50328 : bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1663 :
1664 : CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
1665 : 0.0_dp, t_3c_1, &
1666 : contract_1=[2], notcontract_1=[1], &
1667 : contract_2=[3], notcontract_2=[1, 2], &
1668 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
1669 : bounds_2=bounds_i, &
1670 : bounds_3=bounds_j, &
1671 : unit_nr=unit_nr_dbcsr, &
1672 16776 : flop=nflop)
1673 :
1674 16776 : CALL timestop(handle2)
1675 :
1676 16776 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1677 :
1678 16776 : CALL get_tensor_occupancy(t_3c_1, nze, occ)
1679 16776 : nze_3c_1 = nze_3c_1 + nze
1680 16776 : occ_3c_1 = occ_3c_1 + occ
1681 :
1682 16776 : CALL timeset(routineN//"_copy_2", handle2)
1683 16776 : CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.TRUE.)
1684 16776 : CALL timestop(handle2)
1685 :
1686 50328 : bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
1687 50328 : bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
1688 :
1689 : CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
1690 16776 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1691 :
1692 16776 : CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.TRUE.)
1693 :
1694 16776 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
1695 16776 : nze_3c_2 = nze_3c_2 + nze
1696 16776 : occ_3c_2 = occ_3c_2 + occ
1697 16776 : CALL timeset(routineN//"_KS", handle2)
1698 16776 : CALL dbt_batched_contract_init(ks_t)
1699 : CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
1700 : 1.0_dp, ks_t, &
1701 : contract_1=[1, 2], notcontract_1=[3], &
1702 : contract_2=[1, 2], notcontract_2=[3], &
1703 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
1704 : bounds_1=bounds_ij, &
1705 : unit_nr=unit_nr_dbcsr, &
1706 16776 : flop=nflop, move_data=.TRUE.)
1707 :
1708 16776 : CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
1709 16776 : CALL timestop(handle2)
1710 :
1711 72696 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1712 :
1713 : END DO
1714 5592 : CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
1715 5592 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
1716 7456 : CALL dbt_batched_contract_finalize(t_3c_1)
1717 : END DO
1718 1864 : CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
1719 1864 : CALL dbt_batched_contract_finalize(t_3c_3)
1720 :
1721 7456 : DO i_mem = 1, n_mem
1722 24232 : DO j_mem = 1, n_mem_RI
1723 5592 : ASSOCIATE (blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
1724 : CALL decompress_tensor(tensor_old, blk_indices%ind, &
1725 16776 : ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
1726 16776 : CALL dbt_copy(tensor_old, t_3c, move_data=.TRUE.)
1727 :
1728 16776 : unused = 0
1729 : CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
1730 33552 : ri_data%filter_eps_storage, unused)
1731 : END ASSOCIATE
1732 : END DO
1733 : END DO
1734 :
1735 1864 : CALL dbt_destroy(tensor_old)
1736 :
1737 1864 : CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
1738 :
1739 : !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
1740 1864 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
1741 1864 : CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.TRUE.)
1742 1864 : CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
1743 :
1744 1864 : CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
1745 1864 : CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
1746 1864 : CALL dbt_clear(ks_tmp)
1747 :
1748 9016 : IF (unit_nr > 0 .AND. geometry_did_change) THEN
1749 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1750 28 : 'Occupancy of density matrix P:', REAL(nze_rho, dp), '/', occ_rho*100, '%'
1751 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1752 28 : 'Occupancy of 3c ints:', REAL(nze_3c, dp), '/', occ_3c*100, '%'
1753 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1754 28 : 'Occupancy after contraction with K:', REAL(nze_3c_2, dp), '/', occ_3c_2*100, '%'
1755 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1756 28 : 'Occupancy after contraction with P:', REAL(nze_3c_1, dp), '/', occ_3c_1*100, '%'
1757 : WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
1758 28 : 'Occupancy of Kohn-Sham matrix:', REAL(nze_ks, dp), '/', occ_ks*100, '%'
1759 : END IF
1760 :
1761 : END DO
1762 :
1763 1560 : CALL para_env%sync()
1764 1560 : t2 = m_walltime()
1765 :
1766 1560 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
1767 :
1768 1560 : CALL dbt_destroy(t_3c_1)
1769 1560 : CALL dbt_destroy(t_3c_3)
1770 :
1771 1560 : CALL dbt_destroy(rho_ao_tmp)
1772 1560 : CALL dbt_destroy(ks_t)
1773 1560 : CALL dbt_destroy(ks_tmp)
1774 :
1775 1560 : CALL timestop(handle)
1776 :
1777 4680 : END SUBROUTINE
1778 :
1779 : ! **************************************************************************************************
1780 : !> \brief Implementation based on the MO flavor
1781 : !> \param qs_env ...
1782 : !> \param ri_data ...
1783 : !> \param nspins ...
1784 : !> \param hf_fraction ...
1785 : !> \param mo_coeff ...
1786 : !> \param use_virial ...
1787 : !> \note There is no response code for forces with the MO flavor
1788 : ! **************************************************************************************************
1789 14 : SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
1790 :
1791 : TYPE(qs_environment_type), POINTER :: qs_env
1792 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
1793 : INTEGER, INTENT(IN) :: nspins
1794 : REAL(dp), INTENT(IN) :: hf_fraction
1795 : TYPE(dbcsr_type), DIMENSION(:), INTENT(IN) :: mo_coeff
1796 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial
1797 :
1798 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_mo'
1799 :
1800 : INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
1801 : n_mem_input_RI, n_mem_RI, n_mem_RI_fit, n_mos, natom, nkind, unit_nr_dbcsr
1802 : INTEGER(int_8) :: nflop
1803 14 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
1804 14 : batch_end, batch_end_RI, batch_end_RI_fit, batch_ranges, batch_ranges_RI, &
1805 14 : batch_ranges_RI_fit, batch_start, batch_start_RI, batch_start_RI_fit, bsizes_MO, dist1, &
1806 14 : dist2, dist3, idx_to_at_AO, idx_to_at_RI, kind_of
1807 : INTEGER, DIMENSION(2, 1) :: bounds_ctr_1d
1808 : INTEGER, DIMENSION(2, 2) :: bounds_ctr_2d
1809 : INTEGER, DIMENSION(3) :: pdims
1810 : LOGICAL :: use_virial_prv
1811 : REAL(dp) :: pref, spin_fac, t1, t2
1812 14 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1813 14 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
1814 : TYPE(cell_type), POINTER :: cell
1815 14 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_s
1816 70 : TYPE(dbt_pgrid_type) :: pgrid_1, pgrid_2
1817 602 : TYPE(dbt_type) :: t_2c_RI, t_2c_RI_inv, t_2c_RI_met, t_2c_RI_PQ, t_2c_tmp, t_3c_0, t_3c_1, &
1818 700 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
1819 434 : t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_RI_ctr, t_3c_ri_mo_mo, &
1820 350 : t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
1821 14 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, t_2c_MO_AO, &
1822 28 : t_2c_MO_AO_ctr, t_3c_der_AO, t_3c_der_AO_ctr_1, t_3c_der_RI, t_3c_der_RI_ctr_1, &
1823 14 : t_3c_der_RI_ctr_2
1824 : TYPE(dft_control_type), POINTER :: dft_control
1825 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
1826 14 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
1827 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
1828 : TYPE(hfx_compression_type), ALLOCATABLE, &
1829 14 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
1830 : TYPE(mp_para_env_type), POINTER :: para_env
1831 14 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1832 14 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
1833 14 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1834 :
1835 : ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
1836 : ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
1837 : ! 3) Contract all 3c integrals and derivatives with MO coeffs
1838 : ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
1839 : ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
1840 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R)
1841 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
1842 : ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
1843 :
1844 14 : use_virial_prv = .FALSE.
1845 14 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
1846 14 : IF (use_virial_prv) THEN
1847 0 : CPABORT("Stress tensor with RI-HFX MO flavor not implemented.")
1848 : END IF
1849 :
1850 14 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
1851 :
1852 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
1853 : atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
1854 : matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
1855 14 : qs_kind_set=qs_kind_set)
1856 :
1857 14 : pdims(:) = 0
1858 : CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
1859 : SIZE(ri_data%bsizes_RI_split), &
1860 56 : SIZE(ri_data%bsizes_AO_split)])
1861 14 : pdims(:) = 0
1862 : CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
1863 : SIZE(ri_data%bsizes_AO_split), &
1864 56 : SIZE(ri_data%bsizes_AO_split)])
1865 :
1866 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
1867 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
1868 14 : [1, 2], [3], name="(AO RI | AO)")
1869 14 : DEALLOCATE (dist1, dist2, dist3)
1870 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
1871 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
1872 14 : [1], [2, 3], name="(RI | AO AO)")
1873 14 : DEALLOCATE (dist1, dist2, dist3)
1874 :
1875 104 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
1876 14 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
1877 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
1878 14 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
1879 14 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
1880 :
1881 38 : DO ibasis = 1, SIZE(basis_set_AO)
1882 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1883 24 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
1884 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1885 38 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
1886 : END DO
1887 :
1888 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_2c_MO_AO(3), t_2c_MO_AO_ctr(3), t_3c_der_AO(3), &
1889 1148 : t_3c_der_AO_ctr_1(3), t_3c_der_RI(3), t_3c_der_RI_ctr_1(3), t_3c_der_RI_ctr_2(3))
1890 :
1891 : ! 1) Precompute the derivatives
1892 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
1893 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
1894 14 : basis_set_AO, basis_set_RI, ri_data, qs_env)
1895 :
1896 38 : DO ibasis = 1, SIZE(basis_set_AO)
1897 24 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
1898 24 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
1899 24 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
1900 38 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
1901 : END DO
1902 :
1903 14 : n_mem = SIZE(t_3c_der_RI_comp, 1)
1904 56 : DO i_xyz = 1, 3
1905 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_RI(i_xyz))
1906 42 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_AO(i_xyz))
1907 :
1908 194 : DO i_mem = 1, n_mem
1909 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
1910 138 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1911 138 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1912 :
1913 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
1914 138 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
1915 180 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
1916 : END DO
1917 : END DO
1918 :
1919 56 : DO i_xyz = 1, 3
1920 194 : DO i_mem = 1, n_mem
1921 138 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
1922 180 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
1923 : END DO
1924 : END DO
1925 290 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
1926 :
1927 : ! Get the 3c integrals (desymmetrized)
1928 14 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
1929 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
1930 : CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
1931 14 : summation=.TRUE., move_data=.TRUE.)
1932 :
1933 14 : CALL dbt_destroy(t_3c_ao_ri_ao)
1934 14 : CALL dbt_destroy(t_3c_ri_ao_ao)
1935 :
1936 : ! Some utilities
1937 14 : spin_fac = 0.5_dp
1938 14 : IF (nspins == 2) spin_fac = 1.0_dp
1939 :
1940 42 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
1941 14 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
1942 :
1943 42 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
1944 14 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
1945 :
1946 14 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
1947 :
1948 : ! 2-center RI tensors
1949 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
1950 14 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
1951 14 : DEALLOCATE (dist1, dist2)
1952 :
1953 : CALL create_2c_tensor(t_2c_RI_PQ, dist1, dist2, ri_data%pgrid_2d, &
1954 : ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
1955 14 : DEALLOCATE (dist1, dist2)
1956 :
1957 14 : IF (.NOT. ri_data%same_op) THEN
1958 : !precompute the (P|Q)*S^-1 product
1959 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_inv)
1960 4 : CALL dbt_create(t_2c_RI_PQ, t_2c_RI_met)
1961 4 : CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
1962 :
1963 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
1964 : 0.0_dp, t_2c_tmp, &
1965 : contract_1=[2], notcontract_1=[1], &
1966 : contract_2=[1], notcontract_2=[2], &
1967 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
1968 4 : unit_nr=unit_nr_dbcsr, flop=nflop)
1969 4 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
1970 :
1971 4 : CALL dbt_copy(t_2c_tmp, t_2c_RI_inv, move_data=.TRUE.)
1972 4 : CALL dbt_destroy(t_2c_tmp)
1973 : END IF
1974 :
1975 : !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
1976 : !No need to cut memory further because SCF tensors alrady dense
1977 14 : n_mem_input = FLOOR((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
1978 14 : n_mem_input_RI = FLOOR((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
1979 :
1980 : !batches on RI_split and RI_fit blocks
1981 14 : n_mem_RI = n_mem_input_RI
1982 : CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_RI, batch_start_RI, batch_end_RI, &
1983 14 : batch_blk_start, batch_blk_end)
1984 42 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
1985 28 : batch_ranges_RI(1:n_mem_RI) = batch_blk_start(1:n_mem_RI)
1986 14 : batch_ranges_RI(n_mem_RI + 1) = batch_blk_end(n_mem_RI) + 1
1987 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1988 :
1989 14 : n_mem_RI_fit = n_mem_input_RI
1990 : CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_RI_fit, batch_start_RI_fit, batch_end_RI_fit, &
1991 14 : batch_blk_start, batch_blk_end)
1992 42 : ALLOCATE (batch_ranges_RI_fit(n_mem_RI_fit + 1))
1993 28 : batch_ranges_RI_fit(1:n_mem_RI_fit) = batch_blk_start(1:n_mem_RI_fit)
1994 14 : batch_ranges_RI_fit(n_mem_RI_fit + 1) = batch_blk_end(n_mem_RI_fit) + 1
1995 14 : DEALLOCATE (batch_blk_start, batch_blk_end)
1996 :
1997 32 : DO ispin = 1, nspins
1998 :
1999 : ! 2 )Prepare the batches for this spin
2000 18 : CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
2001 : !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
2002 36 : CALL split_block_sizes([n_mos], bsizes_MO, max_size=FLOOR(SQRT(ri_data%max_bsize_MO - 0.1)) + 1)
2003 :
2004 : !batching on MO blocks
2005 18 : n_mem = n_mem_input
2006 : CALL create_tensor_batches(bsizes_MO, n_mem, batch_start, batch_end, &
2007 18 : batch_blk_start, batch_blk_end)
2008 54 : ALLOCATE (batch_ranges(n_mem + 1))
2009 40 : batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
2010 18 : batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
2011 18 : DEALLOCATE (batch_blk_start, batch_blk_end)
2012 :
2013 : ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
2014 : CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_MO, &
2015 18 : ri_data%bsizes_AO_split, name="MO coeffs")
2016 18 : DEALLOCATE (dist1, dist2)
2017 18 : CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
2018 18 : CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
2019 18 : CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.TRUE.)
2020 18 : CALL dbt_destroy(t_2c_tmp)
2021 :
2022 18 : CALL dbt_create(t_mo_coeff, t_mo_cpy)
2023 18 : CALL dbt_copy(t_mo_coeff, t_mo_cpy)
2024 72 : DO i_xyz = 1, 3
2025 54 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO_ctr(i_xyz))
2026 72 : CALL dbt_create(t_mo_coeff, t_2c_MO_AO(i_xyz))
2027 : END DO
2028 :
2029 : CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
2030 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(AO RI| MO)")
2031 18 : DEALLOCATE (dist1, dist2, dist3)
2032 :
2033 18 : CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
2034 18 : CALL dbt_destroy(t_3c_ao_ri_mo)
2035 :
2036 : CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_MO, ri_data%bsizes_RI_split, &
2037 18 : ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
2038 18 : DEALLOCATE (dist1, dist2, dist3)
2039 18 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
2040 :
2041 72 : DO i_xyz = 1, 3
2042 54 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_RI_ctr_1(i_xyz))
2043 72 : CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_AO_ctr_1(i_xyz))
2044 : END DO
2045 :
2046 : CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_MO, &
2047 18 : ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(MO RI | MO)")
2048 18 : DEALLOCATE (dist1, dist2, dist3)
2049 18 : CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
2050 :
2051 : CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
2052 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2053 18 : DEALLOCATE (dist1, dist2, dist3)
2054 :
2055 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
2056 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
2057 18 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_RI_ctr)
2058 72 : DO i_xyz = 1, 3
2059 72 : CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_RI_ctr_2(i_xyz))
2060 : END DO
2061 :
2062 : !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
2063 18 : pdims(:) = 0
2064 : CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
2065 18 : bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
2066 18 : DEALLOCATE (dist1, dist2, dist3)
2067 :
2068 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
2069 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
2070 18 : CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
2071 :
2072 18 : CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_RI)
2073 18 : CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges)
2074 :
2075 72 : DO i_xyz = 1, 3
2076 54 : CALL dbt_batched_contract_init(t_3c_der_AO(i_xyz), batch_range_2=batch_ranges_RI)
2077 72 : CALL dbt_batched_contract_init(t_3c_der_RI(i_xyz), batch_range_2=batch_ranges_RI)
2078 : END DO
2079 :
2080 18 : CALL para_env%sync()
2081 18 : t1 = m_walltime()
2082 :
2083 : ! 2) Loop over batches
2084 40 : DO i_mem = 1, n_mem
2085 :
2086 22 : bounds_ctr_1d(1, 1) = batch_start(i_mem)
2087 22 : bounds_ctr_1d(2, 1) = batch_end(i_mem)
2088 :
2089 22 : bounds_ctr_2d(1, 1) = 1
2090 96 : bounds_ctr_2d(2, 1) = SUM(ri_data%bsizes_AO)
2091 :
2092 : ! 3) Do the first AO to MO contraction here
2093 22 : CALL timeset(routineN//"_AO2MO_1", handle)
2094 22 : CALL dbt_batched_contract_init(t_mo_coeff)
2095 44 : DO k_mem = 1, n_mem_RI
2096 22 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2097 22 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2098 :
2099 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
2100 : 1.0_dp, t_3c_0, &
2101 : contract_1=[2], notcontract_1=[1], &
2102 : contract_2=[3], notcontract_2=[1, 2], &
2103 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2104 : bounds_2=bounds_ctr_1d, &
2105 : bounds_3=bounds_ctr_2d, &
2106 22 : unit_nr=unit_nr_dbcsr, flop=nflop)
2107 44 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2108 : END DO
2109 22 : CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.TRUE.)
2110 :
2111 88 : DO i_xyz = 1, 3
2112 132 : DO k_mem = 1, n_mem_RI
2113 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2114 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2115 :
2116 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_AO(i_xyz), &
2117 : 1.0_dp, t_3c_0, &
2118 : contract_1=[2], notcontract_1=[1], &
2119 : contract_2=[3], notcontract_2=[1, 2], &
2120 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2121 : bounds_2=bounds_ctr_1d, &
2122 : bounds_3=bounds_ctr_2d, &
2123 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2124 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2125 : END DO
2126 66 : CALL dbt_copy(t_3c_0, t_3c_der_AO_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2127 :
2128 132 : DO k_mem = 1, n_mem_RI
2129 66 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2130 66 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2131 :
2132 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI(i_xyz), &
2133 : 1.0_dp, t_3c_0, &
2134 : contract_1=[2], notcontract_1=[1], &
2135 : contract_2=[3], notcontract_2=[1, 2], &
2136 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2137 : bounds_2=bounds_ctr_1d, &
2138 : bounds_3=bounds_ctr_2d, &
2139 66 : unit_nr=unit_nr_dbcsr, flop=nflop)
2140 132 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2141 : END DO
2142 88 : CALL dbt_copy(t_3c_0, t_3c_der_RI_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
2143 : END DO
2144 22 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2145 22 : CALL timestop(handle)
2146 :
2147 22 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI)
2148 : CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI, &
2149 22 : batch_range_3=batch_ranges)
2150 22 : CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2151 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2152 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2153 :
2154 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI_fit, &
2155 22 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2156 22 : CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2157 :
2158 88 : DO i_xyz = 1, 3
2159 : CALL dbt_batched_contract_init(t_3c_der_RI_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2160 66 : batch_range_2=batch_ranges_RI)
2161 : CALL dbt_batched_contract_init(t_3c_der_AO_ctr_1(i_xyz), batch_range_1=batch_ranges, &
2162 88 : batch_range_2=batch_ranges_RI)
2163 :
2164 : END DO
2165 :
2166 22 : IF (.NOT. ri_data%same_op) THEN
2167 8 : CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2168 : END IF
2169 :
2170 52 : DO j_mem = 1, n_mem
2171 :
2172 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2173 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2174 :
2175 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2176 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2177 :
2178 : ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
2179 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2180 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2181 60 : DO k_mem = 1, n_mem_RI
2182 30 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2183 30 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2184 :
2185 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
2186 : 1.0_dp, t_3c_work, &
2187 : contract_1=[2], notcontract_1=[1], &
2188 : contract_2=[3], notcontract_2=[1, 2], &
2189 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2190 : bounds_2=bounds_ctr_1d, &
2191 : bounds_3=bounds_ctr_2d, &
2192 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2193 60 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2194 : END DO
2195 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2196 30 : CALL timestop(handle)
2197 :
2198 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2199 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2200 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2201 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2202 :
2203 : ! 4) Contract 3c MO integrals with S^-1 as well
2204 30 : CALL timeset(routineN//"_2c_inv", handle)
2205 30 : CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2206 60 : DO k_mem = 1, n_mem_RI
2207 30 : bounds_ctr_1d(1, 1) = batch_start_RI(k_mem)
2208 30 : bounds_ctr_1d(2, 1) = batch_end_RI(k_mem)
2209 :
2210 30 : CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
2211 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
2212 : 1.0_dp, t_3c_2, &
2213 : contract_1=[2], notcontract_1=[1], &
2214 : contract_2=[1], notcontract_2=[2, 3], &
2215 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2216 : bounds_1=bounds_ctr_1d, &
2217 : bounds_3=bounds_ctr_2d, &
2218 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2219 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2220 60 : CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
2221 : END DO
2222 30 : CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
2223 30 : CALL timestop(handle)
2224 :
2225 : !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
2226 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2227 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2228 :
2229 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2230 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2231 :
2232 30 : CALL timeset(routineN//"_AO2MO_2", handle)
2233 30 : CALL dbt_batched_contract_init(t_mo_coeff)
2234 120 : DO i_xyz = 1, 3
2235 180 : DO k_mem = 1, n_mem_RI
2236 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2237 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2238 :
2239 : CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI_ctr_1(i_xyz), &
2240 : 1.0_dp, t_3c_work, &
2241 : contract_1=[2], notcontract_1=[1], &
2242 : contract_2=[3], notcontract_2=[1, 2], &
2243 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2244 : bounds_2=bounds_ctr_1d, &
2245 : bounds_3=bounds_ctr_2d, &
2246 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2247 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2248 : END DO
2249 120 : CALL dbt_copy(t_3c_work, t_3c_der_RI_ctr_2(i_xyz), order=[2, 1, 3], move_data=.TRUE.)
2250 : END DO
2251 30 : CALL dbt_batched_contract_finalize(t_mo_coeff)
2252 30 : CALL timestop(handle)
2253 :
2254 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2255 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2256 30 : bounds_ctr_2d(1, 2) = batch_start(j_mem)
2257 30 : bounds_ctr_2d(2, 2) = batch_end(j_mem)
2258 :
2259 : ! 5) Force due to d/dx (P|Q)
2260 30 : CALL timeset(routineN//"_PQ_der", handle)
2261 30 : CALL dbt_copy(t_3c_2, t_3c_4, move_data=.TRUE.)
2262 30 : CALL dbt_copy(t_3c_4, t_3c_5)
2263 60 : DO k_mem = 1, n_mem_RI_fit
2264 30 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2265 30 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2266 :
2267 30 : CALL dbt_batched_contract_init(t_2c_RI_PQ)
2268 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2269 : 1.0_dp, t_2c_RI_PQ, &
2270 : contract_1=[2, 3], notcontract_1=[1], &
2271 : contract_2=[2, 3], notcontract_2=[1], &
2272 : bounds_1=bounds_ctr_2d, &
2273 : bounds_2=bounds_ctr_1d, &
2274 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2275 30 : unit_nr=unit_nr_dbcsr, flop=nflop)
2276 30 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2277 60 : CALL dbt_batched_contract_finalize(t_2c_RI_PQ)
2278 : END DO
2279 30 : CALL timestop(handle)
2280 :
2281 : ! 6) If metric, do the additional contraction with S_pq^-1 (Q|R) (not on the derivatives)
2282 30 : IF (.NOT. ri_data%same_op) THEN
2283 16 : CALL timeset(routineN//"_metric", handle)
2284 32 : DO k_mem = 1, n_mem_RI_fit
2285 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2286 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2287 :
2288 16 : CALL dbt_batched_contract_init(t_2c_RI_inv)
2289 : CALL dbt_contract(1.0_dp, t_2c_RI_inv, t_3c_4, &
2290 : 1.0_dp, t_3c_6, &
2291 : contract_1=[2], notcontract_1=[1], &
2292 : contract_2=[1], notcontract_2=[2, 3], &
2293 : bounds_1=bounds_ctr_1d, &
2294 : bounds_3=bounds_ctr_2d, &
2295 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2296 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2297 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2298 32 : CALL dbt_batched_contract_finalize(t_2c_RI_inv)
2299 : END DO
2300 16 : CALL dbt_copy(t_3c_6, t_3c_4, move_data=.TRUE.)
2301 :
2302 : ! 8) and get the force due to d/dx S^-1
2303 32 : DO k_mem = 1, n_mem_RI_fit
2304 16 : bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
2305 16 : bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
2306 :
2307 16 : CALL dbt_batched_contract_init(t_2c_RI_met)
2308 : CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
2309 : 1.0_dp, t_2c_RI_met, &
2310 : contract_1=[2, 3], notcontract_1=[1], &
2311 : contract_2=[2, 3], notcontract_2=[1], &
2312 : bounds_1=bounds_ctr_2d, &
2313 : bounds_2=bounds_ctr_1d, &
2314 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2315 16 : unit_nr=unit_nr_dbcsr, flop=nflop)
2316 16 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2317 32 : CALL dbt_batched_contract_finalize(t_2c_RI_met)
2318 : END DO
2319 16 : CALL timestop(handle)
2320 : END IF
2321 30 : CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
2322 :
2323 : ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
2324 :
2325 : ! (ab|P')
2326 30 : CALL timeset(routineN//"_3c_RI", handle)
2327 30 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2328 30 : CALL dbt_copy(t_3c_4, t_3c_RI_ctr, move_data=.TRUE.)
2329 : CALL get_force_from_3c_trace(force, t_3c_RI_ctr, t_3c_der_RI_ctr_2, atom_of_kind, kind_of, &
2330 30 : idx_to_at_RI, pref)
2331 30 : CALL timestop(handle)
2332 :
2333 : ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
2334 : ! which also prevents doing a direct 3-center trace
2335 30 : bounds_ctr_2d(1, 1) = batch_start(i_mem)
2336 30 : bounds_ctr_2d(2, 1) = batch_end(i_mem)
2337 :
2338 30 : bounds_ctr_1d(1, 1) = batch_start(j_mem)
2339 30 : bounds_ctr_1d(2, 1) = batch_end(j_mem)
2340 :
2341 30 : CALL timeset(routineN//"_3c_AO", handle)
2342 30 : CALL dbt_copy(t_3c_RI_ctr, t_3c_work, order=[2, 1, 3], move_data=.TRUE.)
2343 120 : DO i_xyz = 1, 3
2344 :
2345 90 : CALL dbt_batched_contract_init(t_2c_MO_AO_ctr(i_xyz))
2346 180 : DO k_mem = 1, n_mem_RI
2347 90 : bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
2348 90 : bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
2349 :
2350 : CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_AO_ctr_1(i_xyz), &
2351 : 1.0_dp, t_2c_MO_AO_ctr(i_xyz), &
2352 : contract_1=[1, 2], notcontract_1=[3], &
2353 : contract_2=[1, 2], notcontract_2=[3], &
2354 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2355 : bounds_1=bounds_ctr_2d, &
2356 : bounds_2=bounds_ctr_1d, &
2357 90 : unit_nr=unit_nr_dbcsr, flop=nflop)
2358 180 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2359 : END DO
2360 120 : CALL dbt_batched_contract_finalize(t_2c_MO_AO_ctr(i_xyz))
2361 : END DO
2362 232 : CALL timestop(handle)
2363 :
2364 : END DO !j_mem
2365 22 : CALL dbt_batched_contract_finalize(t_3c_1)
2366 22 : CALL dbt_batched_contract_finalize(t_3c_work)
2367 22 : CALL dbt_batched_contract_finalize(t_3c_2)
2368 22 : CALL dbt_batched_contract_finalize(t_3c_3)
2369 22 : CALL dbt_batched_contract_finalize(t_3c_4)
2370 22 : CALL dbt_batched_contract_finalize(t_3c_5)
2371 :
2372 88 : DO i_xyz = 1, 3
2373 66 : CALL dbt_batched_contract_finalize(t_3c_der_RI_ctr_1(i_xyz))
2374 88 : CALL dbt_batched_contract_finalize(t_3c_der_AO_ctr_1(i_xyz))
2375 : END DO
2376 :
2377 62 : IF (.NOT. ri_data%same_op) THEN
2378 8 : CALL dbt_batched_contract_finalize(t_3c_6)
2379 : END IF
2380 :
2381 : END DO !i_mem
2382 18 : CALL dbt_batched_contract_finalize(t_3c_desymm)
2383 18 : CALL dbt_batched_contract_finalize(t_3c_0)
2384 :
2385 72 : DO i_xyz = 1, 3
2386 54 : CALL dbt_batched_contract_finalize(t_3c_der_AO(i_xyz))
2387 72 : CALL dbt_batched_contract_finalize(t_3c_der_RI(i_xyz))
2388 : END DO
2389 :
2390 : !Force contribution due to 3-center AO derivatives (a'b|P)
2391 18 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2392 72 : DO i_xyz = 1, 3
2393 54 : CALL dbt_copy(t_2c_MO_AO_ctr(i_xyz), t_2c_MO_AO(i_xyz), move_data=.TRUE.) !ensures matching distributions
2394 54 : CALL get_mo_ao_force(force, t_mo_cpy, t_2c_MO_AO(i_xyz), atom_of_kind, kind_of, idx_to_at_AO, pref, i_xyz)
2395 72 : CALL dbt_clear(t_2c_MO_AO(i_xyz))
2396 : END DO
2397 :
2398 : !Force contribution of d/dx (P|Q)
2399 18 : pref = 0.5_dp*hf_fraction*spin_fac
2400 18 : IF (.NOT. ri_data%same_op) pref = -pref
2401 :
2402 : !Making sure dists of the t_2c_RI tensors match
2403 18 : CALL dbt_copy(t_2c_RI_PQ, t_2c_RI, move_data=.TRUE.)
2404 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, &
2405 18 : kind_of, idx_to_at_RI, pref)
2406 18 : CALL dbt_clear(t_2c_RI)
2407 :
2408 : !Force contribution due to the inverse metric
2409 18 : IF (.NOT. ri_data%same_op) THEN
2410 4 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2411 :
2412 4 : CALL dbt_copy(t_2c_RI_met, t_2c_RI, move_data=.TRUE.)
2413 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, &
2414 4 : kind_of, idx_to_at_RI, pref)
2415 4 : CALL dbt_clear(t_2c_RI)
2416 : END IF
2417 :
2418 18 : CALL dbt_destroy(t_3c_0)
2419 18 : CALL dbt_destroy(t_3c_1)
2420 18 : CALL dbt_destroy(t_3c_2)
2421 18 : CALL dbt_destroy(t_3c_3)
2422 18 : CALL dbt_destroy(t_3c_4)
2423 18 : CALL dbt_destroy(t_3c_5)
2424 18 : CALL dbt_destroy(t_3c_6)
2425 18 : CALL dbt_destroy(t_3c_work)
2426 18 : CALL dbt_destroy(t_3c_RI_ctr)
2427 18 : CALL dbt_destroy(t_3c_mo_ri_ao)
2428 18 : CALL dbt_destroy(t_3c_mo_ri_mo)
2429 18 : CALL dbt_destroy(t_3c_ri_mo_mo)
2430 18 : CALL dbt_destroy(t_3c_ri_mo_mo_fit)
2431 18 : CALL dbt_destroy(t_mo_coeff)
2432 18 : CALL dbt_destroy(t_mo_cpy)
2433 72 : DO i_xyz = 1, 3
2434 54 : CALL dbt_destroy(t_2c_MO_AO(i_xyz))
2435 54 : CALL dbt_destroy(t_2c_MO_AO_ctr(i_xyz))
2436 54 : CALL dbt_destroy(t_3c_der_RI_ctr_1(i_xyz))
2437 54 : CALL dbt_destroy(t_3c_der_AO_ctr_1(i_xyz))
2438 72 : CALL dbt_destroy(t_3c_der_RI_ctr_2(i_xyz))
2439 : END DO
2440 86 : DEALLOCATE (batch_ranges, batch_start, batch_end)
2441 : END DO !ispin
2442 :
2443 : ! Clean-up
2444 14 : CALL dbt_pgrid_destroy(pgrid_1)
2445 14 : CALL dbt_pgrid_destroy(pgrid_2)
2446 14 : CALL dbt_destroy(t_3c_desymm)
2447 14 : CALL dbt_destroy(t_2c_RI)
2448 14 : CALL dbt_destroy(t_2c_RI_PQ)
2449 14 : IF (.NOT. ri_data%same_op) THEN
2450 4 : CALL dbt_destroy(t_2c_RI_met)
2451 4 : CALL dbt_destroy(t_2c_RI_inv)
2452 : END IF
2453 56 : DO i_xyz = 1, 3
2454 42 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2455 42 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2456 42 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
2457 56 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
2458 : END DO
2459 14 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
2460 :
2461 14 : CALL para_env%sync()
2462 14 : t2 = m_walltime()
2463 :
2464 14 : ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
2465 :
2466 476 : END SUBROUTINE hfx_ri_forces_mo
2467 :
2468 : ! **************************************************************************************************
2469 : !> \brief New sparser implementation
2470 : !> \param qs_env ...
2471 : !> \param ri_data ...
2472 : !> \param nspins ...
2473 : !> \param hf_fraction ...
2474 : !> \param rho_ao ...
2475 : !> \param rho_ao_resp ...
2476 : !> \param use_virial ...
2477 : !> \param resp_only ...
2478 : !> \param rescale_factor ...
2479 : ! **************************************************************************************************
2480 116 : SUBROUTINE hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
2481 : use_virial, resp_only, rescale_factor)
2482 :
2483 : TYPE(qs_environment_type), POINTER :: qs_env
2484 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
2485 : INTEGER, INTENT(IN) :: nspins
2486 : REAL(dp), INTENT(IN) :: hf_fraction
2487 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
2488 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
2489 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
2490 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
2491 :
2492 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_Pmat'
2493 :
2494 : INTEGER :: dummy_int, handle, i_mem, i_spin, i_xyz, &
2495 : ibasis, j_mem, j_xyz, k_mem, k_xyz, &
2496 : n_mem, n_mem_RI, natom, nkind, &
2497 : unit_nr_dbcsr
2498 : INTEGER(int_8) :: nflop
2499 116 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_RI, batch_ranges, &
2500 116 : batch_ranges_RI, batch_start, batch_start_RI, dist1, dist2, dist3, idx_to_at_AO, &
2501 116 : idx_to_at_RI, kind_of
2502 : INTEGER, DIMENSION(2, 1) :: ibounds, jbounds, kbounds
2503 : INTEGER, DIMENSION(2, 2) :: ijbounds
2504 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
2505 232 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
2506 : LOGICAL :: do_resp, resp_only_prv, use_virial_prv
2507 : REAL(dp) :: pref, spin_fac, t1, t2
2508 : REAL(dp), DIMENSION(3, 3) :: work_virial
2509 116 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2510 116 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
2511 : TYPE(cell_type), POINTER :: cell
2512 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist
2513 : TYPE(dbcsr_type) :: dbcsr_tmp, virial_trace
2514 6728 : TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_RI, t_2c_RI_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
2515 7656 : t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
2516 5684 : t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_R, t_SVS
2517 116 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, &
2518 116 : t_3c_der_AO, t_3c_der_RI
2519 : TYPE(dft_control_type), POINTER :: dft_control
2520 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
2521 116 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
2522 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
2523 : TYPE(hfx_compression_type), ALLOCATABLE, &
2524 116 : DIMENSION(:, :) :: t_3c_der_AO_comp, t_3c_der_RI_comp
2525 : TYPE(mp_para_env_type), POINTER :: para_env
2526 : TYPE(neighbor_list_3c_type) :: nl_3c
2527 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
2528 116 : POINTER :: nl_2c_met, nl_2c_pot
2529 116 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2530 116 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2531 116 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2532 : TYPE(virial_type), POINTER :: virial
2533 :
2534 : !The idea is the following: we need to compute the gradients
2535 : ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
2536 : ! Which we do in a few steps:
2537 : ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
2538 : ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
2539 : ! For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
2540 : ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
2541 : ! R_PS = (acP) M_acS
2542 : ! 4) From there, we can easily calculate the 2c contributions to the force:
2543 : ! Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
2544 : ! Metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2545 :
2546 116 : NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
2547 116 : NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
2548 :
2549 116 : use_virial_prv = .FALSE.
2550 116 : IF (PRESENT(use_virial)) use_virial_prv = use_virial
2551 :
2552 116 : do_resp = .FALSE.
2553 116 : IF (PRESENT(rho_ao_resp)) THEN
2554 30 : IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .TRUE.
2555 : END IF
2556 :
2557 116 : resp_only_prv = .FALSE.
2558 116 : IF (PRESENT(resp_only)) resp_only_prv = resp_only
2559 :
2560 116 : unit_nr_dbcsr = ri_data%unit_nr_dbcsr
2561 :
2562 : CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
2563 : atomic_kind_set=atomic_kind_set, virial=virial, &
2564 : cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
2565 116 : qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
2566 :
2567 : CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
2568 : ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
2569 116 : [1, 2], [3], name="(AO RI | AO)")
2570 116 : DEALLOCATE (dist1, dist2, dist3)
2571 :
2572 : CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
2573 : ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2574 116 : [1], [2, 3], name="(RI | AO AO)")
2575 116 : DEALLOCATE (dist1, dist2, dist3)
2576 :
2577 916 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
2578 116 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
2579 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
2580 116 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
2581 116 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
2582 :
2583 342 : DO ibasis = 1, SIZE(basis_set_AO)
2584 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
2585 226 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
2586 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
2587 342 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
2588 : END DO
2589 :
2590 : ! Precompute the derivatives
2591 5684 : ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_3c_der_AO(3), t_3c_der_RI(3))
2592 116 : IF (use_virial) THEN
2593 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2594 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2595 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
2596 : nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
2597 4 : nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
2598 :
2599 16 : ALLOCATE (col_bsize(natom), row_bsize(natom))
2600 16 : col_bsize(:) = ri_data%bsizes_RI
2601 16 : row_bsize(:) = ri_data%bsizes_RI
2602 4 : CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
2603 4 : CALL dbt_create(virial_trace, t_2c_virial)
2604 4 : DEALLOCATE (col_bsize, row_bsize)
2605 : ELSE
2606 : CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
2607 : t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
2608 112 : basis_set_AO, basis_set_RI, ri_data, qs_env)
2609 : END IF
2610 :
2611 : ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
2612 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
2613 464 : DO i_xyz = 1, 3
2614 1490 : DO i_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2615 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
2616 1026 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2617 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE., move_data=.TRUE.)
2618 :
2619 : CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
2620 1026 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
2621 1026 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE.)
2622 1374 : CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
2623 : END DO
2624 : END DO
2625 :
2626 464 : DO i_xyz = 1, 3
2627 348 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz))
2628 464 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz))
2629 : END DO
2630 :
2631 : ! Some utilities
2632 116 : spin_fac = 0.5_dp
2633 116 : IF (nspins == 2) spin_fac = 1.0_dp
2634 116 : IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
2635 :
2636 348 : ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
2637 116 : CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
2638 :
2639 348 : ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
2640 116 : CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
2641 :
2642 116 : CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
2643 :
2644 : ! Go over batches of the 2 AO indices to save memory
2645 116 : n_mem = ri_data%n_mem
2646 464 : ALLOCATE (batch_start(n_mem), batch_end(n_mem))
2647 464 : batch_start(:) = ri_data%starts_array_mem(:)
2648 464 : batch_end(:) = ri_data%ends_array_mem(:)
2649 :
2650 348 : ALLOCATE (batch_ranges(n_mem + 1))
2651 464 : batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
2652 116 : batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
2653 :
2654 116 : n_mem_RI = ri_data%n_mem_RI
2655 464 : ALLOCATE (batch_start_RI(n_mem_RI), batch_end_RI(n_mem_RI))
2656 464 : batch_start_RI(:) = ri_data%starts_array_RI_mem(:)
2657 464 : batch_end_RI(:) = ri_data%ends_array_RI_mem(:)
2658 :
2659 348 : ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
2660 464 : batch_ranges_RI(:n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
2661 116 : batch_ranges_RI(n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(n_mem_RI) + 1
2662 :
2663 : ! Pre-create all the needed tensors
2664 : CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
2665 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
2666 116 : name="(AO | AO)")
2667 116 : DEALLOCATE (dist1, dist2)
2668 116 : CALL dbt_create(rho_ao_1, rho_ao_2)
2669 :
2670 : CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
2671 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
2672 116 : DEALLOCATE (dist1, dist2)
2673 116 : CALL dbt_create(t_2c_RI, t_SVS)
2674 116 : CALL dbt_create(t_2c_RI, t_R)
2675 116 : CALL dbt_create(t_2c_RI, t_2c_RI_tmp)
2676 :
2677 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
2678 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
2679 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
2680 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
2681 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
2682 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
2683 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
2684 :
2685 116 : CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
2686 116 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
2687 :
2688 116 : CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
2689 :
2690 116 : CALL para_env%sync()
2691 116 : t1 = m_walltime()
2692 :
2693 : !Pre-calculate the necessary 2-center quantities
2694 116 : IF (.NOT. ri_data%same_op) THEN
2695 : !S^-1 * V * S^-1
2696 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI, &
2697 : contract_1=[2], notcontract_1=[1], &
2698 : contract_2=[1], notcontract_2=[2], &
2699 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2700 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2701 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2702 :
2703 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_inv(1, 1), 0.0_dp, t_SVS, &
2704 : contract_1=[2], notcontract_1=[1], &
2705 : contract_2=[1], notcontract_2=[2], &
2706 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2707 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2708 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2709 : ELSE
2710 : ! Simply V^-1
2711 88 : CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_SVS)
2712 : END IF
2713 :
2714 116 : CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2715 : CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_RI, &
2716 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2717 116 : CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2718 116 : CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
2719 : CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
2720 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2721 : CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI, &
2722 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2723 : CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_RI, &
2724 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2725 : CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_RI, &
2726 116 : batch_range_2=batch_ranges, batch_range_3=batch_ranges)
2727 :
2728 242 : DO i_spin = 1, nspins
2729 :
2730 : !Prepare Pmat in tensor format
2731 126 : CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2732 126 : CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
2733 126 : CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.TRUE.)
2734 126 : CALL dbt_destroy(t_2c_tmp)
2735 :
2736 126 : IF (.NOT. do_resp) THEN
2737 94 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2738 32 : ELSE IF (do_resp .AND. resp_only_prv) THEN
2739 :
2740 24 : CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2741 24 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2742 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2)
2743 : !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
2744 24 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2745 24 : CALL dbt_destroy(t_2c_tmp)
2746 : ELSE
2747 :
2748 : !if not resp_only, need P-P_resp and P+P_resp
2749 8 : CALL dbt_copy(rho_ao_1, rho_ao_2)
2750 8 : CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
2751 8 : CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
2752 8 : CALL dbt_create(dbcsr_tmp, t_2c_tmp)
2753 8 : CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
2754 8 : CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.TRUE., move_data=.TRUE.)
2755 8 : CALL dbcsr_release(dbcsr_tmp)
2756 :
2757 8 : CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
2758 8 : CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
2759 8 : CALL dbt_destroy(t_2c_tmp)
2760 :
2761 : END IF
2762 126 : work_virial = 0.0_dp
2763 :
2764 126 : CALL timeset(routineN//"_3c", handle)
2765 : !Start looping of the batches
2766 504 : DO i_mem = 1, n_mem
2767 1134 : ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2768 :
2769 378 : CALL dbt_batched_contract_init(rho_ao_1)
2770 : CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
2771 : contract_1=[1], notcontract_1=[2], &
2772 : contract_2=[3], notcontract_2=[1, 2], &
2773 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2774 378 : bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2775 378 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2776 378 : CALL dbt_batched_contract_finalize(rho_ao_1)
2777 :
2778 378 : CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.TRUE.)
2779 :
2780 1512 : DO j_mem = 1, n_mem
2781 3402 : jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
2782 :
2783 1134 : CALL dbt_batched_contract_init(rho_ao_2)
2784 : CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
2785 : contract_1=[1], notcontract_1=[2], &
2786 : contract_2=[3], notcontract_2=[1, 2], &
2787 : map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
2788 1134 : bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
2789 1134 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2790 1134 : CALL dbt_batched_contract_finalize(rho_ao_2)
2791 :
2792 3402 : bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
2793 6840 : bounds_cpy(:, 2) = [1, SUM(ri_data%bsizes_RI)]
2794 3402 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2795 1134 : CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
2796 1134 : CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
2797 :
2798 4914 : DO k_mem = 1, n_mem_RI
2799 10206 : kbounds(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2800 :
2801 10206 : bounds_cpy(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
2802 10206 : bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
2803 10206 : bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
2804 3402 : CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
2805 :
2806 : !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
2807 3402 : CALL dbt_batched_contract_init(t_SVS)
2808 : CALL dbt_contract(1.0_dp, t_SVS, t_3c_3, 0.0_dp, t_3c_4, &
2809 : contract_1=[2], notcontract_1=[1], &
2810 : contract_2=[1], notcontract_2=[2, 3], &
2811 : map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
2812 3402 : retain_sparsity=.TRUE., unit_nr=unit_nr_dbcsr, flop=nflop)
2813 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2814 3402 : CALL dbt_batched_contract_finalize(t_SVS)
2815 :
2816 3402 : CALL dbt_copy(t_3c_4, t_3c_5, summation=.TRUE., move_data=.TRUE.)
2817 :
2818 10206 : ijbounds(:, 1) = ibounds(:, 1)
2819 10206 : ijbounds(:, 2) = jbounds(:, 1)
2820 :
2821 : !Contract R_PS = (acP) M_acS
2822 3402 : CALL dbt_batched_contract_init(t_R)
2823 : CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_R, &
2824 : contract_1=[2, 3], notcontract_1=[1], &
2825 : contract_2=[2, 3], notcontract_2=[1], &
2826 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2827 : bounds_1=ijbounds, bounds_3=kbounds, &
2828 3402 : unit_nr=unit_nr_dbcsr, flop=nflop)
2829 3402 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2830 4536 : CALL dbt_batched_contract_finalize(t_R)
2831 :
2832 : END DO !k_mem
2833 : END DO !j_mem
2834 :
2835 378 : CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.TRUE.)
2836 :
2837 : !The force from the 3c derivatives
2838 378 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2839 :
2840 1476 : DO k_mem = 1, SIZE(t_3c_der_RI_comp, 1)
2841 4392 : DO i_xyz = 1, 3
2842 3294 : CALL dbt_clear(t_3c_der_RI(i_xyz))
2843 : CALL decompress_tensor(t_3c_der_RI(i_xyz), t_3c_der_RI_ind(k_mem, i_xyz)%ind, &
2844 4392 : t_3c_der_RI_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2845 : END DO
2846 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_RI, atom_of_kind, kind_of, &
2847 1476 : idx_to_at_RI, pref)
2848 : END DO
2849 :
2850 378 : pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
2851 378 : IF (do_resp) THEN
2852 96 : pref = 0.5_dp*pref
2853 96 : CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
2854 : END IF
2855 :
2856 1476 : DO k_mem = 1, SIZE(t_3c_der_AO_comp, 1)
2857 4392 : DO i_xyz = 1, 3
2858 3294 : CALL dbt_clear(t_3c_der_AO(i_xyz))
2859 : CALL decompress_tensor(t_3c_der_AO(i_xyz), t_3c_der_AO_ind(k_mem, i_xyz)%ind, &
2860 4392 : t_3c_der_AO_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
2861 : END DO
2862 : CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_AO, atom_of_kind, kind_of, &
2863 1098 : idx_to_at_AO, pref, deriv_dim=2)
2864 :
2865 1476 : IF (do_resp) THEN
2866 : CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_AO, atom_of_kind, kind_of, &
2867 276 : idx_to_at_AO, pref, deriv_dim=2)
2868 : END IF
2869 : END DO
2870 :
2871 : !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
2872 378 : IF (use_virial) THEN
2873 12 : pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
2874 12 : CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.TRUE.)
2875 : CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_RI, &
2876 : basis_set_AO, basis_set_AO, ri_data%ri_metric, &
2877 12 : der_eps=ri_data%eps_schwarz_forces, op_pos=1)
2878 :
2879 12 : CALL dbt_clear(t_3c_virial)
2880 : END IF
2881 :
2882 378 : CALL dbt_clear(t_3c_help_1)
2883 504 : CALL dbt_clear(t_3c_help_2)
2884 : END DO !i_mem
2885 126 : CALL timestop(handle)
2886 :
2887 126 : CALL timeset(routineN//"_2c", handle)
2888 : !Now we deal with all the 2-center quantities
2889 : !Calculate S^-1 * R * S^-1
2890 : CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_R, 0.0_dp, t_2c_RI_tmp, &
2891 : contract_1=[2], notcontract_1=[1], &
2892 : contract_2=[1], notcontract_2=[2], &
2893 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2894 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2895 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2896 :
2897 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2898 : contract_1=[2], notcontract_1=[1], &
2899 : contract_2=[1], notcontract_2=[2], &
2900 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2901 126 : unit_nr=unit_nr_dbcsr, flop=nflop)
2902 126 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2903 :
2904 : !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
2905 126 : pref = 0.5_dp*hf_fraction*spin_fac
2906 126 : IF (.NOT. ri_data%same_op) pref = -pref
2907 126 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, kind_of, idx_to_at_RI, pref)
2908 :
2909 : !Calculate the contribution to the virial on the fly
2910 126 : IF (use_virial_prv) THEN
2911 4 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2912 4 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2913 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
2914 4 : basis_set_RI, basis_set_RI, ri_data%hfx_pot)
2915 : END IF
2916 :
2917 : !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
2918 126 : IF (.NOT. ri_data%same_op) THEN
2919 : CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI_tmp, &
2920 : contract_1=[2], notcontract_1=[1], &
2921 : contract_2=[1], notcontract_2=[2], &
2922 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2923 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2924 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2925 :
2926 : CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
2927 : contract_1=[2], notcontract_1=[1], &
2928 : contract_2=[1], notcontract_2=[2], &
2929 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
2930 28 : unit_nr=unit_nr_dbcsr, flop=nflop)
2931 28 : ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
2932 :
2933 28 : pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
2934 28 : CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_RI, pref)
2935 :
2936 28 : IF (use_virial_prv) THEN
2937 0 : CALL dbt_copy(t_2c_RI, t_2c_virial)
2938 0 : CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
2939 : CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
2940 0 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
2941 : END IF
2942 : END IF
2943 126 : CALL dbt_clear(t_2c_RI)
2944 126 : CALL dbt_clear(t_2c_RI_tmp)
2945 126 : CALL dbt_clear(t_R)
2946 126 : CALL dbt_clear(t_3c_help_1)
2947 126 : CALL dbt_clear(t_3c_help_2)
2948 126 : CALL timestop(handle)
2949 :
2950 494 : IF (use_virial_prv) THEN
2951 16 : DO k_xyz = 1, 3
2952 52 : DO j_xyz = 1, 3
2953 156 : DO i_xyz = 1, 3
2954 : virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
2955 144 : + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
2956 : END DO
2957 : END DO
2958 : END DO
2959 : END IF
2960 : END DO !i_spin
2961 :
2962 116 : CALL dbt_batched_contract_finalize(t_3c_int)
2963 116 : CALL dbt_batched_contract_finalize(t_3c_int_2)
2964 116 : CALL dbt_batched_contract_finalize(t_3c_1)
2965 116 : CALL dbt_batched_contract_finalize(t_3c_2)
2966 116 : CALL dbt_batched_contract_finalize(t_3c_3)
2967 116 : CALL dbt_batched_contract_finalize(t_3c_4)
2968 116 : CALL dbt_batched_contract_finalize(t_3c_5)
2969 116 : CALL dbt_batched_contract_finalize(t_3c_sparse)
2970 :
2971 116 : CALL para_env%sync()
2972 116 : t2 = m_walltime()
2973 :
2974 116 : CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.TRUE.)
2975 :
2976 : !clean-up
2977 116 : CALL dbt_destroy(rho_ao_1)
2978 116 : CALL dbt_destroy(rho_ao_2)
2979 116 : CALL dbt_destroy(t_3c_ao_ri_ao)
2980 116 : CALL dbt_destroy(t_3c_ri_ao_ao)
2981 116 : CALL dbt_destroy(t_3c_int)
2982 116 : CALL dbt_destroy(t_3c_int_2)
2983 116 : CALL dbt_destroy(t_3c_1)
2984 116 : CALL dbt_destroy(t_3c_2)
2985 116 : CALL dbt_destroy(t_3c_3)
2986 116 : CALL dbt_destroy(t_3c_4)
2987 116 : CALL dbt_destroy(t_3c_5)
2988 116 : CALL dbt_destroy(t_3c_help_1)
2989 116 : CALL dbt_destroy(t_3c_help_2)
2990 116 : CALL dbt_destroy(t_3c_sparse)
2991 116 : CALL dbt_destroy(t_SVS)
2992 116 : CALL dbt_destroy(t_R)
2993 116 : CALL dbt_destroy(t_2c_RI)
2994 116 : CALL dbt_destroy(t_2c_RI_tmp)
2995 :
2996 464 : DO i_xyz = 1, 3
2997 348 : CALL dbt_destroy(t_3c_der_RI(i_xyz))
2998 348 : CALL dbt_destroy(t_3c_der_AO(i_xyz))
2999 348 : CALL dbt_destroy(t_2c_der_RI(i_xyz))
3000 464 : IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
3001 : END DO
3002 :
3003 464 : DO i_xyz = 1, 3
3004 1490 : DO i_mem = 1, SIZE(t_3c_der_AO_comp, 1)
3005 1026 : CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
3006 1374 : CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
3007 : END DO
3008 : END DO
3009 2168 : DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
3010 :
3011 342 : DO ibasis = 1, SIZE(basis_set_AO)
3012 226 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3013 226 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3014 226 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3015 342 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3016 : END DO
3017 :
3018 116 : IF (use_virial) THEN
3019 4 : CALL release_neighbor_list_sets(nl_2c_met)
3020 4 : CALL release_neighbor_list_sets(nl_2c_pot)
3021 4 : CALL neighbor_list_3c_destroy(nl_3c)
3022 4 : CALL dbcsr_release(virial_trace)
3023 4 : CALL dbt_destroy(t_2c_virial)
3024 4 : CALL dbt_destroy(t_3c_virial)
3025 : END IF
3026 :
3027 2552 : END SUBROUTINE hfx_ri_forces_Pmat
3028 :
3029 : ! **************************************************************************************************
3030 : !> \brief the general routine that calls the relevant force code
3031 : !> \param qs_env ...
3032 : !> \param ri_data ...
3033 : !> \param nspins ...
3034 : !> \param hf_fraction ...
3035 : !> \param rho_ao ...
3036 : !> \param rho_ao_resp ...
3037 : !> \param mos ...
3038 : !> \param use_virial ...
3039 : !> \param resp_only ...
3040 : !> \param rescale_factor ...
3041 : ! **************************************************************************************************
3042 130 : SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
3043 130 : mos, use_virial, resp_only, rescale_factor)
3044 :
3045 : TYPE(qs_environment_type), POINTER :: qs_env
3046 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3047 : INTEGER, INTENT(IN) :: nspins
3048 : REAL(KIND=dp), INTENT(IN) :: hf_fraction
3049 : TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL :: rho_ao
3050 : TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL :: rho_ao_resp
3051 : TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
3052 : OPTIONAL :: mos
3053 : LOGICAL, INTENT(IN), OPTIONAL :: use_virial, resp_only
3054 : REAL(dp), INTENT(IN), OPTIONAL :: rescale_factor
3055 :
3056 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces'
3057 :
3058 : INTEGER :: handle, ispin
3059 : INTEGER, DIMENSION(2) :: homo
3060 130 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigenvalues
3061 : TYPE(cp_fm_type), POINTER :: mo_coeff
3062 390 : TYPE(dbcsr_type), DIMENSION(2) :: mo_coeff_b
3063 : TYPE(dbcsr_type), POINTER :: mo_coeff_b_tmp
3064 :
3065 130 : CALL timeset(routineN, handle)
3066 :
3067 144 : SELECT CASE (ri_data%flavor)
3068 : CASE (ri_mo)
3069 :
3070 32 : DO ispin = 1, nspins
3071 18 : NULLIFY (mo_coeff_b_tmp)
3072 18 : CPASSERT(mos(ispin)%uniform_occupation)
3073 18 : CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
3074 :
3075 18 : IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
3076 32 : CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
3077 : END DO
3078 :
3079 32 : DO ispin = 1, nspins
3080 18 : CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
3081 14 : homo(ispin) = mos(ispin)%homo
3082 : END DO
3083 :
3084 14 : CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
3085 :
3086 : CASE (ri_pmat)
3087 :
3088 : CALL hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
3089 216 : resp_only, rescale_factor)
3090 : END SELECT
3091 :
3092 274 : DO ispin = 1, nspins
3093 274 : CALL dbcsr_release(mo_coeff_b(ispin))
3094 : END DO
3095 :
3096 130 : CALL timestop(handle)
3097 :
3098 130 : END SUBROUTINE hfx_ri_update_forces
3099 :
3100 : ! **************************************************************************************************
3101 : !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
3102 : !> \param t_3c_der_RI_comp compressed RI derivatives
3103 : !> \param t_3c_der_AO_comp compressed AO derivatives
3104 : !> \param t_3c_der_RI_ind ...
3105 : !> \param t_3c_der_AO_ind ...
3106 : !> \param t_2c_der_RI format based on standard atomic block sizes
3107 : !> \param t_2c_der_metric format based on standard atomic block sizes
3108 : !> \param ri_ao_ao_template ...
3109 : !> \param basis_set_AO ...
3110 : !> \param basis_set_RI ...
3111 : !> \param ri_data ...
3112 : !> \param qs_env ...
3113 : !> \param nl_2c_pot ...
3114 : !> \param nl_2c_met ...
3115 : !> \param nl_3c_out ...
3116 : !> \param t_3c_virial ...
3117 : ! **************************************************************************************************
3118 4160 : SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
3119 : t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
3120 : basis_set_AO, basis_set_RI, ri_data, qs_env, &
3121 : nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
3122 :
3123 : TYPE(hfx_compression_type), ALLOCATABLE, &
3124 : DIMENSION(:, :), INTENT(INOUT) :: t_3c_der_RI_comp, t_3c_der_AO_comp
3125 : TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_RI_ind, t_3c_der_AO_ind
3126 : TYPE(dbt_type), DIMENSION(3), INTENT(OUT) :: t_2c_der_RI, t_2c_der_metric
3127 : TYPE(dbt_type), INTENT(INOUT) :: ri_ao_ao_template
3128 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3129 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3130 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3131 : TYPE(qs_environment_type), POINTER :: qs_env
3132 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3133 : OPTIONAL, POINTER :: nl_2c_pot, nl_2c_met
3134 : TYPE(neighbor_list_3c_type), OPTIONAL :: nl_3c_out
3135 : TYPE(dbt_type), INTENT(INOUT), OPTIONAL :: t_3c_virial
3136 :
3137 : CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
3138 :
3139 : INTEGER :: handle, i_mem, i_xyz, n_mem, natom, &
3140 : nkind, nthreads
3141 : INTEGER(int_8) :: nze, nze_tot
3142 130 : INTEGER, ALLOCATABLE, DIMENSION(:) :: dist1, dist2, dist_AO_1, dist_AO_2, &
3143 130 : dist_RI, dummy_end, dummy_start, &
3144 260 : end_blocks, start_blocks
3145 : INTEGER, DIMENSION(3) :: pcoord, pdims
3146 260 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3147 : REAL(dp) :: compression_factor, memory, occ
3148 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3149 1690 : TYPE(dbcsr_type), DIMENSION(1, 3) :: t_2c_der_metric_prv, t_2c_der_RI_prv
3150 390 : TYPE(dbt_pgrid_type) :: pgrid
3151 3380 : TYPE(dbt_type) :: t_2c_template, t_2c_tmp, t_3c_template
3152 130 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :) :: t_3c_der_AO_prv, t_3c_der_RI_prv
3153 : TYPE(dft_control_type), POINTER :: dft_control
3154 : TYPE(distribution_2d_type), POINTER :: dist_2d
3155 : TYPE(distribution_3d_type) :: dist_3d, dist_3d_out
3156 130 : TYPE(mp_cart_type) :: mp_comm_t3c, mp_comm_t3c_out
3157 : TYPE(mp_para_env_type), POINTER :: para_env
3158 : TYPE(neighbor_list_3c_type) :: nl_3c
3159 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3160 130 : POINTER :: nl_2c
3161 130 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3162 130 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3163 :
3164 130 : NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
3165 :
3166 130 : CALL timeset(routineN, handle)
3167 :
3168 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
3169 130 : particle_set=particle_set, dft_control=dft_control, para_env=para_env)
3170 :
3171 : !TODO: is such a pgrid correct?
3172 : !Dealing with the 3c derivatives
3173 130 : nthreads = 1
3174 130 : !$ nthreads = omp_get_num_threads()
3175 130 : pdims = 0
3176 520 : CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
3177 :
3178 : CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, pgrid, &
3179 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
3180 : map1=[1], map2=[2, 3], &
3181 130 : name="der (RI AO | AO)")
3182 :
3183 4550 : ALLOCATE (t_3c_der_AO_prv(1, 1, 3), t_3c_der_RI_prv(1, 1, 3))
3184 520 : DO i_xyz = 1, 3
3185 390 : CALL dbt_create(t_3c_template, t_3c_der_RI_prv(1, 1, i_xyz))
3186 520 : CALL dbt_create(t_3c_template, t_3c_der_AO_prv(1, 1, i_xyz))
3187 : END DO
3188 130 : IF (PRESENT(t_3c_virial)) THEN
3189 4 : CALL dbt_create(t_3c_template, t_3c_virial)
3190 : END IF
3191 130 : CALL dbt_destroy(t_3c_template)
3192 :
3193 130 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3194 130 : CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
3195 : CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
3196 130 : nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
3197 :
3198 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
3199 130 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
3200 :
3201 130 : IF (PRESENT(nl_3c_out)) THEN
3202 4 : CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
3203 4 : CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
3204 : CALL distribution_3d_create(dist_3d_out, dist_RI, dist_AO_1, dist_AO_2, &
3205 4 : nkind, particle_set, mp_comm_t3c_out, own_comm=.TRUE.)
3206 : CALL build_3c_neighbor_lists(nl_3c_out, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d_out, &
3207 : ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.FALSE., &
3208 4 : own_dist=.TRUE.)
3209 : END IF
3210 130 : DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
3211 130 : CALL dbt_pgrid_destroy(pgrid)
3212 :
3213 130 : n_mem = ri_data%n_mem
3214 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
3215 130 : start_blocks, end_blocks)
3216 130 : DEALLOCATE (dummy_start, dummy_end)
3217 :
3218 553008 : ALLOCATE (t_3c_der_AO_comp(n_mem, 3), t_3c_der_RI_comp(n_mem, 3))
3219 3628 : ALLOCATE (t_3c_der_AO_ind(n_mem, 3), t_3c_der_RI_ind(n_mem, 3))
3220 :
3221 130 : memory = 0.0_dp
3222 130 : nze_tot = 0
3223 518 : DO i_mem = 1, n_mem
3224 : CALL build_3c_derivatives(t_3c_der_RI_prv, t_3c_der_AO_prv, ri_data%filter_eps, qs_env, &
3225 : nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, &
3226 : ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
3227 1164 : bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
3228 :
3229 1682 : DO i_xyz = 1, 3
3230 1164 : CALL dbt_copy(t_3c_der_RI_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.TRUE.)
3231 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3232 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3233 1164 : nze_tot = nze_tot + nze
3234 :
3235 1164 : CALL alloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), 1)
3236 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
3237 1164 : t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3238 1164 : CALL dbt_clear(ri_ao_ao_template)
3239 :
3240 : !put AO derivative as middle index
3241 1164 : CALL dbt_copy(t_3c_der_AO_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.TRUE.)
3242 1164 : CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
3243 1164 : CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
3244 1164 : nze_tot = nze_tot + nze
3245 :
3246 1164 : CALL alloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), 1)
3247 : CALL compress_tensor(ri_ao_ao_template, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
3248 1164 : t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
3249 3880 : CALL dbt_clear(ri_ao_ao_template)
3250 : END DO
3251 : END DO
3252 :
3253 130 : CALL neighbor_list_3c_destroy(nl_3c)
3254 520 : DO i_xyz = 1, 3
3255 390 : CALL dbt_destroy(t_3c_der_RI_prv(1, 1, i_xyz))
3256 520 : CALL dbt_destroy(t_3c_der_AO_prv(1, 1, i_xyz))
3257 : END DO
3258 :
3259 130 : CALL para_env%sum(memory)
3260 130 : compression_factor = REAL(nze_tot, dp)*1.0E-06*8.0_dp/memory
3261 130 : IF (ri_data%unit_nr > 0) THEN
3262 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
3263 14 : "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
3264 :
3265 : WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T60,F21.2))") &
3266 14 : "MEMORY_INFO| Compression factor: ", compression_factor
3267 : END IF
3268 :
3269 : !Deal with the 2-center derivatives
3270 130 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3271 390 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3272 260 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3273 530 : row_bsize(:) = ri_data%bsizes_RI
3274 530 : col_bsize(:) = ri_data%bsizes_RI
3275 :
3276 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3277 130 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3278 :
3279 520 : DO i_xyz = 1, 3
3280 : CALL dbcsr_create(t_2c_der_RI_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3281 520 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3282 : END DO
3283 :
3284 : CALL build_2c_derivatives(t_2c_der_RI_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3285 130 : basis_set_RI, ri_data%hfx_pot)
3286 130 : CALL release_neighbor_list_sets(nl_2c)
3287 :
3288 130 : IF (PRESENT(nl_2c_pot)) THEN
3289 4 : NULLIFY (nl_2c_pot)
3290 : CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
3291 4 : "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3292 : END IF
3293 :
3294 : !copy 2c derivative tensor into the standard format
3295 : CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
3296 130 : ri_data%bsizes_RI_split, name='(RI| RI)')
3297 130 : DEALLOCATE (dist1, dist2)
3298 :
3299 520 : DO i_xyz = 1, 3
3300 390 : CALL dbt_create(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3301 390 : CALL dbt_copy_matrix_to_tensor(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
3302 :
3303 390 : CALL dbt_create(t_2c_template, t_2c_der_RI(i_xyz))
3304 390 : CALL dbt_copy(t_2c_tmp, t_2c_der_RI(i_xyz), move_data=.TRUE.)
3305 :
3306 390 : CALL dbt_destroy(t_2c_tmp)
3307 520 : CALL dbcsr_release(t_2c_der_RI_prv(1, i_xyz))
3308 : END DO
3309 :
3310 : !Repeat with the metric, if required
3311 130 : IF (.NOT. ri_data%same_op) THEN
3312 :
3313 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3314 32 : "HFX_2c_nl_RI", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3315 :
3316 128 : DO i_xyz = 1, 3
3317 : CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
3318 128 : dbcsr_type_antisymmetric, row_bsize, col_bsize)
3319 : END DO
3320 :
3321 : CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
3322 32 : basis_set_RI, basis_set_RI, ri_data%ri_metric)
3323 32 : CALL release_neighbor_list_sets(nl_2c)
3324 :
3325 32 : IF (PRESENT(nl_2c_met)) THEN
3326 0 : NULLIFY (nl_2c_met)
3327 : CALL build_2c_neighbor_lists(nl_2c_met, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3328 0 : "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
3329 : END IF
3330 :
3331 128 : DO i_xyz = 1, 3
3332 96 : CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3333 96 : CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
3334 :
3335 96 : CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
3336 96 : CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.TRUE.)
3337 :
3338 96 : CALL dbt_destroy(t_2c_tmp)
3339 128 : CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
3340 : END DO
3341 :
3342 : END IF
3343 :
3344 130 : CALL dbt_destroy(t_2c_template)
3345 130 : CALL dbcsr_distribution_release(dbcsr_dist)
3346 130 : DEALLOCATE (row_bsize, col_bsize)
3347 :
3348 130 : CALL timestop(handle)
3349 :
3350 1690 : END SUBROUTINE precalc_derivatives
3351 :
3352 : ! **************************************************************************************************
3353 : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
3354 : !> force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
3355 : !> the atom on which the force acts
3356 : !> \param force ...
3357 : !> \param t_3c_contr ...
3358 : !> \param t_3c_der ...
3359 : !> \param atom_of_kind ...
3360 : !> \param kind_of ...
3361 : !> \param idx_to_at ...
3362 : !> \param pref ...
3363 : !> \param do_mp2 ...
3364 : !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
3365 : ! **************************************************************************************************
3366 3862 : SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
3367 : pref, do_mp2, deriv_dim)
3368 :
3369 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3370 : TYPE(dbt_type), INTENT(INOUT) :: t_3c_contr
3371 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_3c_der
3372 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3373 : REAL(dp), INTENT(IN) :: pref
3374 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2
3375 : INTEGER, INTENT(IN), OPTIONAL :: deriv_dim
3376 :
3377 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
3378 :
3379 : INTEGER :: deriv_dim_prv, handle, i_xyz, iat, &
3380 : iat_of_kind, ikind, j_xyz
3381 : INTEGER, DIMENSION(3) :: ind
3382 : LOGICAL :: do_mp2_prv, found
3383 : REAL(dp) :: new_force
3384 3862 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET :: contr_blk, der_blk
3385 : REAL(dp), DIMENSION(3) :: scoord
3386 : TYPE(dbt_iterator_type) :: iter
3387 :
3388 3862 : CALL timeset(routineN, handle)
3389 :
3390 3862 : do_mp2_prv = .FALSE.
3391 3862 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3392 :
3393 3862 : deriv_dim_prv = 1
3394 3862 : IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
3395 :
3396 : !$OMP PARALLEL DEFAULT(NONE) &
3397 : !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3398 3862 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
3399 : DO i_xyz = 1, 3
3400 : CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
3401 : DO WHILE (dbt_iterator_blocks_left(iter))
3402 : CALL dbt_iterator_next_block(iter, ind)
3403 :
3404 : CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
3405 : CPASSERT(found)
3406 : CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
3407 :
3408 : IF (found) THEN
3409 :
3410 : !take the trace of the blocks
3411 : new_force = pref*SUM(der_blk(:, :, :)*contr_blk(:, :, :))
3412 :
3413 : !the first index of the derivative tensor defines the atom
3414 : iat = idx_to_at(ind(deriv_dim_prv))
3415 : iat_of_kind = atom_of_kind(iat)
3416 : ikind = kind_of(iat)
3417 :
3418 : IF (.NOT. do_mp2_prv) THEN
3419 : !$OMP ATOMIC
3420 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3421 : + new_force
3422 : ELSE
3423 : !$OMP ATOMIC
3424 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3425 : + new_force
3426 : END IF
3427 :
3428 : DEALLOCATE (contr_blk)
3429 : END IF
3430 : DEALLOCATE (der_blk)
3431 : END DO !iter
3432 : CALL dbt_iterator_stop(iter)
3433 : END DO
3434 : !$OMP END PARALLEL
3435 3862 : CALL timestop(handle)
3436 :
3437 7724 : END SUBROUTINE get_force_from_3c_trace
3438 :
3439 : ! **************************************************************************************************
3440 : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
3441 : !> \param force ...
3442 : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
3443 : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
3444 : !> \param atom_of_kind ...
3445 : !> \param kind_of ...
3446 : !> \param idx_to_at ...
3447 : !> \param pref ...
3448 : !> \param do_mp2 ...
3449 : !> \param do_ovlp ...
3450 : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
3451 : ! **************************************************************************************************
3452 610 : SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
3453 : pref, do_mp2, do_ovlp)
3454 :
3455 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3456 : TYPE(dbt_type), INTENT(INOUT) :: t_2c_contr
3457 : TYPE(dbt_type), DIMENSION(3), INTENT(INOUT) :: t_2c_der
3458 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3459 : REAL(dp), INTENT(IN) :: pref
3460 : LOGICAL, INTENT(IN), OPTIONAL :: do_mp2, do_ovlp
3461 :
3462 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_2c_der_force'
3463 :
3464 : INTEGER :: handle, i_xyz, iat, iat_of_kind, ikind, &
3465 : j_xyz, jat, jat_of_kind, jkind
3466 : INTEGER, DIMENSION(2) :: ind
3467 : LOGICAL :: do_mp2_prv, do_ovlp_prv, found
3468 : REAL(dp) :: new_force
3469 610 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: contr_blk, der_blk
3470 : REAL(dp), DIMENSION(3) :: scoord
3471 : TYPE(dbt_iterator_type) :: iter
3472 :
3473 : !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
3474 : !update the relevant force
3475 :
3476 610 : CALL timeset(routineN, handle)
3477 :
3478 610 : do_mp2_prv = .FALSE.
3479 610 : IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
3480 :
3481 610 : do_ovlp_prv = .FALSE.
3482 610 : IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
3483 :
3484 : !$OMP PARALLEL DEFAULT(NONE) &
3485 : !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
3486 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
3487 610 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
3488 : DO i_xyz = 1, 3
3489 : CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
3490 : DO WHILE (dbt_iterator_blocks_left(iter))
3491 : CALL dbt_iterator_next_block(iter, ind)
3492 :
3493 : IF (ind(1) == ind(2)) CYCLE
3494 :
3495 : CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
3496 : CPASSERT(found)
3497 : CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
3498 :
3499 : IF (found) THEN
3500 :
3501 : !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
3502 : !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
3503 : new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
3504 :
3505 : iat = idx_to_at(ind(1))
3506 : iat_of_kind = atom_of_kind(iat)
3507 : ikind = kind_of(iat)
3508 :
3509 : IF (do_mp2_prv) THEN
3510 : !$OMP ATOMIC
3511 : force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
3512 : + new_force
3513 : ELSE IF (do_ovlp_prv) THEN
3514 : !$OMP ATOMIC
3515 : force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
3516 : + new_force
3517 : ELSE
3518 : !$OMP ATOMIC
3519 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3520 : + new_force
3521 : END IF
3522 :
3523 : jat = idx_to_at(ind(2))
3524 : jat_of_kind = atom_of_kind(jat)
3525 : jkind = kind_of(jat)
3526 :
3527 : IF (do_mp2_prv) THEN
3528 : !$OMP ATOMIC
3529 : force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
3530 : - new_force
3531 : ELSE IF (do_ovlp_prv) THEN
3532 : !$OMP ATOMIC
3533 : force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
3534 : - new_force
3535 : ELSE
3536 : !$OMP ATOMIC
3537 : force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
3538 : - new_force
3539 : END IF
3540 :
3541 : DEALLOCATE (contr_blk)
3542 : END IF
3543 :
3544 : DEALLOCATE (der_blk)
3545 : END DO !iter
3546 : CALL dbt_iterator_stop(iter)
3547 :
3548 : END DO !i_xyz
3549 : !$OMP END PARALLEL
3550 610 : CALL timestop(handle)
3551 :
3552 1220 : END SUBROUTINE get_2c_der_force
3553 :
3554 : ! **************************************************************************************************
3555 : !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
3556 : !> and a is a MO
3557 : !> \param force ...
3558 : !> \param t_mo_coeff ...
3559 : !> \param t_2c_MO_AO ...
3560 : !> \param atom_of_kind ...
3561 : !> \param kind_of ...
3562 : !> \param idx_to_at ...
3563 : !> \param pref ...
3564 : !> \param i_xyz ...
3565 : ! **************************************************************************************************
3566 54 : SUBROUTINE get_MO_AO_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
3567 :
3568 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
3569 : TYPE(dbt_type), INTENT(INOUT) :: t_mo_coeff, t_2c_MO_AO
3570 : INTEGER, DIMENSION(:), INTENT(IN) :: atom_of_kind, kind_of, idx_to_at
3571 : REAL(dp), INTENT(IN) :: pref
3572 : INTEGER, INTENT(IN) :: i_xyz
3573 :
3574 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_MO_AO_force'
3575 :
3576 : INTEGER :: handle, iat, iat_of_kind, ikind, j_xyz
3577 : INTEGER, DIMENSION(2) :: ind
3578 : LOGICAL :: found
3579 : REAL(dp) :: new_force
3580 54 : REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET :: mo_ao_blk, mo_coeff_blk
3581 : REAL(dp), DIMENSION(3) :: scoord
3582 : TYPE(dbt_iterator_type) :: iter
3583 :
3584 54 : CALL timeset(routineN, handle)
3585 :
3586 : !$OMP PARALLEL DEFAULT(NONE) &
3587 : !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
3588 54 : !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
3589 : CALL dbt_iterator_start(iter, t_2c_MO_AO)
3590 : DO WHILE (dbt_iterator_blocks_left(iter))
3591 : CALL dbt_iterator_next_block(iter, ind)
3592 :
3593 : CALL dbt_get_block(t_2c_MO_AO, ind, mo_ao_blk, found)
3594 : CPASSERT(found)
3595 : CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
3596 :
3597 : IF (found) THEN
3598 :
3599 : new_force = pref*SUM(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
3600 :
3601 : iat = idx_to_at(ind(2)) !AO index is column index
3602 : iat_of_kind = atom_of_kind(iat)
3603 : ikind = kind_of(iat)
3604 :
3605 : !$OMP ATOMIC
3606 : force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
3607 : + new_force
3608 :
3609 : DEALLOCATE (mo_coeff_blk)
3610 : END IF
3611 :
3612 : DEALLOCATE (mo_ao_blk)
3613 : END DO !iter
3614 : CALL dbt_iterator_stop(iter)
3615 : !$OMP END PARALLEL
3616 :
3617 54 : CALL timestop(handle)
3618 :
3619 108 : END SUBROUTINE get_MO_AO_force
3620 :
3621 : ! **************************************************************************************************
3622 : !> \brief Print RI-HFX quantities, as required by the PRINT subsection
3623 : !> \param ri_data ...
3624 : !> \param qs_env ...
3625 : ! **************************************************************************************************
3626 148 : SUBROUTINE print_ri_hfx(ri_data, qs_env)
3627 :
3628 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3629 : TYPE(qs_environment_type), POINTER :: qs_env
3630 :
3631 : CHARACTER(Len=2) :: symbol
3632 : CHARACTER(Len=8) :: rifmt
3633 : INTEGER :: atype, i_RI, ia, ib, ibasis, ikind, &
3634 : iset, isgf, ishell, iso, l, m, natom, &
3635 : ncols, nkind, nrows, nset, nsgf, &
3636 : nspins, unit_nr
3637 : INTEGER, DIMENSION(3) :: periodic
3638 148 : INTEGER, DIMENSION(:), POINTER :: npgf, nshell
3639 148 : INTEGER, DIMENSION(:, :), POINTER :: lshell
3640 : LOGICAL :: mult_by_S, print_density, &
3641 : print_ri_metric, skip_ri_metric
3642 148 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs, density_coeffs_2
3643 : REAL(dp), DIMENSION(3, 3) :: hmat
3644 148 : REAL(dp), DIMENSION(:, :), POINTER :: zet
3645 148 : REAL(dp), DIMENSION(:, :, :), POINTER :: gcc
3646 : TYPE(cell_type), POINTER :: cell
3647 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3648 : TYPE(cp_fm_struct_type), POINTER :: fm_struct
3649 : TYPE(cp_fm_type) :: matrix_s_fm
3650 : TYPE(cp_logger_type), POINTER :: logger
3651 148 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
3652 296 : TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3653 : TYPE(dft_control_type), POINTER :: dft_control
3654 : TYPE(gto_basis_set_p_type), ALLOCATABLE, &
3655 148 : DIMENSION(:), TARGET :: basis_set_AO, basis_set_RI
3656 : TYPE(gto_basis_set_type), POINTER :: orb_basis, ri_basis
3657 : TYPE(mp_para_env_type), POINTER :: para_env
3658 148 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3659 148 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3660 : TYPE(qs_rho_type), POINTER :: rho
3661 : TYPE(section_vals_type), POINTER :: input, print_section
3662 :
3663 148 : NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, &
3664 148 : para_env, blacs_env, fm_struct, orb_basis, dft_control)
3665 :
3666 148 : CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
3667 148 : logger => cp_get_default_logger()
3668 148 : print_density = .FALSE.
3669 148 : print_ri_metric = .FALSE.
3670 :
3671 : !Do we print the RI density coeffs and/or RI_metric 2c integrals?
3672 148 : print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
3673 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
3674 0 : cp_p_file)) print_density = .TRUE.
3675 148 : IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
3676 0 : cp_p_file)) print_ri_metric = .TRUE.
3677 :
3678 : !common stuff
3679 148 : IF (print_density .OR. print_ri_metric) THEN
3680 :
3681 : !Set up basis sets and interaction radii
3682 0 : CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set)
3683 0 : ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
3684 0 : CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
3685 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
3686 0 : CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
3687 0 : CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
3688 :
3689 0 : DO ibasis = 1, nkind
3690 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3691 0 : CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
3692 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3693 0 : CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
3694 : END DO
3695 : END IF
3696 :
3697 0 : IF (print_density) THEN
3698 0 : CALL get_qs_env(qs_env, rho=rho)
3699 0 : CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
3700 0 : nspins = SIZE(rho_ao, 1)
3701 :
3702 0 : CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
3703 0 : CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%SKIP_RI_METRIC", l_val=skip_ri_metric)
3704 :
3705 0 : IF (mult_by_s .AND. skip_ri_metric) THEN
3706 0 : CPABORT("MULTIPLY_BY_RI_2C_INTEGRALS and SKIP_RI_METRIC are mutually exclusive.")
3707 : END IF
3708 :
3709 : CALL get_RI_density_coeffs(density_coeffs, rho_ao, 1, basis_set_AO, basis_set_RI, &
3710 0 : mult_by_s, skip_ri_metric, ri_data, qs_env)
3711 0 : IF (nspins == 2) &
3712 : CALL get_RI_density_coeffs(density_coeffs_2, rho_ao, 2, basis_set_AO, basis_set_RI, &
3713 0 : mult_by_s, skip_ri_metric, ri_data, qs_env)
3714 :
3715 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
3716 : extension=".dat", file_status="REPLACE", &
3717 0 : file_action="WRITE", file_form="FORMATTED")
3718 :
3719 0 : CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%FILE_FORMAT", c_val=rifmt)
3720 0 : CALL uppercase(rifmt)
3721 :
3722 0 : IF (unit_nr > 0) THEN
3723 0 : SELECT CASE (rifmt)
3724 : CASE DEFAULT
3725 0 : CPABORT("NA")
3726 : CASE ("BASIC")
3727 0 : IF (nspins == 1) THEN
3728 : WRITE (unit_nr, FMT="(A,A,A)") &
3729 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3730 0 : TRIM(logger%iter_info%project_name), " project"
3731 0 : DO i_RI = 1, SIZE(density_coeffs)
3732 0 : WRITE (unit_nr, FMT="(F20.12)") density_coeffs(i_RI)
3733 : END DO
3734 : ELSE
3735 : WRITE (unit_nr, FMT="(A,A,A)") &
3736 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3737 0 : TRIM(logger%iter_info%project_name), " project. Spin up, spin down"
3738 0 : DO i_RI = 1, SIZE(density_coeffs)
3739 0 : WRITE (unit_nr, FMT="(F20.12,F20.12)") density_coeffs(i_RI), density_coeffs_2(i_RI)
3740 : END DO
3741 : END IF
3742 : CASE ("EXTENDED")
3743 : WRITE (unit_nr, FMT="(A,A,A)") &
3744 0 : "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
3745 0 : TRIM(logger%iter_info%project_name), " project"
3746 :
3747 0 : CALL get_qs_env(qs_env, cell=cell, particle_set=particle_set)
3748 0 : CALL get_cell(cell, periodic=periodic, h=hmat)
3749 0 : natom = SIZE(particle_set)
3750 0 : ib = 0
3751 0 : DO ia = 1, natom
3752 0 : CALL get_atomic_kind(particle_set(ia)%atomic_kind, kind_number=ikind)
3753 0 : ri_basis => basis_set_RI(ikind)%gto_basis_set
3754 0 : CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf)
3755 0 : DO ibasis = 1, nsgf
3756 0 : ib = ib + 1
3757 0 : IF (nspins == 1) THEN
3758 0 : WRITE (unit_nr, FMT="(I10,3I7,F20.12)") ib, ia, ikind, ibasis, &
3759 0 : density_coeffs(ib)
3760 : ELSE
3761 0 : WRITE (unit_nr, FMT="(I10,3I7,F20.12,F20.12)") ib, ia, ikind, ibasis, &
3762 0 : density_coeffs(ib), density_coeffs_2(ib)
3763 : END IF
3764 : END DO
3765 : END DO
3766 0 : WRITE (unit_nr, FMT="(A)") "# Cell Periodicity "
3767 0 : WRITE (unit_nr, FMT="(3I5)") periodic
3768 0 : WRITE (unit_nr, FMT="(A)") "# Cell Matrix [Bohr]"
3769 0 : WRITE (unit_nr, FMT="(3F20.12)") hmat(1, 1:3)
3770 0 : WRITE (unit_nr, FMT="(3F20.12)") hmat(2, 1:3)
3771 0 : WRITE (unit_nr, FMT="(3F20.12)") hmat(3, 1:3)
3772 0 : WRITE (unit_nr, FMT="(A)") "# El Type Number Coordinates [Bohr]"
3773 0 : DO ia = 1, natom
3774 : CALL get_atomic_kind(atomic_kind=particle_set(ia)%atomic_kind, &
3775 0 : kind_number=atype, element_symbol=symbol)
3776 0 : WRITE (unit_nr, FMT="(2X,A2,I5,I10,3F20.12)") symbol, atype, ia, particle_set(ia)%r(1:3)
3777 : END DO
3778 0 : WRITE (unit_nr, FMT="(A)") "# Basis Set Information"
3779 0 : DO ibasis = 1, nkind
3780 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3781 : CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf, npgf=npgf, &
3782 : zet=zet, gcc=gcc, &
3783 0 : nset=nset, nshell=nshell, l=lshell)
3784 0 : WRITE (unit_nr, FMT="(A)") "# Basis Functions"
3785 0 : WRITE (unit_nr, FMT="(I7,I15)") ibasis, nsgf
3786 0 : WRITE (unit_nr, FMT="(A)") "# Nr. l m set shell "
3787 0 : isgf = 0
3788 0 : DO iset = 1, nset
3789 0 : DO ishell = 1, nshell(iset)
3790 0 : l = lshell(ishell, iset)
3791 0 : DO iso = 1, nso(l)
3792 0 : isgf = isgf + 1
3793 0 : m = iso - 1 - l
3794 0 : WRITE (unit_nr, FMT="(I6,I7,I8,2I8)") isgf, l, m, iset, ishell
3795 : END DO
3796 : END DO
3797 : END DO
3798 0 : WRITE (unit_nr, FMT="(A)") "# Basis set exponents and contractions "
3799 0 : DO iset = 1, nset
3800 0 : WRITE (unit_nr, FMT="(I7)") iset
3801 0 : WRITE (unit_nr, FMT="(A)") "# Exponent Shells ... "
3802 0 : DO m = 1, npgf(iset)
3803 0 : WRITE (unit_nr, FMT="(E18.12,50F18.12)") zet(m, iset), gcc(m, 1:nshell(iset), iset)
3804 : END DO
3805 : END DO
3806 :
3807 : END DO
3808 : END SELECT
3809 : END IF
3810 :
3811 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
3812 : END IF
3813 :
3814 148 : IF (print_ri_metric) THEN
3815 : !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
3816 0 : CALL calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
3817 :
3818 : !convert 2c integrals to fm for dumping
3819 0 : CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
3820 0 : CALL dbcsr_get_info(matrix_s(1), nfullrows_total=nrows, nfullcols_total=ncols)
3821 : CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
3822 0 : nrow_global=nrows, ncol_global=ncols)
3823 0 : CALL cp_fm_create(matrix_s_fm, fm_struct)
3824 :
3825 0 : CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
3826 0 : CALL dbcsr_release(matrix_s(1))
3827 :
3828 : unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
3829 : extension=".fm", file_status="REPLACE", &
3830 0 : file_action="WRITE", file_form="UNFORMATTED")
3831 0 : CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
3832 :
3833 0 : CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
3834 :
3835 0 : CALL cp_fm_struct_release(fm_struct)
3836 0 : CALL cp_fm_release(matrix_s_fm)
3837 : END IF
3838 :
3839 : !clean-up
3840 148 : IF (print_density .OR. print_ri_metric) THEN
3841 0 : DO ibasis = 1, nkind
3842 0 : ri_basis => basis_set_RI(ibasis)%gto_basis_set
3843 0 : CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
3844 0 : orb_basis => basis_set_AO(ibasis)%gto_basis_set
3845 0 : CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
3846 : END DO
3847 : END IF
3848 :
3849 444 : END SUBROUTINE print_ri_hfx
3850 :
3851 : ! **************************************************************************************************
3852 : !> \brief Calculate the RI metric 2-center integrals
3853 : !> \param matrix_s ...
3854 : !> \param basis_set_RI ...
3855 : !> \param ri_data ...
3856 : !> \param qs_env ...
3857 : ! **************************************************************************************************
3858 0 : SUBROUTINE calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
3859 :
3860 : TYPE(dbcsr_type), DIMENSION(1) :: matrix_s
3861 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_RI
3862 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3863 : TYPE(qs_environment_type), POINTER :: qs_env
3864 :
3865 0 : INTEGER, DIMENSION(:), POINTER :: col_bsize, row_bsize
3866 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
3867 : TYPE(distribution_2d_type), POINTER :: dist_2d
3868 : TYPE(neighbor_list_set_p_type), DIMENSION(:), &
3869 0 : POINTER :: nl_2c
3870 :
3871 0 : NULLIFY (nl_2c, dist_2d, row_bsize, col_bsize)
3872 :
3873 0 : CALL get_qs_env(qs_env, distribution_2d=dist_2d)
3874 0 : CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
3875 0 : ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
3876 0 : ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
3877 0 : row_bsize(:) = ri_data%bsizes_RI
3878 0 : col_bsize(:) = ri_data%bsizes_RI
3879 :
3880 0 : CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
3881 :
3882 : CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
3883 0 : "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
3884 : CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
3885 0 : basis_set_RI, ri_data%ri_metric)
3886 :
3887 0 : CALL release_neighbor_list_sets(nl_2c)
3888 0 : CALL dbcsr_distribution_release(dbcsr_dist)
3889 0 : DEALLOCATE (row_bsize, col_bsize)
3890 :
3891 0 : END SUBROUTINE calc_RI_2c_ints
3892 :
3893 : ! **************************************************************************************************
3894 : !> \brief Projects the density on the RI basis and return the array of the RI coefficients
3895 : !> \param density_coeffs ...
3896 : !> \param rho_ao ...
3897 : !> \param ispin ...
3898 : !> \param basis_set_AO ...
3899 : !> \param basis_set_RI ...
3900 : !> \param multiply_by_S ...
3901 : !> \param skip_ri_metric ...
3902 : !> \param ri_data ...
3903 : !> \param qs_env ...
3904 : ! **************************************************************************************************
3905 0 : SUBROUTINE get_RI_density_coeffs(density_coeffs, rho_ao, ispin, basis_set_AO, basis_set_RI, &
3906 : multiply_by_S, skip_ri_metric, ri_data, qs_env)
3907 :
3908 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: density_coeffs
3909 : TYPE(dbcsr_p_type), DIMENSION(:, :) :: rho_ao
3910 : INTEGER, INTENT(IN) :: ispin
3911 : TYPE(gto_basis_set_p_type), DIMENSION(:) :: basis_set_AO, basis_set_RI
3912 : LOGICAL, INTENT(IN) :: multiply_by_S, skip_ri_metric
3913 : TYPE(hfx_ri_type), INTENT(INOUT) :: ri_data
3914 : TYPE(qs_environment_type), POINTER :: qs_env
3915 :
3916 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_RI_density_coeffs'
3917 :
3918 : INTEGER :: a, b, handle, i_mem, idx, n_dependent, &
3919 : n_mem, n_mem_RI, natom, &
3920 : nblk_per_thread, nblks, nkind
3921 : INTEGER(int_8) :: nze
3922 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_block_end, batch_block_start, &
3923 0 : dist1, dist2, dist3, dummy1, dummy2, &
3924 0 : idx1, idx2, idx3
3925 : INTEGER, DIMENSION(2) :: ind, pdims_2d
3926 : INTEGER, DIMENSION(2, 3) :: bounds_cpy
3927 : INTEGER, DIMENSION(3) :: dims_3c, pcoord_3d, pdims_3d
3928 : LOGICAL :: calc_ints, found
3929 : REAL(dp) :: occ, threshold
3930 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: blk
3931 0 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: blk_3d
3932 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
3933 : TYPE(dbcsr_type) :: ri_2c_inv
3934 0 : TYPE(dbcsr_type), DIMENSION(1) :: ri_2c_ints
3935 0 : TYPE(dbt_distribution_type) :: dist_2d, dist_3d
3936 : TYPE(dbt_iterator_type) :: iter
3937 0 : TYPE(dbt_pgrid_type) :: pgrid_2d, pgrid_3d
3938 0 : TYPE(dbt_type) :: density_coeffs_t, density_tmp, rho_ao_t, &
3939 0 : rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
3940 0 : t2c_ri_inv, t2c_ri_tmp
3941 0 : TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_int_batched
3942 : TYPE(dft_control_type), POINTER :: dft_control
3943 : TYPE(distribution_3d_type) :: dist_nl3c
3944 0 : TYPE(mp_cart_type) :: mp_comm_t3c
3945 : TYPE(mp_para_env_type), POINTER :: para_env
3946 : TYPE(neighbor_list_3c_type) :: nl_3c
3947 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3948 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
3949 :
3950 0 : NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
3951 :
3952 0 : CALL timeset(routineN, handle)
3953 :
3954 : ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
3955 : ! = sum_nu d_nu nu(r)
3956 : ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
3957 :
3958 0 : IF (.NOT. ri_data%flavor == ri_pmat) THEN
3959 0 : CPABORT("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
3960 : END IF
3961 :
3962 : CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
3963 0 : particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
3964 0 : n_mem = ri_data%n_mem
3965 0 : n_mem_RI = ri_data%n_mem_RI
3966 :
3967 : ! Calculate RI 2c int tensor and its inverse. Skip this if requested
3968 0 : IF (.NOT. skip_ri_metric) THEN
3969 0 : CALL calc_RI_2c_ints(ri_2c_ints, basis_set_RI, ri_data, qs_env)
3970 0 : CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints(1), matrix_type=dbcsr_type_no_symmetry)
3971 :
3972 0 : SELECT CASE (ri_data%t2c_method)
3973 : CASE (hfx_ri_do_2c_iter)
3974 0 : threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
3975 0 : CALL invert_hotelling(ri_2c_inv, ri_2c_ints(1), threshold=threshold, silent=.FALSE.)
3976 : CASE (hfx_ri_do_2c_cholesky)
3977 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
3978 0 : CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
3979 0 : CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
3980 : CASE (hfx_ri_do_2c_diag)
3981 0 : CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
3982 : CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
3983 0 : para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
3984 : END SELECT
3985 :
3986 0 : CALL dbt_create(ri_2c_ints(1), t2c_ri_tmp)
3987 : CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
3988 : ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
3989 : name="(RI | RI)")
3990 0 : CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
3991 :
3992 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_ints(1), t2c_ri_tmp)
3993 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.TRUE.)
3994 0 : CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
3995 :
3996 0 : CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
3997 0 : CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.TRUE.)
3998 0 : CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
3999 :
4000 0 : CALL dbcsr_release(ri_2c_ints(1))
4001 0 : CALL dbcsr_release(ri_2c_inv)
4002 0 : CALL dbt_destroy(t2c_ri_tmp)
4003 0 : DEALLOCATE (dist1, dist2)
4004 : END IF
4005 :
4006 : ! The AO density tensor
4007 0 : CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
4008 : CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
4009 : ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
4010 0 : name="(AO | AO)")
4011 0 : DEALLOCATE (dist1, dist2)
4012 :
4013 0 : CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
4014 0 : CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
4015 0 : CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
4016 0 : CALL dbt_destroy(rho_ao_tmp)
4017 :
4018 : ! Put in in 3D
4019 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
4020 0 : dist3(1) = 0
4021 0 : CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
4022 0 : CALL dbt_default_distvec(1, 1, [1], dist3)
4023 :
4024 0 : pdims_3d(1) = pdims_2d(1)
4025 0 : pdims_3d(2) = pdims_2d(2)
4026 0 : pdims_3d(3) = 1
4027 :
4028 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
4029 0 : CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
4030 : CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
4031 0 : ri_data%bsizes_AO_split, [1])
4032 0 : DEALLOCATE (dist1, dist2, dist3)
4033 0 : CALL dbt_pgrid_destroy(pgrid_3d)
4034 0 : CALL dbt_distribution_destroy(dist_3d)
4035 :
4036 : ! copy density
4037 0 : nblks = 0
4038 : !$OMP PARALLEL DEFAULT(NONE) &
4039 : !$OMP SHARED(rho_ao_t,nblks) &
4040 0 : !$OMP PRIVATE(iter,ind,blk,found)
4041 : CALL dbt_iterator_start(iter, rho_ao_t)
4042 : DO WHILE (dbt_iterator_blocks_left(iter))
4043 : CALL dbt_iterator_next_block(iter, ind)
4044 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
4045 : IF (found) THEN
4046 : !$OMP ATOMIC
4047 : nblks = nblks + 1
4048 : DEALLOCATE (blk)
4049 : END IF
4050 : END DO
4051 : CALL dbt_iterator_stop(iter)
4052 : !$OMP END PARALLEL
4053 :
4054 0 : ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
4055 0 : idx3 = 1
4056 0 : nblks = 0
4057 : !$OMP PARALLEL DEFAULT(NONE) &
4058 : !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
4059 0 : !$OMP PRIVATE(iter,ind,blk,found)
4060 : CALL dbt_iterator_start(iter, rho_ao_t)
4061 : DO WHILE (dbt_iterator_blocks_left(iter))
4062 : CALL dbt_iterator_next_block(iter, ind)
4063 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
4064 : IF (found) THEN
4065 : !$OMP CRITICAL
4066 : nblks = nblks + 1
4067 : idx1(nblks) = ind(1)
4068 : idx2(nblks) = ind(2)
4069 : !$OMP END CRITICAL
4070 : DEALLOCATE (blk)
4071 : END IF
4072 : END DO
4073 : CALL dbt_iterator_stop(iter)
4074 : !$OMP END PARALLEL
4075 :
4076 0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
4077 : nblk_per_thread = nblks/omp_get_num_threads() + 1
4078 : a = omp_get_thread_num()*nblk_per_thread + 1
4079 : b = MIN(a + nblk_per_thread, nblks)
4080 : CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
4081 : !$OMP END PARALLEL
4082 :
4083 : !$OMP PARALLEL DEFAULT(NONE) &
4084 : !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
4085 0 : !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
4086 : CALL dbt_iterator_start(iter, rho_ao_t)
4087 : DO WHILE (dbt_iterator_blocks_left(iter))
4088 : CALL dbt_iterator_next_block(iter, ind)
4089 : CALL dbt_get_block(rho_ao_t, ind, blk, found)
4090 : IF (found) THEN
4091 : ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
4092 : blk_3d(:, :, 1) = blk(:, :)
4093 : !$OMP CRITICAL
4094 : CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
4095 : !$OMP END CRITICAL
4096 : DEALLOCATE (blk, blk_3d)
4097 : END IF
4098 : END DO
4099 : CALL dbt_iterator_stop(iter)
4100 : !$OMP END PARALLEL
4101 :
4102 : ! The 1D tensor with the density coeffs
4103 0 : pdims_2d(1) = para_env%num_pe
4104 0 : pdims_2d(2) = 1
4105 :
4106 0 : ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
4107 0 : CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
4108 0 : CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
4109 :
4110 0 : CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
4111 0 : CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
4112 0 : CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
4113 0 : CALL dbt_create(density_coeffs_t, density_tmp)
4114 0 : DEALLOCATE (dist1, dist2)
4115 0 : CALL dbt_pgrid_destroy(pgrid_2d)
4116 0 : CALL dbt_distribution_destroy(dist_2d)
4117 :
4118 0 : CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
4119 :
4120 : ! The 3c integrals tensor, in case we compute them here
4121 0 : pdims_3d = 0
4122 0 : CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[MAX(1, natom/n_mem), natom, natom])
4123 0 : ALLOCATE (t_3c_int_batched(1, 1))
4124 : CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
4125 : ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
4126 0 : name="(RI | AO AO)")
4127 :
4128 0 : CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
4129 0 : CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
4130 : CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
4131 0 : mp_comm_t3c, own_comm=.TRUE.)
4132 0 : DEALLOCATE (dist1, dist2, dist3)
4133 0 : CALL dbt_pgrid_destroy(pgrid_3d)
4134 :
4135 : CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_nl3c, ri_data%ri_metric, &
4136 0 : "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
4137 :
4138 0 : n_mem = ri_data%n_mem
4139 0 : CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
4140 :
4141 0 : calc_ints = .FALSE.
4142 0 : CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
4143 0 : IF (nze == 0) calc_ints = .TRUE.
4144 :
4145 0 : DO i_mem = 1, n_mem
4146 0 : IF (calc_ints) THEN
4147 : CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
4148 : basis_set_RI, basis_set_AO, basis_set_AO, &
4149 : ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
4150 : desymmetrize=.FALSE., &
4151 0 : bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
4152 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
4153 0 : CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.TRUE., summation=.TRUE.)
4154 0 : CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
4155 : ELSE
4156 : bounds_cpy(:, 2) = [SUM(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
4157 0 : SUM(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
4158 0 : bounds_cpy(:, 1) = [1, SUM(ri_data%bsizes_AO)]
4159 0 : bounds_cpy(:, 3) = [1, SUM(ri_data%bsizes_AO)]
4160 : CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
4161 0 : order=[2, 1, 3], bounds=bounds_cpy)
4162 : END IF
4163 :
4164 : !contract the integrals with the density P_pq (pq|R)
4165 : CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
4166 : contract_1=[2, 3], notcontract_1=[1], &
4167 : contract_2=[1, 2], notcontract_2=[3], &
4168 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4169 0 : CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
4170 :
4171 0 : IF (skip_ri_metric) THEN
4172 0 : CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
4173 : ELSE
4174 : !contract the above vector with the inverse metric
4175 : CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
4176 : contract_1=[2], notcontract_1=[1], &
4177 : contract_2=[1], notcontract_2=[2], &
4178 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4179 : END IF
4180 :
4181 : END DO
4182 0 : CALL neighbor_list_3c_destroy(nl_3c)
4183 :
4184 0 : IF (multiply_by_s) THEN
4185 : CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
4186 : contract_1=[2], notcontract_1=[1], &
4187 : contract_2=[1], notcontract_2=[2], &
4188 0 : map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
4189 0 : CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
4190 : END IF
4191 :
4192 0 : ALLOCATE (density_coeffs(SUM(ri_data%bsizes_RI)))
4193 0 : density_coeffs = 0.0
4194 :
4195 : !$OMP PARALLEL DEFAULT(NONE) &
4196 : !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
4197 0 : !$OMP PRIVATE(iter,ind,blk,found,idx)
4198 : CALL dbt_iterator_start(iter, density_coeffs_t)
4199 : DO WHILE (dbt_iterator_blocks_left(iter))
4200 : CALL dbt_iterator_next_block(iter, ind)
4201 : CALL dbt_get_block(density_coeffs_t, ind, blk, found)
4202 : IF (found) THEN
4203 :
4204 : idx = SUM(ri_data%bsizes_RI_split(1:ind(1) - 1))
4205 : !$OMP CRITICAL
4206 : density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
4207 : !$OMP END CRITICAL
4208 : DEALLOCATE (blk)
4209 : END IF
4210 : END DO
4211 : CALL dbt_iterator_stop(iter)
4212 : !$OMP END PARALLEL
4213 0 : CALL para_env%sum(density_coeffs)
4214 :
4215 0 : CALL dbt_destroy(density_tmp)
4216 0 : CALL dbt_destroy(rho_ao_t)
4217 0 : CALL dbt_destroy(rho_ao_t_3d)
4218 0 : CALL dbt_destroy(density_coeffs_t)
4219 0 : CALL dbt_destroy(t_3c_int_batched(1, 1))
4220 :
4221 0 : IF (.NOT. skip_ri_metric) THEN
4222 0 : CALL dbt_destroy(t2c_ri_ints)
4223 0 : CALL dbt_destroy(t2c_ri_inv)
4224 : END IF
4225 :
4226 0 : CALL timestop(handle)
4227 :
4228 0 : END SUBROUTINE get_RI_density_coeffs
4229 :
4230 : ! **************************************************************************************************
4231 : !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
4232 : !> \param idx_to_at ...
4233 : !> \param bsizes_split ...
4234 : !> \param bsizes_orig ...
4235 : !> \return ...
4236 : ! **************************************************************************************************
4237 936 : SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
4238 : INTEGER, DIMENSION(:), INTENT(INOUT) :: idx_to_at
4239 : INTEGER, DIMENSION(:), INTENT(IN) :: bsizes_split, bsizes_orig
4240 :
4241 : INTEGER :: full_sum, iat, iblk, split_sum
4242 :
4243 936 : iat = 1
4244 936 : full_sum = bsizes_orig(iat)
4245 936 : split_sum = 0
4246 4662 : DO iblk = 1, SIZE(bsizes_split)
4247 3726 : split_sum = split_sum + bsizes_split(iblk)
4248 :
4249 3726 : IF (split_sum .GT. full_sum) THEN
4250 1340 : iat = iat + 1
4251 1340 : full_sum = full_sum + bsizes_orig(iat)
4252 : END IF
4253 :
4254 4662 : idx_to_at(iblk) = iat
4255 : END DO
4256 :
4257 936 : END SUBROUTINE get_idx_to_atom
4258 :
4259 : ! **************************************************************************************************
4260 : !> \brief Function for calculating sqrt of a matrix
4261 : !> \param values ...
4262 : !> \return ...
4263 : ! **************************************************************************************************
4264 0 : FUNCTION my_sqrt(values)
4265 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4266 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_sqrt
4267 :
4268 0 : my_sqrt = SQRT(values)
4269 : END FUNCTION
4270 :
4271 : ! **************************************************************************************************
4272 : !> \brief Function for calculation inverse sqrt of a matrix
4273 : !> \param values ...
4274 : !> \return ...
4275 : ! **************************************************************************************************
4276 0 : FUNCTION my_invsqrt(values)
4277 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: values
4278 : REAL(KIND=dp), DIMENSION(SIZE(values)) :: my_invsqrt
4279 :
4280 0 : my_invsqrt = SQRT(1.0_dp/values)
4281 : END FUNCTION
4282 328 : END MODULE
|