Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate HFX energy and potential
10 : !> \par History
11 : !> 11.2006 created [Manuel Guidon]
12 : !> 10.2025 added libGint interface [M Puligheddu]
13 : !> \author Manuel Guidon
14 : ! **************************************************************************************************
15 : MODULE hfx_energy_potential
16 : USE admm_types, ONLY: get_admm_env
17 : USE atomic_kind_types, ONLY: atomic_kind_type, &
18 : get_atomic_kind_set
19 : USE bibliography, ONLY: cite_reference, &
20 : guidon2008, &
21 : guidon2009
22 : USE cell_types, ONLY: cell_type, &
23 : pbc
24 : USE cp_control_types, ONLY: dft_control_type
25 : USE cp_files, ONLY: close_file, &
26 : open_file
27 : USE cp_log_handling, ONLY: cp_get_default_logger, &
28 : cp_logger_type
29 : USE cp_output_handling, ONLY: cp_p_file, &
30 : cp_print_key_finished_output, &
31 : cp_print_key_should_output, &
32 : cp_print_key_unit_nr
33 : USE message_passing, ONLY: mp_para_env_type
34 : USE cp_dbcsr_api, ONLY: dbcsr_copy, &
35 : dbcsr_get_matrix_type, &
36 : dbcsr_p_type, &
37 : dbcsr_type_antisymmetric, &
38 : dbcsr_dot_threadsafe
39 : USE gamma, ONLY: init_md_ftable
40 : USE hfx_communication, ONLY: distribute_ks_matrix, &
41 : get_atomic_block_maps, &
42 : get_full_density
43 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
44 : hfx_add_single_cache_element, &
45 : hfx_decompress_first_cache, &
46 : hfx_flush_last_cache, &
47 : hfx_get_mult_cache_elements, &
48 : hfx_get_single_cache_element, &
49 : hfx_reset_cache_and_container
50 : USE hfx_contract_block, ONLY: contract_block
51 : USE hfx_libint_interface, ONLY: evaluate_eri
52 : USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
53 : hfx_load_balance, &
54 : hfx_update_load_balance
55 : USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
56 : build_pair_list, &
57 : build_pair_list_pgf, &
58 : build_pgf_product_list, &
59 : pgf_product_list_size
60 : USE hfx_screening_methods, ONLY: calc_pair_dist_radii, &
61 : calc_screening_functions, &
62 : update_pmax_mat
63 : USE hfx_types, ONLY: &
64 : alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
65 : hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
66 : hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
67 : hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
68 : hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
69 : log_zero, pair_list_type, pair_set_list_type
70 :
71 : USE input_constants, ONLY: do_potential_mix_cl_trunc, &
72 : do_potential_truncated, &
73 : do_potential_TShPSC, &
74 : do_potential_short, &
75 : do_potential_coulomb, &
76 : hfx_do_eval_energy, &
77 : hfx_library_is_libint, hfx_library_is_libGint, hfx_library_is_both
78 :
79 : USE input_section_types, ONLY: section_vals_type
80 : USE kinds, ONLY: default_string_length, &
81 : dp, &
82 : int_8
83 : USE kpoint_types, ONLY: get_kpoint_info, &
84 : kpoint_type
85 : USE libint_wrapper, ONLY: cp_libint_t
86 : USE machine, ONLY: m_flush, &
87 : m_memory, &
88 : m_walltime
89 : USE mathconstants, ONLY: fac
90 : USE orbital_pointers, ONLY: nco, &
91 : ncoset, &
92 : nso
93 : USE particle_types, ONLY: particle_type
94 : USE qs_environment_types, ONLY: get_qs_env, &
95 : qs_environment_type
96 : USE qs_ks_types, ONLY: get_ks_env, &
97 : qs_ks_env_type
98 : USE t_c_g0, ONLY: init
99 : USE util, ONLY: sort
100 : use, intrinsic :: iso_c_binding, only: c_ptr
101 :
102 : use cp2k_info, only: cp2k_flags
103 : use offload_api, only: offload_get_chosen_device, offload_set_chosen_device, offload_activate_chosen_device
104 : use libGint_wrapper, only: cp_libGint_init, libGint_update_env, libGint_set_density, libGint_coulomb4, &
105 : libGint_update_fock_matrix, libGint_get_fock_matrix
106 :
107 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
108 :
109 : #include "./base/base_uses.f90"
110 :
111 : IMPLICIT NONE
112 : PRIVATE
113 :
114 : PUBLIC :: integrate_four_center, coulomb4
115 :
116 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
117 :
118 : !***
119 :
120 : CONTAINS
121 :
122 : ! **************************************************************************************************
123 : !> \brief computes four center integrals for a full basis set and updates the
124 : !> Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
125 : !> \param qs_env ...
126 : !> \param x_data ...
127 : !> \param ks_matrix ...
128 : !> \param ehfx energy calculated with the updated HFX matrix
129 : !> \param rho_ao density matrix in ao basis
130 : !> \param hfx_section input_section HFX
131 : !> \param para_env ...
132 : !> \param geometry_did_change flag that indicates we have to recalc integrals
133 : !> \param irep Index for the HFX replica
134 : !> \param distribute_fock_matrix Flag that indicates whether to communicate the
135 : !> new fock matrix or not
136 : !> \param ispin ...
137 : !> \par History
138 : !> 06.2007 created [Manuel Guidon]
139 : !> 08.2007 optimized load balance [Manuel Guidon]
140 : !> 09.2007 new parallelization [Manuel Guidon]
141 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
142 : !> 12.2017 major bug fix. removed wrong cycle that was caussing segfault.
143 : !> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
144 : !> [Tobias Binninger + Valery Weber]
145 : !> \author Manuel Guidon
146 : ! **************************************************************************************************
147 40523 : SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
148 : geometry_did_change, irep, distribute_fock_matrix, &
149 : ispin, nspins)
150 :
151 : TYPE(qs_environment_type), POINTER :: qs_env
152 : TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data
153 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
154 : REAL(KIND=dp), INTENT(OUT) :: ehfx
155 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
156 : TYPE(section_vals_type), POINTER :: hfx_section
157 : TYPE(mp_para_env_type), POINTER :: para_env
158 : LOGICAL :: geometry_did_change
159 : INTEGER :: irep
160 : LOGICAL, INTENT(IN) :: distribute_fock_matrix
161 : INTEGER, INTENT(IN) :: ispin
162 : INTEGER, INTENT(IN), OPTIONAL :: nspins
163 :
164 : CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center'
165 :
166 : CHARACTER(len=default_string_length) :: eps_scaling_str, eps_schwarz_min_str
167 : INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
168 : atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
169 : buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
170 : handle_getP, handle_load, handle_main, i, inla, nla, i_list_ij, i_list_kl, i_set_list_ij, &
171 : i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
172 : i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
173 : iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
174 : katom_end
175 : INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
176 : latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
177 : my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
178 : nneighbors, nseta, nsetb, nsgf_max, my_nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
179 : sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
180 : sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
181 : INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
182 : mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
183 : mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
184 : memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
185 : neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
186 : shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
187 : shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
188 : shm_storage_counter_integrals, stor_count_int_disk
189 : INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
190 : tmp_i8(8)
191 40523 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
192 81046 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global, nimages, &
193 81046 : tmp_index, first_set_of_atom
194 40523 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
195 81046 : ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
196 81046 : INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
197 40523 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
198 : INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
199 40523 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
200 40523 : INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
201 : INTEGER, SAVE :: shm_number_of_p_entries
202 : LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
203 : do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
204 : ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
205 40523 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
206 : REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
207 : compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
208 : eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
209 : max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
210 : pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
211 : spherical_estimate, symm_fac
212 40523 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
213 81046 : ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
214 40523 : primitive_integrals
215 40523 : REAL(dp), DIMENSION(:), POINTER :: p_work
216 40523 : REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
217 81046 : full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
218 40523 : shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd, &
219 40523 : full_ks_alpha_from_gpu, full_ks_beta_from_gpu
220 40523 : REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
221 40523 : sphi_c_ext_set, sphi_d_ext_set
222 40523 : REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
223 40523 : sphi_d_ext
224 40523 : REAL(dp), DIMENSION(:, :, :), POINTER :: gcc
225 : REAL(KIND=dp) :: coeffs_kind_max0
226 40523 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
227 : TYPE(cell_type), POINTER :: cell
228 : TYPE(cp_libint_t) :: private_lib
229 : TYPE(cp_logger_type), POINTER :: logger
230 40523 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks_aux_fit_hfx
231 : TYPE(dft_control_type), POINTER :: dft_control
232 : TYPE(hfx_basis_info_type), POINTER :: basis_info
233 40523 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
234 40523 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches, integral_caches_disk
235 : TYPE(hfx_cache_type), POINTER :: maxval_cache, maxval_cache_disk
236 40523 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers, &
237 40523 : integral_containers_disk
238 : TYPE(hfx_container_type), POINTER :: maxval_container, maxval_container_disk
239 : TYPE(hfx_distribution), POINTER :: distribution_energy
240 : TYPE(hfx_general_type) :: general_parameter
241 : TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
242 : TYPE(hfx_memory_type), POINTER :: memory_parameter
243 40523 : TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
244 40523 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
245 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
246 40523 : DIMENSION(:) :: pgf_product_list
247 : TYPE(hfx_potential_type) :: potential_parameter
248 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
249 40523 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
250 40523 : tmp_screen_pgf1, tmp_screen_pgf2
251 : TYPE(hfx_screen_coeff_type), &
252 40523 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
253 : TYPE(hfx_screen_coeff_type), &
254 40523 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
255 : TYPE(hfx_screening_type) :: screening_parameter
256 40523 : TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
257 : TYPE(hfx_type), POINTER :: actual_x_data, shm_master_x_data
258 : TYPE(kpoint_type), POINTER :: kpoints
259 : TYPE(pair_list_type) :: list_ij, list_kl
260 : TYPE(pair_set_list_type), ALLOCATABLE, &
261 40523 : DIMENSION(:) :: set_list_ij, set_list_kl
262 40523 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
263 : TYPE(qs_ks_env_type), POINTER :: ks_env
264 :
265 : logical :: screened
266 : real(dp) :: max_abs_delta_KS
267 : integer :: iatom_set, jatom_set, katom_set, latom_set, shared_dev
268 : logical :: use_libint, use_libgint, use_only_alpha_spin
269 : logical :: cp2k_was_compiled_with_cuda_offload, cp2k_was_compiled_with_libint
270 : logical :: cp2k_was_compiled_with_libGint
271 :
272 40523 : NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
273 :
274 40523 : CALL timeset(routineN, handle)
275 :
276 40523 : CALL cite_reference(Guidon2008)
277 40523 : CALL cite_reference(Guidon2009)
278 :
279 40523 : ehfx = 0.0_dp
280 :
281 : ! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
282 40523 : my_geo_change = geometry_did_change
283 40523 : IF (ispin == 2) my_geo_change = .FALSE.
284 :
285 40523 : logger => cp_get_default_logger()
286 :
287 40523 : is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) == dbcsr_type_antisymmetric
288 :
289 40523 : IF (my_geo_change) THEN
290 2448 : CALL m_memory(memsize_before)
291 2448 : CALL para_env%max(memsize_before)
292 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
293 2448 : extension=".scfLog")
294 2448 : IF (iw > 0) THEN
295 : WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
296 442 : "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
297 442 : CALL m_flush(iw)
298 : END IF
299 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
300 2448 : "HF_INFO")
301 : END IF
302 :
303 40523 : CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
304 :
305 40523 : NULLIFY (cell_to_index)
306 40523 : CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
307 40523 : IF (do_kpoints) THEN
308 0 : CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
309 0 : CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
310 0 : CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
311 : END IF
312 :
313 : !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
314 40523 : nkind = SIZE(atomic_kind_set, 1)
315 40523 : l_max = 0
316 115369 : DO ikind = 1, nkind
317 337315 : l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
318 : END DO
319 40523 : l_max = 4*l_max
320 40523 : CALL init_md_ftable(l_max)
321 :
322 40523 : IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
323 : x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
324 10094 : IF (l_max > init_t_c_g0_lmax) THEN
325 322 : IF (para_env%is_source()) THEN
326 161 : CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
327 : END IF
328 322 : CALL init(l_max, unit_id, para_env%mepos, para_env)
329 322 : IF (para_env%is_source()) THEN
330 161 : CALL close_file(unit_id)
331 : END IF
332 322 : init_t_c_g0_lmax = l_max
333 : END IF
334 : END IF
335 :
336 40523 : n_threads = 1
337 40523 : !$ n_threads = omp_get_max_threads()
338 :
339 : ! This initialization is needed to prevent a segmentation fault. The correct assigment is done below
340 40523 : my_nspins = 0
341 40523 : IF (PRESENT(nspins)) my_nspins = nspins
342 :
343 40523 : shm_neris_total = 0
344 40523 : shm_nprim_ints = 0
345 40523 : shm_neris_onthefly = 0
346 40523 : shm_storage_counter_integrals = 0
347 40523 : shm_stor_count_int_disk = 0
348 40523 : shm_neris_incore = 0
349 40523 : shm_neris_disk = 0
350 40523 : shm_stor_count_max_val = 0
351 :
352 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
353 : !$OMP SHARED(qs_env,&
354 : !$OMP x_data,&
355 : !$OMP ks_matrix,&
356 : !$OMP ehfx,&
357 : !$OMP rho_ao,&
358 : !$OMP matrix_ks_aux_fit_hfx,&
359 : !$OMP hfx_section,&
360 : !$OMP para_env,&
361 : !$OMP my_geo_change,&
362 : !$OMP irep,&
363 : !$OMP distribute_fock_matrix,&
364 : !$OMP cell_to_index,&
365 : !$OMP ncoset,&
366 : !$OMP nso,&
367 : !$OMP nco,&
368 : !$OMP full_ks_alpha,&
369 : !$OMP full_ks_alpha_from_gpu,&
370 : !$OMP full_ks_beta_from_gpu,&
371 : !$OMP full_ks_beta,&
372 : !$OMP n_threads,&
373 : !$OMP full_density_alpha,&
374 : !$OMP full_density_beta,&
375 : !$OMP shm_initial_p,&
376 : !$OMP shm_is_assoc_atomic_block,&
377 : !$OMP shm_number_of_p_entries,&
378 : !$OMP shm_neris_total,&
379 : !$OMP shm_neris_onthefly,&
380 : !$OMP shm_storage_counter_integrals,&
381 : !$OMP shm_stor_count_int_disk,&
382 : !$OMP shm_neris_incore,&
383 : !$OMP shm_neris_disk,&
384 : !$OMP shm_nprim_ints,&
385 : !$OMP shm_stor_count_max_val,&
386 : !$OMP cell,&
387 : !$OMP screen_coeffs_set,&
388 : !$OMP screen_coeffs_kind,&
389 : !$OMP screen_coeffs_pgf,&
390 : !$OMP pgf_product_list_size,&
391 : !$OMP radii_pgf,&
392 : !$OMP nkind,&
393 : !$OMP ispin,&
394 : !$OMP is_anti_symmetric,&
395 : !$OMP shm_atomic_block_offset,&
396 : !$OMP shm_set_offset,&
397 : !$OMP shm_block_offset,&
398 : !$OMP shm_task_counter,&
399 : !$OMP shm_task_list,&
400 : !$OMP shm_total_bins,&
401 : !$OMP shm_master_x_data,&
402 : !$OMP shm_pmax_atom,&
403 : !$OMP shm_pmax_block,&
404 : !$OMP shm_atomic_pair_list,&
405 : !$OMP shm_mem_compression_counter,&
406 : !$OMP shared_dev, &
407 : !$OMP do_print_load_balance_info,use_libgint,use_libint, &
408 : !$OMP cp2k_was_compiled_with_cuda_offload, &
409 : !$OMP cp2k_was_compiled_with_libint, &
410 : !$OMP cp2k_was_compiled_with_libGint, &
411 : !$OMP my_nspins) &
412 : !$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
413 : !$OMP general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
414 : !$OMP basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
415 : !$OMP neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
416 : !$OMP compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
417 : !$OMP max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
418 : !$OMP nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
419 : !$OMP kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,max_contraction,&
420 : !$OMP max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
421 : !$OMP pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
422 : !$OMP maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
423 : !$OMP log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
424 : !$OMP p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
425 : !$OMP integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
426 : !$OMP set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
427 : !$OMP my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
428 : !$OMP katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
429 : !$OMP i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
430 : !$OMP lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
431 : !$OMP kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
432 : !$OMP nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
433 : !$OMP sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
434 : !$OMP offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
435 : !$OMP mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
436 : !$OMP sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
437 : !$OMP spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
438 : !$OMP tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
439 : !$OMP stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
440 : !$OMP ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
441 : !$OMP etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str, &
442 : !$OMP i,inla,nla,gcc,first_set_of_atom,max_abs_delta_KS, screened, &
443 40523 : !$OMP iatom_set,jatom_set,katom_set,latom_set,use_only_alpha_spin)
444 :
445 : ln_10 = LOG(10.0_dp)
446 : i_thread = 0
447 : !$ i_thread = omp_get_thread_num()
448 :
449 : actual_x_data => x_data(irep, i_thread + 1)
450 : !$OMP MASTER
451 : shm_master_x_data => x_data(irep, 1)
452 : !$OMP END MASTER
453 : !$OMP BARRIER
454 :
455 : do_periodic = actual_x_data%periodic_parameter%do_periodic
456 :
457 : IF (do_periodic) THEN
458 : ! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
459 : actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
460 : CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
461 : cell, i_thread)
462 : END IF
463 :
464 : screening_parameter = actual_x_data%screening_parameter
465 : potential_parameter = actual_x_data%potential_parameter
466 :
467 : general_parameter = actual_x_data%general_parameter
468 : load_balance_parameter => actual_x_data%load_balance_parameter
469 : memory_parameter => actual_x_data%memory_parameter
470 :
471 : cache_size = memory_parameter%cache_size
472 : bits_max_val = memory_parameter%bits_max_val
473 :
474 : basis_parameter => actual_x_data%basis_parameter
475 : basis_info => actual_x_data%basis_info
476 :
477 : treat_lsd_in_core = general_parameter%treat_lsd_in_core
478 :
479 : ncpu = para_env%num_pe
480 : n_processes = ncpu*n_threads
481 :
482 : !! initialize some counters
483 : neris_total = 0_int_8
484 : neris_incore = 0_int_8
485 : neris_disk = 0_int_8
486 : neris_onthefly = 0_int_8
487 : mem_eris = 0_int_8
488 : mem_eris_disk = 0_int_8
489 : mem_max_val = 0_int_8
490 : compression_factor = 0.0_dp
491 : compression_factor_disk = 0.0_dp
492 : nprim_ints = 0_int_8
493 : neris_tmp = 0_int_8
494 : max_val_memory = 1_int_8
495 :
496 : ! Set the integral engine
497 : if (general_parameter%hfx_library == hfx_library_is_libint) then
498 : use_libint = .true.
499 : use_libGint = .false.
500 : end if
501 : if (general_parameter%hfx_library == hfx_library_is_libGint) then
502 : use_libint = .false.
503 : use_libGint = .true.
504 : end if
505 : if (general_parameter%hfx_library == hfx_library_is_both) then
506 : use_libint = .true.
507 : use_libGint = .true.
508 : end if
509 :
510 : max_am = basis_info%max_am
511 :
512 : CALL get_qs_env(qs_env=qs_env, &
513 : atomic_kind_set=atomic_kind_set, &
514 : particle_set=particle_set, &
515 : dft_control=dft_control)
516 : IF (dft_control%do_admm) CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
517 :
518 : do_p_screening = screening_parameter%do_initial_p_screening
519 : ! Special treatment for MP2 with initial density screening
520 : IF (do_p_screening) THEN
521 : IF (ASSOCIATED(qs_env%mp2_env)) THEN
522 : IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer)) THEN
523 : do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
524 : ELSE
525 : do_p_screening = .FALSE.
526 : END IF
527 : END IF
528 : END IF
529 : max_set = basis_info%max_set
530 : natom = SIZE(particle_set, 1)
531 :
532 : ! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
533 : nkimages = dft_control%nimages
534 : CPASSERT(nkimages == 1)
535 :
536 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
537 :
538 : !! precompute maximum nco and allocate scratch
539 : ncos_max = 0
540 : nsgf_max = 0
541 : DO iatom = 1, natom
542 : ikind = kind_of(iatom)
543 : nseta = basis_parameter(ikind)%nset
544 : npgfa => basis_parameter(ikind)%npgf
545 : la_max => basis_parameter(ikind)%lmax
546 : nsgfa => basis_parameter(ikind)%nsgf
547 : DO iset = 1, nseta
548 : ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
549 : nsgf_max = MAX(nsgf_max, nsgfa(iset))
550 : END DO
551 : END DO
552 : !! Allocate the arrays for the integrals.
553 : ALLOCATE (primitive_integrals(nsgf_max**4))
554 : primitive_integrals = 0.0_dp
555 :
556 : ALLOCATE (pbd_buf(nsgf_max**2))
557 : ALLOCATE (pbc_buf(nsgf_max**2))
558 : ALLOCATE (pad_buf(nsgf_max**2))
559 : ALLOCATE (pac_buf(nsgf_max**2))
560 : ALLOCATE (kbd_buf(nsgf_max**2))
561 : ALLOCATE (kbc_buf(nsgf_max**2))
562 : ALLOCATE (kad_buf(nsgf_max**2))
563 : ALLOCATE (kac_buf(nsgf_max**2))
564 : ALLOCATE (ee_work(ncos_max**4))
565 : ALLOCATE (ee_work2(ncos_max**4))
566 : ALLOCATE (ee_buffer1(ncos_max**4))
567 : ALLOCATE (ee_buffer2(ncos_max**4))
568 : ALLOCATE (ee_primitives_tmp(nsgf_max**4))
569 :
570 : IF (my_nspins == 0) my_nspins = dft_control%nspins
571 :
572 : ALLOCATE (max_contraction(max_set, natom))
573 :
574 : max_contraction = 0.0_dp
575 : max_pgf = 0
576 : DO jatom = 1, natom
577 : jkind = kind_of(jatom)
578 : lb_max => basis_parameter(jkind)%lmax
579 : nsetb = basis_parameter(jkind)%nset
580 : npgfb => basis_parameter(jkind)%npgf
581 : first_sgfb => basis_parameter(jkind)%first_sgf
582 : sphi_b => basis_parameter(jkind)%sphi
583 : nsgfb => basis_parameter(jkind)%nsgf
584 : DO jset = 1, nsetb
585 : ! takes the primitive to contracted transformation into account
586 : ncob = npgfb(jset)*ncoset(lb_max(jset))
587 : sgfb = first_sgfb(1, jset)
588 : ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
589 : ! the maximum value after multiplication with sphi_b
590 : max_contraction(jset, jatom) = MAXVAL([(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)])
591 : max_pgf = MAX(max_pgf, npgfb(jset))
592 : END DO
593 : END DO
594 :
595 : ! ** Allocate buffers for pgf_lists
596 : nneighbors = SIZE(actual_x_data%neighbor_cells)
597 : ALLOCATE (pgf_list_ij(max_pgf**2))
598 : ALLOCATE (pgf_list_kl(max_pgf**2))
599 : ! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
600 : !$OMP ATOMIC READ
601 : tmp_i4 = pgf_product_list_size
602 : ALLOCATE (pgf_product_list(tmp_i4))
603 : ALLOCATE (nimages(max_pgf**2))
604 :
605 : DO i = 1, max_pgf**2
606 : ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
607 : ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
608 : END DO
609 : !$OMP BARRIER
610 : !$OMP MASTER
611 : !! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
612 : IF (my_geo_change) THEN
613 : CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
614 : shm_master_x_data%is_assoc_atomic_block, &
615 : shm_master_x_data%number_of_p_entries, &
616 : para_env, &
617 : shm_master_x_data%atomic_block_offset, &
618 : shm_master_x_data%set_offset, &
619 : shm_master_x_data%block_offset, &
620 : shm_master_x_data%map_atoms_to_cpus, &
621 : nkind)
622 :
623 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
624 :
625 : !! Get occupation of KS-matrix
626 : ks_fully_occ = .TRUE.
627 : outer: DO iatom = 1, natom
628 : DO jatom = iatom, natom
629 : IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
630 : ks_fully_occ = .FALSE.
631 : EXIT outer
632 : END IF
633 : END DO
634 : END DO outer
635 :
636 : IF (.NOT. ks_fully_occ) THEN
637 : CALL cp_warn(__LOCATION__, &
638 : "The Kohn Sham matrix is not 100% occupied. This "// &
639 : "may result in incorrect Hartree-Fock results. Setting "// &
640 : "MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
641 : "fully occupied KS matrix. For more information "// &
642 : "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
643 : END IF
644 : END IF
645 :
646 : ! ** Set pointers
647 : shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
648 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
649 : shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
650 : shm_set_offset => shm_master_x_data%set_offset
651 : shm_block_offset => shm_master_x_data%block_offset
652 : !$OMP END MASTER
653 : !$OMP BARRIER
654 :
655 : ! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
656 : ! ** Fock and density Matrices (shared)
657 : subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
658 : ! ** if non restricted ==> alpha/beta spin
659 : IF (.NOT. treat_lsd_in_core) THEN
660 : IF (my_nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
661 : END IF
662 : ! ** Initial P only MAX(alpha,beta) (shared)
663 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
664 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
665 : END IF
666 : ! ** In core forces require their own initial P
667 : IF (screening_parameter%do_p_screening_forces) THEN
668 : IF (memory_parameter%treat_forces_in_core) THEN
669 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
670 : END IF
671 : END IF
672 : ! ** primitive buffer (not shared by the threads)
673 : subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
674 : ! ** density + fock buffers
675 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
676 : ! ** screening functions (shared)
677 : ! ** coeffs_pgf
678 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
679 : ! ** coeffs_set
680 : subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
681 : ! ** coeffs_kind
682 : subtr_size_mb = subtr_size_mb + nkind**2
683 : ! ** radii_pgf
684 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
685 :
686 : ! ** is_assoc (shared)
687 : subtr_size_mb = subtr_size_mb + natom**2
688 :
689 : ! ** pmax_atom (shared)
690 : IF (do_p_screening) THEN
691 : subtr_size_mb = subtr_size_mb + natom**2
692 : END IF
693 : IF (screening_parameter%do_p_screening_forces) THEN
694 : IF (memory_parameter%treat_forces_in_core) THEN
695 : subtr_size_mb = subtr_size_mb + natom**2
696 : END IF
697 : END IF
698 :
699 : ! ** Convert into MiB's
700 : subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
701 :
702 : ! ** Subtracting all these buffers from MAX_MEMORY yields the amount
703 : ! ** of RAM that is left for the compressed integrals. When using threads
704 : ! ** all the available memory is shared among all n_threads. i.e. the faster
705 : ! ** ones can steal from the slower ones
706 :
707 : CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
708 :
709 : use_disk_storage = .FALSE.
710 : counter = 0_int_8
711 : do_disk_storage = memory_parameter%do_disk_storage
712 : IF (do_disk_storage) THEN
713 : maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
714 : maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
715 :
716 : integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
717 : integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
718 : END IF
719 :
720 : IF (my_geo_change) THEN
721 : memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
722 : END IF
723 :
724 : IF (my_geo_change) THEN
725 : memory_parameter%recalc_forces = .TRUE.
726 : ELSE
727 : IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
728 : END IF
729 :
730 : !! Get screening parameter
731 : eps_schwarz = screening_parameter%eps_schwarz
732 : IF (eps_schwarz <= 0.0_dp) THEN
733 : log10_eps_schwarz = log_zero
734 : ELSE
735 : log10_eps_schwarz = LOG10(eps_schwarz)
736 : END IF
737 : !! get storage epsilon
738 : eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
739 :
740 : !! If we have a hybrid functional, we may need only a fraction of exact exchange
741 : hf_fraction = general_parameter%fraction
742 :
743 : !! The number of integrals that fit into the given MAX_MEMORY
744 :
745 : !! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
746 : potential_parameter = actual_x_data%potential_parameter
747 :
748 : !! Variable to check if we calculate the integrals in-core or on the fly
749 : !! If TRUE -> on the fly
750 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
751 : buffer_overflow = .FALSE.
752 : ELSE
753 : buffer_overflow = .TRUE.
754 : END IF
755 : logger => cp_get_default_logger()
756 :
757 : private_lib = actual_x_data%lib
758 :
759 : !! Helper array to map local basis function indices to global ones
760 : ALLOCATE (last_sgf_global(0:natom))
761 : last_sgf_global(0) = 0
762 : DO iatom = 1, natom
763 : ikind = kind_of(iatom)
764 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
765 : END DO
766 : !$OMP BARRIER
767 : !$OMP MASTER
768 : !! Let master thread get the density (avoid problems with MPI)
769 : !! Get the full density from all the processors
770 : NULLIFY (full_density_alpha, full_density_beta)
771 : ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
772 : IF (.NOT. treat_lsd_in_core .OR. my_nspins == 1) THEN
773 : CALL timeset(routineN//"_getP", handle_getP)
774 : DO img = 1, nkimages
775 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
776 : shm_master_x_data%block_offset, &
777 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
778 : END DO
779 :
780 : IF (my_nspins == 2) THEN
781 : ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
782 : DO img = 1, nkimages
783 : CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
784 : shm_master_x_data%block_offset, &
785 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
786 : END DO
787 : END IF
788 : CALL timestop(handle_getP)
789 :
790 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
791 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
792 : !! a changed geometry
793 : NULLIFY (shm_initial_p)
794 : IF (do_p_screening) THEN
795 : shm_initial_p => shm_master_x_data%initial_p
796 : shm_pmax_atom => shm_master_x_data%pmax_atom
797 : IF (my_geo_change) THEN
798 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
799 : shm_master_x_data%map_atom_to_kind_atom, &
800 : shm_master_x_data%set_offset, &
801 : shm_master_x_data%atomic_block_offset, &
802 : shm_pmax_atom, &
803 : full_density_alpha, full_density_beta, &
804 : natom, kind_of, basis_parameter, &
805 : nkind, shm_master_x_data%is_assoc_atomic_block)
806 : END IF
807 : END IF
808 : ELSE
809 : IF (do_p_screening) THEN
810 : CALL timeset(routineN//"_getP", handle_getP)
811 : DO img = 1, nkimages
812 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
813 : shm_master_x_data%block_offset, &
814 : kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
815 : rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
816 : END DO
817 : CALL timestop(handle_getP)
818 :
819 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
820 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
821 : !! a changed geometry
822 : NULLIFY (shm_initial_p)
823 : shm_initial_p => actual_x_data%initial_p
824 : shm_pmax_atom => shm_master_x_data%pmax_atom
825 : IF (my_geo_change) THEN
826 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
827 : shm_master_x_data%map_atom_to_kind_atom, &
828 : shm_master_x_data%set_offset, &
829 : shm_master_x_data%atomic_block_offset, &
830 : shm_pmax_atom, &
831 : full_density_alpha, full_density_beta, &
832 : natom, kind_of, basis_parameter, &
833 : nkind, shm_master_x_data%is_assoc_atomic_block)
834 : END IF
835 : END IF
836 : ! ** Now get the density(ispin)
837 : DO img = 1, nkimages
838 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
839 : shm_master_x_data%block_offset, &
840 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
841 : antisymmetric=is_anti_symmetric)
842 : END DO
843 : END IF
844 :
845 : NULLIFY (full_ks_alpha, full_ks_beta)
846 : ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
847 : full_ks_alpha => shm_master_x_data%full_ks_alpha
848 : full_ks_alpha = 0.0_dp
849 :
850 : IF (.NOT. treat_lsd_in_core) THEN
851 : IF (my_nspins == 2) THEN
852 : ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
853 : full_ks_beta => shm_master_x_data%full_ks_beta
854 : full_ks_beta = 0.0_dp
855 : END IF
856 : END IF
857 :
858 : !$OMP END MASTER
859 : !$OMP BARRIER
860 :
861 : !! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
862 : !! for far field estimates. The update is only performed if the geomtry of the system changed.
863 : !! If the system is periodic, then the corresponding routines are called and some variables
864 : !! are initialized
865 :
866 : IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
867 : CALL calc_pair_dist_radii(qs_env, basis_parameter, &
868 : shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
869 : n_threads, i_thread)
870 : !$OMP BARRIER
871 : CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
872 : shm_master_x_data%screen_funct_coeffs_set, &
873 : shm_master_x_data%screen_funct_coeffs_kind, &
874 : shm_master_x_data%screen_funct_coeffs_pgf, &
875 : shm_master_x_data%pair_dist_radii_pgf, &
876 : max_set, max_pgf, n_threads, i_thread, p_work)
877 :
878 : !$OMP MASTER
879 : shm_master_x_data%screen_funct_is_initialized = .TRUE.
880 : !$OMP END MASTER
881 : END IF
882 : !$OMP BARRIER
883 :
884 : !$OMP MASTER
885 : screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
886 : screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
887 : screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
888 : radii_pgf => shm_master_x_data%pair_dist_radii_pgf
889 : !$OMP END MASTER
890 : !$OMP BARRIER
891 :
892 : !! Initialize a prefactor depending on the fraction and the number of spins
893 : IF (my_nspins == 1) THEN
894 : fac = 0.5_dp*hf_fraction
895 : ELSE
896 : fac = 1.0_dp*hf_fraction
897 : END IF
898 :
899 : !! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
900 : !! an optional parameter. Of course, this is only done if the geometry did change
901 : !$OMP BARRIER
902 : !$OMP MASTER
903 : CALL timeset(routineN//"_load", handle_load)
904 : !$OMP END MASTER
905 : !$OMP BARRIER
906 : IF (my_geo_change) THEN
907 : IF (actual_x_data%b_first_load_balance_energy) THEN
908 : CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
909 : screen_coeffs_set, screen_coeffs_kind, &
910 : shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
911 : kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
912 : cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
913 : nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
914 : actual_x_data%b_first_load_balance_energy = .FALSE.
915 : ELSE
916 : CALL hfx_update_load_balance(actual_x_data, para_env, &
917 : load_balance_parameter, &
918 : i_thread, n_threads, hfx_do_eval_energy)
919 : END IF
920 : END IF
921 : !$OMP BARRIER
922 : !$OMP MASTER
923 : CALL timestop(handle_load)
924 : !$OMP END MASTER
925 : !$OMP BARRIER
926 :
927 : use_only_alpha_spin = treat_lsd_in_core .or. .not. my_nspins == 2
928 :
929 : ! Set up the gpu enviroment for libGint
930 : if (use_libGint) then
931 :
932 : call offload_activate_chosen_device()
933 :
934 : ! At least once, and before any other call from all OMP threads
935 : call cp_libGint_init(actual_x_data)
936 :
937 : ! At least at geo change
938 : call libGint_update_env(fac, memory_parameter, do_periodic, cell, actual_x_data, &
939 : nneighbors, max_pgf, natom, kind_of, particle_set, basis_parameter)
940 :
941 : ! Every cycle, comunicate the updated density to libGint
942 : if (use_only_alpha_spin) then
943 : call libGint_set_density(full_density_alpha)
944 : else
945 : call libGint_set_density(full_density_alpha, full_density_beta)
946 : end if
947 : end if
948 : !!!
949 :
950 : !! Start calculating integrals of the form (ab|cd) or (ij|kl)
951 : !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
952 : !!
953 : !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
954 : !!
955 : !! by iterating in the following way
956 : !!
957 : !! DO iatom=1,natom and DO katom=1,natom
958 : !! DO jatom=iatom,natom DO latom=katom,natom
959 : !!
960 : !! The third symmetry
961 : !!
962 : !! (ab|cd) = (cd|ab)
963 : !!
964 : !! is taken into account by the following criterion:
965 : !!
966 : !! IF(katom+latom<=iatom+jatom) THEN
967 : !! IF( ((iatom+jatom)==(katom+latom) ) .AND.(katom<iatom)) CYCLE
968 : !!
969 : !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
970 : !! factor ( symm_fac ).
971 : !!
972 : !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
973 : !! different hierarchies of short range screening matrices.
974 : !!
975 : !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
976 : !! defined as :
977 : !!
978 : !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
979 : !!
980 : !! This tells the process where to start the main loops and how many bunches of integrals it has to
981 : !! calculate. The original parallelization is a simple modulo distribution that is binned and
982 : !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
983 : !! we need to know which was the initial cpu_id.
984 : !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
985 : !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
986 : !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
987 :
988 : !! LibGint follows the same patter, distributing integrals over mpi and openmp with a shared-over-mpi
989 : !! density and fock matrix. The main differences are that the fock matrix is not in a valid state
990 : !! until after the libGint_get_fock_matrix has returned -in approximately a thousand lines-,
991 : !! and the integrals are not saved in memory, but recomputed each cycle
992 :
993 : do_dynamic_load_balancing = .TRUE.
994 :
995 : IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
996 :
997 : IF (do_dynamic_load_balancing) THEN
998 : my_bin_size = SIZE(actual_x_data%distribution_energy)
999 : ELSE
1000 : my_bin_size = 1
1001 : END IF
1002 : !! We do not need the containers if MAX_MEM = 0
1003 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1004 : !! IF new md step -> reinitialize containers
1005 : IF (my_geo_change) THEN
1006 : CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
1007 : CALL alloc_containers(actual_x_data%store_ints, my_bin_size)
1008 :
1009 : DO bin = 1, my_bin_size
1010 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
1011 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
1012 : CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
1013 : DO i = 1, 64
1014 : CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
1015 : END DO
1016 : END DO
1017 : END IF
1018 :
1019 : !! Decompress the first cache for maxvals and integrals
1020 : IF (.NOT. my_geo_change) THEN
1021 : DO bin = 1, my_bin_size
1022 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
1023 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
1024 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
1025 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
1026 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
1027 : memory_parameter%actual_memory_usage, .FALSE.)
1028 : DO i = 1, 64
1029 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
1030 : memory_parameter%actual_memory_usage, .FALSE.)
1031 : END DO
1032 : END DO
1033 : END IF
1034 : END IF
1035 :
1036 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
1037 : !$OMP CRITICAL(hfxenergy_io_critical)
1038 : !! If we do disk storage, we have to initialize the containers/caches
1039 : IF (do_disk_storage) THEN
1040 : !! IF new md step -> reinitialize containers
1041 : IF (my_geo_change) THEN
1042 : CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
1043 : DO i = 1, 64
1044 : CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
1045 : END DO
1046 : END IF
1047 : !! Decompress the first cache for maxvals and integrals
1048 : IF (.NOT. my_geo_change) THEN
1049 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
1050 : memory_parameter%actual_memory_usage_disk, .TRUE.)
1051 : DO i = 1, 64
1052 : CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
1053 : memory_parameter%actual_memory_usage_disk, .TRUE.)
1054 : END DO
1055 : END IF
1056 : END IF
1057 : !$OMP END CRITICAL(hfxenergy_io_critical)
1058 :
1059 : !$OMP BARRIER
1060 : !$OMP MASTER
1061 :
1062 : IF (do_dynamic_load_balancing) THEN
1063 : ! ** Lets construct the task list
1064 : shm_total_bins = 0
1065 : DO i = 1, n_threads
1066 : shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
1067 : END DO
1068 : ALLOCATE (tmp_task_list(shm_total_bins))
1069 : shm_task_counter = 0
1070 : DO i = 1, n_threads
1071 : DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
1072 : shm_task_counter = shm_task_counter + 1
1073 : tmp_task_list(shm_task_counter)%thread_id = i
1074 : tmp_task_list(shm_task_counter)%bin_id = bin
1075 : tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
1076 : END DO
1077 : END DO
1078 :
1079 : ! ** Now sort the task list
1080 : ALLOCATE (tmp_task_list_cost(shm_total_bins))
1081 : ALLOCATE (tmp_index(shm_total_bins))
1082 :
1083 : DO i = 1, shm_total_bins
1084 : tmp_task_list_cost(i) = tmp_task_list(i)%cost
1085 : END DO
1086 :
1087 : CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
1088 :
1089 : ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
1090 :
1091 : DO i = 1, shm_total_bins
1092 : shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
1093 : END DO
1094 :
1095 : shm_task_list => shm_master_x_data%task_list
1096 : shm_task_counter = 0
1097 :
1098 : DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
1099 : END IF
1100 : !$OMP END MASTER
1101 : !$OMP BARRIER
1102 :
1103 : IF (my_bin_size > 0) THEN
1104 : maxval_container => actual_x_data%store_ints%maxval_container(1)
1105 : maxval_cache => actual_x_data%store_ints%maxval_cache(1)
1106 :
1107 : integral_containers => actual_x_data%store_ints%integral_containers(:, 1)
1108 : integral_caches => actual_x_data%store_ints%integral_caches(:, 1)
1109 : END IF
1110 :
1111 : !$OMP BARRIER
1112 : !$OMP MASTER
1113 : CALL timeset(routineN//"_main", handle_main)
1114 : !$OMP END MASTER
1115 : !$OMP BARRIER
1116 :
1117 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
1118 : ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
1119 : ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
1120 :
1121 : !$OMP BARRIER
1122 : !$OMP MASTER
1123 :
1124 : !! precalculate maximum density matrix elements in blocks
1125 : actual_x_data%pmax_block = 0.0_dp
1126 : shm_pmax_block => actual_x_data%pmax_block
1127 : IF (do_p_screening) THEN
1128 : DO iatom_block = 1, SIZE(actual_x_data%blocks)
1129 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1130 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1131 : DO jatom_block = 1, SIZE(actual_x_data%blocks)
1132 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1133 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1134 : shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
1135 : END DO
1136 : END DO
1137 : END IF
1138 : shm_atomic_pair_list => actual_x_data%atomic_pair_list
1139 : IF (my_geo_change) THEN
1140 : CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
1141 : do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
1142 : actual_x_data%blocks)
1143 : END IF
1144 :
1145 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1146 : ! reset timings for the new SCF round
1147 : IF (my_geo_change) THEN
1148 : DO bin = 1, my_bin_size
1149 : actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
1150 : actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
1151 : actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
1152 : END DO
1153 : END IF
1154 : !$OMP END MASTER
1155 : !$OMP BARRIER
1156 :
1157 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1158 : nblocks = load_balance_parameter%nblocks
1159 :
1160 : bins_left = .TRUE.
1161 : do_it = .TRUE.
1162 : bin = 0
1163 : ! Main cycle starts here
1164 : DO WHILE (bins_left)
1165 : ! Check if this thread can run this bin
1166 : IF (.NOT. do_dynamic_load_balancing) THEN
1167 : bin = bin + 1
1168 : IF (bin > my_bin_size) THEN
1169 : do_it = .FALSE.
1170 : bins_left = .FALSE.
1171 : ELSE
1172 : do_it = .TRUE.
1173 : bins_left = .TRUE.
1174 : distribution_energy => actual_x_data%distribution_energy(bin)
1175 : END IF
1176 : ELSE
1177 : !$OMP CRITICAL(hfxenergy_critical)
1178 : shm_task_counter = shm_task_counter + 1
1179 : IF (shm_task_counter <= shm_total_bins) THEN
1180 : my_thread_id = shm_task_list(shm_task_counter)%thread_id
1181 : my_bin_id = shm_task_list(shm_task_counter)%bin_id
1182 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1183 : maxval_cache => x_data(irep, my_thread_id)%store_ints%maxval_cache(my_bin_id)
1184 : maxval_container => x_data(irep, my_thread_id)%store_ints%maxval_container(my_bin_id)
1185 : integral_caches => x_data(irep, my_thread_id)%store_ints%integral_caches(:, my_bin_id)
1186 : integral_containers => x_data(irep, my_thread_id)%store_ints%integral_containers(:, my_bin_id)
1187 : END IF
1188 : distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
1189 : do_it = .TRUE.
1190 : bins_left = .TRUE.
1191 : IF (my_geo_change) THEN
1192 : distribution_energy%ram_counter = HUGE(distribution_energy%ram_counter)
1193 : END IF
1194 : counter = 0_Int_8
1195 : ELSE
1196 : do_it = .FALSE.
1197 : bins_left = .FALSE.
1198 : END IF
1199 : !$OMP END CRITICAL(hfxenergy_critical)
1200 : END IF
1201 : IF (.NOT. do_it) CYCLE
1202 :
1203 : !$OMP MASTER
1204 : CALL timeset(routineN//"_bin", handle_bin)
1205 : !$OMP END MASTER
1206 :
1207 : bintime_start = m_walltime()
1208 : my_istart = distribution_energy%istart
1209 : my_current_counter = 0
1210 : IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
1211 : my_istart == -1_int_8) my_istart = nblocks**4
1212 : atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
1213 : latom_block = INT(MODULO(atom_block, nblocks)) + 1
1214 : tmp_block = atom_block/nblocks
1215 : katom_block = INT(MODULO(tmp_block, nblocks)) + 1
1216 : IF (latom_block < katom_block) CYCLE atomic_blocks
1217 : tmp_block = tmp_block/nblocks
1218 : jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1219 : tmp_block = tmp_block/nblocks
1220 : iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1221 : IF (jatom_block < iatom_block) CYCLE atomic_blocks
1222 : my_current_counter = my_current_counter + 1
1223 : IF (my_current_counter > distribution_energy%number_of_atom_quartets) EXIT atomic_blocks
1224 :
1225 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1226 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1227 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1228 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1229 : katom_start = actual_x_data%blocks(katom_block)%istart
1230 : katom_end = actual_x_data%blocks(katom_block)%iend
1231 : latom_start = actual_x_data%blocks(latom_block)%istart
1232 : latom_end = actual_x_data%blocks(latom_block)%iend
1233 :
1234 : pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block), &
1235 : shm_pmax_block(latom_block, jatom_block), &
1236 : shm_pmax_block(latom_block, iatom_block), &
1237 : shm_pmax_block(katom_block, jatom_block))
1238 :
1239 : IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE atomic_blocks
1240 :
1241 : CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
1242 : jatom_start, jatom_end, &
1243 : kind_of, basis_parameter, particle_set, &
1244 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1245 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1246 : shm_atomic_pair_list)
1247 :
1248 : CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
1249 : latom_start, latom_end, &
1250 : kind_of, basis_parameter, particle_set, &
1251 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1252 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1253 : shm_atomic_pair_list)
1254 :
1255 : DO i_list_ij = 1, list_ij%n_element
1256 :
1257 : iatom = list_ij%elements(i_list_ij)%pair(1)
1258 : jatom = list_ij%elements(i_list_ij)%pair(2)
1259 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
1260 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
1261 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
1262 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
1263 : ra = list_ij%elements(i_list_ij)%r1
1264 : rb = list_ij%elements(i_list_ij)%r2
1265 : rab2 = list_ij%elements(i_list_ij)%dist2
1266 :
1267 : la_max => basis_parameter(ikind)%lmax
1268 : la_min => basis_parameter(ikind)%lmin
1269 : npgfa => basis_parameter(ikind)%npgf
1270 : nseta = basis_parameter(ikind)%nset
1271 : zeta => basis_parameter(ikind)%zet
1272 : nsgfa => basis_parameter(ikind)%nsgf
1273 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
1274 : nsgfl_a => basis_parameter(ikind)%nsgfl
1275 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
1276 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
1277 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
1278 :
1279 : lb_max => basis_parameter(jkind)%lmax
1280 : lb_min => basis_parameter(jkind)%lmin
1281 : npgfb => basis_parameter(jkind)%npgf
1282 : nsetb = basis_parameter(jkind)%nset
1283 : zetb => basis_parameter(jkind)%zet
1284 : nsgfb => basis_parameter(jkind)%nsgf
1285 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
1286 : nsgfl_b => basis_parameter(jkind)%nsgfl
1287 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
1288 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
1289 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
1290 :
1291 : DO i_list_kl = 1, list_kl%n_element
1292 : katom = list_kl%elements(i_list_kl)%pair(1)
1293 : latom = list_kl%elements(i_list_kl)%pair(2)
1294 :
1295 : IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1296 : IF (((iatom + jatom) == (katom + latom)) .AND. (katom < iatom)) CYCLE
1297 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1298 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1299 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1300 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1301 : rc = list_kl%elements(i_list_kl)%r1
1302 : rd = list_kl%elements(i_list_kl)%r2
1303 : rcd2 = list_kl%elements(i_list_kl)%dist2
1304 :
1305 : IF (do_p_screening) THEN
1306 : pmax_atom = MAX(shm_pmax_atom(katom, iatom), &
1307 : shm_pmax_atom(latom, jatom), &
1308 : shm_pmax_atom(latom, iatom), &
1309 : shm_pmax_atom(katom, jatom))
1310 : ELSE
1311 : pmax_atom = 0.0_dp
1312 : END IF
1313 :
1314 : screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1315 : screen_coeffs_kind(jkind, ikind)%x(2)
1316 : screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1317 : screen_coeffs_kind(lkind, kkind)%x(2)
1318 :
1319 : IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1320 :
1321 : !! we want to be consistent with the KS matrix. If none of the atomic indices
1322 : !! is associated cycle
1323 : IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1324 : shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1325 : shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1326 : shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1327 :
1328 : !! calculate symmetry_factor according to degeneracy of atomic quartet
1329 : symm_fac = 0.5_dp
1330 : IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1331 : IF (katom == latom) symm_fac = symm_fac*2.0_dp
1332 : IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1333 : IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
1334 : symm_fac = 1.0_dp/symm_fac
1335 :
1336 : lc_max => basis_parameter(kkind)%lmax
1337 : lc_min => basis_parameter(kkind)%lmin
1338 : npgfc => basis_parameter(kkind)%npgf
1339 : zetc => basis_parameter(kkind)%zet
1340 : nsgfc => basis_parameter(kkind)%nsgf
1341 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1342 : nsgfl_c => basis_parameter(kkind)%nsgfl
1343 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1344 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1345 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1346 :
1347 : ld_max => basis_parameter(lkind)%lmax
1348 : ld_min => basis_parameter(lkind)%lmin
1349 : npgfd => basis_parameter(lkind)%npgf
1350 : zetd => basis_parameter(lkind)%zet
1351 : nsgfd => basis_parameter(lkind)%nsgf
1352 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1353 : nsgfl_d => basis_parameter(lkind)%nsgfl
1354 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1355 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1356 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1357 :
1358 : ! Note: shm_atomic_block_offset is already symmetric
1359 : ! see get_atomic_block_maps in hfx_communication.F
1360 : atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1361 : atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1362 : atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1363 : atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1364 :
1365 : IF (jatom < latom) THEN
1366 : offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1367 : ELSE
1368 : offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1369 : END IF
1370 : IF (jatom < katom) THEN
1371 : offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1372 : ELSE
1373 : offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1374 : END IF
1375 : IF (iatom < latom) THEN
1376 : offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1377 : ELSE
1378 : offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1379 : END IF
1380 : IF (iatom < katom) THEN
1381 : offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1382 : ELSE
1383 : offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1384 : END IF
1385 :
1386 : IF (do_p_screening) THEN
1387 : swap_id = 0
1388 : kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1389 : IF (ikind >= kkind) THEN
1390 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1391 : actual_x_data%map_atom_to_kind_atom(katom), &
1392 : actual_x_data%map_atom_to_kind_atom(iatom))
1393 : ELSE
1394 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1395 : actual_x_data%map_atom_to_kind_atom(iatom), &
1396 : actual_x_data%map_atom_to_kind_atom(katom))
1397 : swap_id = swap_id + 1
1398 : END IF
1399 : kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1400 : IF (jkind >= lkind) THEN
1401 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1402 : actual_x_data%map_atom_to_kind_atom(latom), &
1403 : actual_x_data%map_atom_to_kind_atom(jatom))
1404 : ELSE
1405 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1406 : actual_x_data%map_atom_to_kind_atom(jatom), &
1407 : actual_x_data%map_atom_to_kind_atom(latom))
1408 : swap_id = swap_id + 2
1409 : END IF
1410 : kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1411 : IF (ikind >= lkind) THEN
1412 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1413 : actual_x_data%map_atom_to_kind_atom(latom), &
1414 : actual_x_data%map_atom_to_kind_atom(iatom))
1415 : ELSE
1416 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1417 : actual_x_data%map_atom_to_kind_atom(iatom), &
1418 : actual_x_data%map_atom_to_kind_atom(latom))
1419 : swap_id = swap_id + 4
1420 : END IF
1421 : kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1422 : IF (jkind >= kkind) THEN
1423 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1424 : actual_x_data%map_atom_to_kind_atom(katom), &
1425 : actual_x_data%map_atom_to_kind_atom(jatom))
1426 : ELSE
1427 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1428 : actual_x_data%map_atom_to_kind_atom(jatom), &
1429 : actual_x_data%map_atom_to_kind_atom(katom))
1430 : swap_id = swap_id + 8
1431 : END IF
1432 : END IF
1433 :
1434 : !! At this stage, check for memory used in compression
1435 : IF (my_geo_change) THEN
1436 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1437 : ! ** We know the maximum amount of integrals that we can store per MPI-process
1438 : ! ** Now we need to sum the current memory usage among all openMP threads
1439 : ! ** We can just read what is currently stored on the corresponding x_data type
1440 : ! ** If this is thread i and it tries to read the data from thread j, that is
1441 : ! ** currently writing that data, we just dont care, because the possible error
1442 : ! ** is of the order of CACHE_SIZE
1443 : mem_compression_counter = 0
1444 : DO i = 1, n_threads
1445 : !$OMP ATOMIC READ
1446 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1447 : mem_compression_counter = mem_compression_counter + &
1448 : tmp_i4*memory_parameter%cache_size
1449 : END DO
1450 : IF (mem_compression_counter > memory_parameter%max_compression_counter) THEN
1451 : buffer_overflow = .TRUE.
1452 : IF (do_dynamic_load_balancing) THEN
1453 : distribution_energy%ram_counter = counter
1454 : ELSE
1455 : memory_parameter%ram_counter = counter
1456 : END IF
1457 : ELSE
1458 : counter = counter + 1
1459 : buffer_overflow = .FALSE.
1460 : END IF
1461 : END IF
1462 : ELSE
1463 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1464 : IF (do_dynamic_load_balancing) THEN
1465 : IF (distribution_energy%ram_counter == counter) THEN
1466 : buffer_overflow = .TRUE.
1467 : ELSE
1468 : counter = counter + 1
1469 : buffer_overflow = .FALSE.
1470 : END IF
1471 :
1472 : ELSE
1473 : IF (memory_parameter%ram_counter == counter) THEN
1474 : buffer_overflow = .TRUE.
1475 : ELSE
1476 : counter = counter + 1
1477 : buffer_overflow = .FALSE.
1478 : END IF
1479 : END IF
1480 : END IF
1481 : END IF
1482 :
1483 : IF (buffer_overflow .AND. do_disk_storage) THEN
1484 : use_disk_storage = .TRUE.
1485 : buffer_overflow = .FALSE.
1486 : END IF
1487 :
1488 : IF (use_disk_storage) THEN
1489 : !$OMP ATOMIC READ
1490 : tmp_i4 = memory_parameter%actual_memory_usage_disk
1491 : mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
1492 : IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk) THEN
1493 : buffer_overflow = .TRUE.
1494 : use_disk_storage = .FALSE.
1495 : END IF
1496 : END IF
1497 :
1498 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1499 : iset = set_list_ij(i_set_list_ij)%pair(1)
1500 : jset = set_list_ij(i_set_list_ij)%pair(2)
1501 :
1502 : ncob = npgfb(jset)*ncoset(lb_max(jset))
1503 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1504 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1505 :
1506 : IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1507 :
1508 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1509 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1510 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1511 : kset = set_list_kl(i_set_list_kl)%pair(1)
1512 : lset = set_list_kl(i_set_list_kl)%pair(2)
1513 :
1514 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1515 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1516 : max_val2 = max_val1 + max_val2_set
1517 :
1518 : !! Near field screening
1519 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1520 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1521 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1522 : !! get max_vals if we screen on initial density
1523 : IF (do_p_screening) THEN
1524 : CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1525 : iset, jset, kset, lset, &
1526 : pmax_entry, swap_id)
1527 : ELSE
1528 : pmax_entry = 0.0_dp
1529 : END IF
1530 : log10_pmax = pmax_entry
1531 : max_val2 = max_val2 + log10_pmax
1532 : IF (max_val2 < log10_eps_schwarz) CYCLE
1533 : pmax_entry = EXP(log10_pmax*ln_10)
1534 :
1535 : ! libGint can only run in direct mode at the moment
1536 : ! so there is no need to check the buffers
1537 : if (use_libGint) then
1538 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1539 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1540 : ! TODO a more modern approach in which the call is more like
1541 : ! call coulom_4_gpu( HFX_data, iset, jset, kset, lset )
1542 : call libGint_coulomb4( &
1543 : iatom, jatom, katom, latom, iset, jset, kset, lset, &
1544 : ra, rb, rc, rd, &
1545 : npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1546 : potential_parameter, &
1547 : screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1548 : screen_coeffs_set(lset, kset, lkind, kkind)%x, &
1549 : log10_pmax, log10_eps_schwarz, &
1550 : tmp_screen_pgf1, tmp_screen_pgf2, &
1551 : actual_x_data%neighbor_cells, cell, do_periodic, screened)
1552 :
1553 : if (.not. screened) then
1554 :
1555 : ! TODO a more modern approach in which the call is more like
1556 : ! call update_fock_matrix_gpu( HFX_data, iset, jset, kset, lset )
1557 : call libGint_update_fock_matrix( &
1558 : symm_fac, &
1559 : iatom, jatom, katom, latom, &
1560 : iset, jset, kset, lset, &
1561 : atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, &
1562 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, &
1563 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1564 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1565 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1566 : nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d)
1567 :
1568 : end if
1569 : end if
1570 : ! Run using libint as integral engine
1571 : if_use_libint: if (use_libint) then
1572 : !! store current number of integrals, update total number and number of integrals in buffer
1573 : current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1574 : IF (buffer_overflow) THEN
1575 : neris_onthefly = neris_onthefly + current_counter
1576 : END IF
1577 :
1578 : !! Get integrals from buffer and update Kohn-Sham matrix
1579 : IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change) THEN
1580 : nints = current_counter
1581 : IF (.NOT. use_disk_storage) THEN
1582 : CALL hfx_get_single_cache_element( &
1583 : estimate_to_store_int, 6, &
1584 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1585 : use_disk_storage)
1586 : ELSE
1587 : CALL hfx_get_single_cache_element( &
1588 : estimate_to_store_int, 6, &
1589 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1590 : use_disk_storage)
1591 : END IF
1592 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1593 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1594 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1595 : buffer_left = nints
1596 : buffer_start = 1
1597 : IF (.NOT. use_disk_storage) THEN
1598 : neris_incore = neris_incore + INT(nints, int_8)
1599 : ELSE
1600 : neris_disk = neris_disk + INT(nints, int_8)
1601 : END IF
1602 : DO WHILE (buffer_left > 0)
1603 : buffer_size = MIN(buffer_left, cache_size)
1604 : IF (.NOT. use_disk_storage) THEN
1605 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1606 : buffer_size, nbits, &
1607 : integral_caches(nbits), &
1608 : integral_containers(nbits), &
1609 : eps_storage, pmax_entry, &
1610 : memory_parameter%actual_memory_usage, &
1611 : use_disk_storage)
1612 : ELSE
1613 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1614 : buffer_size, nbits, &
1615 : integral_caches_disk(nbits), &
1616 : integral_containers_disk(nbits), &
1617 : eps_storage, pmax_entry, &
1618 : memory_parameter%actual_memory_usage_disk, &
1619 : use_disk_storage)
1620 : END IF
1621 : buffer_left = buffer_left - buffer_size
1622 : buffer_start = buffer_start + buffer_size
1623 : END DO
1624 : END IF
1625 : !! Calculate integrals if we run out of buffer or the geometry did change
1626 : IF (my_geo_change .OR. buffer_overflow) THEN
1627 :
1628 : max_contraction_val = max_contraction(iset, iatom)* &
1629 : max_contraction(jset, jatom)* &
1630 : max_contraction(kset, katom)* &
1631 : max_contraction(lset, latom)*pmax_entry
1632 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1633 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1634 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1635 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1636 :
1637 : CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1638 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1639 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1640 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1641 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1642 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1643 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1644 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1645 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1646 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1647 : primitive_integrals, &
1648 : potential_parameter, &
1649 : actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1650 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1651 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1652 : log10_pmax, log10_eps_schwarz, &
1653 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1654 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
1655 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
1656 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
1657 : sphi_a_ext_set, &
1658 : sphi_b_ext_set, &
1659 : sphi_c_ext_set, &
1660 : sphi_d_ext_set, &
1661 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
1662 : nimages, do_periodic, p_work)
1663 :
1664 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1665 : neris_total = neris_total + nints
1666 : nprim_ints = nprim_ints + neris_tmp
1667 :
1668 : !! Compress the array for storage
1669 : spherical_estimate = 0.0_dp
1670 : DO i = 1, nints
1671 : spherical_estimate = MAX(spherical_estimate, ABS(primitive_integrals(i)))
1672 : END DO
1673 :
1674 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1675 : estimate_to_store_int = EXPONENT(spherical_estimate)
1676 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1677 :
1678 : IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1679 : IF (.NOT. use_disk_storage) THEN
1680 : CALL hfx_add_single_cache_element( &
1681 : estimate_to_store_int, 6, &
1682 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1683 : use_disk_storage, max_val_memory)
1684 : ELSE
1685 : CALL hfx_add_single_cache_element( &
1686 : estimate_to_store_int, 6, &
1687 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1688 : use_disk_storage)
1689 : END IF
1690 : END IF
1691 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1692 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1693 : IF (.NOT. buffer_overflow) THEN
1694 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1695 :
1696 : ! In case of a tight eps_storage threshold the number of significant
1697 : ! bits in the integer number NINT(value*pmax_entry/eps_storage) may
1698 : ! exceed the width of the storage element. As the compression algorithm
1699 : ! is designed for IEEE 754 double precision numbers, a 64-bit signed
1700 : ! integer variable which is used to store the result of this float-to-
1701 : ! integer conversion (we have no wish to use more memory for storing
1702 : ! compressed ERIs than it is needed for uncompressed ERIs) may overflow.
1703 : ! Abort with a meaningful message when it happens.
1704 : !
1705 : ! The magic number 63 stands for the number of magnitude bits
1706 : ! (64 bits minus one sign bit).
1707 : IF (nbits > 63) THEN
1708 : WRITE (eps_schwarz_min_str, '(ES10.3E2)') &
1709 : spherical_estimate*pmax_entry/ &
1710 : (SET_EXPONENT(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
1711 :
1712 : WRITE (eps_scaling_str, '(ES10.3E2)') &
1713 : spherical_estimate*pmax_entry/(SET_EXPONENT(1.0_dp, 63)*eps_schwarz)
1714 :
1715 : CALL cp_abort(__LOCATION__, &
1716 : "Overflow during ERI's compression. Please use a larger "// &
1717 : "EPS_SCHWARZ threshold (above "//TRIM(ADJUSTL(eps_schwarz_min_str))// &
1718 : ") or increase the EPS_STORAGE_SCALING factor above "// &
1719 : TRIM(ADJUSTL(eps_scaling_str))//".")
1720 : END IF
1721 :
1722 : buffer_left = nints
1723 : buffer_start = 1
1724 : IF (.NOT. use_disk_storage) THEN
1725 : neris_incore = neris_incore + INT(nints, int_8)
1726 : ELSE
1727 : neris_disk = neris_disk + INT(nints, int_8)
1728 : END IF
1729 : DO WHILE (buffer_left > 0)
1730 : buffer_size = MIN(buffer_left, CACHE_SIZE)
1731 : IF (.NOT. use_disk_storage) THEN
1732 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1733 : buffer_size, nbits, &
1734 : integral_caches(nbits), &
1735 : integral_containers(nbits), &
1736 : eps_storage, pmax_entry, &
1737 : memory_parameter%actual_memory_usage, &
1738 : use_disk_storage)
1739 : ELSE
1740 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1741 : buffer_size, nbits, &
1742 : integral_caches_disk(nbits), &
1743 : integral_containers_disk(nbits), &
1744 : eps_storage, pmax_entry, &
1745 : memory_parameter%actual_memory_usage_disk, &
1746 : use_disk_storage)
1747 : END IF
1748 : buffer_left = buffer_left - buffer_size
1749 : buffer_start = buffer_start + buffer_size
1750 : END DO
1751 : ELSE
1752 : !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1753 : DO i = 1, nints
1754 : primitive_integrals(i) = primitive_integrals(i)*pmax_entry
1755 : IF (ABS(primitive_integrals(i)) > eps_storage) THEN
1756 : primitive_integrals(i) = ANINT(primitive_integrals(i)/eps_storage, dp)*eps_storage/pmax_entry
1757 : ELSE
1758 : primitive_integrals(i) = 0.0_dp
1759 : END IF
1760 : END DO
1761 : END IF
1762 : END IF
1763 : !!! DEBUG, print out primitive integrals and indices. Only works serial no OMP !!!
1764 : IF (.FALSE.) THEN
1765 : CALL print_integrals( &
1766 : iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
1767 : iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
1768 : END IF
1769 : IF (.NOT. is_anti_symmetric) THEN
1770 : !! Update Kohn-Sham matrix
1771 : CALL update_fock_matrix( &
1772 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1773 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1774 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1775 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1776 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1777 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1778 : IF (.NOT. treat_lsd_in_core) THEN
1779 : IF (my_nspins == 2) THEN
1780 : CALL update_fock_matrix( &
1781 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1782 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1783 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1784 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1785 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1786 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1787 : END IF
1788 : END IF
1789 : ELSE
1790 : !! Update Kohn-Sham matrix
1791 : CALL update_fock_matrix_as( &
1792 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1793 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1794 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1795 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1796 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1797 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1798 : IF (.NOT. treat_lsd_in_core) THEN
1799 : IF (my_nspins == 2) THEN
1800 : CALL update_fock_matrix_as( &
1801 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1802 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1803 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1804 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1805 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1806 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1807 : END IF
1808 : END IF
1809 : END IF
1810 : END IF if_use_libint
1811 : END DO ! i_set_list_kl
1812 : END DO ! i_set_list_ij
1813 : IF (do_disk_storage) THEN
1814 : buffer_overflow = .TRUE.
1815 : END IF
1816 : END DO !i_list_ij
1817 : END DO !i_list_kl
1818 : END DO atomic_blocks
1819 : bintime_stop = m_walltime()
1820 : IF (my_geo_change) THEN
1821 : distribution_energy%time_first_scf = bintime_stop - bintime_start
1822 : ELSE
1823 : distribution_energy%time_other_scf = &
1824 : distribution_energy%time_other_scf + bintime_stop - bintime_start
1825 : END IF
1826 : !$OMP MASTER
1827 : CALL timestop(handle_bin)
1828 : !$OMP END MASTER
1829 : END DO !bin
1830 :
1831 : !!!
1832 : ! IF running both libint and libGint, uses the values from libint to check libGint values
1833 : if (use_libGint .and. use_libint) then
1834 : ! barrier needed after allocate full_ks_alpha(beta)_from_gpu before libGint get K
1835 : ! to feed libgint a valid position for full_ks_alpha(beta)_from_gpu
1836 : allocate (full_ks_alpha_from_gpu, Mold=full_ks_alpha)
1837 : !$OMP BARRIER
1838 : if (use_only_alpha_spin) then
1839 : call libGint_get_fock_matrix(full_ks_alpha_from_gpu)
1840 : else
1841 : allocate (full_ks_beta_from_gpu, Mold=full_ks_beta)
1842 : !$OMP BARRIER
1843 : call libGint_get_fock_matrix(full_ks_alpha_from_gpu, full_ks_beta_from_gpu)
1844 : end if
1845 :
1846 : !$OMP BARRIER
1847 : !$omp single
1848 : max_abs_delta_KS = 0.0_dp
1849 : do i = 1, size(full_ks_alpha)
1850 : max_abs_delta_KS = max(max_abs_delta_ks, abs(full_ks_alpha(i, 1) - full_ks_alpha_from_gpu(i, 1)))
1851 : end do
1852 : write (UNIT=iw, FMT=*) " ------------------------------------ "
1853 : write (UNIT=iw, FMT=*) " | rank ", para_env%mepos
1854 : write (UNIT=iw, FMT=*) " | max abs dif: ", max_abs_delta_KS
1855 : write (UNIT=iw, FMT=*) " ------------------------------------ "
1856 :
1857 : ! Print all F if delta is too large or (suspiciously) too small
1858 : if (max_abs_delta_KS > 100*eps_schwarz .or. max_abs_delta_KS < 1.E-20) then
1859 : write (UNIT=iw, FMT=*) " K from CPU | K from GPU | Delta | Ratio "
1860 : do i = 1, size(full_ks_alpha)
1861 : write (UNIT=iw, FMT=*) i, full_ks_alpha(i, 1), full_ks_alpha_from_gpu(i, 1), &
1862 : full_ks_alpha(i, 1) - full_ks_alpha_from_gpu(i, 1), &
1863 : full_ks_alpha(i, 1)/full_ks_alpha_from_gpu(i, 1)
1864 : end do
1865 : end if
1866 :
1867 : if (.not. use_only_alpha_spin) then
1868 : max_abs_delta_KS = 0.0_dp
1869 : do i = 1, size(full_ks_beta)
1870 : max_abs_delta_KS = max(max_abs_delta_ks, abs(full_ks_beta(i, 1) - full_ks_beta_from_gpu(i, 1)))
1871 : end do
1872 : write (UNIT=iw, FMT=*) " ------------------------------------ "
1873 : write (UNIT=iw, FMT=*) " | rank ", para_env%mepos
1874 : write (UNIT=iw, FMT=*) " | max abs dif B: ", max_abs_delta_KS
1875 : write (UNIT=iw, FMT=*) " ------------------------------------ "
1876 :
1877 : ! Print all F if delta is too large or (suspiciously) too small
1878 : if (max_abs_delta_KS > 100*eps_schwarz .or. max_abs_delta_KS < 1.E-18) then
1879 : write (UNIT=iw, FMT=*) " BETA K from CPU | K from GPU | Delta | Ratio "
1880 : do i = 1, size(full_ks_beta)
1881 : write (UNIT=iw, FMT=*) i, full_ks_beta(i, 1), full_ks_beta_from_gpu(i, 1), &
1882 : full_ks_beta(i, 1) - full_ks_beta_from_gpu(i, 1), &
1883 : full_ks_beta(i, 1)/full_ks_beta_from_gpu(i, 1)
1884 : end do
1885 : end if
1886 : end if
1887 : !$omp end single
1888 : end if
1889 :
1890 : ! Copy the KS matrix from device to host.
1891 : if (use_libGint .and. .not. use_libint) then
1892 : if (use_only_alpha_spin) then
1893 : call libGint_get_fock_matrix(full_ks_alpha)
1894 : else
1895 : call libGint_get_fock_matrix(full_ks_alpha, full_ks_beta)
1896 : end if
1897 : end if
1898 :
1899 : !$OMP MASTER
1900 : logger => cp_get_default_logger()
1901 : do_print_load_balance_info = .FALSE.
1902 : do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1903 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1904 : !$OMP END MASTER
1905 : !$OMP BARRIER
1906 : IF (do_print_load_balance_info) THEN
1907 : iw = -1
1908 : !$OMP MASTER
1909 : iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1910 : extension=".scfLog")
1911 : !$OMP END MASTER
1912 :
1913 : CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1914 : hfx_do_eval_energy)
1915 :
1916 : !$OMP MASTER
1917 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1918 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1919 : !$OMP END MASTER
1920 : END IF
1921 :
1922 : !$OMP BARRIER
1923 : !$OMP MASTER
1924 : CALL m_memory(memsize_after)
1925 : !$OMP END MASTER
1926 : !$OMP BARRIER
1927 :
1928 : DEALLOCATE (primitive_integrals)
1929 : !$OMP BARRIER
1930 : !! Get some number about ERIS
1931 : !$OMP ATOMIC
1932 : shm_neris_total = shm_neris_total + neris_total
1933 : !$OMP ATOMIC
1934 : shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1935 : !$OMP ATOMIC
1936 : shm_nprim_ints = shm_nprim_ints + nprim_ints
1937 :
1938 : storage_counter_integrals = memory_parameter%actual_memory_usage* &
1939 : memory_parameter%cache_size
1940 : stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
1941 : memory_parameter%cache_size
1942 : stor_count_max_val = max_val_memory*memory_parameter%cache_size
1943 : !$OMP ATOMIC
1944 : shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1945 : !$OMP ATOMIC
1946 : shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
1947 : !$OMP ATOMIC
1948 : shm_neris_incore = shm_neris_incore + neris_incore
1949 : !$OMP ATOMIC
1950 : shm_neris_disk = shm_neris_disk + neris_disk
1951 : !$OMP ATOMIC
1952 : shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1953 : !$OMP BARRIER
1954 :
1955 : ! ** Calculate how much memory has already been used (might be needed for in-core forces
1956 : !$OMP MASTER
1957 : shm_mem_compression_counter = 0
1958 : DO i = 1, n_threads
1959 : !$OMP ATOMIC READ
1960 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1961 : shm_mem_compression_counter = shm_mem_compression_counter + &
1962 : tmp_i4*memory_parameter%cache_size
1963 : END DO
1964 : !$OMP END MASTER
1965 : !$OMP BARRIER
1966 : actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
1967 :
1968 : !$OMP MASTER
1969 : !! Calculate the exchange energies from the Kohn-Sham matrix. Before we can go on, we have to symmetrize.
1970 : ene_x_aa = 0.0_dp
1971 : ene_x_bb = 0.0_dp
1972 :
1973 : mb_size_p = shm_block_offset(ncpu + 1)/1024/128
1974 : mb_size_f = shm_block_offset(ncpu + 1)/1024/128
1975 : IF (.NOT. treat_lsd_in_core) THEN
1976 : IF (my_nspins == 2) THEN
1977 : mb_size_f = mb_size_f*2
1978 : mb_size_p = mb_size_p*2
1979 : END IF
1980 : END IF
1981 : !! size of primitive_integrals(not shared)
1982 : mb_size_buffers = INT(nsgf_max, int_8)**4*n_threads
1983 : !! fock density buffers (not shared)
1984 : mb_size_buffers = mb_size_buffers + INT(nsgf_max, int_8)**2*n_threads
1985 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
1986 : !! size of screening functions (shared)
1987 : mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
1988 : + max_set**2*nkind**2 &
1989 : + nkind**2 &
1990 : + max_pgf**2*max_set**2*nkind**2
1991 : !! is_assoc (shared)
1992 : mb_size_buffers = mb_size_buffers + natom**2
1993 : ! ** pmax_atom (shared)
1994 : IF (do_p_screening) THEN
1995 : mb_size_buffers = mb_size_buffers + natom**2
1996 : END IF
1997 : IF (screening_parameter%do_p_screening_forces) THEN
1998 : IF (memory_parameter%treat_forces_in_core) THEN
1999 : mb_size_buffers = mb_size_buffers + natom**2
2000 : END IF
2001 : END IF
2002 : ! ** Initial P only MAX(alpha,beta) (shared)
2003 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
2004 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
2005 : END IF
2006 : ! ** In core forces require their own initial P
2007 : IF (screening_parameter%do_p_screening_forces) THEN
2008 : IF (memory_parameter%treat_forces_in_core) THEN
2009 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
2010 : END IF
2011 : END IF
2012 :
2013 : !! mb
2014 : mb_size_buffers = mb_size_buffers/1024/128
2015 :
2016 : afac = 1.0_dp
2017 : IF (is_anti_symmetric) afac = -1.0_dp
2018 : CALL timestop(handle_main)
2019 : ene_x_aa_diag = 0.0_dp
2020 : ene_x_bb_diag = 0.0_dp
2021 : DO iatom = 1, natom
2022 : ikind = kind_of(iatom)
2023 : nseta = basis_parameter(ikind)%nset
2024 : nsgfa => basis_parameter(ikind)%nsgf
2025 : jatom = iatom
2026 : jkind = kind_of(jatom)
2027 : nsetb = basis_parameter(jkind)%nset
2028 : nsgfb => basis_parameter(jkind)%nsgf
2029 : act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
2030 : DO img = 1, nkimages
2031 : DO iset = 1, nseta
2032 : DO jset = 1, nsetb
2033 : act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
2034 : i = act_set_offset + act_atomic_block_offset - 1
2035 : DO ma = 1, nsgfa(iset)
2036 : j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
2037 : DO mb = 1, nsgfb(jset)
2038 : IF (i > j) THEN
2039 : full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
2040 : full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
2041 : IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
2042 : full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
2043 : full_ks_beta(j, img) = full_ks_beta(i, img)*afac
2044 : END IF
2045 : END IF
2046 : ! ** For adiabatically rescaled functionals we need the energy coming from the diagonal elements
2047 : IF (i == j) THEN
2048 : ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
2049 : IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
2050 : ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
2051 : END IF
2052 : END IF
2053 : i = i + 1
2054 : j = j + nsgfa(iset)
2055 : END DO
2056 : END DO
2057 : END DO
2058 : END DO
2059 : END DO
2060 : END DO
2061 :
2062 : CALL para_env%sync()
2063 : afac = 1.0_dp
2064 : IF (is_anti_symmetric) afac = 0._dp
2065 : IF (distribute_fock_matrix) THEN
2066 : !! Distribute the current KS-matrix to all the processes
2067 : CALL timeset(routineN//"_dist_KS", handle_dist_ks)
2068 : DO img = 1, nkimages
2069 : CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
2070 : shm_block_offset, kind_of, basis_parameter, &
2071 : off_diag_fac=0.5_dp, diag_fac=afac)
2072 : END DO
2073 :
2074 : NULLIFY (full_ks_alpha)
2075 : DEALLOCATE (shm_master_x_data%full_ks_alpha)
2076 : IF (.NOT. treat_lsd_in_core) THEN
2077 : IF (my_nspins == 2) THEN
2078 : DO img = 1, nkimages
2079 : CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
2080 : shm_block_offset, kind_of, basis_parameter, &
2081 : off_diag_fac=0.5_dp, diag_fac=afac)
2082 : END DO
2083 : NULLIFY (full_ks_beta)
2084 : DEALLOCATE (shm_master_x_data%full_ks_beta)
2085 : END IF
2086 : END IF
2087 : CALL timestop(handle_dist_ks)
2088 : END IF
2089 :
2090 : IF (distribute_fock_matrix) THEN
2091 : !! ** Calculate the exchange energy
2092 : ene_x_aa = 0.0_dp
2093 : DO img = 1, nkimages
2094 : CALL dbcsr_dot_threadsafe(ks_matrix(ispin, img)%matrix, rho_ao(ispin, img)%matrix, etmp)
2095 : ene_x_aa = ene_x_aa + etmp
2096 : END DO
2097 : !for ADMMS, we need the exchange matrix k(d) for both spins
2098 : IF (dft_control%do_admm) THEN
2099 : CPASSERT(nkimages == 1)
2100 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
2101 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
2102 : END IF
2103 :
2104 : ene_x_bb = 0.0_dp
2105 : IF (my_nspins == 2 .AND. .NOT. treat_lsd_in_core) THEN
2106 : DO img = 1, nkimages
2107 : CALL dbcsr_dot_threadsafe(ks_matrix(2, img)%matrix, rho_ao(2, img)%matrix, etmp)
2108 : ene_x_bb = ene_x_bb + etmp
2109 : END DO
2110 : !for ADMMS, we need the exchange matrix k(d) for both spins
2111 : IF (dft_control%do_admm) THEN
2112 : CPASSERT(nkimages == 1)
2113 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
2114 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
2115 : END IF
2116 : END IF
2117 :
2118 : !! Update energy type
2119 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
2120 : ELSE
2121 : ! ** It is easier to correct the following expression by the diagonal energy contribution,
2122 : ! ** than explicitly going throuhg the diagonal elements
2123 : DO img = 1, nkimages
2124 : DO pa = 1, SIZE(full_ks_alpha, 1)
2125 : ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
2126 : END DO
2127 : END DO
2128 : ! ** Now correct
2129 : ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
2130 : IF (my_nspins == 2) THEN
2131 : DO img = 1, nkimages
2132 : DO pa = 1, SIZE(full_ks_beta, 1)
2133 : ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
2134 : END DO
2135 : END DO
2136 : ! ** Now correct
2137 : ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
2138 : END IF
2139 : CALL para_env%sum(ene_x_aa)
2140 : IF (my_nspins == 2) CALL para_env%sum(ene_x_bb)
2141 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
2142 : END IF
2143 :
2144 : !! Print some memeory information if this is the first step
2145 : IF (my_geo_change) THEN
2146 : tmp_i8(1:8) = [shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
2147 : shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val]
2148 : CALL para_env%sum(tmp_i8)
2149 : shm_storage_counter_integrals = tmp_i8(1)
2150 : shm_neris_onthefly = tmp_i8(2)
2151 : shm_neris_incore = tmp_i8(3)
2152 : shm_neris_disk = tmp_i8(4)
2153 : shm_neris_total = tmp_i8(5)
2154 : shm_stor_count_int_disk = tmp_i8(6)
2155 : shm_nprim_ints = tmp_i8(7)
2156 : shm_stor_count_max_val = tmp_i8(8)
2157 : CALL para_env%max(memsize_after)
2158 : mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
2159 : compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
2160 : mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
2161 : compression_factor_disk = REAL(shm_neris_disk, dp)/REAL(shm_stor_count_int_disk, dp)
2162 : mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
2163 :
2164 : IF (shm_neris_incore == 0) THEN
2165 : mem_eris = 0
2166 : compression_factor = 0.0_dp
2167 : END IF
2168 : IF (shm_neris_disk == 0) THEN
2169 : mem_eris_disk = 0
2170 : compression_factor_disk = 0.0_dp
2171 : END IF
2172 :
2173 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
2174 : extension=".scfLog")
2175 : IF (iw > 0) THEN
2176 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2177 : "HFX_MEM_INFO| Number of cart. primitive ERI's calculated: ", shm_nprim_ints
2178 :
2179 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2180 : "HFX_MEM_INFO| Number of sph. ERI's calculated: ", shm_neris_total
2181 :
2182 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2183 : "HFX_MEM_INFO| Number of sph. ERI's stored in-core: ", shm_neris_incore
2184 :
2185 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2186 : "HFX_MEM_INFO| Number of sph. ERI's stored on disk: ", shm_neris_disk
2187 :
2188 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2189 : "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
2190 :
2191 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2192 : "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]: ", mem_eris
2193 :
2194 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2195 : "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
2196 :
2197 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2198 : "HFX_MEM_INFO| Total compression factor ERI's RAM: ", compression_factor
2199 :
2200 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2201 : "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]: ", mem_eris_disk
2202 :
2203 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2204 : "HFX_MEM_INFO| Total compression factor ERI's disk: ", compression_factor_disk
2205 :
2206 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2207 : "HFX_MEM_INFO| Size of density/Fock matrix [MiB]: ", 2_int_8*mb_size_p
2208 :
2209 : IF (do_periodic) THEN
2210 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2211 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2212 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2213 : "HFX_MEM_INFO| Number of periodic image cells considered: ", SIZE(shm_master_x_data%neighbor_cells)
2214 : ELSE
2215 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2216 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2217 : END IF
2218 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21),/)") &
2219 : "HFX_MEM_INFO| Est. max. program size after HFX [MiB]:", memsize_after/(1024*1024)
2220 : CALL m_flush(iw)
2221 : END IF
2222 :
2223 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
2224 : "HF_INFO")
2225 : END IF
2226 : !$OMP END MASTER
2227 :
2228 : !! flush caches if the geometry changed
2229 : IF (do_dynamic_load_balancing) THEN
2230 : my_bin_size = SIZE(actual_x_data%distribution_energy)
2231 : ELSE
2232 : my_bin_size = 1
2233 : END IF
2234 :
2235 : IF (my_geo_change) THEN
2236 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2237 : DO bin = 1, my_bin_size
2238 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2239 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2240 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2241 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2242 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
2243 : .FALSE.)
2244 : DO i = 1, 64
2245 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
2246 : memory_parameter%actual_memory_usage, .FALSE.)
2247 : END DO
2248 : END DO
2249 : END IF
2250 : END IF
2251 : !! reset all caches except we calculate all on the fly
2252 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2253 : DO bin = 1, my_bin_size
2254 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2255 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2256 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2257 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2258 :
2259 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
2260 : DO i = 1, 64
2261 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
2262 : memory_parameter%actual_memory_usage, &
2263 : .FALSE.)
2264 : END DO
2265 : END DO
2266 : END IF
2267 :
2268 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
2269 : !$OMP CRITICAL(hfxenergy_out_critical)
2270 : IF (do_disk_storage) THEN
2271 : !! flush caches if the geometry changed
2272 : IF (my_geo_change) THEN
2273 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
2274 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2275 : DO i = 1, 64
2276 : CALL hfx_flush_last_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
2277 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2278 : END DO
2279 : END IF
2280 : !! reset all caches except we calculate all on the fly
2281 : CALL hfx_reset_cache_and_container(maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
2282 : do_disk_storage)
2283 : DO i = 1, 64
2284 : CALL hfx_reset_cache_and_container(integral_caches_disk(i), integral_containers_disk(i), &
2285 : memory_parameter%actual_memory_usage_disk, do_disk_storage)
2286 : END DO
2287 : END IF
2288 : !$OMP END CRITICAL(hfxenergy_out_critical)
2289 : !$OMP BARRIER
2290 : !! Clean up
2291 : DEALLOCATE (last_sgf_global)
2292 : !$OMP MASTER
2293 : DEALLOCATE (full_density_alpha)
2294 : IF (.NOT. treat_lsd_in_core) THEN
2295 : IF (my_nspins == 2) THEN
2296 : DEALLOCATE (full_density_beta)
2297 : END IF
2298 : END IF
2299 : IF (do_dynamic_load_balancing) THEN
2300 : DEALLOCATE (shm_master_x_data%task_list)
2301 : END IF
2302 : !$OMP END MASTER
2303 : DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
2304 : DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
2305 : DEALLOCATE (set_list_ij, set_list_kl)
2306 :
2307 : DO i = 1, max_pgf**2
2308 : DEALLOCATE (pgf_list_ij(i)%image_list)
2309 : DEALLOCATE (pgf_list_kl(i)%image_list)
2310 : END DO
2311 :
2312 : DEALLOCATE (pgf_list_ij)
2313 : DEALLOCATE (pgf_list_kl)
2314 : DEALLOCATE (pgf_product_list)
2315 :
2316 : DEALLOCATE (max_contraction, kind_of)
2317 :
2318 : DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
2319 :
2320 : DEALLOCATE (nimages)
2321 :
2322 : !$OMP BARRIER
2323 : !$OMP END PARALLEL
2324 :
2325 40523 : CALL timestop(handle)
2326 84044702 : END SUBROUTINE integrate_four_center
2327 :
2328 : ! **************************************************************************************************
2329 : !> \brief calculates two-electron integrals of a quartet/shell using the library
2330 : !> lib_int in the periodic case
2331 : !> \param lib ...
2332 : !> \param ra ...
2333 : !> \param rb ...
2334 : !> \param rc ...
2335 : !> \param rd ...
2336 : !> \param npgfa ...
2337 : !> \param npgfb ...
2338 : !> \param npgfc ...
2339 : !> \param npgfd ...
2340 : !> \param la_min ...
2341 : !> \param la_max ...
2342 : !> \param lb_min ...
2343 : !> \param lb_max ...
2344 : !> \param lc_min ...
2345 : !> \param lc_max ...
2346 : !> \param ld_min ...
2347 : !> \param ld_max ...
2348 : !> \param nsgfa ...
2349 : !> \param nsgfb ...
2350 : !> \param nsgfc ...
2351 : !> \param nsgfd ...
2352 : !> \param sphi_a_u1 ...
2353 : !> \param sphi_a_u2 ...
2354 : !> \param sphi_a_u3 ...
2355 : !> \param sphi_b_u1 ...
2356 : !> \param sphi_b_u2 ...
2357 : !> \param sphi_b_u3 ...
2358 : !> \param sphi_c_u1 ...
2359 : !> \param sphi_c_u2 ...
2360 : !> \param sphi_c_u3 ...
2361 : !> \param sphi_d_u1 ...
2362 : !> \param sphi_d_u2 ...
2363 : !> \param sphi_d_u3 ...
2364 : !> \param zeta ...
2365 : !> \param zetb ...
2366 : !> \param zetc ...
2367 : !> \param zetd ...
2368 : !> \param primitive_integrals array of primitive_integrals
2369 : !> \param potential_parameter contains info for libint
2370 : !> \param neighbor_cells Periodic images
2371 : !> \param screen1 set based coefficients for near field screening
2372 : !> \param screen2 set based coefficients for near field screening
2373 : !> \param eps_schwarz threshold
2374 : !> \param max_contraction_val maximum multiplication factor for cart -> sph
2375 : !> \param cart_estimate maximum calculated integral value
2376 : !> \param cell cell
2377 : !> \param neris_tmp counter for calculated cart integrals
2378 : !> \param log10_pmax logarithm of initial p matrix max element
2379 : !> \param log10_eps_schwarz log of threshold
2380 : !> \param R1_pgf coefficients for radii of product distribution function
2381 : !> \param R2_pgf coefficients for radii of product distribution function
2382 : !> \param pgf1 schwarz coefficients pgf basid
2383 : !> \param pgf2 schwarz coefficients pgf basid
2384 : !> \param pgf_list_ij ...
2385 : !> \param pgf_list_kl ...
2386 : !> \param pgf_product_list ...
2387 : !> \param nsgfl_a ...
2388 : !> \param nsgfl_b ...
2389 : !> \param nsgfl_c ...
2390 : !> \param nsgfl_d ...
2391 : !> \param sphi_a_ext ...
2392 : !> \param sphi_b_ext ...
2393 : !> \param sphi_c_ext ...
2394 : !> \param sphi_d_ext ...
2395 : !> \param ee_work ...
2396 : !> \param ee_work2 ...
2397 : !> \param ee_buffer1 ...
2398 : !> \param ee_buffer2 ...
2399 : !> \param ee_primitives_tmp ...
2400 : !> \param nimages ...
2401 : !> \param do_periodic ...
2402 : !> \param p_work ...
2403 : !> \par History
2404 : !> 11.2006 created [Manuel Guidon]
2405 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
2406 : !> \author Manuel Guidon
2407 : ! **************************************************************************************************
2408 8519285 : SUBROUTINE coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
2409 : la_min, la_max, lb_min, lb_max, &
2410 : lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
2411 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2412 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
2413 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
2414 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
2415 8519285 : zeta, zetb, zetc, zetd, &
2416 8519285 : primitive_integrals, &
2417 : potential_parameter, neighbor_cells, &
2418 : screen1, screen2, eps_schwarz, max_contraction_val, &
2419 : cart_estimate, cell, neris_tmp, log10_pmax, &
2420 : log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
2421 : pgf_list_ij, pgf_list_kl, &
2422 : pgf_product_list, &
2423 8519285 : nsgfl_a, nsgfl_b, nsgfl_c, &
2424 8519285 : nsgfl_d, &
2425 8519285 : sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
2426 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2427 : nimages, do_periodic, p_work)
2428 :
2429 : TYPE(cp_libint_t) :: lib
2430 : REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
2431 : INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
2432 : lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2433 : sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
2434 : sphi_d_u3
2435 : REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
2436 : REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
2437 : REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
2438 : REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
2439 : REAL(dp), DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd) :: primitive_integrals
2440 : TYPE(hfx_potential_type) :: potential_parameter
2441 : TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
2442 : REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
2443 : max_contraction_val
2444 : REAL(dp) :: cart_estimate
2445 : TYPE(cell_type), POINTER :: cell
2446 : INTEGER(int_8) :: neris_tmp
2447 : REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
2448 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
2449 : POINTER :: R1_pgf, R2_pgf, pgf1, pgf2
2450 : TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
2451 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
2452 : DIMENSION(:), INTENT(INOUT) :: pgf_product_list
2453 : INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
2454 : REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
2455 : sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
2456 : sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
2457 : REAL(dp), DIMENSION(*) :: ee_work, ee_work2, ee_buffer1, &
2458 : ee_buffer2, ee_primitives_tmp
2459 : INTEGER, DIMENSION(*) :: nimages
2460 : LOGICAL, INTENT(IN) :: do_periodic
2461 : REAL(dp), DIMENSION(:), POINTER :: p_work
2462 :
2463 : INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
2464 : ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
2465 : nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
2466 : REAL(dp) :: EtaInv, tmp_max, ZetaInv
2467 :
2468 : CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
2469 : pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
2470 : nelements_ij, &
2471 8519285 : neighbor_cells, nimages, do_periodic)
2472 : CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
2473 : pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
2474 : nelements_kl, &
2475 8519285 : neighbor_cells, nimages, do_periodic)
2476 :
2477 8519285 : cart_estimate = 0.0_dp
2478 8519285 : neris_tmp = 0
2479 567657313 : primitive_integrals = 0.0_dp
2480 8519285 : max_l = la_max + lb_max + lc_max + ld_max
2481 :
2482 29367805 : DO list_ij = 1, nelements_ij
2483 20848520 : ZetaInv = pgf_list_ij(list_ij)%ZetaInv
2484 20848520 : ipgf = pgf_list_ij(list_ij)%ipgf
2485 20848520 : jpgf = pgf_list_ij(list_ij)%jpgf
2486 :
2487 105687421 : DO list_kl = 1, nelements_kl
2488 76319616 : EtaInv = pgf_list_kl(list_kl)%ZetaInv
2489 76319616 : kpgf = pgf_list_kl(list_kl)%ipgf
2490 76319616 : lpgf = pgf_list_kl(list_kl)%jpgf
2491 :
2492 : CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
2493 : nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
2494 76319616 : potential_parameter, max_l, do_periodic)
2495 :
2496 76319616 : s_offset_a = 0
2497 188156426 : DO la = la_min, la_max
2498 90988290 : s_offset_b = 0
2499 90988290 : ncoa = nco(la)
2500 90988290 : nsgfla = nsgfl_a(la)
2501 90988290 : nsoa = nso(la)
2502 :
2503 194398410 : DO lb = lb_min, lb_max
2504 103410120 : s_offset_c = 0
2505 103410120 : ncob = nco(lb)
2506 103410120 : nsgflb = nsgfl_b(lb)
2507 103410120 : nsob = nso(lb)
2508 :
2509 242972951 : DO lc = lc_min, lc_max
2510 139562831 : s_offset_d = 0
2511 139562831 : ncoc = nco(lc)
2512 139562831 : nsgflc = nsgfl_c(lc)
2513 139562831 : nsoc = nso(lc)
2514 :
2515 323919785 : DO ld = ld_min, ld_max
2516 184356954 : ncod = nco(ld)
2517 184356954 : nsgfld = nsgfl_d(ld)
2518 184356954 : nsod = nso(ld)
2519 :
2520 184356954 : tmp_max = 0.0_dp
2521 : CALL evaluate_eri(lib, nproducts, pgf_product_list, &
2522 : la, lb, lc, ld, &
2523 : ncoa, ncob, ncoc, ncod, &
2524 : nsgfa, nsgfb, nsgfc, nsgfd, &
2525 : primitive_integrals, &
2526 : max_contraction_val, tmp_max, eps_schwarz, &
2527 : neris_tmp, ZetaInv, EtaInv, &
2528 : s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
2529 : nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
2530 : sphi_a_ext(1, la + 1, ipgf), &
2531 : sphi_b_ext(1, lb + 1, jpgf), &
2532 : sphi_c_ext(1, lc + 1, kpgf), &
2533 : sphi_d_ext(1, ld + 1, lpgf), &
2534 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2535 184356954 : p_work)
2536 184356954 : cart_estimate = MAX(tmp_max, cart_estimate)
2537 323919785 : s_offset_d = s_offset_d + nsod*nsgfld
2538 : END DO !ld
2539 242972951 : s_offset_c = s_offset_c + nsoc*nsgflc
2540 : END DO !lc
2541 194398410 : s_offset_b = s_offset_b + nsob*nsgflb
2542 : END DO !lb
2543 167307906 : s_offset_a = s_offset_a + nsoa*nsgfla
2544 : END DO !la
2545 : END DO
2546 : END DO
2547 :
2548 8519285 : END SUBROUTINE coulomb4
2549 :
2550 : ! **************************************************************************************************
2551 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
2552 : !> a symmetric upper triangle NxN matrix
2553 : !> The compiler should inline this function, therefore it appears in
2554 : !> several modules
2555 : !> \param i 2d index
2556 : !> \param j 2d index
2557 : !> \param N matrix size
2558 : !> \return ...
2559 : !> \par History
2560 : !> 03.2009 created [Manuel Guidon]
2561 : !> \author Manuel Guidon
2562 : ! **************************************************************************************************
2563 28264 : PURE FUNCTION get_1D_idx(i, j, N)
2564 : INTEGER, INTENT(IN) :: i, j
2565 : INTEGER(int_8), INTENT(IN) :: N
2566 : INTEGER(int_8) :: get_1D_idx
2567 :
2568 : INTEGER(int_8) :: min_ij
2569 :
2570 28264 : min_ij = MIN(i, j)
2571 28264 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2572 :
2573 28264 : END FUNCTION get_1D_idx
2574 :
2575 : ! **************************************************************************************************
2576 : !> \brief This routine prefetches density/fock matrix elements and stores them
2577 : !> in cache friendly arrays. These buffers are then used to update the
2578 : !> fock matrix
2579 : !> \param ma_max Size of matrix blocks
2580 : !> \param mb_max Size of matrix blocks
2581 : !> \param mc_max Size of matrix blocks
2582 : !> \param md_max Size of matrix blocks
2583 : !> \param fac multiplication factor (spin)
2584 : !> \param symm_fac multiplication factor (symmetry)
2585 : !> \param density upper triangular density matrix
2586 : !> \param ks upper triangular fock matrix
2587 : !> \param prim primitive integrals
2588 : !> \param pbd buffer that will contain P(b,d)
2589 : !> \param pbc buffer that will contain P(b,c)
2590 : !> \param pad buffer that will contain P(a,d)
2591 : !> \param pac buffer that will contain P(a,c)
2592 : !> \param kbd buffer for KS(b,d)
2593 : !> \param kbc buffer for KS(b,c)
2594 : !> \param kad buffer for KS(a,d)
2595 : !> \param kac buffer for KS(a,c)
2596 : !> \param iatom ...
2597 : !> \param jatom ...
2598 : !> \param katom ...
2599 : !> \param latom ...
2600 : !> \param iset ...
2601 : !> \param jset ...
2602 : !> \param kset ...
2603 : !> \param lset ...
2604 : !> \param offset_bd_set ...
2605 : !> \param offset_bc_set ...
2606 : !> \param offset_ad_set ...
2607 : !> \param offset_ac_set ...
2608 : !> \param atomic_offset_bd ...
2609 : !> \param atomic_offset_bc ...
2610 : !> \param atomic_offset_ad ...
2611 : !> \param atomic_offset_ac ...
2612 : !> \par History
2613 : !> 03.2009 created [Manuel Guidon]
2614 : !> \author Manuel Guidon
2615 : ! **************************************************************************************************
2616 :
2617 77149063 : SUBROUTINE update_fock_matrix(ma_max, mb_max, mc_max, md_max, &
2618 77149063 : fac, symm_fac, density, ks, prim, &
2619 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2620 : iatom, jatom, katom, latom, &
2621 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2622 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2623 : atomic_offset_ac)
2624 :
2625 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2626 : REAL(dp), INTENT(IN) :: fac, symm_fac
2627 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2628 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2629 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2630 : INTENT(IN) :: prim
2631 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2632 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2633 : kset, lset
2634 : INTEGER, DIMENSION(:, :), POINTER, INTENT(IN) :: offset_bd_set, offset_bc_set, &
2635 : offset_ad_set, offset_ac_set
2636 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2637 : atomic_offset_ad, atomic_offset_ac
2638 :
2639 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2640 : offset_ad, offset_bc, offset_bd
2641 : REAL(dp) :: ki
2642 :
2643 77149063 : IF (jatom >= latom) THEN
2644 75952697 : i = 1
2645 75952697 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2646 75952697 : j = offset_bd
2647 228983323 : DO md = 1, md_max
2648 514429017 : DO mb = 1, mb_max
2649 285445694 : pbd(i) = density(j)
2650 285445694 : i = i + 1
2651 438476320 : j = j + 1
2652 : END DO
2653 : END DO
2654 : ELSE
2655 1196366 : i = 1
2656 1196366 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2657 3523312 : DO md = 1, md_max
2658 2326946 : j = offset_bd + md - 1
2659 8645991 : DO mb = 1, mb_max
2660 5122679 : pbd(i) = density(j)
2661 5122679 : i = i + 1
2662 7449625 : j = j + md_max
2663 : END DO
2664 : END DO
2665 : END IF
2666 77149063 : IF (jatom >= katom) THEN
2667 77149063 : i = 1
2668 77149063 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2669 77149063 : j = offset_bc
2670 256522867 : DO mc = 1, mc_max
2671 581257238 : DO mb = 1, mb_max
2672 324734371 : pbc(i) = density(j)
2673 324734371 : i = i + 1
2674 504108175 : j = j + 1
2675 : END DO
2676 : END DO
2677 : ELSE
2678 0 : i = 1
2679 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2680 0 : DO mc = 1, mc_max
2681 0 : j = offset_bc + mc - 1
2682 0 : DO mb = 1, mb_max
2683 0 : pbc(i) = density(j)
2684 0 : i = i + 1
2685 0 : j = j + mc_max
2686 : END DO
2687 : END DO
2688 : END IF
2689 77149063 : IF (iatom >= latom) THEN
2690 55438727 : i = 1
2691 55438727 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2692 55438727 : j = offset_ad
2693 174714273 : DO md = 1, md_max
2694 433908748 : DO ma = 1, ma_max
2695 259194475 : pad(i) = density(j)
2696 259194475 : i = i + 1
2697 378470021 : j = j + 1
2698 : END DO
2699 : END DO
2700 : ELSE
2701 21710336 : i = 1
2702 21710336 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2703 57792362 : DO md = 1, md_max
2704 36082026 : j = offset_ad + md - 1
2705 144513718 : DO ma = 1, ma_max
2706 86721356 : pad(i) = density(j)
2707 86721356 : i = i + 1
2708 122803382 : j = j + md_max
2709 : END DO
2710 : END DO
2711 : END IF
2712 77149063 : IF (iatom >= katom) THEN
2713 73568651 : i = 1
2714 73568651 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2715 73568651 : j = offset_ac
2716 246427416 : DO mc = 1, mc_max
2717 626798704 : DO ma = 1, ma_max
2718 380371288 : pac(i) = density(j)
2719 380371288 : i = i + 1
2720 553230053 : j = j + 1
2721 : END DO
2722 : END DO
2723 : ELSE
2724 3580412 : i = 1
2725 3580412 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2726 10095451 : DO mc = 1, mc_max
2727 6515039 : j = offset_ac + mc - 1
2728 27315653 : DO ma = 1, ma_max
2729 17220202 : pac(i) = density(j)
2730 17220202 : i = i + 1
2731 23735241 : j = j + mc_max
2732 : END DO
2733 : END DO
2734 : END IF
2735 :
2736 77149063 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2737 77149063 : IF (jatom >= latom) THEN
2738 : i = 1
2739 : j = offset_bd
2740 228983323 : DO md = 1, md_max
2741 514429017 : DO mb = 1, mb_max
2742 285445694 : ki = kbd(i)
2743 285445694 : !$OMP ATOMIC
2744 : ks(j) = ks(j) + ki
2745 285445694 : i = i + 1
2746 438476320 : j = j + 1
2747 : END DO
2748 : END DO
2749 : ELSE
2750 : i = 1
2751 3523312 : DO md = 1, md_max
2752 2326946 : j = offset_bd + md - 1
2753 8645991 : DO mb = 1, mb_max
2754 5122679 : ki = kbd(i)
2755 5122679 : !$OMP ATOMIC
2756 : ks(j) = ks(j) + ki
2757 5122679 : i = i + 1
2758 7449625 : j = j + md_max
2759 : END DO
2760 : END DO
2761 : END IF
2762 77149063 : IF (jatom >= katom) THEN
2763 : i = 1
2764 : j = offset_bc
2765 256522867 : DO mc = 1, mc_max
2766 581257238 : DO mb = 1, mb_max
2767 324734371 : ki = kbc(i)
2768 324734371 : !$OMP ATOMIC
2769 : ks(j) = ks(j) + ki
2770 324734371 : i = i + 1
2771 504108175 : j = j + 1
2772 : END DO
2773 : END DO
2774 : ELSE
2775 : i = 1
2776 0 : DO mc = 1, mc_max
2777 0 : j = offset_bc + mc - 1
2778 0 : DO mb = 1, mb_max
2779 0 : ki = kbc(i)
2780 0 : !$OMP ATOMIC
2781 : ks(j) = ks(j) + ki
2782 0 : i = i + 1
2783 0 : j = j + mc_max
2784 : END DO
2785 : END DO
2786 : END IF
2787 77149063 : IF (iatom >= latom) THEN
2788 : i = 1
2789 : j = offset_ad
2790 174714273 : DO md = 1, md_max
2791 433908748 : DO ma = 1, ma_max
2792 259194475 : ki = kad(i)
2793 259194475 : !$OMP ATOMIC
2794 : ks(j) = ks(j) + ki
2795 259194475 : i = i + 1
2796 378470021 : j = j + 1
2797 : END DO
2798 : END DO
2799 : ELSE
2800 : i = 1
2801 57792362 : DO md = 1, md_max
2802 36082026 : j = offset_ad + md - 1
2803 144513718 : DO ma = 1, ma_max
2804 86721356 : ki = kad(i)
2805 86721356 : !$OMP ATOMIC
2806 : ks(j) = ks(j) + ki
2807 86721356 : i = i + 1
2808 122803382 : j = j + md_max
2809 : END DO
2810 : END DO
2811 : END IF
2812 77149063 : IF (iatom >= katom) THEN
2813 : i = 1
2814 : j = offset_ac
2815 246427416 : DO mc = 1, mc_max
2816 626798704 : DO ma = 1, ma_max
2817 380371288 : ki = kac(i)
2818 380371288 : !$OMP ATOMIC
2819 : ks(j) = ks(j) + ki
2820 380371288 : i = i + 1
2821 553230053 : j = j + 1
2822 : END DO
2823 : END DO
2824 : ELSE
2825 : i = 1
2826 10095451 : DO mc = 1, mc_max
2827 6515039 : j = offset_ac + mc - 1
2828 27315653 : DO ma = 1, ma_max
2829 17220202 : ki = kac(i)
2830 17220202 : !$OMP ATOMIC
2831 : ks(j) = ks(j) + ki
2832 17220202 : i = i + 1
2833 23735241 : j = j + mc_max
2834 : END DO
2835 : END DO
2836 : END IF
2837 77149063 : END SUBROUTINE update_fock_matrix
2838 :
2839 : ! **************************************************************************************************
2840 : !> \brief ...
2841 : !> \param ma_max ...
2842 : !> \param mb_max ...
2843 : !> \param mc_max ...
2844 : !> \param md_max ...
2845 : !> \param fac ...
2846 : !> \param symm_fac ...
2847 : !> \param density ...
2848 : !> \param ks ...
2849 : !> \param prim ...
2850 : !> \param pbd ...
2851 : !> \param pbc ...
2852 : !> \param pad ...
2853 : !> \param pac ...
2854 : !> \param kbd ...
2855 : !> \param kbc ...
2856 : !> \param kad ...
2857 : !> \param kac ...
2858 : !> \param iatom ...
2859 : !> \param jatom ...
2860 : !> \param katom ...
2861 : !> \param latom ...
2862 : !> \param iset ...
2863 : !> \param jset ...
2864 : !> \param kset ...
2865 : !> \param lset ...
2866 : !> \param offset_bd_set ...
2867 : !> \param offset_bc_set ...
2868 : !> \param offset_ad_set ...
2869 : !> \param offset_ac_set ...
2870 : !> \param atomic_offset_bd ...
2871 : !> \param atomic_offset_bc ...
2872 : !> \param atomic_offset_ad ...
2873 : !> \param atomic_offset_ac ...
2874 : ! **************************************************************************************************
2875 5562963 : SUBROUTINE update_fock_matrix_as(ma_max, mb_max, mc_max, md_max, &
2876 5562963 : fac, symm_fac, density, ks, prim, &
2877 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2878 : iatom, jatom, katom, latom, &
2879 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2880 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2881 : atomic_offset_ac)
2882 :
2883 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2884 : REAL(dp), INTENT(IN) :: fac, symm_fac
2885 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2886 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2887 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2888 : INTENT(IN) :: prim
2889 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2890 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2891 : kset, lset
2892 : INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2893 : offset_ad_set, offset_ac_set
2894 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2895 : atomic_offset_ad, atomic_offset_ac
2896 :
2897 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2898 : offset_ad, offset_bc, offset_bd
2899 :
2900 5562963 : IF (jatom >= latom) THEN
2901 5549451 : i = 1
2902 5549451 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2903 5549451 : j = offset_bd
2904 15215976 : DO md = 1, md_max
2905 29405199 : DO mb = 1, mb_max
2906 14189223 : pbd(i) = +density(j)
2907 14189223 : i = i + 1
2908 23855748 : j = j + 1
2909 : END DO
2910 : END DO
2911 : ELSE
2912 13512 : i = 1
2913 13512 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2914 28504 : DO md = 1, md_max
2915 14992 : j = offset_bd + md - 1
2916 47164 : DO mb = 1, mb_max
2917 18660 : pbd(i) = -density(j)
2918 18660 : i = i + 1
2919 33652 : j = j + md_max
2920 : END DO
2921 : END DO
2922 : END IF
2923 5562963 : IF (jatom >= katom) THEN
2924 5562963 : i = 1
2925 5562963 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2926 5562963 : j = offset_bc
2927 17097841 : DO mc = 1, mc_max
2928 33527858 : DO mb = 1, mb_max
2929 16430017 : pbc(i) = -density(j)
2930 16430017 : i = i + 1
2931 27964895 : j = j + 1
2932 : END DO
2933 : END DO
2934 : ELSE
2935 0 : i = 1
2936 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2937 0 : DO mc = 1, mc_max
2938 0 : j = offset_bc + mc - 1
2939 0 : DO mb = 1, mb_max
2940 0 : pbc(i) = density(j)
2941 0 : i = i + 1
2942 0 : j = j + mc_max
2943 : END DO
2944 : END DO
2945 : END IF
2946 5562963 : IF (iatom >= latom) THEN
2947 4162454 : i = 1
2948 4162454 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2949 4162454 : j = offset_ad
2950 12159819 : DO md = 1, md_max
2951 27371932 : DO ma = 1, ma_max
2952 15212113 : pad(i) = -density(j)
2953 15212113 : i = i + 1
2954 23209478 : j = j + 1
2955 : END DO
2956 : END DO
2957 : ELSE
2958 1400509 : i = 1
2959 1400509 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2960 3084661 : DO md = 1, md_max
2961 1684152 : j = offset_ad + md - 1
2962 6852229 : DO ma = 1, ma_max
2963 3767568 : pad(i) = density(j)
2964 3767568 : i = i + 1
2965 5451720 : j = j + md_max
2966 : END DO
2967 : END DO
2968 : END IF
2969 5562963 : IF (iatom >= katom) THEN
2970 5358791 : i = 1
2971 5358791 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2972 5358791 : j = offset_ac
2973 16600747 : DO mc = 1, mc_max
2974 38437337 : DO ma = 1, ma_max
2975 21836590 : pac(i) = +density(j)
2976 21836590 : i = i + 1
2977 33078546 : j = j + 1
2978 : END DO
2979 : END DO
2980 : ELSE
2981 204172 : i = 1
2982 204172 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2983 497094 : DO mc = 1, mc_max
2984 292922 : j = offset_ac + mc - 1
2985 1248888 : DO ma = 1, ma_max
2986 751794 : pac(i) = -density(j)
2987 751794 : i = i + 1
2988 1044716 : j = j + mc_max
2989 : END DO
2990 : END DO
2991 : END IF
2992 :
2993 5562963 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2994 :
2995 5562963 : IF (jatom >= latom) THEN
2996 : i = 1
2997 : j = offset_bd
2998 15215976 : DO md = 1, md_max
2999 29405199 : DO mb = 1, mb_max
3000 14189223 : !$OMP ATOMIC
3001 : ks(j) = ks(j) + kbd(i)
3002 14189223 : i = i + 1
3003 23855748 : j = j + 1
3004 : END DO
3005 : END DO
3006 : ELSE
3007 : i = 1
3008 28504 : DO md = 1, md_max
3009 14992 : j = offset_bd + md - 1
3010 47164 : DO mb = 1, mb_max
3011 18660 : !$OMP ATOMIC
3012 : ks(j) = ks(j) - kbd(i)
3013 18660 : i = i + 1
3014 33652 : j = j + md_max
3015 : END DO
3016 : END DO
3017 : END IF
3018 5562963 : IF (jatom >= katom) THEN
3019 : i = 1
3020 : j = offset_bc
3021 17097841 : DO mc = 1, mc_max
3022 33527858 : DO mb = 1, mb_max
3023 16430017 : !$OMP ATOMIC
3024 : ks(j) = ks(j) - kbc(i)
3025 16430017 : i = i + 1
3026 27964895 : j = j + 1
3027 : END DO
3028 : END DO
3029 : ELSE
3030 : i = 1
3031 0 : DO mc = 1, mc_max
3032 0 : j = offset_bc + mc - 1
3033 0 : DO mb = 1, mb_max
3034 0 : !$OMP ATOMIC
3035 : ks(j) = ks(j) + kbc(i)
3036 0 : i = i + 1
3037 0 : j = j + mc_max
3038 : END DO
3039 : END DO
3040 : END IF
3041 5562963 : IF (iatom >= latom) THEN
3042 : i = 1
3043 : j = offset_ad
3044 12159819 : DO md = 1, md_max
3045 27371932 : DO ma = 1, ma_max
3046 15212113 : !$OMP ATOMIC
3047 : ks(j) = ks(j) - kad(i)
3048 15212113 : i = i + 1
3049 23209478 : j = j + 1
3050 : END DO
3051 : END DO
3052 : ELSE
3053 : i = 1
3054 3084661 : DO md = 1, md_max
3055 1684152 : j = offset_ad + md - 1
3056 6852229 : DO ma = 1, ma_max
3057 3767568 : !$OMP ATOMIC
3058 : ks(j) = ks(j) + kad(i)
3059 3767568 : i = i + 1
3060 5451720 : j = j + md_max
3061 : END DO
3062 : END DO
3063 : END IF
3064 : ! XXXXXXXXXXXXXXXXXXXXXXXX
3065 5562963 : IF (iatom >= katom) THEN
3066 : i = 1
3067 : j = offset_ac
3068 16600747 : DO mc = 1, mc_max
3069 38437337 : DO ma = 1, ma_max
3070 21836590 : !$OMP ATOMIC
3071 : ks(j) = ks(j) + kac(i)
3072 21836590 : i = i + 1
3073 33078546 : j = j + 1
3074 : END DO
3075 : END DO
3076 : ELSE
3077 : i = 1
3078 497094 : DO mc = 1, mc_max
3079 292922 : j = offset_ac + mc - 1
3080 1248888 : DO ma = 1, ma_max
3081 751794 : !$OMP ATOMIC
3082 : ks(j) = ks(j) - kac(i)
3083 751794 : i = i + 1
3084 1044716 : j = j + mc_max
3085 : END DO
3086 : END DO
3087 : END IF
3088 :
3089 5562963 : END SUBROUTINE update_fock_matrix_as
3090 :
3091 : ! **************************************************************************************************
3092 : !> \brief ...
3093 : !> \param i ...
3094 : !> \param j ...
3095 : !> \param k ...
3096 : !> \param l ...
3097 : !> \param set_offsets ...
3098 : !> \param atom_offsets ...
3099 : !> \param iset ...
3100 : !> \param jset ...
3101 : !> \param kset ...
3102 : !> \param lset ...
3103 : !> \param ma_max ...
3104 : !> \param mb_max ...
3105 : !> \param mc_max ...
3106 : !> \param md_max ...
3107 : !> \param prim ...
3108 : ! **************************************************************************************************
3109 0 : SUBROUTINE print_integrals(i, j, k, l, set_offsets, atom_offsets, iset, jset, kset, lset, ma_max, mb_max, mc_max, md_max, prim)
3110 : INTEGER :: i, j, k, l
3111 : INTEGER, DIMENSION(:, :, :, :), POINTER :: set_offsets
3112 : INTEGER, DIMENSION(:, :), POINTER :: atom_offsets
3113 : INTEGER :: iset, jset, kset, lset, ma_max, mb_max, &
3114 : mc_max, md_max
3115 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
3116 : INTENT(IN) :: prim
3117 :
3118 : INTEGER :: iint, ma, mb, mc, md
3119 :
3120 0 : iint = 0
3121 0 : DO md = 1, md_max
3122 0 : DO mc = 1, mc_max
3123 0 : DO mb = 1, mb_max
3124 0 : DO ma = 1, ma_max
3125 0 : iint = iint + 1
3126 0 : IF (ABS(prim(iint)) > 0.0000000000001) &
3127 0 : WRITE (99, *) atom_offsets(i, 1) + ma + set_offsets(iset, 1, i, 1) - 1, &
3128 0 : atom_offsets(j, 1) + ma + set_offsets(jset, 1, j, 1) - 1, &
3129 0 : atom_offsets(k, 1) + ma + set_offsets(kset, 1, k, 1) - 1, &
3130 0 : atom_offsets(l, 1) + ma + set_offsets(lset, 1, l, 1) - 1, &
3131 0 : prim(iint)
3132 : END DO
3133 : END DO
3134 : END DO
3135 : END DO
3136 :
3137 0 : END SUBROUTINE print_integrals
3138 :
3139 : #:include "hfx_get_pmax_val.fypp"
3140 :
3141 : END MODULE hfx_energy_potential
|