Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate HFX energy and potential
10 : !> \par History
11 : !> 11.2006 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_energy_potential
15 : USE admm_types, ONLY: get_admm_env
16 : USE atomic_kind_types, ONLY: atomic_kind_type, &
17 : get_atomic_kind_set
18 : USE bibliography, ONLY: cite_reference, &
19 : guidon2008, &
20 : guidon2009
21 : USE cell_types, ONLY: cell_type, &
22 : pbc
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_files, ONLY: close_file, &
25 : open_file
26 : USE cp_log_handling, ONLY: cp_get_default_logger, &
27 : cp_logger_type
28 : USE cp_output_handling, ONLY: cp_p_file, &
29 : cp_print_key_finished_output, &
30 : cp_print_key_should_output, &
31 : cp_print_key_unit_nr
32 : USE message_passing, ONLY: mp_para_env_type
33 : USE cp_dbcsr_api, ONLY: dbcsr_copy, &
34 : dbcsr_get_matrix_type, &
35 : dbcsr_p_type, &
36 : dbcsr_type_antisymmetric, &
37 : dbcsr_dot_threadsafe
38 : USE gamma, ONLY: init_md_ftable
39 : USE hfx_communication, ONLY: distribute_ks_matrix, &
40 : get_atomic_block_maps, &
41 : get_full_density
42 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
43 : hfx_add_single_cache_element, &
44 : hfx_decompress_first_cache, &
45 : hfx_flush_last_cache, &
46 : hfx_get_mult_cache_elements, &
47 : hfx_get_single_cache_element, &
48 : hfx_reset_cache_and_container
49 : USE hfx_contract_block, ONLY: contract_block
50 : USE hfx_libint_interface, ONLY: evaluate_eri
51 : USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
52 : hfx_load_balance, &
53 : hfx_update_load_balance
54 : USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
55 : build_pair_list, &
56 : build_pair_list_pgf, &
57 : build_pgf_product_list, &
58 : pgf_product_list_size
59 : USE hfx_screening_methods, ONLY: calc_pair_dist_radii, &
60 : calc_screening_functions, &
61 : update_pmax_mat
62 : USE hfx_types, ONLY: &
63 : alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
64 : hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
65 : hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
66 : hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
67 : hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
68 : log_zero, pair_list_type, pair_set_list_type
69 : USE input_constants, ONLY: do_potential_mix_cl_trunc, &
70 : do_potential_truncated, &
71 : hfx_do_eval_energy
72 : USE input_section_types, ONLY: section_vals_type
73 : USE kinds, ONLY: default_string_length, &
74 : dp, &
75 : int_8
76 : USE kpoint_types, ONLY: get_kpoint_info, &
77 : kpoint_type
78 : USE libint_wrapper, ONLY: cp_libint_t
79 : USE machine, ONLY: m_flush, &
80 : m_memory, &
81 : m_walltime
82 : USE mathconstants, ONLY: fac
83 : USE orbital_pointers, ONLY: nco, &
84 : ncoset, &
85 : nso
86 : USE particle_types, ONLY: particle_type
87 : USE qs_environment_types, ONLY: get_qs_env, &
88 : qs_environment_type
89 : USE qs_ks_types, ONLY: get_ks_env, &
90 : qs_ks_env_type
91 : USE t_c_g0, ONLY: init
92 : USE util, ONLY: sort
93 :
94 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
95 :
96 : #include "./base/base_uses.f90"
97 :
98 : IMPLICIT NONE
99 : PRIVATE
100 :
101 : PUBLIC :: integrate_four_center, coulomb4
102 :
103 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
104 :
105 : !***
106 :
107 : CONTAINS
108 :
109 : ! **************************************************************************************************
110 : !> \brief computes four center integrals for a full basis set and updates the
111 : !> Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
112 : !> \param qs_env ...
113 : !> \param x_data ...
114 : !> \param ks_matrix ...
115 : !> \param ehfx energy calculated with the updated HFX matrix
116 : !> \param rho_ao density matrix in ao basis
117 : !> \param hfx_section input_section HFX
118 : !> \param para_env ...
119 : !> \param geometry_did_change flag that indicates we have to recalc integrals
120 : !> \param irep Index for the HFX replica
121 : !> \param distribute_fock_matrix Flag that indicates whether to communicate the
122 : !> new fock matrix or not
123 : !> \param ispin ...
124 : !> \par History
125 : !> 06.2007 created [Manuel Guidon]
126 : !> 08.2007 optimized load balance [Manuel Guidon]
127 : !> 09.2007 new parallelization [Manuel Guidon]
128 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
129 : !> 12.2017 major bug fix. removed wrong cycle that was caussing segfault.
130 : !> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
131 : !> [Tobias Binninger + Valery Weber]
132 : !> \author Manuel Guidon
133 : ! **************************************************************************************************
134 36269 : SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
135 : geometry_did_change, irep, distribute_fock_matrix, &
136 : ispin, nspins)
137 :
138 : TYPE(qs_environment_type), POINTER :: qs_env
139 : TYPE(hfx_type), DIMENSION(:, :), POINTER :: x_data
140 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: ks_matrix
141 : REAL(KIND=dp), INTENT(OUT) :: ehfx
142 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
143 : TYPE(section_vals_type), POINTER :: hfx_section
144 : TYPE(mp_para_env_type), POINTER :: para_env
145 : LOGICAL :: geometry_did_change
146 : INTEGER :: irep
147 : LOGICAL, INTENT(IN) :: distribute_fock_matrix
148 : INTEGER, INTENT(IN) :: ispin
149 : INTEGER, INTENT(IN), OPTIONAL :: nspins
150 :
151 : CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center'
152 :
153 : CHARACTER(len=default_string_length) :: eps_scaling_str, eps_schwarz_min_str
154 : INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
155 : atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
156 : buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
157 : handle_getP, handle_load, handle_main, i, i_list_ij, i_list_kl, i_set_list_ij, &
158 : i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
159 : i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
160 : iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
161 : katom_end
162 : INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
163 : latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
164 : my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
165 : nneighbors, nseta, nsetb, nsgf_max, my_nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
166 : sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
167 : sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
168 : INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
169 : mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
170 : mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
171 : memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
172 : neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
173 : shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
174 : shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
175 : shm_storage_counter_integrals, stor_count_int_disk
176 : INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
177 : tmp_i8(8)
178 36269 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
179 72538 : INTEGER, ALLOCATABLE, DIMENSION(:) :: kind_of, last_sgf_global, nimages, &
180 36269 : tmp_index
181 36269 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
182 72538 : ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
183 72538 : INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
184 36269 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
185 : INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
186 36269 : INTEGER, DIMENSION(:, :, :), POINTER :: cell_to_index
187 36269 : INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
188 : INTEGER, SAVE :: shm_number_of_p_entries
189 : LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
190 : do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
191 : ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
192 36269 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
193 : REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
194 : compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
195 : eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
196 : max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
197 : pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
198 : spherical_estimate, symm_fac
199 36269 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
200 72538 : ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
201 36269 : primitive_integrals
202 36269 : REAL(dp), DIMENSION(:), POINTER :: p_work
203 36269 : REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
204 72538 : full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
205 36269 : shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd
206 36269 : REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
207 36269 : sphi_c_ext_set, sphi_d_ext_set
208 36269 : REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
209 36269 : sphi_d_ext
210 : REAL(KIND=dp) :: coeffs_kind_max0
211 36269 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
212 : TYPE(cell_type), POINTER :: cell
213 : TYPE(cp_libint_t) :: private_lib
214 : TYPE(cp_logger_type), POINTER :: logger
215 36269 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: matrix_ks_aux_fit_hfx
216 : TYPE(dft_control_type), POINTER :: dft_control
217 : TYPE(hfx_basis_info_type), POINTER :: basis_info
218 36269 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
219 36269 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches, integral_caches_disk
220 : TYPE(hfx_cache_type), POINTER :: maxval_cache, maxval_cache_disk
221 36269 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers, &
222 36269 : integral_containers_disk
223 : TYPE(hfx_container_type), POINTER :: maxval_container, maxval_container_disk
224 : TYPE(hfx_distribution), POINTER :: distribution_energy
225 : TYPE(hfx_general_type) :: general_parameter
226 : TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
227 : TYPE(hfx_memory_type), POINTER :: memory_parameter
228 36269 : TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
229 36269 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
230 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
231 36269 : DIMENSION(:) :: pgf_product_list
232 : TYPE(hfx_potential_type) :: potential_parameter
233 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
234 36269 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
235 36269 : tmp_screen_pgf1, tmp_screen_pgf2
236 : TYPE(hfx_screen_coeff_type), &
237 36269 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
238 : TYPE(hfx_screen_coeff_type), &
239 36269 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
240 : TYPE(hfx_screening_type) :: screening_parameter
241 36269 : TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
242 : TYPE(hfx_type), POINTER :: actual_x_data, shm_master_x_data
243 : TYPE(kpoint_type), POINTER :: kpoints
244 : TYPE(pair_list_type) :: list_ij, list_kl
245 : TYPE(pair_set_list_type), ALLOCATABLE, &
246 36269 : DIMENSION(:) :: set_list_ij, set_list_kl
247 36269 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
248 : TYPE(qs_ks_env_type), POINTER :: ks_env
249 :
250 36269 : NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
251 :
252 36269 : CALL timeset(routineN, handle)
253 :
254 36269 : CALL cite_reference(Guidon2008)
255 36269 : CALL cite_reference(Guidon2009)
256 :
257 36269 : ehfx = 0.0_dp
258 :
259 : ! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
260 36269 : my_geo_change = geometry_did_change
261 36269 : IF (ispin == 2) my_geo_change = .FALSE.
262 :
263 36269 : logger => cp_get_default_logger()
264 :
265 36269 : is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) == dbcsr_type_antisymmetric
266 :
267 36269 : IF (my_geo_change) THEN
268 2188 : CALL m_memory(memsize_before)
269 2188 : CALL para_env%max(memsize_before)
270 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
271 2188 : extension=".scfLog")
272 2188 : IF (iw > 0) THEN
273 : WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
274 395 : "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
275 395 : CALL m_flush(iw)
276 : END IF
277 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
278 2188 : "HF_INFO")
279 : END IF
280 :
281 36269 : CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
282 :
283 36269 : NULLIFY (cell_to_index)
284 36269 : CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
285 36269 : IF (do_kpoints) THEN
286 0 : CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
287 0 : CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
288 0 : CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
289 : END IF
290 :
291 : !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
292 36269 : nkind = SIZE(atomic_kind_set, 1)
293 36269 : l_max = 0
294 102537 : DO ikind = 1, nkind
295 300483 : l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
296 : END DO
297 36269 : l_max = 4*l_max
298 36269 : CALL init_md_ftable(l_max)
299 :
300 36269 : IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
301 : x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
302 9796 : IF (l_max > init_t_c_g0_lmax) THEN
303 314 : IF (para_env%is_source()) THEN
304 157 : CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
305 : END IF
306 314 : CALL init(l_max, unit_id, para_env%mepos, para_env)
307 314 : IF (para_env%is_source()) THEN
308 157 : CALL close_file(unit_id)
309 : END IF
310 314 : init_t_c_g0_lmax = l_max
311 : END IF
312 : END IF
313 :
314 36269 : n_threads = 1
315 36269 : !$ n_threads = omp_get_max_threads()
316 :
317 : ! This initialization is needed to prevent a segmentation fault. The correct assigment is done below
318 36269 : my_nspins = 0
319 36269 : IF (PRESENT(nspins)) my_nspins = nspins
320 :
321 36269 : shm_neris_total = 0
322 36269 : shm_nprim_ints = 0
323 36269 : shm_neris_onthefly = 0
324 36269 : shm_storage_counter_integrals = 0
325 36269 : shm_stor_count_int_disk = 0
326 36269 : shm_neris_incore = 0
327 36269 : shm_neris_disk = 0
328 36269 : shm_stor_count_max_val = 0
329 :
330 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
331 : !$OMP SHARED(qs_env,&
332 : !$OMP x_data,&
333 : !$OMP ks_matrix,&
334 : !$OMP ehfx,&
335 : !$OMP rho_ao,&
336 : !$OMP matrix_ks_aux_fit_hfx,&
337 : !$OMP hfx_section,&
338 : !$OMP para_env,&
339 : !$OMP my_geo_change,&
340 : !$OMP irep,&
341 : !$OMP distribute_fock_matrix,&
342 : !$OMP cell_to_index,&
343 : !$OMP ncoset,&
344 : !$OMP nso,&
345 : !$OMP nco,&
346 : !$OMP full_ks_alpha,&
347 : !$OMP full_ks_beta,&
348 : !$OMP n_threads,&
349 : !$OMP full_density_alpha,&
350 : !$OMP full_density_beta,&
351 : !$OMP shm_initial_p,&
352 : !$OMP shm_is_assoc_atomic_block,&
353 : !$OMP shm_number_of_p_entries,&
354 : !$OMP shm_neris_total,&
355 : !$OMP shm_neris_onthefly,&
356 : !$OMP shm_storage_counter_integrals,&
357 : !$OMP shm_stor_count_int_disk,&
358 : !$OMP shm_neris_incore,&
359 : !$OMP shm_neris_disk,&
360 : !$OMP shm_nprim_ints,&
361 : !$OMP shm_stor_count_max_val,&
362 : !$OMP cell,&
363 : !$OMP screen_coeffs_set,&
364 : !$OMP screen_coeffs_kind,&
365 : !$OMP screen_coeffs_pgf,&
366 : !$OMP pgf_product_list_size,&
367 : !$OMP radii_pgf,&
368 : !$OMP nkind,&
369 : !$OMP ispin,&
370 : !$OMP is_anti_symmetric,&
371 : !$OMP shm_atomic_block_offset,&
372 : !$OMP shm_set_offset,&
373 : !$OMP shm_block_offset,&
374 : !$OMP shm_task_counter,&
375 : !$OMP shm_task_list,&
376 : !$OMP shm_total_bins,&
377 : !$OMP shm_master_x_data,&
378 : !$OMP shm_pmax_atom,&
379 : !$OMP shm_pmax_block,&
380 : !$OMP shm_atomic_pair_list,&
381 : !$OMP shm_mem_compression_counter,&
382 : !$OMP do_print_load_balance_info,&
383 : !$OMP my_nspins) &
384 : !$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
385 : !$OMP general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
386 : !$OMP basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
387 : !$OMP neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
388 : !$OMP compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
389 : !$OMP max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
390 : !$OMP nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
391 : !$OMP kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,max_contraction,&
392 : !$OMP max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
393 : !$OMP pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
394 : !$OMP maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
395 : !$OMP log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
396 : !$OMP p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
397 : !$OMP integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
398 : !$OMP set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
399 : !$OMP my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
400 : !$OMP katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
401 : !$OMP i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
402 : !$OMP lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
403 : !$OMP kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
404 : !$OMP nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
405 : !$OMP sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
406 : !$OMP offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
407 : !$OMP mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
408 : !$OMP sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
409 : !$OMP spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
410 : !$OMP tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
411 : !$OMP stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
412 : !$OMP ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
413 36269 : !$OMP etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str)
414 :
415 : ln_10 = LOG(10.0_dp)
416 : i_thread = 0
417 : !$ i_thread = omp_get_thread_num()
418 :
419 : actual_x_data => x_data(irep, i_thread + 1)
420 : !$OMP MASTER
421 : shm_master_x_data => x_data(irep, 1)
422 : !$OMP END MASTER
423 : !$OMP BARRIER
424 :
425 : do_periodic = actual_x_data%periodic_parameter%do_periodic
426 :
427 : IF (do_periodic) THEN
428 : ! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
429 : actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
430 : CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
431 : cell, i_thread)
432 : END IF
433 :
434 : screening_parameter = actual_x_data%screening_parameter
435 : potential_parameter = actual_x_data%potential_parameter
436 :
437 : general_parameter = actual_x_data%general_parameter
438 : load_balance_parameter => actual_x_data%load_balance_parameter
439 : memory_parameter => actual_x_data%memory_parameter
440 :
441 : cache_size = memory_parameter%cache_size
442 : bits_max_val = memory_parameter%bits_max_val
443 :
444 : basis_parameter => actual_x_data%basis_parameter
445 : basis_info => actual_x_data%basis_info
446 :
447 : treat_lsd_in_core = general_parameter%treat_lsd_in_core
448 :
449 : ncpu = para_env%num_pe
450 : n_processes = ncpu*n_threads
451 :
452 : !! initialize some counters
453 : neris_total = 0_int_8
454 : neris_incore = 0_int_8
455 : neris_disk = 0_int_8
456 : neris_onthefly = 0_int_8
457 : mem_eris = 0_int_8
458 : mem_eris_disk = 0_int_8
459 : mem_max_val = 0_int_8
460 : compression_factor = 0.0_dp
461 : compression_factor_disk = 0.0_dp
462 : nprim_ints = 0_int_8
463 : neris_tmp = 0_int_8
464 : max_val_memory = 1_int_8
465 :
466 : max_am = basis_info%max_am
467 :
468 : CALL get_qs_env(qs_env=qs_env, &
469 : atomic_kind_set=atomic_kind_set, &
470 : particle_set=particle_set, &
471 : dft_control=dft_control)
472 : IF (dft_control%do_admm) CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
473 :
474 : do_p_screening = screening_parameter%do_initial_p_screening
475 : ! Special treatment for MP2 with initial density screening
476 : IF (do_p_screening) THEN
477 : IF (ASSOCIATED(qs_env%mp2_env)) THEN
478 : IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer)) THEN
479 : do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
480 : ELSE
481 : do_p_screening = .FALSE.
482 : END IF
483 : END IF
484 : END IF
485 : max_set = basis_info%max_set
486 : natom = SIZE(particle_set, 1)
487 :
488 : ! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
489 : nkimages = dft_control%nimages
490 : CPASSERT(nkimages == 1)
491 :
492 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
493 :
494 : !! precompute maximum nco and allocate scratch
495 : ncos_max = 0
496 : nsgf_max = 0
497 : DO iatom = 1, natom
498 : ikind = kind_of(iatom)
499 : nseta = basis_parameter(ikind)%nset
500 : npgfa => basis_parameter(ikind)%npgf
501 : la_max => basis_parameter(ikind)%lmax
502 : nsgfa => basis_parameter(ikind)%nsgf
503 : DO iset = 1, nseta
504 : ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
505 : nsgf_max = MAX(nsgf_max, nsgfa(iset))
506 : END DO
507 : END DO
508 : !! Allocate the arrays for the integrals.
509 : ALLOCATE (primitive_integrals(nsgf_max**4))
510 : primitive_integrals = 0.0_dp
511 :
512 : ALLOCATE (pbd_buf(nsgf_max**2))
513 : ALLOCATE (pbc_buf(nsgf_max**2))
514 : ALLOCATE (pad_buf(nsgf_max**2))
515 : ALLOCATE (pac_buf(nsgf_max**2))
516 : ALLOCATE (kbd_buf(nsgf_max**2))
517 : ALLOCATE (kbc_buf(nsgf_max**2))
518 : ALLOCATE (kad_buf(nsgf_max**2))
519 : ALLOCATE (kac_buf(nsgf_max**2))
520 : ALLOCATE (ee_work(ncos_max**4))
521 : ALLOCATE (ee_work2(ncos_max**4))
522 : ALLOCATE (ee_buffer1(ncos_max**4))
523 : ALLOCATE (ee_buffer2(ncos_max**4))
524 : ALLOCATE (ee_primitives_tmp(nsgf_max**4))
525 :
526 : IF (my_nspins == 0) my_nspins = dft_control%nspins
527 :
528 : ALLOCATE (max_contraction(max_set, natom))
529 :
530 : max_contraction = 0.0_dp
531 : max_pgf = 0
532 : DO jatom = 1, natom
533 : jkind = kind_of(jatom)
534 : lb_max => basis_parameter(jkind)%lmax
535 : nsetb = basis_parameter(jkind)%nset
536 : npgfb => basis_parameter(jkind)%npgf
537 : first_sgfb => basis_parameter(jkind)%first_sgf
538 : sphi_b => basis_parameter(jkind)%sphi
539 : nsgfb => basis_parameter(jkind)%nsgf
540 : DO jset = 1, nsetb
541 : ! takes the primitive to contracted transformation into account
542 : ncob = npgfb(jset)*ncoset(lb_max(jset))
543 : sgfb = first_sgfb(1, jset)
544 : ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
545 : ! the maximum value after multiplication with sphi_b
546 : max_contraction(jset, jatom) = MAXVAL([(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)])
547 : max_pgf = MAX(max_pgf, npgfb(jset))
548 : END DO
549 : END DO
550 :
551 : ! ** Allocate buffers for pgf_lists
552 : nneighbors = SIZE(actual_x_data%neighbor_cells)
553 : ALLOCATE (pgf_list_ij(max_pgf**2))
554 : ALLOCATE (pgf_list_kl(max_pgf**2))
555 : ! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
556 : !$OMP ATOMIC READ
557 : tmp_i4 = pgf_product_list_size
558 : ALLOCATE (pgf_product_list(tmp_i4))
559 : ALLOCATE (nimages(max_pgf**2))
560 :
561 : DO i = 1, max_pgf**2
562 : ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
563 : ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
564 : END DO
565 : !$OMP BARRIER
566 : !$OMP MASTER
567 : !! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
568 : IF (my_geo_change) THEN
569 : CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
570 : shm_master_x_data%is_assoc_atomic_block, &
571 : shm_master_x_data%number_of_p_entries, &
572 : para_env, &
573 : shm_master_x_data%atomic_block_offset, &
574 : shm_master_x_data%set_offset, &
575 : shm_master_x_data%block_offset, &
576 : shm_master_x_data%map_atoms_to_cpus, &
577 : nkind)
578 :
579 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
580 :
581 : !! Get occupation of KS-matrix
582 : ks_fully_occ = .TRUE.
583 : outer: DO iatom = 1, natom
584 : DO jatom = iatom, natom
585 : IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
586 : ks_fully_occ = .FALSE.
587 : EXIT outer
588 : END IF
589 : END DO
590 : END DO outer
591 :
592 : IF (.NOT. ks_fully_occ) THEN
593 : CALL cp_warn(__LOCATION__, &
594 : "The Kohn Sham matrix is not 100% occupied. This "// &
595 : "may result in incorrect Hartree-Fock results. Setting "// &
596 : "MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
597 : "fully occupied KS matrix. For more information "// &
598 : "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
599 : END IF
600 : END IF
601 :
602 : ! ** Set pointers
603 : shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
604 : shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
605 : shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
606 : shm_set_offset => shm_master_x_data%set_offset
607 : shm_block_offset => shm_master_x_data%block_offset
608 : !$OMP END MASTER
609 : !$OMP BARRIER
610 :
611 : ! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
612 : ! ** Fock and density Matrices (shared)
613 : subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
614 : ! ** if non restricted ==> alpha/beta spin
615 : IF (.NOT. treat_lsd_in_core) THEN
616 : IF (my_nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
617 : END IF
618 : ! ** Initial P only MAX(alpha,beta) (shared)
619 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
620 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
621 : END IF
622 : ! ** In core forces require their own initial P
623 : IF (screening_parameter%do_p_screening_forces) THEN
624 : IF (memory_parameter%treat_forces_in_core) THEN
625 : subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
626 : END IF
627 : END IF
628 : ! ** primitive buffer (not shared by the threads)
629 : subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
630 : ! ** density + fock buffers
631 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
632 : ! ** screening functions (shared)
633 : ! ** coeffs_pgf
634 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
635 : ! ** coeffs_set
636 : subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
637 : ! ** coeffs_kind
638 : subtr_size_mb = subtr_size_mb + nkind**2
639 : ! ** radii_pgf
640 : subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
641 :
642 : ! ** is_assoc (shared)
643 : subtr_size_mb = subtr_size_mb + natom**2
644 :
645 : ! ** pmax_atom (shared)
646 : IF (do_p_screening) THEN
647 : subtr_size_mb = subtr_size_mb + natom**2
648 : END IF
649 : IF (screening_parameter%do_p_screening_forces) THEN
650 : IF (memory_parameter%treat_forces_in_core) THEN
651 : subtr_size_mb = subtr_size_mb + natom**2
652 : END IF
653 : END IF
654 :
655 : ! ** Convert into MiB's
656 : subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
657 :
658 : ! ** Subtracting all these buffers from MAX_MEMORY yields the amount
659 : ! ** of RAM that is left for the compressed integrals. When using threads
660 : ! ** all the available memory is shared among all n_threads. i.e. the faster
661 : ! ** ones can steal from the slower ones
662 :
663 : CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
664 :
665 : use_disk_storage = .FALSE.
666 : counter = 0_int_8
667 : do_disk_storage = memory_parameter%do_disk_storage
668 : IF (do_disk_storage) THEN
669 : maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
670 : maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
671 :
672 : integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
673 : integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
674 : END IF
675 :
676 : IF (my_geo_change) THEN
677 : memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
678 : END IF
679 :
680 : IF (my_geo_change) THEN
681 : memory_parameter%recalc_forces = .TRUE.
682 : ELSE
683 : IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
684 : END IF
685 :
686 : !! Get screening parameter
687 : eps_schwarz = screening_parameter%eps_schwarz
688 : IF (eps_schwarz <= 0.0_dp) THEN
689 : log10_eps_schwarz = log_zero
690 : ELSE
691 : log10_eps_schwarz = LOG10(eps_schwarz)
692 : END IF
693 : !! get storage epsilon
694 : eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
695 :
696 : !! If we have a hybrid functional, we may need only a fraction of exact exchange
697 : hf_fraction = general_parameter%fraction
698 :
699 : !! The number of integrals that fit into the given MAX_MEMORY
700 :
701 : !! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
702 : potential_parameter = actual_x_data%potential_parameter
703 :
704 : !! Variable to check if we calculate the integrals in-core or on the fly
705 : !! If TRUE -> on the fly
706 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
707 : buffer_overflow = .FALSE.
708 : ELSE
709 : buffer_overflow = .TRUE.
710 : END IF
711 : logger => cp_get_default_logger()
712 :
713 : private_lib = actual_x_data%lib
714 :
715 : !! Helper array to map local basis function indices to global ones
716 : ALLOCATE (last_sgf_global(0:natom))
717 : last_sgf_global(0) = 0
718 : DO iatom = 1, natom
719 : ikind = kind_of(iatom)
720 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
721 : END DO
722 : !$OMP BARRIER
723 : !$OMP MASTER
724 : !! Let master thread get the density (avoid problems with MPI)
725 : !! Get the full density from all the processors
726 : NULLIFY (full_density_alpha, full_density_beta)
727 : ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
728 : IF (.NOT. treat_lsd_in_core .OR. my_nspins == 1) THEN
729 : CALL timeset(routineN//"_getP", handle_getP)
730 : DO img = 1, nkimages
731 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
732 : shm_master_x_data%block_offset, &
733 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
734 : END DO
735 :
736 : IF (my_nspins == 2) THEN
737 : ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
738 : DO img = 1, nkimages
739 : CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
740 : shm_master_x_data%block_offset, &
741 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
742 : END DO
743 : END IF
744 : CALL timestop(handle_getP)
745 :
746 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
747 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
748 : !! a changed geometry
749 : NULLIFY (shm_initial_p)
750 : IF (do_p_screening) THEN
751 : shm_initial_p => shm_master_x_data%initial_p
752 : shm_pmax_atom => shm_master_x_data%pmax_atom
753 : IF (my_geo_change) THEN
754 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
755 : shm_master_x_data%map_atom_to_kind_atom, &
756 : shm_master_x_data%set_offset, &
757 : shm_master_x_data%atomic_block_offset, &
758 : shm_pmax_atom, &
759 : full_density_alpha, full_density_beta, &
760 : natom, kind_of, basis_parameter, &
761 : nkind, shm_master_x_data%is_assoc_atomic_block)
762 : END IF
763 : END IF
764 : ELSE
765 : IF (do_p_screening) THEN
766 : CALL timeset(routineN//"_getP", handle_getP)
767 : DO img = 1, nkimages
768 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
769 : shm_master_x_data%block_offset, &
770 : kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
771 : rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
772 : END DO
773 : CALL timestop(handle_getP)
774 :
775 : !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
776 : !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
777 : !! a changed geometry
778 : NULLIFY (shm_initial_p)
779 : shm_initial_p => actual_x_data%initial_p
780 : shm_pmax_atom => shm_master_x_data%pmax_atom
781 : IF (my_geo_change) THEN
782 : CALL update_pmax_mat(shm_master_x_data%initial_p, &
783 : shm_master_x_data%map_atom_to_kind_atom, &
784 : shm_master_x_data%set_offset, &
785 : shm_master_x_data%atomic_block_offset, &
786 : shm_pmax_atom, &
787 : full_density_alpha, full_density_beta, &
788 : natom, kind_of, basis_parameter, &
789 : nkind, shm_master_x_data%is_assoc_atomic_block)
790 : END IF
791 : END IF
792 : ! ** Now get the density(ispin)
793 : DO img = 1, nkimages
794 : CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
795 : shm_master_x_data%block_offset, &
796 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
797 : antisymmetric=is_anti_symmetric)
798 : END DO
799 : END IF
800 :
801 : NULLIFY (full_ks_alpha, full_ks_beta)
802 : ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
803 : full_ks_alpha => shm_master_x_data%full_ks_alpha
804 : full_ks_alpha = 0.0_dp
805 :
806 : IF (.NOT. treat_lsd_in_core) THEN
807 : IF (my_nspins == 2) THEN
808 : ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
809 : full_ks_beta => shm_master_x_data%full_ks_beta
810 : full_ks_beta = 0.0_dp
811 : END IF
812 : END IF
813 :
814 : !! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
815 : !! for far field estimates. The update is only performed if the geomtry of the system changed.
816 : !! If the system is periodic, then the corresponding routines are called and some variables
817 : !! are initialized
818 :
819 : !$OMP END MASTER
820 : !$OMP BARRIER
821 :
822 : IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
823 : CALL calc_pair_dist_radii(qs_env, basis_parameter, &
824 : shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
825 : n_threads, i_thread)
826 : !$OMP BARRIER
827 : CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
828 : shm_master_x_data%screen_funct_coeffs_set, &
829 : shm_master_x_data%screen_funct_coeffs_kind, &
830 : shm_master_x_data%screen_funct_coeffs_pgf, &
831 : shm_master_x_data%pair_dist_radii_pgf, &
832 : max_set, max_pgf, n_threads, i_thread, p_work)
833 :
834 : !$OMP MASTER
835 : shm_master_x_data%screen_funct_is_initialized = .TRUE.
836 : !$OMP END MASTER
837 : END IF
838 : !$OMP BARRIER
839 :
840 : !$OMP MASTER
841 : screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
842 : screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
843 : screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
844 : radii_pgf => shm_master_x_data%pair_dist_radii_pgf
845 : !$OMP END MASTER
846 : !$OMP BARRIER
847 :
848 : !! Initialize a prefactor depending on the fraction and the number of spins
849 : IF (my_nspins == 1) THEN
850 : fac = 0.5_dp*hf_fraction
851 : ELSE
852 : fac = 1.0_dp*hf_fraction
853 : END IF
854 :
855 : !! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
856 : !! an optional parameter. Of course, this is only done if the geometry did change
857 : !$OMP BARRIER
858 : !$OMP MASTER
859 : CALL timeset(routineN//"_load", handle_load)
860 : !$OMP END MASTER
861 : !$OMP BARRIER
862 : IF (my_geo_change) THEN
863 : IF (actual_x_data%b_first_load_balance_energy) THEN
864 : CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
865 : screen_coeffs_set, screen_coeffs_kind, &
866 : shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
867 : kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
868 : cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
869 : nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
870 : actual_x_data%b_first_load_balance_energy = .FALSE.
871 : ELSE
872 : CALL hfx_update_load_balance(actual_x_data, para_env, &
873 : load_balance_parameter, &
874 : i_thread, n_threads, hfx_do_eval_energy)
875 : END IF
876 : END IF
877 : !$OMP BARRIER
878 : !$OMP MASTER
879 : CALL timestop(handle_load)
880 : !$OMP END MASTER
881 : !$OMP BARRIER
882 :
883 : !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
884 : !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
885 : !!
886 : !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
887 : !!
888 : !! by iterating in the following way
889 : !!
890 : !! DO iatom=1,natom and DO katom=1,natom
891 : !! DO jatom=iatom,natom DO latom=katom,natom
892 : !!
893 : !! The third symmetry
894 : !!
895 : !! (ab|cd) = (cd|ab)
896 : !!
897 : !! is taken into account by the following criterion:
898 : !!
899 : !! IF(katom+latom<=iatom+jatom) THEN
900 : !! IF( ((iatom+jatom)==(katom+latom) ) .AND.(katom<iatom)) CYCLE
901 : !!
902 : !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
903 : !! factor ( symm_fac ).
904 : !!
905 : !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
906 : !! different hierarchies of short range screening matrices.
907 : !!
908 : !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
909 : !! defined as :
910 : !!
911 : !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
912 : !!
913 : !! This tells the process where to start the main loops and how many bunches of integrals it has to
914 : !! calculate. The original parallelization is a simple modulo distribution that is binned and
915 : !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
916 : !! we need to know which was the initial cpu_id.
917 : !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
918 : !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
919 : !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
920 :
921 : do_dynamic_load_balancing = .TRUE.
922 :
923 : IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
924 :
925 : IF (do_dynamic_load_balancing) THEN
926 : my_bin_size = SIZE(actual_x_data%distribution_energy)
927 : ELSE
928 : my_bin_size = 1
929 : END IF
930 : !! We do not need the containers if MAX_MEM = 0
931 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
932 : !! IF new md step -> reinitialize containers
933 : IF (my_geo_change) THEN
934 : CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
935 : CALL alloc_containers(actual_x_data%store_ints, my_bin_size)
936 :
937 : DO bin = 1, my_bin_size
938 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
939 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
940 : CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
941 : DO i = 1, 64
942 : CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
943 : END DO
944 : END DO
945 : END IF
946 :
947 : !! Decompress the first cache for maxvals and integrals
948 : IF (.NOT. my_geo_change) THEN
949 : DO bin = 1, my_bin_size
950 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
951 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
952 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
953 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
954 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
955 : memory_parameter%actual_memory_usage, .FALSE.)
956 : DO i = 1, 64
957 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
958 : memory_parameter%actual_memory_usage, .FALSE.)
959 : END DO
960 : END DO
961 : END IF
962 : END IF
963 :
964 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
965 : !$OMP CRITICAL(hfxenergy_io_critical)
966 : !! If we do disk storage, we have to initialize the containers/caches
967 : IF (do_disk_storage) THEN
968 : !! IF new md step -> reinitialize containers
969 : IF (my_geo_change) THEN
970 : CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
971 : DO i = 1, 64
972 : CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
973 : END DO
974 : END IF
975 : !! Decompress the first cache for maxvals and integrals
976 : IF (.NOT. my_geo_change) THEN
977 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
978 : memory_parameter%actual_memory_usage_disk, .TRUE.)
979 : DO i = 1, 64
980 : CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
981 : memory_parameter%actual_memory_usage_disk, .TRUE.)
982 : END DO
983 : END IF
984 : END IF
985 : !$OMP END CRITICAL(hfxenergy_io_critical)
986 :
987 : !$OMP BARRIER
988 : !$OMP MASTER
989 :
990 : IF (do_dynamic_load_balancing) THEN
991 : ! ** Lets construct the task list
992 : shm_total_bins = 0
993 : DO i = 1, n_threads
994 : shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
995 : END DO
996 : ALLOCATE (tmp_task_list(shm_total_bins))
997 : shm_task_counter = 0
998 : DO i = 1, n_threads
999 : DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
1000 : shm_task_counter = shm_task_counter + 1
1001 : tmp_task_list(shm_task_counter)%thread_id = i
1002 : tmp_task_list(shm_task_counter)%bin_id = bin
1003 : tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
1004 : END DO
1005 : END DO
1006 :
1007 : ! ** Now sort the task list
1008 : ALLOCATE (tmp_task_list_cost(shm_total_bins))
1009 : ALLOCATE (tmp_index(shm_total_bins))
1010 :
1011 : DO i = 1, shm_total_bins
1012 : tmp_task_list_cost(i) = tmp_task_list(i)%cost
1013 : END DO
1014 :
1015 : CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
1016 :
1017 : ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
1018 :
1019 : DO i = 1, shm_total_bins
1020 : shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
1021 : END DO
1022 :
1023 : shm_task_list => shm_master_x_data%task_list
1024 : shm_task_counter = 0
1025 :
1026 : DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
1027 : END IF
1028 : !$OMP END MASTER
1029 : !$OMP BARRIER
1030 :
1031 : IF (my_bin_size > 0) THEN
1032 : maxval_container => actual_x_data%store_ints%maxval_container(1)
1033 : maxval_cache => actual_x_data%store_ints%maxval_cache(1)
1034 :
1035 : integral_containers => actual_x_data%store_ints%integral_containers(:, 1)
1036 : integral_caches => actual_x_data%store_ints%integral_caches(:, 1)
1037 : END IF
1038 :
1039 : !$OMP BARRIER
1040 : !$OMP MASTER
1041 : CALL timeset(routineN//"_main", handle_main)
1042 : !$OMP END MASTER
1043 : !$OMP BARRIER
1044 :
1045 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
1046 : ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
1047 : ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
1048 :
1049 : !$OMP BARRIER
1050 : !$OMP MASTER
1051 :
1052 : !! precalculate maximum density matrix elements in blocks
1053 : actual_x_data%pmax_block = 0.0_dp
1054 : shm_pmax_block => actual_x_data%pmax_block
1055 : IF (do_p_screening) THEN
1056 : DO iatom_block = 1, SIZE(actual_x_data%blocks)
1057 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1058 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1059 : DO jatom_block = 1, SIZE(actual_x_data%blocks)
1060 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1061 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1062 : shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
1063 : END DO
1064 : END DO
1065 : END IF
1066 : shm_atomic_pair_list => actual_x_data%atomic_pair_list
1067 : IF (my_geo_change) THEN
1068 : CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
1069 : do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
1070 : actual_x_data%blocks)
1071 : END IF
1072 :
1073 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1074 : ! reset timings for the new SCF round
1075 : IF (my_geo_change) THEN
1076 : DO bin = 1, my_bin_size
1077 : actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
1078 : actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
1079 : actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
1080 : END DO
1081 : END IF
1082 : !$OMP END MASTER
1083 : !$OMP BARRIER
1084 :
1085 : my_bin_size = SIZE(actual_x_data%distribution_energy)
1086 : nblocks = load_balance_parameter%nblocks
1087 :
1088 : bins_left = .TRUE.
1089 : do_it = .TRUE.
1090 : bin = 0
1091 : DO WHILE (bins_left)
1092 : IF (.NOT. do_dynamic_load_balancing) THEN
1093 : bin = bin + 1
1094 : IF (bin > my_bin_size) THEN
1095 : do_it = .FALSE.
1096 : bins_left = .FALSE.
1097 : ELSE
1098 : do_it = .TRUE.
1099 : bins_left = .TRUE.
1100 : distribution_energy => actual_x_data%distribution_energy(bin)
1101 : END IF
1102 : ELSE
1103 : !$OMP CRITICAL(hfxenergy_critical)
1104 : shm_task_counter = shm_task_counter + 1
1105 : IF (shm_task_counter <= shm_total_bins) THEN
1106 : my_thread_id = shm_task_list(shm_task_counter)%thread_id
1107 : my_bin_id = shm_task_list(shm_task_counter)%bin_id
1108 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1109 : maxval_cache => x_data(irep, my_thread_id)%store_ints%maxval_cache(my_bin_id)
1110 : maxval_container => x_data(irep, my_thread_id)%store_ints%maxval_container(my_bin_id)
1111 : integral_caches => x_data(irep, my_thread_id)%store_ints%integral_caches(:, my_bin_id)
1112 : integral_containers => x_data(irep, my_thread_id)%store_ints%integral_containers(:, my_bin_id)
1113 : END IF
1114 : distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
1115 : do_it = .TRUE.
1116 : bins_left = .TRUE.
1117 : IF (my_geo_change) THEN
1118 : distribution_energy%ram_counter = HUGE(distribution_energy%ram_counter)
1119 : END IF
1120 : counter = 0_Int_8
1121 : ELSE
1122 : do_it = .FALSE.
1123 : bins_left = .FALSE.
1124 : END IF
1125 : !$OMP END CRITICAL(hfxenergy_critical)
1126 : END IF
1127 :
1128 : IF (.NOT. do_it) CYCLE
1129 : !$OMP MASTER
1130 : CALL timeset(routineN//"_bin", handle_bin)
1131 : !$OMP END MASTER
1132 : bintime_start = m_walltime()
1133 : my_istart = distribution_energy%istart
1134 : my_current_counter = 0
1135 : IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
1136 : my_istart == -1_int_8) my_istart = nblocks**4
1137 : atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
1138 : latom_block = INT(MODULO(atom_block, nblocks)) + 1
1139 : tmp_block = atom_block/nblocks
1140 : katom_block = INT(MODULO(tmp_block, nblocks)) + 1
1141 : IF (latom_block < katom_block) CYCLE atomic_blocks
1142 : tmp_block = tmp_block/nblocks
1143 : jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1144 : tmp_block = tmp_block/nblocks
1145 : iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
1146 : IF (jatom_block < iatom_block) CYCLE atomic_blocks
1147 : my_current_counter = my_current_counter + 1
1148 : IF (my_current_counter > distribution_energy%number_of_atom_quartets) EXIT atomic_blocks
1149 :
1150 : iatom_start = actual_x_data%blocks(iatom_block)%istart
1151 : iatom_end = actual_x_data%blocks(iatom_block)%iend
1152 : jatom_start = actual_x_data%blocks(jatom_block)%istart
1153 : jatom_end = actual_x_data%blocks(jatom_block)%iend
1154 : katom_start = actual_x_data%blocks(katom_block)%istart
1155 : katom_end = actual_x_data%blocks(katom_block)%iend
1156 : latom_start = actual_x_data%blocks(latom_block)%istart
1157 : latom_end = actual_x_data%blocks(latom_block)%iend
1158 :
1159 : pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block), &
1160 : shm_pmax_block(latom_block, jatom_block), &
1161 : shm_pmax_block(latom_block, iatom_block), &
1162 : shm_pmax_block(katom_block, jatom_block))
1163 :
1164 : IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE atomic_blocks
1165 :
1166 : CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
1167 : jatom_start, jatom_end, &
1168 : kind_of, basis_parameter, particle_set, &
1169 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1170 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1171 : shm_atomic_pair_list)
1172 :
1173 : CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
1174 : latom_start, latom_end, &
1175 : kind_of, basis_parameter, particle_set, &
1176 : do_periodic, screen_coeffs_set, screen_coeffs_kind, &
1177 : coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
1178 : shm_atomic_pair_list)
1179 :
1180 : DO i_list_ij = 1, list_ij%n_element
1181 :
1182 : iatom = list_ij%elements(i_list_ij)%pair(1)
1183 : jatom = list_ij%elements(i_list_ij)%pair(2)
1184 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
1185 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
1186 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
1187 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
1188 : ra = list_ij%elements(i_list_ij)%r1
1189 : rb = list_ij%elements(i_list_ij)%r2
1190 : rab2 = list_ij%elements(i_list_ij)%dist2
1191 :
1192 : la_max => basis_parameter(ikind)%lmax
1193 : la_min => basis_parameter(ikind)%lmin
1194 : npgfa => basis_parameter(ikind)%npgf
1195 : nseta = basis_parameter(ikind)%nset
1196 : zeta => basis_parameter(ikind)%zet
1197 : nsgfa => basis_parameter(ikind)%nsgf
1198 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
1199 : nsgfl_a => basis_parameter(ikind)%nsgfl
1200 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
1201 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
1202 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
1203 :
1204 : lb_max => basis_parameter(jkind)%lmax
1205 : lb_min => basis_parameter(jkind)%lmin
1206 : npgfb => basis_parameter(jkind)%npgf
1207 : nsetb = basis_parameter(jkind)%nset
1208 : zetb => basis_parameter(jkind)%zet
1209 : nsgfb => basis_parameter(jkind)%nsgf
1210 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
1211 : nsgfl_b => basis_parameter(jkind)%nsgfl
1212 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
1213 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
1214 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
1215 :
1216 : DO i_list_kl = 1, list_kl%n_element
1217 : katom = list_kl%elements(i_list_kl)%pair(1)
1218 : latom = list_kl%elements(i_list_kl)%pair(2)
1219 :
1220 : IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1221 : IF (((iatom + jatom) == (katom + latom)) .AND. (katom < iatom)) CYCLE
1222 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1223 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1224 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1225 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1226 : rc = list_kl%elements(i_list_kl)%r1
1227 : rd = list_kl%elements(i_list_kl)%r2
1228 : rcd2 = list_kl%elements(i_list_kl)%dist2
1229 :
1230 : IF (do_p_screening) THEN
1231 : pmax_atom = MAX(shm_pmax_atom(katom, iatom), &
1232 : shm_pmax_atom(latom, jatom), &
1233 : shm_pmax_atom(latom, iatom), &
1234 : shm_pmax_atom(katom, jatom))
1235 : ELSE
1236 : pmax_atom = 0.0_dp
1237 : END IF
1238 :
1239 : screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1240 : screen_coeffs_kind(jkind, ikind)%x(2)
1241 : screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1242 : screen_coeffs_kind(lkind, kkind)%x(2)
1243 :
1244 : IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1245 :
1246 : !! we want to be consistent with the KS matrix. If none of the atomic indices
1247 : !! is associated cycle
1248 : IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1249 : shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1250 : shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1251 : shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1252 :
1253 : !! calculate symmetry_factor according to degeneracy of atomic quartet
1254 : symm_fac = 0.5_dp
1255 : IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1256 : IF (katom == latom) symm_fac = symm_fac*2.0_dp
1257 : IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1258 : IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
1259 : symm_fac = 1.0_dp/symm_fac
1260 :
1261 : lc_max => basis_parameter(kkind)%lmax
1262 : lc_min => basis_parameter(kkind)%lmin
1263 : npgfc => basis_parameter(kkind)%npgf
1264 : zetc => basis_parameter(kkind)%zet
1265 : nsgfc => basis_parameter(kkind)%nsgf
1266 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1267 : nsgfl_c => basis_parameter(kkind)%nsgfl
1268 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1269 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1270 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1271 :
1272 : ld_max => basis_parameter(lkind)%lmax
1273 : ld_min => basis_parameter(lkind)%lmin
1274 : npgfd => basis_parameter(lkind)%npgf
1275 : zetd => basis_parameter(lkind)%zet
1276 : nsgfd => basis_parameter(lkind)%nsgf
1277 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1278 : nsgfl_d => basis_parameter(lkind)%nsgfl
1279 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1280 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1281 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1282 :
1283 : atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1284 : atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1285 : atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1286 : atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1287 :
1288 : IF (jatom < latom) THEN
1289 : offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1290 : ELSE
1291 : offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1292 : END IF
1293 : IF (jatom < katom) THEN
1294 : offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1295 : ELSE
1296 : offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1297 : END IF
1298 : IF (iatom < latom) THEN
1299 : offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1300 : ELSE
1301 : offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1302 : END IF
1303 : IF (iatom < katom) THEN
1304 : offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1305 : ELSE
1306 : offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1307 : END IF
1308 :
1309 : IF (do_p_screening) THEN
1310 : swap_id = 0
1311 : kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1312 : IF (ikind >= kkind) THEN
1313 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1314 : actual_x_data%map_atom_to_kind_atom(katom), &
1315 : actual_x_data%map_atom_to_kind_atom(iatom))
1316 : ELSE
1317 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1318 : actual_x_data%map_atom_to_kind_atom(iatom), &
1319 : actual_x_data%map_atom_to_kind_atom(katom))
1320 : swap_id = swap_id + 1
1321 : END IF
1322 : kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1323 : IF (jkind >= lkind) THEN
1324 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1325 : actual_x_data%map_atom_to_kind_atom(latom), &
1326 : actual_x_data%map_atom_to_kind_atom(jatom))
1327 : ELSE
1328 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1329 : actual_x_data%map_atom_to_kind_atom(jatom), &
1330 : actual_x_data%map_atom_to_kind_atom(latom))
1331 : swap_id = swap_id + 2
1332 : END IF
1333 : kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1334 : IF (ikind >= lkind) THEN
1335 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1336 : actual_x_data%map_atom_to_kind_atom(latom), &
1337 : actual_x_data%map_atom_to_kind_atom(iatom))
1338 : ELSE
1339 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1340 : actual_x_data%map_atom_to_kind_atom(iatom), &
1341 : actual_x_data%map_atom_to_kind_atom(latom))
1342 : swap_id = swap_id + 4
1343 : END IF
1344 : kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1345 : IF (jkind >= kkind) THEN
1346 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1347 : actual_x_data%map_atom_to_kind_atom(katom), &
1348 : actual_x_data%map_atom_to_kind_atom(jatom))
1349 : ELSE
1350 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1351 : actual_x_data%map_atom_to_kind_atom(jatom), &
1352 : actual_x_data%map_atom_to_kind_atom(katom))
1353 : swap_id = swap_id + 8
1354 : END IF
1355 : END IF
1356 :
1357 : !! At this stage, check for memory used in compression
1358 :
1359 : IF (my_geo_change) THEN
1360 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1361 : ! ** We know the maximum amount of integrals that we can store per MPI-process
1362 : ! ** Now we need to sum the current memory usage among all openMP threads
1363 : ! ** We can just read what is currently stored on the corresponding x_data type
1364 : ! ** If this is thread i and it tries to read the data from thread j, that is
1365 : ! ** currently writing that data, we just dont care, because the possible error
1366 : ! ** is of the order of CACHE_SIZE
1367 : mem_compression_counter = 0
1368 : DO i = 1, n_threads
1369 : !$OMP ATOMIC READ
1370 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1371 : mem_compression_counter = mem_compression_counter + &
1372 : tmp_i4*memory_parameter%cache_size
1373 : END DO
1374 : IF (mem_compression_counter > memory_parameter%max_compression_counter) THEN
1375 : buffer_overflow = .TRUE.
1376 : IF (do_dynamic_load_balancing) THEN
1377 : distribution_energy%ram_counter = counter
1378 : ELSE
1379 : memory_parameter%ram_counter = counter
1380 : END IF
1381 : ELSE
1382 : counter = counter + 1
1383 : buffer_overflow = .FALSE.
1384 : END IF
1385 : END IF
1386 : ELSE
1387 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
1388 : IF (do_dynamic_load_balancing) THEN
1389 : IF (distribution_energy%ram_counter == counter) THEN
1390 : buffer_overflow = .TRUE.
1391 : ELSE
1392 : counter = counter + 1
1393 : buffer_overflow = .FALSE.
1394 : END IF
1395 :
1396 : ELSE
1397 : IF (memory_parameter%ram_counter == counter) THEN
1398 : buffer_overflow = .TRUE.
1399 : ELSE
1400 : counter = counter + 1
1401 : buffer_overflow = .FALSE.
1402 : END IF
1403 : END IF
1404 : END IF
1405 : END IF
1406 :
1407 : IF (buffer_overflow .AND. do_disk_storage) THEN
1408 : use_disk_storage = .TRUE.
1409 : buffer_overflow = .FALSE.
1410 : END IF
1411 :
1412 : IF (use_disk_storage) THEN
1413 : !$OMP ATOMIC READ
1414 : tmp_i4 = memory_parameter%actual_memory_usage_disk
1415 : mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
1416 : IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk) THEN
1417 : buffer_overflow = .TRUE.
1418 : use_disk_storage = .FALSE.
1419 : END IF
1420 : END IF
1421 :
1422 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1423 : iset = set_list_ij(i_set_list_ij)%pair(1)
1424 : jset = set_list_ij(i_set_list_ij)%pair(2)
1425 :
1426 : ncob = npgfb(jset)*ncoset(lb_max(jset))
1427 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1428 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1429 :
1430 : IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
1431 :
1432 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1433 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1434 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1435 : kset = set_list_kl(i_set_list_kl)%pair(1)
1436 : lset = set_list_kl(i_set_list_kl)%pair(2)
1437 :
1438 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1439 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1440 : max_val2 = max_val1 + max_val2_set
1441 :
1442 : !! Near field screening
1443 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1444 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1445 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1446 : !! get max_vals if we screen on initial density
1447 : IF (do_p_screening) THEN
1448 : CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1449 : iset, jset, kset, lset, &
1450 : pmax_entry, swap_id)
1451 : ELSE
1452 : pmax_entry = 0.0_dp
1453 : END IF
1454 : log10_pmax = pmax_entry
1455 : max_val2 = max_val2 + log10_pmax
1456 : IF (max_val2 < log10_eps_schwarz) CYCLE
1457 : pmax_entry = EXP(log10_pmax*ln_10)
1458 :
1459 : !! store current number of integrals, update total number and number of integrals in buffer
1460 : current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1461 : IF (buffer_overflow) THEN
1462 : neris_onthefly = neris_onthefly + current_counter
1463 : END IF
1464 :
1465 : !! Get integrals from buffer and update Kohn-Sham matrix
1466 : IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change) THEN
1467 : nints = current_counter
1468 : IF (.NOT. use_disk_storage) THEN
1469 : CALL hfx_get_single_cache_element( &
1470 : estimate_to_store_int, 6, &
1471 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1472 : use_disk_storage)
1473 : ELSE
1474 : CALL hfx_get_single_cache_element( &
1475 : estimate_to_store_int, 6, &
1476 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1477 : use_disk_storage)
1478 : END IF
1479 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1480 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1481 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1482 : buffer_left = nints
1483 : buffer_start = 1
1484 : IF (.NOT. use_disk_storage) THEN
1485 : neris_incore = neris_incore + INT(nints, int_8)
1486 : ELSE
1487 : neris_disk = neris_disk + INT(nints, int_8)
1488 : END IF
1489 : DO WHILE (buffer_left > 0)
1490 : buffer_size = MIN(buffer_left, cache_size)
1491 : IF (.NOT. use_disk_storage) THEN
1492 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1493 : buffer_size, nbits, &
1494 : integral_caches(nbits), &
1495 : integral_containers(nbits), &
1496 : eps_storage, pmax_entry, &
1497 : memory_parameter%actual_memory_usage, &
1498 : use_disk_storage)
1499 : ELSE
1500 : CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
1501 : buffer_size, nbits, &
1502 : integral_caches_disk(nbits), &
1503 : integral_containers_disk(nbits), &
1504 : eps_storage, pmax_entry, &
1505 : memory_parameter%actual_memory_usage_disk, &
1506 : use_disk_storage)
1507 : END IF
1508 : buffer_left = buffer_left - buffer_size
1509 : buffer_start = buffer_start + buffer_size
1510 : END DO
1511 : END IF
1512 : !! Calculate integrals if we run out of buffer or the geometry did change
1513 : IF (my_geo_change .OR. buffer_overflow) THEN
1514 : max_contraction_val = max_contraction(iset, iatom)* &
1515 : max_contraction(jset, jatom)* &
1516 : max_contraction(kset, katom)* &
1517 : max_contraction(lset, latom)*pmax_entry
1518 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1519 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1520 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1521 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1522 :
1523 : CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
1524 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1525 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1526 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1527 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1528 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1529 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1530 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1531 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1532 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1533 : primitive_integrals, &
1534 : potential_parameter, &
1535 : actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1536 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1537 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1538 : log10_pmax, log10_eps_schwarz, &
1539 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1540 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
1541 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
1542 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
1543 : sphi_a_ext_set, &
1544 : sphi_b_ext_set, &
1545 : sphi_c_ext_set, &
1546 : sphi_d_ext_set, &
1547 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
1548 : nimages, do_periodic, p_work)
1549 :
1550 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
1551 : neris_total = neris_total + nints
1552 : nprim_ints = nprim_ints + neris_tmp
1553 : ! IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1554 : ! estimate_to_store_int = EXPONENT(cartesian_estimate)
1555 : ! estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1556 : ! cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1557 : ! IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1558 : ! IF (cartesian_estimate < eps_schwarz) THEN
1559 : ! IF (.NOT. use_disk_storage) THEN
1560 : ! CALL hfx_add_single_cache_element( &
1561 : ! estimate_to_store_int, 6, &
1562 : ! maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1563 : ! use_disk_storage, max_val_memory)
1564 : ! ELSE
1565 : ! CALL hfx_add_single_cache_element( &
1566 : ! estimate_to_store_int, 6, &
1567 : ! maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1568 : ! use_disk_storage)
1569 : ! END IF
1570 : ! END IF
1571 : ! END IF
1572 : !
1573 : ! IF (cartesian_estimate < eps_schwarz) CYCLE
1574 :
1575 : !! Compress the array for storage
1576 : spherical_estimate = 0.0_dp
1577 : DO i = 1, nints
1578 : spherical_estimate = MAX(spherical_estimate, ABS(primitive_integrals(i)))
1579 : END DO
1580 :
1581 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1582 : estimate_to_store_int = EXPONENT(spherical_estimate)
1583 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1584 :
1585 : IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
1586 : IF (.NOT. use_disk_storage) THEN
1587 : CALL hfx_add_single_cache_element( &
1588 : estimate_to_store_int, 6, &
1589 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1590 : use_disk_storage, max_val_memory)
1591 : ELSE
1592 : CALL hfx_add_single_cache_element( &
1593 : estimate_to_store_int, 6, &
1594 : maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
1595 : use_disk_storage)
1596 : END IF
1597 : END IF
1598 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1599 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1600 : IF (.NOT. buffer_overflow) THEN
1601 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1602 :
1603 : ! In case of a tight eps_storage threshold the number of significant
1604 : ! bits in the integer number NINT(value*pmax_entry/eps_storage) may
1605 : ! exceed the width of the storage element. As the compression algorithm
1606 : ! is designed for IEEE 754 double precision numbers, a 64-bit signed
1607 : ! integer variable which is used to store the result of this float-to-
1608 : ! integer conversion (we have no wish to use more memory for storing
1609 : ! compressed ERIs than it is needed for uncompressed ERIs) may overflow.
1610 : ! Abort with a meaningful message when it happens.
1611 : !
1612 : ! The magic number 63 stands for the number of magnitude bits
1613 : ! (64 bits minus one sign bit).
1614 : IF (nbits > 63) THEN
1615 : WRITE (eps_schwarz_min_str, '(ES10.3E2)') &
1616 : spherical_estimate*pmax_entry/ &
1617 : (SET_EXPONENT(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
1618 :
1619 : WRITE (eps_scaling_str, '(ES10.3E2)') &
1620 : spherical_estimate*pmax_entry/(SET_EXPONENT(1.0_dp, 63)*eps_schwarz)
1621 :
1622 : CALL cp_abort(__LOCATION__, &
1623 : "Overflow during ERI's compression. Please use a larger "// &
1624 : "EPS_SCHWARZ threshold (above "//TRIM(ADJUSTL(eps_schwarz_min_str))// &
1625 : ") or increase the EPS_STORAGE_SCALING factor above "// &
1626 : TRIM(ADJUSTL(eps_scaling_str))//".")
1627 : END IF
1628 :
1629 : buffer_left = nints
1630 : buffer_start = 1
1631 : IF (.NOT. use_disk_storage) THEN
1632 : neris_incore = neris_incore + INT(nints, int_8)
1633 : ! neris_incore = neris_incore+nints
1634 : ELSE
1635 : neris_disk = neris_disk + INT(nints, int_8)
1636 : ! neris_disk = neris_disk+nints
1637 : END IF
1638 : DO WHILE (buffer_left > 0)
1639 : buffer_size = MIN(buffer_left, CACHE_SIZE)
1640 : IF (.NOT. use_disk_storage) THEN
1641 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1642 : buffer_size, nbits, &
1643 : integral_caches(nbits), &
1644 : integral_containers(nbits), &
1645 : eps_storage, pmax_entry, &
1646 : memory_parameter%actual_memory_usage, &
1647 : use_disk_storage)
1648 : ELSE
1649 : CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
1650 : buffer_size, nbits, &
1651 : integral_caches_disk(nbits), &
1652 : integral_containers_disk(nbits), &
1653 : eps_storage, pmax_entry, &
1654 : memory_parameter%actual_memory_usage_disk, &
1655 : use_disk_storage)
1656 : END IF
1657 : buffer_left = buffer_left - buffer_size
1658 : buffer_start = buffer_start + buffer_size
1659 : END DO
1660 : ELSE
1661 : !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1662 : DO i = 1, nints
1663 : primitive_integrals(i) = primitive_integrals(i)*pmax_entry
1664 : IF (ABS(primitive_integrals(i)) > eps_storage) THEN
1665 : primitive_integrals(i) = ANINT(primitive_integrals(i)/eps_storage, dp)*eps_storage/pmax_entry
1666 : ELSE
1667 : primitive_integrals(i) = 0.0_dp
1668 : END IF
1669 : END DO
1670 : END IF
1671 : END IF
1672 : !!! DEBUG, print out primitive integrals and indices. Only works serial no OMP !!!
1673 : IF (.FALSE.) THEN
1674 : CALL print_integrals( &
1675 : iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
1676 : iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
1677 : END IF
1678 : IF (.NOT. is_anti_symmetric) THEN
1679 : !! Update Kohn-Sham matrix
1680 : CALL update_fock_matrix( &
1681 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1682 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1683 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1684 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1685 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1686 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1687 : IF (.NOT. treat_lsd_in_core) THEN
1688 : IF (my_nspins == 2) THEN
1689 : CALL update_fock_matrix( &
1690 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1691 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1692 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1693 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1694 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1695 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1696 : END IF
1697 : END IF
1698 : ELSE
1699 : !! Update Kohn-Sham matrix
1700 : CALL update_fock_matrix_as( &
1701 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1702 : fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
1703 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1704 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1705 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1706 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1707 : IF (.NOT. treat_lsd_in_core) THEN
1708 : IF (my_nspins == 2) THEN
1709 : CALL update_fock_matrix_as( &
1710 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1711 : fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
1712 : primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
1713 : kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
1714 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
1715 : atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
1716 : END IF
1717 : END IF
1718 : END IF
1719 : END DO ! i_set_list_kl
1720 : END DO ! i_set_list_ij
1721 : IF (do_disk_storage) THEN
1722 : buffer_overflow = .TRUE.
1723 : END IF
1724 : END DO !i_list_ij
1725 : END DO !i_list_kl
1726 : END DO atomic_blocks
1727 : bintime_stop = m_walltime()
1728 : IF (my_geo_change) THEN
1729 : distribution_energy%time_first_scf = bintime_stop - bintime_start
1730 : ELSE
1731 : distribution_energy%time_other_scf = &
1732 : distribution_energy%time_other_scf + bintime_stop - bintime_start
1733 : END IF
1734 : !$OMP MASTER
1735 : CALL timestop(handle_bin)
1736 : !$OMP END MASTER
1737 : END DO !bin
1738 :
1739 : !$OMP MASTER
1740 : logger => cp_get_default_logger()
1741 : do_print_load_balance_info = .FALSE.
1742 : do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1743 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1744 : !$OMP END MASTER
1745 : !$OMP BARRIER
1746 : IF (do_print_load_balance_info) THEN
1747 : iw = -1
1748 : !$OMP MASTER
1749 : iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1750 : extension=".scfLog")
1751 : !$OMP END MASTER
1752 :
1753 : CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1754 : hfx_do_eval_energy)
1755 :
1756 : !$OMP MASTER
1757 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1758 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1759 : !$OMP END MASTER
1760 : END IF
1761 :
1762 : !$OMP BARRIER
1763 : !$OMP MASTER
1764 : CALL m_memory(memsize_after)
1765 : !$OMP END MASTER
1766 : !$OMP BARRIER
1767 :
1768 : DEALLOCATE (primitive_integrals)
1769 : !$OMP BARRIER
1770 : !! Get some number about ERIS
1771 : !$OMP ATOMIC
1772 : shm_neris_total = shm_neris_total + neris_total
1773 : !$OMP ATOMIC
1774 : shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1775 : !$OMP ATOMIC
1776 : shm_nprim_ints = shm_nprim_ints + nprim_ints
1777 :
1778 : storage_counter_integrals = memory_parameter%actual_memory_usage* &
1779 : memory_parameter%cache_size
1780 : stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
1781 : memory_parameter%cache_size
1782 : stor_count_max_val = max_val_memory*memory_parameter%cache_size
1783 : !$OMP ATOMIC
1784 : shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1785 : !$OMP ATOMIC
1786 : shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
1787 : !$OMP ATOMIC
1788 : shm_neris_incore = shm_neris_incore + neris_incore
1789 : !$OMP ATOMIC
1790 : shm_neris_disk = shm_neris_disk + neris_disk
1791 : !$OMP ATOMIC
1792 : shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1793 : !$OMP BARRIER
1794 :
1795 : ! ** Calculate how much memory has already been used (might be needed for in-core forces
1796 : !$OMP MASTER
1797 : shm_mem_compression_counter = 0
1798 : DO i = 1, n_threads
1799 : !$OMP ATOMIC READ
1800 : tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
1801 : shm_mem_compression_counter = shm_mem_compression_counter + &
1802 : tmp_i4*memory_parameter%cache_size
1803 : END DO
1804 : !$OMP END MASTER
1805 : !$OMP BARRIER
1806 : actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
1807 :
1808 : !$OMP MASTER
1809 : !! Calculate the exchange energies from the Kohn-Sham matrix. Before we can go on, we have to symmetrize.
1810 : ene_x_aa = 0.0_dp
1811 : ene_x_bb = 0.0_dp
1812 :
1813 : mb_size_p = shm_block_offset(ncpu + 1)/1024/128
1814 : mb_size_f = shm_block_offset(ncpu + 1)/1024/128
1815 : IF (.NOT. treat_lsd_in_core) THEN
1816 : IF (my_nspins == 2) THEN
1817 : mb_size_f = mb_size_f*2
1818 : mb_size_p = mb_size_p*2
1819 : END IF
1820 : END IF
1821 : !! size of primitive_integrals(not shared)
1822 : mb_size_buffers = INT(nsgf_max, int_8)**4*n_threads
1823 : !! fock density buffers (not shared)
1824 : mb_size_buffers = mb_size_buffers + INT(nsgf_max, int_8)**2*n_threads
1825 : subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
1826 : !! size of screening functions (shared)
1827 : mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
1828 : + max_set**2*nkind**2 &
1829 : + nkind**2 &
1830 : + max_pgf**2*max_set**2*nkind**2
1831 : !! is_assoc (shared)
1832 : mb_size_buffers = mb_size_buffers + natom**2
1833 : ! ** pmax_atom (shared)
1834 : IF (do_p_screening) THEN
1835 : mb_size_buffers = mb_size_buffers + natom**2
1836 : END IF
1837 : IF (screening_parameter%do_p_screening_forces) THEN
1838 : IF (memory_parameter%treat_forces_in_core) THEN
1839 : mb_size_buffers = mb_size_buffers + natom**2
1840 : END IF
1841 : END IF
1842 : ! ** Initial P only MAX(alpha,beta) (shared)
1843 : IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
1844 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1845 : END IF
1846 : ! ** In core forces require their own initial P
1847 : IF (screening_parameter%do_p_screening_forces) THEN
1848 : IF (memory_parameter%treat_forces_in_core) THEN
1849 : mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
1850 : END IF
1851 : END IF
1852 :
1853 : !! mb
1854 : mb_size_buffers = mb_size_buffers/1024/128
1855 :
1856 : afac = 1.0_dp
1857 : IF (is_anti_symmetric) afac = -1.0_dp
1858 : CALL timestop(handle_main)
1859 : ene_x_aa_diag = 0.0_dp
1860 : ene_x_bb_diag = 0.0_dp
1861 : DO iatom = 1, natom
1862 : ikind = kind_of(iatom)
1863 : nseta = basis_parameter(ikind)%nset
1864 : nsgfa => basis_parameter(ikind)%nsgf
1865 : jatom = iatom
1866 : jkind = kind_of(jatom)
1867 : nsetb = basis_parameter(jkind)%nset
1868 : nsgfb => basis_parameter(jkind)%nsgf
1869 : act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
1870 : DO img = 1, nkimages
1871 : DO iset = 1, nseta
1872 : DO jset = 1, nsetb
1873 : act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
1874 : i = act_set_offset + act_atomic_block_offset - 1
1875 : DO ma = 1, nsgfa(iset)
1876 : j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
1877 : DO mb = 1, nsgfb(jset)
1878 : IF (i > j) THEN
1879 : full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
1880 : full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
1881 : IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
1882 : full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
1883 : full_ks_beta(j, img) = full_ks_beta(i, img)*afac
1884 : END IF
1885 : END IF
1886 : ! ** For adiabatically rescaled functionals we need the energy coming from the diagonal elements
1887 : IF (i == j) THEN
1888 : ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
1889 : IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
1890 : ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
1891 : END IF
1892 : END IF
1893 : i = i + 1
1894 : j = j + nsgfa(iset)
1895 : END DO
1896 : END DO
1897 : END DO
1898 : END DO
1899 : END DO
1900 : END DO
1901 :
1902 : CALL para_env%sync()
1903 : afac = 1.0_dp
1904 : IF (is_anti_symmetric) afac = 0._dp
1905 : IF (distribute_fock_matrix) THEN
1906 : !! Distribute the current KS-matrix to all the processes
1907 : CALL timeset(routineN//"_dist_KS", handle_dist_ks)
1908 : DO img = 1, nkimages
1909 : CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
1910 : shm_block_offset, kind_of, basis_parameter, &
1911 : off_diag_fac=0.5_dp, diag_fac=afac)
1912 : END DO
1913 :
1914 : NULLIFY (full_ks_alpha)
1915 : DEALLOCATE (shm_master_x_data%full_ks_alpha)
1916 : IF (.NOT. treat_lsd_in_core) THEN
1917 : IF (my_nspins == 2) THEN
1918 : DO img = 1, nkimages
1919 : CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
1920 : shm_block_offset, kind_of, basis_parameter, &
1921 : off_diag_fac=0.5_dp, diag_fac=afac)
1922 : END DO
1923 : NULLIFY (full_ks_beta)
1924 : DEALLOCATE (shm_master_x_data%full_ks_beta)
1925 : END IF
1926 : END IF
1927 : CALL timestop(handle_dist_ks)
1928 : END IF
1929 :
1930 : IF (distribute_fock_matrix) THEN
1931 : !! ** Calculate the exchange energy
1932 : ene_x_aa = 0.0_dp
1933 : DO img = 1, nkimages
1934 : CALL dbcsr_dot_threadsafe(ks_matrix(ispin, img)%matrix, rho_ao(ispin, img)%matrix, etmp)
1935 : ene_x_aa = ene_x_aa + etmp
1936 : END DO
1937 : !for ADMMS, we need the exchange matrix k(d) for both spins
1938 : IF (dft_control%do_admm) THEN
1939 : CPASSERT(nkimages == 1)
1940 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
1941 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1942 : END IF
1943 :
1944 : ene_x_bb = 0.0_dp
1945 : IF (my_nspins == 2 .AND. .NOT. treat_lsd_in_core) THEN
1946 : DO img = 1, nkimages
1947 : CALL dbcsr_dot_threadsafe(ks_matrix(2, img)%matrix, rho_ao(2, img)%matrix, etmp)
1948 : ene_x_bb = ene_x_bb + etmp
1949 : END DO
1950 : !for ADMMS, we need the exchange matrix k(d) for both spins
1951 : IF (dft_control%do_admm) THEN
1952 : CPASSERT(nkimages == 1)
1953 : CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
1954 : name="HF exch. part of matrix_ks_aux_fit for ADMMS")
1955 : END IF
1956 : END IF
1957 :
1958 : !! Update energy type
1959 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1960 : ELSE
1961 : ! ** It is easier to correct the following expression by the diagonal energy contribution,
1962 : ! ** than explicitly going throuhg the diagonal elements
1963 : DO img = 1, nkimages
1964 : DO pa = 1, SIZE(full_ks_alpha, 1)
1965 : ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
1966 : END DO
1967 : END DO
1968 : ! ** Now correct
1969 : ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
1970 : IF (my_nspins == 2) THEN
1971 : DO img = 1, nkimages
1972 : DO pa = 1, SIZE(full_ks_beta, 1)
1973 : ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
1974 : END DO
1975 : END DO
1976 : ! ** Now correct
1977 : ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
1978 : END IF
1979 : CALL para_env%sum(ene_x_aa)
1980 : IF (my_nspins == 2) CALL para_env%sum(ene_x_bb)
1981 : ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
1982 : END IF
1983 :
1984 : !! Print some memeory information if this is the first step
1985 : IF (my_geo_change) THEN
1986 : tmp_i8(1:8) = [shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
1987 : shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val]
1988 : CALL para_env%sum(tmp_i8)
1989 : shm_storage_counter_integrals = tmp_i8(1)
1990 : shm_neris_onthefly = tmp_i8(2)
1991 : shm_neris_incore = tmp_i8(3)
1992 : shm_neris_disk = tmp_i8(4)
1993 : shm_neris_total = tmp_i8(5)
1994 : shm_stor_count_int_disk = tmp_i8(6)
1995 : shm_nprim_ints = tmp_i8(7)
1996 : shm_stor_count_max_val = tmp_i8(8)
1997 : CALL para_env%max(memsize_after)
1998 : mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1999 : compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
2000 : mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
2001 : compression_factor_disk = REAL(shm_neris_disk, dp)/REAL(shm_stor_count_int_disk, dp)
2002 : mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
2003 :
2004 : IF (shm_neris_incore == 0) THEN
2005 : mem_eris = 0
2006 : compression_factor = 0.0_dp
2007 : END IF
2008 : IF (shm_neris_disk == 0) THEN
2009 : mem_eris_disk = 0
2010 : compression_factor_disk = 0.0_dp
2011 : END IF
2012 :
2013 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
2014 : extension=".scfLog")
2015 : IF (iw > 0) THEN
2016 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2017 : "HFX_MEM_INFO| Number of cart. primitive ERI's calculated: ", shm_nprim_ints
2018 :
2019 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2020 : "HFX_MEM_INFO| Number of sph. ERI's calculated: ", shm_neris_total
2021 :
2022 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2023 : "HFX_MEM_INFO| Number of sph. ERI's stored in-core: ", shm_neris_incore
2024 :
2025 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2026 : "HFX_MEM_INFO| Number of sph. ERI's stored on disk: ", shm_neris_disk
2027 :
2028 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2029 : "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
2030 :
2031 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2032 : "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]: ", mem_eris
2033 :
2034 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2035 : "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
2036 :
2037 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2038 : "HFX_MEM_INFO| Total compression factor ERI's RAM: ", compression_factor
2039 :
2040 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2041 : "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]: ", mem_eris_disk
2042 :
2043 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
2044 : "HFX_MEM_INFO| Total compression factor ERI's disk: ", compression_factor_disk
2045 :
2046 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2047 : "HFX_MEM_INFO| Size of density/Fock matrix [MiB]: ", 2_int_8*mb_size_p
2048 :
2049 : IF (do_periodic) THEN
2050 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2051 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2052 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2053 : "HFX_MEM_INFO| Number of periodic image cells considered: ", SIZE(shm_master_x_data%neighbor_cells)
2054 : ELSE
2055 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
2056 : "HFX_MEM_INFO| Size of buffers [MiB]: ", mb_size_buffers
2057 : END IF
2058 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21),/)") &
2059 : "HFX_MEM_INFO| Est. max. program size after HFX [MiB]:", memsize_after/(1024*1024)
2060 : CALL m_flush(iw)
2061 : END IF
2062 :
2063 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
2064 : "HF_INFO")
2065 : END IF
2066 : !$OMP END MASTER
2067 :
2068 : !! flush caches if the geometry changed
2069 : IF (do_dynamic_load_balancing) THEN
2070 : my_bin_size = SIZE(actual_x_data%distribution_energy)
2071 : ELSE
2072 : my_bin_size = 1
2073 : END IF
2074 :
2075 : IF (my_geo_change) THEN
2076 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2077 : DO bin = 1, my_bin_size
2078 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2079 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2080 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2081 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2082 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
2083 : .FALSE.)
2084 : DO i = 1, 64
2085 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
2086 : memory_parameter%actual_memory_usage, .FALSE.)
2087 : END DO
2088 : END DO
2089 : END IF
2090 : END IF
2091 : !! reset all caches except we calculate all on the fly
2092 : IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
2093 : DO bin = 1, my_bin_size
2094 : maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
2095 : maxval_container => actual_x_data%store_ints%maxval_container(bin)
2096 : integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
2097 : integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
2098 :
2099 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
2100 : DO i = 1, 64
2101 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
2102 : memory_parameter%actual_memory_usage, &
2103 : .FALSE.)
2104 : END DO
2105 : END DO
2106 : END IF
2107 :
2108 : !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
2109 : !$OMP CRITICAL(hfxenergy_out_critical)
2110 : IF (do_disk_storage) THEN
2111 : !! flush caches if the geometry changed
2112 : IF (my_geo_change) THEN
2113 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
2114 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2115 : DO i = 1, 64
2116 : CALL hfx_flush_last_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
2117 : memory_parameter%actual_memory_usage_disk, .TRUE.)
2118 : END DO
2119 : END IF
2120 : !! reset all caches except we calculate all on the fly
2121 : CALL hfx_reset_cache_and_container(maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
2122 : do_disk_storage)
2123 : DO i = 1, 64
2124 : CALL hfx_reset_cache_and_container(integral_caches_disk(i), integral_containers_disk(i), &
2125 : memory_parameter%actual_memory_usage_disk, do_disk_storage)
2126 : END DO
2127 : END IF
2128 : !$OMP END CRITICAL(hfxenergy_out_critical)
2129 : !$OMP BARRIER
2130 : !! Clean up
2131 : DEALLOCATE (last_sgf_global)
2132 : !$OMP MASTER
2133 : DEALLOCATE (full_density_alpha)
2134 : IF (.NOT. treat_lsd_in_core) THEN
2135 : IF (my_nspins == 2) THEN
2136 : DEALLOCATE (full_density_beta)
2137 : END IF
2138 : END IF
2139 : IF (do_dynamic_load_balancing) THEN
2140 : DEALLOCATE (shm_master_x_data%task_list)
2141 : END IF
2142 : !$OMP END MASTER
2143 : DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
2144 : DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
2145 : DEALLOCATE (set_list_ij, set_list_kl)
2146 :
2147 : DO i = 1, max_pgf**2
2148 : DEALLOCATE (pgf_list_ij(i)%image_list)
2149 : DEALLOCATE (pgf_list_kl(i)%image_list)
2150 : END DO
2151 :
2152 : DEALLOCATE (pgf_list_ij)
2153 : DEALLOCATE (pgf_list_kl)
2154 : DEALLOCATE (pgf_product_list)
2155 :
2156 : DEALLOCATE (max_contraction, kind_of)
2157 :
2158 : DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
2159 :
2160 : DEALLOCATE (nimages)
2161 :
2162 : !$OMP BARRIER
2163 : !$OMP END PARALLEL
2164 :
2165 36269 : CALL timestop(handle)
2166 75221906 : END SUBROUTINE integrate_four_center
2167 :
2168 : ! **************************************************************************************************
2169 : !> \brief calculates two-electron integrals of a quartet/shell using the library
2170 : !> lib_int in the periodic case
2171 : !> \param lib ...
2172 : !> \param ra ...
2173 : !> \param rb ...
2174 : !> \param rc ...
2175 : !> \param rd ...
2176 : !> \param npgfa ...
2177 : !> \param npgfb ...
2178 : !> \param npgfc ...
2179 : !> \param npgfd ...
2180 : !> \param la_min ...
2181 : !> \param la_max ...
2182 : !> \param lb_min ...
2183 : !> \param lb_max ...
2184 : !> \param lc_min ...
2185 : !> \param lc_max ...
2186 : !> \param ld_min ...
2187 : !> \param ld_max ...
2188 : !> \param nsgfa ...
2189 : !> \param nsgfb ...
2190 : !> \param nsgfc ...
2191 : !> \param nsgfd ...
2192 : !> \param sphi_a_u1 ...
2193 : !> \param sphi_a_u2 ...
2194 : !> \param sphi_a_u3 ...
2195 : !> \param sphi_b_u1 ...
2196 : !> \param sphi_b_u2 ...
2197 : !> \param sphi_b_u3 ...
2198 : !> \param sphi_c_u1 ...
2199 : !> \param sphi_c_u2 ...
2200 : !> \param sphi_c_u3 ...
2201 : !> \param sphi_d_u1 ...
2202 : !> \param sphi_d_u2 ...
2203 : !> \param sphi_d_u3 ...
2204 : !> \param zeta ...
2205 : !> \param zetb ...
2206 : !> \param zetc ...
2207 : !> \param zetd ...
2208 : !> \param primitive_integrals array of primitive_integrals
2209 : !> \param potential_parameter contains info for libint
2210 : !> \param neighbor_cells Periodic images
2211 : !> \param screen1 set based coefficients for near field screening
2212 : !> \param screen2 set based coefficients for near field screening
2213 : !> \param eps_schwarz threshold
2214 : !> \param max_contraction_val maximum multiplication factor for cart -> sph
2215 : !> \param cart_estimate maximum calculated integral value
2216 : !> \param cell cell
2217 : !> \param neris_tmp counter for calculated cart integrals
2218 : !> \param log10_pmax logarithm of initial p matrix max element
2219 : !> \param log10_eps_schwarz log of threshold
2220 : !> \param R1_pgf coefficients for radii of product distribution function
2221 : !> \param R2_pgf coefficients for radii of product distribution function
2222 : !> \param pgf1 schwarz coefficients pgf basid
2223 : !> \param pgf2 schwarz coefficients pgf basid
2224 : !> \param pgf_list_ij ...
2225 : !> \param pgf_list_kl ...
2226 : !> \param pgf_product_list ...
2227 : !> \param nsgfl_a ...
2228 : !> \param nsgfl_b ...
2229 : !> \param nsgfl_c ...
2230 : !> \param nsgfl_d ...
2231 : !> \param sphi_a_ext ...
2232 : !> \param sphi_b_ext ...
2233 : !> \param sphi_c_ext ...
2234 : !> \param sphi_d_ext ...
2235 : !> \param ee_work ...
2236 : !> \param ee_work2 ...
2237 : !> \param ee_buffer1 ...
2238 : !> \param ee_buffer2 ...
2239 : !> \param ee_primitives_tmp ...
2240 : !> \param nimages ...
2241 : !> \param do_periodic ...
2242 : !> \param p_work ...
2243 : !> \par History
2244 : !> 11.2006 created [Manuel Guidon]
2245 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
2246 : !> \author Manuel Guidon
2247 : ! **************************************************************************************************
2248 8232355 : SUBROUTINE coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
2249 : la_min, la_max, lb_min, lb_max, &
2250 : lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
2251 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2252 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
2253 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
2254 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
2255 8232355 : zeta, zetb, zetc, zetd, &
2256 8232355 : primitive_integrals, &
2257 : potential_parameter, neighbor_cells, &
2258 : screen1, screen2, eps_schwarz, max_contraction_val, &
2259 : cart_estimate, cell, neris_tmp, log10_pmax, &
2260 : log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
2261 : pgf_list_ij, pgf_list_kl, &
2262 : pgf_product_list, &
2263 8232355 : nsgfl_a, nsgfl_b, nsgfl_c, &
2264 8232355 : nsgfl_d, &
2265 8232355 : sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
2266 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2267 : nimages, do_periodic, p_work)
2268 :
2269 : TYPE(cp_libint_t) :: lib
2270 : REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
2271 : INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
2272 : lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
2273 : sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
2274 : sphi_d_u3
2275 : REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
2276 : REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
2277 : REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
2278 : REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
2279 : REAL(dp), DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd) :: primitive_integrals
2280 : TYPE(hfx_potential_type) :: potential_parameter
2281 : TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
2282 : REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
2283 : max_contraction_val
2284 : REAL(dp) :: cart_estimate
2285 : TYPE(cell_type), POINTER :: cell
2286 : INTEGER(int_8) :: neris_tmp
2287 : REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
2288 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
2289 : POINTER :: R1_pgf, R2_pgf, pgf1, pgf2
2290 : TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
2291 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
2292 : DIMENSION(:), INTENT(INOUT) :: pgf_product_list
2293 : INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
2294 : REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
2295 : sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
2296 : sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
2297 : REAL(dp), DIMENSION(*) :: ee_work, ee_work2, ee_buffer1, &
2298 : ee_buffer2, ee_primitives_tmp
2299 : INTEGER, DIMENSION(*) :: nimages
2300 : LOGICAL, INTENT(IN) :: do_periodic
2301 : REAL(dp), DIMENSION(:), POINTER :: p_work
2302 :
2303 : INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
2304 : ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
2305 : nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
2306 : REAL(dp) :: EtaInv, tmp_max, ZetaInv
2307 :
2308 : CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
2309 : pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
2310 : nelements_ij, &
2311 8232355 : neighbor_cells, nimages, do_periodic)
2312 : CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
2313 : pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
2314 : nelements_kl, &
2315 8232355 : neighbor_cells, nimages, do_periodic)
2316 :
2317 8232355 : cart_estimate = 0.0_dp
2318 8232355 : neris_tmp = 0
2319 552647897 : primitive_integrals = 0.0_dp
2320 8232355 : max_l = la_max + lb_max + lc_max + ld_max
2321 28382077 : DO list_ij = 1, nelements_ij
2322 20149722 : ZetaInv = pgf_list_ij(list_ij)%ZetaInv
2323 20149722 : ipgf = pgf_list_ij(list_ij)%ipgf
2324 20149722 : jpgf = pgf_list_ij(list_ij)%jpgf
2325 :
2326 100477610 : DO list_kl = 1, nelements_kl
2327 72095533 : EtaInv = pgf_list_kl(list_kl)%ZetaInv
2328 72095533 : kpgf = pgf_list_kl(list_kl)%ipgf
2329 72095533 : lpgf = pgf_list_kl(list_kl)%jpgf
2330 :
2331 : CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
2332 : nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
2333 72095533 : potential_parameter, max_l, do_periodic)
2334 :
2335 72095533 : s_offset_a = 0
2336 178156373 : DO la = la_min, la_max
2337 85911118 : s_offset_b = 0
2338 85911118 : ncoa = nco(la)
2339 85911118 : nsgfla = nsgfl_a(la)
2340 85911118 : nsoa = nso(la)
2341 :
2342 184037947 : DO lb = lb_min, lb_max
2343 98126829 : s_offset_c = 0
2344 98126829 : ncob = nco(lb)
2345 98126829 : nsgflb = nsgfl_b(lb)
2346 98126829 : nsob = nso(lb)
2347 :
2348 229514839 : DO lc = lc_min, lc_max
2349 131388010 : s_offset_d = 0
2350 131388010 : ncoc = nco(lc)
2351 131388010 : nsgflc = nsgfl_c(lc)
2352 131388010 : nsoc = nso(lc)
2353 :
2354 305057491 : DO ld = ld_min, ld_max
2355 173669481 : ncod = nco(ld)
2356 173669481 : nsgfld = nsgfl_d(ld)
2357 173669481 : nsod = nso(ld)
2358 :
2359 173669481 : tmp_max = 0.0_dp
2360 : CALL evaluate_eri(lib, nproducts, pgf_product_list, &
2361 : la, lb, lc, ld, &
2362 : ncoa, ncob, ncoc, ncod, &
2363 : nsgfa, nsgfb, nsgfc, nsgfd, &
2364 : primitive_integrals, &
2365 : max_contraction_val, tmp_max, eps_schwarz, &
2366 : neris_tmp, ZetaInv, EtaInv, &
2367 : s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
2368 : nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
2369 : sphi_a_ext(1, la + 1, ipgf), &
2370 : sphi_b_ext(1, lb + 1, jpgf), &
2371 : sphi_c_ext(1, lc + 1, kpgf), &
2372 : sphi_d_ext(1, ld + 1, lpgf), &
2373 : ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
2374 173669481 : p_work)
2375 173669481 : cart_estimate = MAX(tmp_max, cart_estimate)
2376 305057491 : s_offset_d = s_offset_d + nsod*nsgfld
2377 : END DO !ld
2378 229514839 : s_offset_c = s_offset_c + nsoc*nsgflc
2379 : END DO !lc
2380 184037947 : s_offset_b = s_offset_b + nsob*nsgflb
2381 : END DO !lb
2382 158006651 : s_offset_a = s_offset_a + nsoa*nsgfla
2383 : END DO !la
2384 : END DO
2385 : END DO
2386 :
2387 8232355 : END SUBROUTINE coulomb4
2388 :
2389 : ! **************************************************************************************************
2390 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
2391 : !> a symmetric upper triangle NxN matrix
2392 : !> The compiler should inline this function, therefore it appears in
2393 : !> several modules
2394 : !> \param i 2d index
2395 : !> \param j 2d index
2396 : !> \param N matrix size
2397 : !> \return ...
2398 : !> \par History
2399 : !> 03.2009 created [Manuel Guidon]
2400 : !> \author Manuel Guidon
2401 : ! **************************************************************************************************
2402 28264 : PURE FUNCTION get_1D_idx(i, j, N)
2403 : INTEGER, INTENT(IN) :: i, j
2404 : INTEGER(int_8), INTENT(IN) :: N
2405 : INTEGER(int_8) :: get_1D_idx
2406 :
2407 : INTEGER(int_8) :: min_ij
2408 :
2409 28264 : min_ij = MIN(i, j)
2410 28264 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2411 :
2412 28264 : END FUNCTION get_1D_idx
2413 :
2414 : ! **************************************************************************************************
2415 : !> \brief This routine prefetches density/fock matrix elements and stores them
2416 : !> in cache friendly arrays. These buffers are then used to update the
2417 : !> fock matrix
2418 : !> \param ma_max Size of matrix blocks
2419 : !> \param mb_max Size of matrix blocks
2420 : !> \param mc_max Size of matrix blocks
2421 : !> \param md_max Size of matrix blocks
2422 : !> \param fac multiplication factor (spin)
2423 : !> \param symm_fac multiplication factor (symmetry)
2424 : !> \param density upper triangular density matrix
2425 : !> \param ks upper triangular fock matrix
2426 : !> \param prim primitive integrals
2427 : !> \param pbd buffer that will contain P(b,d)
2428 : !> \param pbc buffer that will contain P(b,c)
2429 : !> \param pad buffer that will contain P(a,d)
2430 : !> \param pac buffer that will contain P(a,c)
2431 : !> \param kbd buffer for KS(b,d)
2432 : !> \param kbc buffer for KS(b,c)
2433 : !> \param kad buffer for KS(a,d)
2434 : !> \param kac buffer for KS(a,c)
2435 : !> \param iatom ...
2436 : !> \param jatom ...
2437 : !> \param katom ...
2438 : !> \param latom ...
2439 : !> \param iset ...
2440 : !> \param jset ...
2441 : !> \param kset ...
2442 : !> \param lset ...
2443 : !> \param offset_bd_set ...
2444 : !> \param offset_bc_set ...
2445 : !> \param offset_ad_set ...
2446 : !> \param offset_ac_set ...
2447 : !> \param atomic_offset_bd ...
2448 : !> \param atomic_offset_bc ...
2449 : !> \param atomic_offset_ad ...
2450 : !> \param atomic_offset_ac ...
2451 : !> \par History
2452 : !> 03.2009 created [Manuel Guidon]
2453 : !> \author Manuel Guidon
2454 : ! **************************************************************************************************
2455 :
2456 73074964 : SUBROUTINE update_fock_matrix(ma_max, mb_max, mc_max, md_max, &
2457 73074964 : fac, symm_fac, density, ks, prim, &
2458 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2459 : iatom, jatom, katom, latom, &
2460 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2461 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2462 : atomic_offset_ac)
2463 :
2464 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2465 : REAL(dp), INTENT(IN) :: fac, symm_fac
2466 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2467 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2468 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2469 : INTENT(IN) :: prim
2470 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2471 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2472 : kset, lset
2473 : INTEGER, DIMENSION(:, :), POINTER, INTENT(IN) :: offset_bd_set, offset_bc_set, &
2474 : offset_ad_set, offset_ac_set
2475 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2476 : atomic_offset_ad, atomic_offset_ac
2477 :
2478 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2479 : offset_ad, offset_bc, offset_bd
2480 : REAL(dp) :: ki
2481 :
2482 73074964 : IF (jatom >= latom) THEN
2483 71897558 : i = 1
2484 71897558 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2485 71897558 : j = offset_bd
2486 216852618 : DO md = 1, md_max
2487 486828908 : DO mb = 1, mb_max
2488 269976290 : pbd(i) = density(j)
2489 269976290 : i = i + 1
2490 414931350 : j = j + 1
2491 : END DO
2492 : END DO
2493 : ELSE
2494 1177406 : i = 1
2495 1177406 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2496 3483410 : DO md = 1, md_max
2497 2306004 : j = offset_bd + md - 1
2498 8579719 : DO mb = 1, mb_max
2499 5096309 : pbd(i) = density(j)
2500 5096309 : i = i + 1
2501 7402313 : j = j + md_max
2502 : END DO
2503 : END DO
2504 : END IF
2505 73074964 : IF (jatom >= katom) THEN
2506 73074964 : i = 1
2507 73074964 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2508 73074964 : j = offset_bc
2509 243265884 : DO mc = 1, mc_max
2510 551161344 : DO mb = 1, mb_max
2511 307895460 : pbc(i) = density(j)
2512 307895460 : i = i + 1
2513 478086380 : j = j + 1
2514 : END DO
2515 : END DO
2516 : ELSE
2517 0 : i = 1
2518 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2519 0 : DO mc = 1, mc_max
2520 0 : j = offset_bc + mc - 1
2521 0 : DO mb = 1, mb_max
2522 0 : pbc(i) = density(j)
2523 0 : i = i + 1
2524 0 : j = j + mc_max
2525 : END DO
2526 : END DO
2527 : END IF
2528 73074964 : IF (iatom >= latom) THEN
2529 52367469 : i = 1
2530 52367469 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2531 52367469 : j = offset_ad
2532 165093466 : DO md = 1, md_max
2533 409612096 : DO ma = 1, ma_max
2534 244518630 : pad(i) = density(j)
2535 244518630 : i = i + 1
2536 357244627 : j = j + 1
2537 : END DO
2538 : END DO
2539 : ELSE
2540 20707495 : i = 1
2541 20707495 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2542 55242562 : DO md = 1, md_max
2543 34535067 : j = offset_ad + md - 1
2544 138088256 : DO ma = 1, ma_max
2545 82845694 : pad(i) = density(j)
2546 82845694 : i = i + 1
2547 117380761 : j = j + md_max
2548 : END DO
2549 : END DO
2550 : END IF
2551 73074964 : IF (iatom >= katom) THEN
2552 69639489 : i = 1
2553 69639489 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2554 69639489 : j = offset_ac
2555 233568059 : DO mc = 1, mc_max
2556 593677772 : DO ma = 1, ma_max
2557 360109713 : pac(i) = density(j)
2558 360109713 : i = i + 1
2559 524038283 : j = j + 1
2560 : END DO
2561 : END DO
2562 : ELSE
2563 3435475 : i = 1
2564 3435475 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2565 9697825 : DO mc = 1, mc_max
2566 6262350 : j = offset_ac + mc - 1
2567 26152925 : DO ma = 1, ma_max
2568 16455100 : pac(i) = density(j)
2569 16455100 : i = i + 1
2570 22717450 : j = j + mc_max
2571 : END DO
2572 : END DO
2573 : END IF
2574 :
2575 73074964 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2576 73074964 : IF (jatom >= latom) THEN
2577 : i = 1
2578 : j = offset_bd
2579 216852618 : DO md = 1, md_max
2580 486828908 : DO mb = 1, mb_max
2581 269976290 : ki = kbd(i)
2582 269976290 : !$OMP ATOMIC
2583 : ks(j) = ks(j) + ki
2584 269976290 : i = i + 1
2585 414931350 : j = j + 1
2586 : END DO
2587 : END DO
2588 : ELSE
2589 : i = 1
2590 3483410 : DO md = 1, md_max
2591 2306004 : j = offset_bd + md - 1
2592 8579719 : DO mb = 1, mb_max
2593 5096309 : ki = kbd(i)
2594 5096309 : !$OMP ATOMIC
2595 : ks(j) = ks(j) + ki
2596 5096309 : i = i + 1
2597 7402313 : j = j + md_max
2598 : END DO
2599 : END DO
2600 : END IF
2601 73074964 : IF (jatom >= katom) THEN
2602 : i = 1
2603 : j = offset_bc
2604 243265884 : DO mc = 1, mc_max
2605 551161344 : DO mb = 1, mb_max
2606 307895460 : ki = kbc(i)
2607 307895460 : !$OMP ATOMIC
2608 : ks(j) = ks(j) + ki
2609 307895460 : i = i + 1
2610 478086380 : j = j + 1
2611 : END DO
2612 : END DO
2613 : ELSE
2614 : i = 1
2615 0 : DO mc = 1, mc_max
2616 0 : j = offset_bc + mc - 1
2617 0 : DO mb = 1, mb_max
2618 0 : ki = kbc(i)
2619 0 : !$OMP ATOMIC
2620 : ks(j) = ks(j) + ki
2621 0 : i = i + 1
2622 0 : j = j + mc_max
2623 : END DO
2624 : END DO
2625 : END IF
2626 73074964 : IF (iatom >= latom) THEN
2627 : i = 1
2628 : j = offset_ad
2629 165093466 : DO md = 1, md_max
2630 409612096 : DO ma = 1, ma_max
2631 244518630 : ki = kad(i)
2632 244518630 : !$OMP ATOMIC
2633 : ks(j) = ks(j) + ki
2634 244518630 : i = i + 1
2635 357244627 : j = j + 1
2636 : END DO
2637 : END DO
2638 : ELSE
2639 : i = 1
2640 55242562 : DO md = 1, md_max
2641 34535067 : j = offset_ad + md - 1
2642 138088256 : DO ma = 1, ma_max
2643 82845694 : ki = kad(i)
2644 82845694 : !$OMP ATOMIC
2645 : ks(j) = ks(j) + ki
2646 82845694 : i = i + 1
2647 117380761 : j = j + md_max
2648 : END DO
2649 : END DO
2650 : END IF
2651 73074964 : IF (iatom >= katom) THEN
2652 : i = 1
2653 : j = offset_ac
2654 233568059 : DO mc = 1, mc_max
2655 593677772 : DO ma = 1, ma_max
2656 360109713 : ki = kac(i)
2657 360109713 : !$OMP ATOMIC
2658 : ks(j) = ks(j) + ki
2659 360109713 : i = i + 1
2660 524038283 : j = j + 1
2661 : END DO
2662 : END DO
2663 : ELSE
2664 : i = 1
2665 9697825 : DO mc = 1, mc_max
2666 6262350 : j = offset_ac + mc - 1
2667 26152925 : DO ma = 1, ma_max
2668 16455100 : ki = kac(i)
2669 16455100 : !$OMP ATOMIC
2670 : ks(j) = ks(j) + ki
2671 16455100 : i = i + 1
2672 22717450 : j = j + mc_max
2673 : END DO
2674 : END DO
2675 : END IF
2676 73074964 : END SUBROUTINE update_fock_matrix
2677 :
2678 : ! **************************************************************************************************
2679 : !> \brief ...
2680 : !> \param ma_max ...
2681 : !> \param mb_max ...
2682 : !> \param mc_max ...
2683 : !> \param md_max ...
2684 : !> \param fac ...
2685 : !> \param symm_fac ...
2686 : !> \param density ...
2687 : !> \param ks ...
2688 : !> \param prim ...
2689 : !> \param pbd ...
2690 : !> \param pbc ...
2691 : !> \param pad ...
2692 : !> \param pac ...
2693 : !> \param kbd ...
2694 : !> \param kbc ...
2695 : !> \param kad ...
2696 : !> \param kac ...
2697 : !> \param iatom ...
2698 : !> \param jatom ...
2699 : !> \param katom ...
2700 : !> \param latom ...
2701 : !> \param iset ...
2702 : !> \param jset ...
2703 : !> \param kset ...
2704 : !> \param lset ...
2705 : !> \param offset_bd_set ...
2706 : !> \param offset_bc_set ...
2707 : !> \param offset_ad_set ...
2708 : !> \param offset_ac_set ...
2709 : !> \param atomic_offset_bd ...
2710 : !> \param atomic_offset_bc ...
2711 : !> \param atomic_offset_ad ...
2712 : !> \param atomic_offset_ac ...
2713 : ! **************************************************************************************************
2714 4833577 : SUBROUTINE update_fock_matrix_as(ma_max, mb_max, mc_max, md_max, &
2715 4833577 : fac, symm_fac, density, ks, prim, &
2716 : pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
2717 : iatom, jatom, katom, latom, &
2718 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
2719 : offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
2720 : atomic_offset_ac)
2721 :
2722 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2723 : REAL(dp), INTENT(IN) :: fac, symm_fac
2724 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2725 : REAL(dp), DIMENSION(:), INTENT(INOUT) :: ks
2726 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2727 : INTENT(IN) :: prim
2728 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
2729 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2730 : kset, lset
2731 : INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2732 : offset_ad_set, offset_ac_set
2733 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2734 : atomic_offset_ad, atomic_offset_ac
2735 :
2736 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2737 : offset_ad, offset_bc, offset_bd
2738 :
2739 4833577 : IF (jatom >= latom) THEN
2740 4821889 : i = 1
2741 4821889 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2742 4821889 : j = offset_bd
2743 13141042 : DO md = 1, md_max
2744 25210237 : DO mb = 1, mb_max
2745 12069195 : pbd(i) = +density(j)
2746 12069195 : i = i + 1
2747 20388348 : j = j + 1
2748 : END DO
2749 : END DO
2750 : ELSE
2751 11688 : i = 1
2752 11688 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2753 24440 : DO md = 1, md_max
2754 12752 : j = offset_bd + md - 1
2755 39836 : DO mb = 1, mb_max
2756 15396 : pbd(i) = -density(j)
2757 15396 : i = i + 1
2758 28148 : j = j + md_max
2759 : END DO
2760 : END DO
2761 : END IF
2762 4833577 : IF (jatom >= katom) THEN
2763 4833577 : i = 1
2764 4833577 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2765 4833577 : j = offset_bc
2766 14749173 : DO mc = 1, mc_max
2767 28695532 : DO mb = 1, mb_max
2768 13946359 : pbc(i) = -density(j)
2769 13946359 : i = i + 1
2770 23861955 : j = j + 1
2771 : END DO
2772 : END DO
2773 : ELSE
2774 0 : i = 1
2775 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2776 0 : DO mc = 1, mc_max
2777 0 : j = offset_bc + mc - 1
2778 0 : DO mb = 1, mb_max
2779 0 : pbc(i) = density(j)
2780 0 : i = i + 1
2781 0 : j = j + mc_max
2782 : END DO
2783 : END DO
2784 : END IF
2785 4833577 : IF (iatom >= latom) THEN
2786 3629228 : i = 1
2787 3629228 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2788 3629228 : j = offset_ad
2789 10545331 : DO md = 1, md_max
2790 23549912 : DO ma = 1, ma_max
2791 13004581 : pad(i) = -density(j)
2792 13004581 : i = i + 1
2793 19920684 : j = j + 1
2794 : END DO
2795 : END DO
2796 : ELSE
2797 1204349 : i = 1
2798 1204349 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2799 2620151 : DO md = 1, md_max
2800 1415802 : j = offset_ad + md - 1
2801 5710689 : DO ma = 1, ma_max
2802 3090538 : pad(i) = density(j)
2803 3090538 : i = i + 1
2804 4506340 : j = j + md_max
2805 : END DO
2806 : END DO
2807 : END IF
2808 4833577 : IF (iatom >= katom) THEN
2809 4662031 : i = 1
2810 4662031 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2811 4662031 : j = offset_ac
2812 14341459 : DO mc = 1, mc_max
2813 32863793 : DO ma = 1, ma_max
2814 18522334 : pac(i) = +density(j)
2815 18522334 : i = i + 1
2816 28201762 : j = j + 1
2817 : END DO
2818 : END DO
2819 : ELSE
2820 171546 : i = 1
2821 171546 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2822 407714 : DO mc = 1, mc_max
2823 236168 : j = offset_ac + mc - 1
2824 997088 : DO ma = 1, ma_max
2825 589374 : pac(i) = -density(j)
2826 589374 : i = i + 1
2827 825542 : j = j + mc_max
2828 : END DO
2829 : END DO
2830 : END IF
2831 :
2832 4833577 : CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
2833 :
2834 4833577 : IF (jatom >= latom) THEN
2835 : i = 1
2836 : j = offset_bd
2837 13141042 : DO md = 1, md_max
2838 25210237 : DO mb = 1, mb_max
2839 12069195 : !$OMP ATOMIC
2840 : ks(j) = ks(j) + kbd(i)
2841 12069195 : i = i + 1
2842 20388348 : j = j + 1
2843 : END DO
2844 : END DO
2845 : ELSE
2846 : i = 1
2847 24440 : DO md = 1, md_max
2848 12752 : j = offset_bd + md - 1
2849 39836 : DO mb = 1, mb_max
2850 15396 : !$OMP ATOMIC
2851 : ks(j) = ks(j) - kbd(i)
2852 15396 : i = i + 1
2853 28148 : j = j + md_max
2854 : END DO
2855 : END DO
2856 : END IF
2857 4833577 : IF (jatom >= katom) THEN
2858 : i = 1
2859 : j = offset_bc
2860 14749173 : DO mc = 1, mc_max
2861 28695532 : DO mb = 1, mb_max
2862 13946359 : !$OMP ATOMIC
2863 : ks(j) = ks(j) - kbc(i)
2864 13946359 : i = i + 1
2865 23861955 : j = j + 1
2866 : END DO
2867 : END DO
2868 : ELSE
2869 : i = 1
2870 0 : DO mc = 1, mc_max
2871 0 : j = offset_bc + mc - 1
2872 0 : DO mb = 1, mb_max
2873 0 : !$OMP ATOMIC
2874 : ks(j) = ks(j) + kbc(i)
2875 0 : i = i + 1
2876 0 : j = j + mc_max
2877 : END DO
2878 : END DO
2879 : END IF
2880 4833577 : IF (iatom >= latom) THEN
2881 : i = 1
2882 : j = offset_ad
2883 10545331 : DO md = 1, md_max
2884 23549912 : DO ma = 1, ma_max
2885 13004581 : !$OMP ATOMIC
2886 : ks(j) = ks(j) - kad(i)
2887 13004581 : i = i + 1
2888 19920684 : j = j + 1
2889 : END DO
2890 : END DO
2891 : ELSE
2892 : i = 1
2893 2620151 : DO md = 1, md_max
2894 1415802 : j = offset_ad + md - 1
2895 5710689 : DO ma = 1, ma_max
2896 3090538 : !$OMP ATOMIC
2897 : ks(j) = ks(j) + kad(i)
2898 3090538 : i = i + 1
2899 4506340 : j = j + md_max
2900 : END DO
2901 : END DO
2902 : END IF
2903 : ! XXXXXXXXXXXXXXXXXXXXXXXX
2904 4833577 : IF (iatom >= katom) THEN
2905 : i = 1
2906 : j = offset_ac
2907 14341459 : DO mc = 1, mc_max
2908 32863793 : DO ma = 1, ma_max
2909 18522334 : !$OMP ATOMIC
2910 : ks(j) = ks(j) + kac(i)
2911 18522334 : i = i + 1
2912 28201762 : j = j + 1
2913 : END DO
2914 : END DO
2915 : ELSE
2916 : i = 1
2917 407714 : DO mc = 1, mc_max
2918 236168 : j = offset_ac + mc - 1
2919 997088 : DO ma = 1, ma_max
2920 589374 : !$OMP ATOMIC
2921 : ks(j) = ks(j) - kac(i)
2922 589374 : i = i + 1
2923 825542 : j = j + mc_max
2924 : END DO
2925 : END DO
2926 : END IF
2927 :
2928 4833577 : END SUBROUTINE update_fock_matrix_as
2929 :
2930 : ! **************************************************************************************************
2931 : !> \brief ...
2932 : !> \param i ...
2933 : !> \param j ...
2934 : !> \param k ...
2935 : !> \param l ...
2936 : !> \param set_offsets ...
2937 : !> \param atom_offsets ...
2938 : !> \param iset ...
2939 : !> \param jset ...
2940 : !> \param kset ...
2941 : !> \param lset ...
2942 : !> \param ma_max ...
2943 : !> \param mb_max ...
2944 : !> \param mc_max ...
2945 : !> \param md_max ...
2946 : !> \param prim ...
2947 : ! **************************************************************************************************
2948 0 : SUBROUTINE print_integrals(i, j, k, l, set_offsets, atom_offsets, iset, jset, kset, lset, ma_max, mb_max, mc_max, md_max, prim)
2949 : INTEGER :: i, j, k, l
2950 : INTEGER, DIMENSION(:, :, :, :), POINTER :: set_offsets
2951 : INTEGER, DIMENSION(:, :), POINTER :: atom_offsets
2952 : INTEGER :: iset, jset, kset, lset, ma_max, mb_max, &
2953 : mc_max, md_max
2954 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2955 : INTENT(IN) :: prim
2956 :
2957 : INTEGER :: iint, ma, mb, mc, md
2958 :
2959 0 : iint = 0
2960 0 : DO md = 1, md_max
2961 0 : DO mc = 1, mc_max
2962 0 : DO mb = 1, mb_max
2963 0 : DO ma = 1, ma_max
2964 0 : iint = iint + 1
2965 0 : IF (ABS(prim(iint)) > 0.0000000000001) &
2966 0 : WRITE (99, *) atom_offsets(i, 1) + ma + set_offsets(iset, 1, i, 1) - 1, &
2967 0 : atom_offsets(j, 1) + ma + set_offsets(jset, 1, j, 1) - 1, &
2968 0 : atom_offsets(k, 1) + ma + set_offsets(kset, 1, k, 1) - 1, &
2969 0 : atom_offsets(l, 1) + ma + set_offsets(lset, 1, l, 1) - 1, &
2970 0 : prim(iint)
2971 : END DO
2972 : END DO
2973 : END DO
2974 : END DO
2975 :
2976 0 : END SUBROUTINE print_integrals
2977 :
2978 : #:include "hfx_get_pmax_val.fypp"
2979 :
2980 : END MODULE hfx_energy_potential
|