Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Routines to calculate derivatives with respect to basis function origin
10 : !> \par History
11 : !> 04.2008 created [Manuel Guidon]
12 : !> \author Manuel Guidon
13 : ! **************************************************************************************************
14 : MODULE hfx_derivatives
15 : USE admm_types, ONLY: admm_type
16 : USE atomic_kind_types, ONLY: atomic_kind_type, &
17 : get_atomic_kind_set
18 : USE cell_types, ONLY: cell_type, &
19 : pbc
20 : USE cp_control_types, ONLY: dft_control_type
21 : USE cp_files, ONLY: close_file, &
22 : open_file
23 : USE cp_log_handling, ONLY: cp_get_default_logger, &
24 : cp_logger_type
25 : USE cp_output_handling, ONLY: cp_p_file, &
26 : cp_print_key_finished_output, &
27 : cp_print_key_should_output, &
28 : cp_print_key_unit_nr
29 : USE message_passing, ONLY: mp_para_env_type
30 : USE cp_dbcsr_api, ONLY: dbcsr_get_matrix_type, &
31 : dbcsr_p_type, &
32 : dbcsr_type_antisymmetric
33 : USE gamma, ONLY: init_md_ftable
34 : USE hfx_communication, ONLY: get_full_density
35 : USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
36 : hfx_add_single_cache_element, &
37 : hfx_decompress_first_cache, &
38 : hfx_flush_last_cache, &
39 : hfx_get_mult_cache_elements, &
40 : hfx_get_single_cache_element, &
41 : hfx_reset_cache_and_container
42 : USE hfx_libint_interface, ONLY: evaluate_deriv_eri
43 : USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
44 : hfx_load_balance, &
45 : hfx_update_load_balance
46 : USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
47 : build_pair_list, &
48 : build_pair_list_pgf, &
49 : build_pgf_product_list, &
50 : pgf_product_list_size
51 : USE hfx_screening_methods, ONLY: update_pmax_mat
52 : USE hfx_types, ONLY: &
53 : alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
54 : hfx_cell_type, hfx_container_type, hfx_distribution, hfx_general_type, hfx_init_container, &
55 : hfx_load_balance_type, hfx_memory_type, hfx_p_kind, hfx_pgf_list, hfx_pgf_product_list, &
56 : hfx_potential_type, hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, &
57 : hfx_type, init_t_c_g0_lmax, log_zero, pair_list_type, pair_set_list_type
58 : USE input_constants, ONLY: do_potential_mix_cl_trunc, &
59 : do_potential_truncated, &
60 : hfx_do_eval_forces
61 : USE input_section_types, ONLY: section_vals_type
62 : USE kinds, ONLY: dp, &
63 : int_8
64 : USE libint_wrapper, ONLY: cp_libint_t
65 : USE machine, ONLY: m_walltime
66 : USE mathconstants, ONLY: fac
67 : USE orbital_pointers, ONLY: nco, &
68 : ncoset, &
69 : nso
70 : USE particle_types, ONLY: particle_type
71 : USE qs_environment_types, ONLY: get_qs_env, &
72 : qs_environment_type
73 : USE qs_force_types, ONLY: qs_force_type
74 : USE t_c_g0, ONLY: init
75 : USE util, ONLY: sort
76 : USE virial_types, ONLY: virial_type
77 :
78 : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
79 :
80 : #include "./base/base_uses.f90"
81 :
82 : IMPLICIT NONE
83 : PRIVATE
84 :
85 : PUBLIC :: derivatives_four_center
86 :
87 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_derivatives'
88 :
89 : !***
90 :
91 : CONTAINS
92 :
93 : ! **************************************************************************************************
94 : !> \brief computes four center derivatives for a full basis set and updates the
95 : !> forces%fock_4c arrays. Uses all 8 eri symmetries
96 : !> \param qs_env ...
97 : !> \param rho_ao density matrix
98 : !> \param rho_ao_resp relaxed density matrix from response
99 : !> \param hfx_section HFX input section
100 : !> \param para_env para_env
101 : !> \param irep ID of HFX replica
102 : !> \param use_virial ...
103 : !> \param adiabatic_rescale_factor parameter used for MCY3 hybrid
104 : !> \param resp_only Calculate only forces from response density
105 : !> \par History
106 : !> 06.2007 created [Manuel Guidon]
107 : !> 08.2007 optimized load balance [Manuel Guidon]
108 : !> 02.2009 completely rewritten screening part [Manuel Guidon]
109 : !> 12.2017 major bug fix. removed wrong cycle that was causing segfault.
110 : !> see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
111 : !> [Tobias Binninger + Valery Weber]
112 : !> \author Manuel Guidon
113 : ! **************************************************************************************************
114 1838 : SUBROUTINE derivatives_four_center(qs_env, rho_ao, rho_ao_resp, hfx_section, para_env, &
115 : irep, use_virial, adiabatic_rescale_factor, resp_only, &
116 : external_x_data, nspins)
117 :
118 : TYPE(qs_environment_type), POINTER :: qs_env
119 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: rho_ao
120 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: rho_ao_resp
121 : TYPE(section_vals_type), POINTER :: hfx_section
122 : TYPE(mp_para_env_type), POINTER :: para_env
123 : INTEGER, INTENT(IN) :: irep
124 : LOGICAL, INTENT(IN) :: use_virial
125 : REAL(dp), INTENT(IN), OPTIONAL :: adiabatic_rescale_factor
126 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
127 : TYPE(hfx_type), DIMENSION(:, :), POINTER, OPTIONAL :: external_x_data
128 : INTEGER, INTENT(IN), OPTIONAL :: nspins
129 :
130 : CHARACTER(LEN=*), PARAMETER :: routineN = 'derivatives_four_center'
131 :
132 : INTEGER :: atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, bin, &
133 : bits_max_val, buffer_left, buffer_size, buffer_start, cache_size, coord, current_counter, &
134 : forces_map(4, 2), handle, handle_bin, handle_getP, handle_load, handle_main, i, i_atom, &
135 : i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
136 : i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_thread, iatom, iatom_block, &
137 : iatom_end, iatom_start, ikind, iset, iw, j, j_atom, jatom, jatom_block, jatom_end, &
138 : jatom_start, jkind, jset, k, k_atom, katom, katom_block, katom_end, katom_start
139 : INTEGER :: kind_kind_idx, kkind, kset, l_atom, l_max, latom, latom_block, latom_end, &
140 : latom_start, lkind, lset, max_am, max_pgf, max_set, my_bin_id, my_bin_size, my_thread_id, &
141 : n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, nneighbors, nseta, &
142 : nsetb, nsgf_max, my_nspins, sgfb, shm_task_counter, shm_total_bins, sphi_a_u1, sphi_a_u2, &
143 : sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, &
144 : sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
145 : INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
146 : mem_compression_counter, mem_eris, mem_max_val, my_current_counter, my_istart, &
147 : n_processes, nblocks, ncpu, neris_incore, neris_onthefly, neris_tmp, neris_total, &
148 : nprim_ints, shm_neris_incore, shm_neris_onthefly, shm_neris_total, shm_nprim_ints, &
149 : shm_stor_count_max_val, shm_storage_counter_integrals, stor_count_max_val, &
150 : storage_counter_integrals, tmp_block, tmp_i8(6)
151 1838 : INTEGER(int_8), ALLOCATABLE, DIMENSION(:) :: tmp_task_list_cost
152 1838 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, kind_of, last_sgf_global, &
153 1838 : nimages, tmp_index
154 1838 : INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
155 3676 : ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
156 3676 : INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
157 1838 : offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
158 : INTEGER, DIMENSION(:, :), POINTER, SAVE :: shm_is_assoc_atomic_block
159 1838 : INTEGER, DIMENSION(:, :, :, :), POINTER :: shm_set_offset
160 : INTEGER, SAVE :: shm_number_of_p_entries
161 : LOGICAL :: bins_left, buffer_overflow, do_dynamic_load_balancing, do_it, do_periodic, &
162 : do_print_load_balance_info, is_anti_symmetric, my_resp_only, screen_pmat_forces, &
163 : treat_forces_in_core, use_disk_storage, with_resp_density
164 1838 : LOGICAL, DIMENSION(:, :), POINTER :: shm_atomic_pair_list
165 : REAL(dp) :: bintime_start, bintime_stop, cartesian_estimate, cartesian_estimate_virial, &
166 : compression_factor, eps_schwarz, eps_storage, fac, hf_fraction, ln_10, log10_eps_schwarz, &
167 : log10_pmax, log_2, max_contraction_val, max_val1, max_val2, max_val2_set, &
168 : my_adiabatic_rescale_factor, pmax_atom, pmax_blocks, pmax_entry, pmax_tmp, ra(3), rab2, &
169 : rb(3), rc(3), rcd2, rd(3), spherical_estimate, spherical_estimate_virial, symm_fac, &
170 : tmp_virial(3, 3)
171 1838 : REAL(dp), ALLOCATABLE, DIMENSION(:) :: ede_buffer1, ede_buffer2, ede_primitives_tmp, &
172 1838 : ede_primitives_tmp_virial, ede_work, ede_work2, ede_work2_virial, ede_work_forces, &
173 3676 : ede_work_virial, pac_buf, pac_buf_beta, pac_buf_resp, pac_buf_resp_beta, pad_buf, &
174 1838 : pad_buf_beta, pad_buf_resp, pad_buf_resp_beta, pbc_buf, pbc_buf_beta, pbc_buf_resp, &
175 1838 : pbc_buf_resp_beta, pbd_buf, pbd_buf_beta, pbd_buf_resp, pbd_buf_resp_beta
176 1838 : REAL(dp), ALLOCATABLE, DIMENSION(:), TARGET :: primitive_forces, primitive_forces_virial
177 1838 : REAL(dp), DIMENSION(:), POINTER :: full_density_resp, &
178 3676 : full_density_resp_beta, T2
179 1838 : REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, &
180 1838 : max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, shm_pmax_block, &
181 1838 : sphi_b, zeta, zetb, zetc, zetd
182 1838 : REAL(dp), DIMENSION(:, :, :), POINTER :: sphi_a_ext_set, sphi_b_ext_set, &
183 1838 : sphi_c_ext_set, sphi_d_ext_set
184 1838 : REAL(dp), DIMENSION(:, :, :, :), POINTER :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
185 1838 : sphi_d_ext
186 : REAL(KIND=dp) :: coeffs_kind_max0
187 : TYPE(admm_type), POINTER :: admm_env
188 1838 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
189 : TYPE(cell_type), POINTER :: cell
190 : TYPE(cp_libint_t) :: private_deriv
191 : TYPE(cp_logger_type), POINTER :: logger
192 : TYPE(dft_control_type), POINTER :: dft_control
193 : TYPE(hfx_basis_info_type), POINTER :: basis_info
194 1838 : TYPE(hfx_basis_type), DIMENSION(:), POINTER :: basis_parameter
195 1838 : TYPE(hfx_cache_type), DIMENSION(:), POINTER :: integral_caches
196 : TYPE(hfx_cache_type), POINTER :: maxval_cache
197 1838 : TYPE(hfx_container_type), DIMENSION(:), POINTER :: integral_containers
198 : TYPE(hfx_container_type), POINTER :: maxval_container
199 : TYPE(hfx_distribution), POINTER :: distribution_forces
200 : TYPE(hfx_general_type) :: general_parameter
201 : TYPE(hfx_load_balance_type), POINTER :: load_balance_parameter
202 : TYPE(hfx_memory_type), POINTER :: memory_parameter
203 1838 : TYPE(hfx_p_kind), DIMENSION(:), POINTER :: shm_initial_p
204 1838 : TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:) :: pgf_list_ij, pgf_list_kl
205 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
206 1838 : DIMENSION(:) :: pgf_product_list
207 : TYPE(hfx_potential_type) :: potential_parameter
208 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
209 1838 : POINTER :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
210 1838 : tmp_screen_pgf1, tmp_screen_pgf2
211 : TYPE(hfx_screen_coeff_type), &
212 1838 : DIMENSION(:, :, :, :), POINTER :: screen_coeffs_set
213 : TYPE(hfx_screen_coeff_type), &
214 1838 : DIMENSION(:, :, :, :, :, :), POINTER :: radii_pgf, screen_coeffs_pgf
215 : TYPE(hfx_screening_type) :: screening_parameter
216 1838 : TYPE(hfx_task_list_type), DIMENSION(:), POINTER :: shm_task_list, tmp_task_list
217 : TYPE(hfx_type), POINTER :: actual_x_data
218 1838 : TYPE(hfx_type), DIMENSION(:, :), POINTER :: my_x_data
219 : TYPE(pair_list_type) :: list_ij, list_kl
220 : TYPE(pair_set_list_type), ALLOCATABLE, &
221 1838 : DIMENSION(:) :: set_list_ij, set_list_kl
222 1838 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
223 1838 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
224 : TYPE(virial_type), POINTER :: virial
225 :
226 1838 : NULLIFY (dft_control, admm_env)
227 :
228 1838 : with_resp_density = .FALSE.
229 650 : IF (ASSOCIATED(rho_ao_resp)) with_resp_density = .TRUE.
230 :
231 1838 : my_resp_only = .FALSE.
232 1838 : IF (PRESENT(resp_only)) my_resp_only = resp_only
233 :
234 1838 : is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) == dbcsr_type_antisymmetric
235 :
236 : CALL get_qs_env(qs_env, &
237 : admm_env=admm_env, &
238 : particle_set=particle_set, &
239 : atomic_kind_set=atomic_kind_set, &
240 : cell=cell, &
241 1838 : dft_control=dft_control)
242 :
243 1838 : IF (PRESENT(nspins)) THEN
244 208 : my_nspins = nspins
245 : ELSE
246 1630 : my_nspins = dft_control%nspins
247 : END IF
248 1838 : nkimages = dft_control%nimages
249 1838 : CPASSERT(nkimages == 1)
250 :
251 1838 : my_x_data => qs_env%x_data
252 1838 : IF (PRESENT(external_x_data)) my_x_data => external_x_data
253 :
254 : !! One atom systems have no contribution to forces
255 1838 : IF (SIZE(particle_set, 1) == 1) THEN
256 4 : IF (.NOT. use_virial) RETURN
257 : END IF
258 :
259 1834 : CALL timeset(routineN, handle)
260 : !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
261 1834 : nkind = SIZE(atomic_kind_set, 1)
262 1834 : l_max = 0
263 5236 : DO ikind = 1, nkind
264 14432 : l_max = MAX(l_max, MAXVAL(my_x_data(1, 1)%basis_parameter(ikind)%lmax))
265 : END DO
266 1834 : l_max = 4*l_max + 1
267 1834 : CALL init_md_ftable(l_max)
268 :
269 1834 : IF (my_x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
270 : my_x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
271 744 : IF (l_max > init_t_c_g0_lmax) THEN
272 200 : CALL open_file(unit_number=unit_id, file_name=my_x_data(1, 1)%potential_parameter%filename)
273 200 : CALL init(l_max, unit_id, para_env%mepos, para_env)
274 200 : CALL close_file(unit_id)
275 200 : init_t_c_g0_lmax = l_max
276 : END IF
277 : END IF
278 :
279 1834 : n_threads = 1
280 1834 : !$ n_threads = omp_get_max_threads()
281 :
282 1834 : shm_neris_total = 0
283 1834 : shm_nprim_ints = 0
284 1834 : shm_neris_onthefly = 0
285 1834 : shm_storage_counter_integrals = 0
286 1834 : shm_neris_incore = 0
287 1834 : shm_stor_count_max_val = 0
288 :
289 : !! get force array
290 1834 : CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
291 :
292 1834 : my_adiabatic_rescale_factor = 1.0_dp
293 1834 : IF (PRESENT(adiabatic_rescale_factor)) THEN
294 216 : my_adiabatic_rescale_factor = adiabatic_rescale_factor
295 : END IF
296 :
297 : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
298 : !$OMP SHARED(qs_env,&
299 : !$OMP rho_ao,&
300 : !$OMP rho_ao_resp,&
301 : !$OMP with_resp_density,&
302 : !$OMP hfx_section,&
303 : !$OMP para_env,&
304 : !$OMP irep,&
305 : !$OMP my_resp_only,&
306 : !$OMP ncoset,&
307 : !$OMP nco,&
308 : !$OMP nso,&
309 : !$OMP n_threads,&
310 : !$OMP full_density_alpha,&
311 : !$OMP full_density_resp,&
312 : !$OMP full_density_resp_beta,&
313 : !$OMP full_density_beta,&
314 : !$OMP shm_initial_p,&
315 : !$OMP shm_is_assoc_atomic_block,&
316 : !$OMP shm_number_of_p_entries,&
317 : !$OMP force,&
318 : !$OMP virial,&
319 : !$OMP my_adiabatic_rescale_factor,&
320 : !$OMP shm_neris_total,&
321 : !$OMP shm_neris_onthefly,&
322 : !$OMP shm_storage_counter_integrals,&
323 : !$OMP shm_neris_incore,&
324 : !$OMP shm_nprim_ints,&
325 : !$OMP shm_stor_count_max_val,&
326 : !$OMP cell,&
327 : !$OMP screen_coeffs_set,&
328 : !$OMP screen_coeffs_kind,&
329 : !$OMP screen_coeffs_pgf,&
330 : !$OMP pgf_product_list_size,&
331 : !$OMP nkind,&
332 : !$OMP is_anti_symmetric,&
333 : !$OMP radii_pgf,&
334 : !$OMP shm_atomic_block_offset,&
335 : !$OMP shm_set_offset,&
336 : !$OMP shm_block_offset,&
337 : !$OMP shm_task_counter,&
338 : !$OMP shm_task_list,&
339 : !$OMP shm_total_bins,&
340 : !$OMP shm_pmax_block,&
341 : !$OMP shm_atomic_pair_list,&
342 : !$OMP shm_pmax_atom,&
343 : !$OMP use_virial,&
344 : !$OMP do_print_load_balance_info,&
345 : !$OMP nkimages,my_nspins,my_x_data)&
346 : !$OMP PRIVATE(actual_x_data,atomic_kind_set,atomic_offset_ac,atomic_offset_ad,atomic_offset_bc,&
347 : !$OMP atomic_offset_bd,atom_of_kind,basis_info,&
348 : !$OMP basis_parameter,bin,bins_left,bintime_start,bintime_stop,bits_max_val,buffer_left,buffer_overflow,buffer_size,&
349 : !$OMP buffer_start,cache_size,cartesian_estimate,cartesian_estimate_virial,&
350 : !$OMP coeffs_kind_max0,compression_factor,counter,current_counter,&
351 : !$OMP distribution_forces,do_dynamic_load_balancing,do_it,do_periodic,ede_buffer1,ede_buffer2,&
352 : !$OMP ede_primitives_tmp,ede_primitives_tmp_virial,&
353 : !$OMP ede_work,ede_work2,ede_work2_virial,ede_work_forces,ede_work_virial,eps_schwarz,eps_storage,estimate_to_store_int,&
354 : !$OMP fac,first_sgfb,forces_map,general_parameter,handle_bin,handle_getp,handle_load,&
355 : !$OMP handle_main,hf_fraction,i_atom,iatom_end,iatom_start,ikind,integral_caches,integral_containers,&
356 : !$OMP i_set_list_ij_start,i_set_list_ij_stop,i_set_list_kl_start,i_set_list_kl_stop,i_thread,iw,j_atom,jatom_end,&
357 : !$OMP jatom_start,jkind,katom,k_atom,katom_block,katom_end,katom_start,kind_kind_idx,&
358 : !$OMP kind_of,kkind,kset,la_max,la_min,last_sgf_global,latom,l_atom,&
359 : !$OMP latom_block,latom_end,latom_start,lb_max,lb_min,lc_max,lc_min,ld_max,&
360 : !$OMP ld_min,list_ij,list_kl,lkind,ln_10,load_balance_parameter,log10_eps_schwarz,log10_pmax,&
361 : !$OMP log_2,logger,lset,max_am,max_contraction,max_contraction_val,max_pgf,max_set,&
362 : !$OMP max_val1,max_val2,max_val2_set,maxval_cache,maxval_container,max_val_memory,mem_compression_counter,mem_eris,&
363 : !$OMP mem_max_val,memory_parameter,my_bin_id,my_bin_size,my_current_counter,my_istart,my_thread_id,natom,&
364 : !$OMP nbits,nblocks,ncob,ncos_max,ncpu,neris_incore,neris_onthefly,neris_tmp,&
365 : !$OMP neris_total,nimages,nints,nneighbors,npgfa,npgfb,npgfc,npgfd,&
366 : !$OMP nprim_ints,n_processes,nseta,nsetb,nsgfa,nsgfb,nsgfc,nsgfd,&
367 : !$OMP nsgfl_a,nsgfl_b,nsgfl_c,nsgfl_d,nsgf_max,offset_ac_set,offset_ad_set,&
368 : !$OMP offset_bc_set,offset_bd_set,pac_buf,pac_buf_beta,pac_buf_resp,pac_buf_resp_beta,pad_buf,pad_buf_beta,pad_buf_resp,&
369 : !$OMP pad_buf_resp_beta,particle_set,pbc_buf,pbc_buf_beta,pbc_buf_resp,pbc_buf_resp_beta,pbd_buf,pbd_buf_beta, &
370 : !$OMP pbd_buf_resp, pbd_buf_resp_beta, pgf_list_ij,&
371 : !$OMP pgf_list_kl,pgf_product_list,pmax_atom,pmax_blocks,pmax_entry,pmax_tmp,potential_parameter,primitive_forces,&
372 : !$OMP primitive_forces_virial,private_deriv,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,ra,rab2,&
373 : !$OMP rb,rc,rcd2,rd,screening_parameter,screen_pmat_forces,set_list_ij,set_list_kl,&
374 : !$OMP sgfb,spherical_estimate,spherical_estimate_virial,sphi_a_ext,sphi_a_ext_set,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
375 : !$OMP sphi_b,sphi_b_ext,sphi_b_ext_set,sphi_b_u1,sphi_b_u2,sphi_b_u3,sphi_c_ext,sphi_c_ext_set,&
376 : !$OMP sphi_c_u1,sphi_c_u2,sphi_c_u3,sphi_d_ext,sphi_d_ext_set,sphi_d_u1,sphi_d_u2,sphi_d_u3,&
377 : !$OMP storage_counter_integrals,stor_count_max_val,swap_id,symm_fac,t2,tmp_block,tmp_i8, tmp_i4,&
378 : !$OMP tmp_index,tmp_r_1,tmp_r_2,tmp_screen_pgf1,tmp_screen_pgf2,tmp_task_list,tmp_task_list_cost,tmp_virial,&
379 1834 : !$OMP treat_forces_in_core,use_disk_storage,zeta,zetb,zetc,zetd)
380 :
381 : i_thread = 0
382 : !$ i_thread = omp_get_thread_num()
383 :
384 : ln_10 = LOG(10.0_dp)
385 : log_2 = LOG10(2.0_dp)
386 : actual_x_data => my_x_data(irep, i_thread + 1)
387 : do_periodic = actual_x_data%periodic_parameter%do_periodic
388 :
389 : screening_parameter = actual_x_data%screening_parameter
390 : general_parameter = actual_x_data%general_parameter
391 : potential_parameter = actual_x_data%potential_parameter
392 : basis_info => actual_x_data%basis_info
393 :
394 : load_balance_parameter => actual_x_data%load_balance_parameter
395 : basis_parameter => actual_x_data%basis_parameter
396 :
397 : ! IF(with_resp_density) THEN
398 : ! ! here we also do a copy of the original load_balance_parameter
399 : ! ! since if MP2 forces are required after the calculation of the HFX
400 : ! ! derivatives we need to calculate again the SCF energy.
401 : ! ALLOCATE(load_balance_parameter_energy)
402 : ! load_balance_parameter_energy = actual_x_data%load_balance_parameter
403 : ! END IF
404 :
405 : memory_parameter => actual_x_data%memory_parameter
406 :
407 : cache_size = memory_parameter%cache_size
408 : bits_max_val = memory_parameter%bits_max_val
409 :
410 : use_disk_storage = .FALSE.
411 :
412 : CALL get_qs_env(qs_env=qs_env, &
413 : atomic_kind_set=atomic_kind_set, &
414 : particle_set=particle_set)
415 :
416 : max_set = basis_info%max_set
417 : max_am = basis_info%max_am
418 : natom = SIZE(particle_set, 1)
419 :
420 : treat_forces_in_core = memory_parameter%treat_forces_in_core
421 :
422 : hf_fraction = general_parameter%fraction
423 : hf_fraction = hf_fraction*my_adiabatic_rescale_factor
424 : eps_schwarz = screening_parameter%eps_schwarz_forces
425 : IF (eps_schwarz < 0.0_dp) THEN
426 : log10_eps_schwarz = log_zero
427 : ELSE
428 : !! ** make eps_schwarz tighter in case of a stress calculation
429 : IF (use_virial) eps_schwarz = eps_schwarz/actual_x_data%periodic_parameter%R_max_stress
430 : log10_eps_schwarz = LOG10(eps_schwarz)
431 : END IF
432 : screen_pmat_forces = screening_parameter%do_p_screening_forces
433 : ! don't do density screening in case of response forces
434 : IF (with_resp_density) screen_pmat_forces = .FALSE.
435 :
436 : eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
437 :
438 : counter = 0_int_8
439 :
440 : !! Copy the libint_guy
441 : private_deriv = actual_x_data%lib_deriv
442 :
443 : !! Get screening parameter
444 :
445 : CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, &
446 : atom_of_kind=atom_of_kind, &
447 : kind_of=kind_of)
448 :
449 : !! Create helper arrray for mapping local basis functions to global ones
450 : ALLOCATE (last_sgf_global(0:natom))
451 : last_sgf_global(0) = 0
452 : DO iatom = 1, natom
453 : ikind = kind_of(iatom)
454 : last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
455 : END DO
456 :
457 : ALLOCATE (max_contraction(max_set, natom))
458 :
459 : max_contraction = 0.0_dp
460 : max_pgf = 0
461 : DO jatom = 1, natom
462 : jkind = kind_of(jatom)
463 : lb_max => basis_parameter(jkind)%lmax
464 : npgfb => basis_parameter(jkind)%npgf
465 : nsetb = basis_parameter(jkind)%nset
466 : first_sgfb => basis_parameter(jkind)%first_sgf
467 : sphi_b => basis_parameter(jkind)%sphi
468 : nsgfb => basis_parameter(jkind)%nsgf
469 : DO jset = 1, nsetb
470 : ! takes the primitive to contracted transformation into account
471 : ncob = npgfb(jset)*ncoset(lb_max(jset))
472 : sgfb = first_sgfb(1, jset)
473 : ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
474 : ! the maximum value after multiplication with sphi_b
475 : max_contraction(jset, jatom) = MAXVAL([(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)])
476 : max_pgf = MAX(max_pgf, npgfb(jset))
477 : END DO
478 : END DO
479 :
480 : ncpu = para_env%num_pe
481 : n_processes = ncpu*n_threads
482 :
483 : !! initialize some counters
484 : neris_total = 0_int_8
485 : neris_incore = 0_int_8
486 : neris_onthefly = 0_int_8
487 : mem_eris = 0_int_8
488 : mem_max_val = 0_int_8
489 : compression_factor = 0.0_dp
490 : nprim_ints = 0_int_8
491 : neris_tmp = 0_int_8
492 : max_val_memory = 1_int_8
493 :
494 : !$OMP MASTER
495 : !! Set pointer for is_assoc helper
496 : shm_is_assoc_atomic_block => actual_x_data%is_assoc_atomic_block
497 : shm_number_of_p_entries = actual_x_data%number_of_p_entries
498 : shm_atomic_block_offset => actual_x_data%atomic_block_offset
499 : shm_set_offset => actual_x_data%set_offset
500 : shm_block_offset => actual_x_data%block_offset
501 :
502 : NULLIFY (full_density_alpha)
503 : NULLIFY (full_density_beta)
504 : ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
505 :
506 : !! Get the full density from all the processors
507 : CALL timeset(routineN//"_getP", handle_getP)
508 : CALL get_full_density(para_env, full_density_alpha(:, 1), rho_ao(1, 1)%matrix, shm_number_of_p_entries, &
509 : shm_block_offset, &
510 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
511 : antisymmetric=is_anti_symmetric)
512 : ! for now only closed shell case
513 : IF (with_resp_density) THEN
514 : NULLIFY (full_density_resp)
515 : ALLOCATE (full_density_resp(shm_block_offset(ncpu + 1)))
516 : CALL get_full_density(para_env, full_density_resp, rho_ao_resp(1)%matrix, shm_number_of_p_entries, &
517 : shm_block_offset, &
518 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
519 : antisymmetric=is_anti_symmetric)
520 : END IF
521 :
522 : IF (my_nspins == 2) THEN
523 : ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), 1))
524 : CALL get_full_density(para_env, full_density_beta(:, 1), rho_ao(2, 1)%matrix, shm_number_of_p_entries, &
525 : shm_block_offset, &
526 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
527 : antisymmetric=is_anti_symmetric)
528 : ! With resp density
529 : IF (with_resp_density) THEN
530 : NULLIFY (full_density_resp_beta)
531 : ALLOCATE (full_density_resp_beta(shm_block_offset(ncpu + 1)))
532 : CALL get_full_density(para_env, full_density_resp_beta, rho_ao_resp(2)%matrix, shm_number_of_p_entries, &
533 : shm_block_offset, &
534 : kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
535 : antisymmetric=is_anti_symmetric)
536 : END IF
537 :
538 : END IF
539 : CALL timestop(handle_getP)
540 :
541 : !! Calculate max entries for screening on actual density. If screen_p_mat_forces = FALSE, the
542 : !! matrix is initialized to 1.0
543 : IF (screen_pmat_forces) THEN
544 : NULLIFY (shm_initial_p)
545 : shm_initial_p => actual_x_data%initial_p_forces
546 : shm_pmax_atom => actual_x_data%pmax_atom_forces
547 : IF (memory_parameter%recalc_forces) THEN
548 : CALL update_pmax_mat(shm_initial_p, &
549 : actual_x_data%map_atom_to_kind_atom, &
550 : actual_x_data%set_offset, &
551 : actual_x_data%atomic_block_offset, &
552 : shm_pmax_atom, &
553 : full_density_alpha, full_density_beta, &
554 : natom, kind_of, basis_parameter, &
555 : nkind, actual_x_data%is_assoc_atomic_block)
556 : END IF
557 : END IF
558 :
559 : ! restore as full density the HF density
560 : ! maybe in the future
561 : IF (with_resp_density .AND. .NOT. my_resp_only) THEN
562 : full_density_alpha(:, 1) = full_density_alpha(:, 1) - full_density_resp
563 : IF (my_nspins == 2) THEN
564 : full_density_beta(:, 1) = &
565 : full_density_beta(:, 1) - full_density_resp_beta
566 : END IF
567 : ! full_density_resp=full_density+full_density_resp
568 : END IF
569 :
570 : screen_coeffs_set => actual_x_data%screen_funct_coeffs_set
571 : screen_coeffs_kind => actual_x_data%screen_funct_coeffs_kind
572 : screen_coeffs_pgf => actual_x_data%screen_funct_coeffs_pgf
573 : radii_pgf => actual_x_data%pair_dist_radii_pgf
574 : shm_pmax_block => actual_x_data%pmax_block
575 :
576 : !$OMP END MASTER
577 : !$OMP BARRIER
578 :
579 : !$OMP MASTER
580 : CALL timeset(routineN//"_load", handle_load)
581 : !$OMP END MASTER
582 : !! Load balance the work
583 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
584 : IF (actual_x_data%b_first_load_balance_forces) THEN
585 : CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
586 : screen_coeffs_set, screen_coeffs_kind, &
587 : shm_is_assoc_atomic_block, do_periodic, &
588 : load_balance_parameter, kind_of, basis_parameter, shm_initial_p, &
589 : shm_pmax_atom, i_thread, n_threads, &
590 : cell, screen_pmat_forces, actual_x_data%map_atom_to_kind_atom, &
591 : nkind, hfx_do_eval_forces, shm_pmax_block, use_virial)
592 : actual_x_data%b_first_load_balance_forces = .FALSE.
593 : ELSE
594 : CALL hfx_update_load_balance(actual_x_data, para_env, &
595 : load_balance_parameter, &
596 : i_thread, n_threads, hfx_do_eval_forces)
597 : END IF
598 : END IF
599 : !$OMP MASTER
600 : CALL timestop(handle_load)
601 : !$OMP END MASTER
602 :
603 : !! precompute maximum nco and allocate scratch
604 : ncos_max = 0
605 : nsgf_max = 0
606 : DO iatom = 1, natom
607 : ikind = kind_of(iatom)
608 : nseta = basis_parameter(ikind)%nset
609 : npgfa => basis_parameter(ikind)%npgf
610 : la_max => basis_parameter(ikind)%lmax
611 : nsgfa => basis_parameter(ikind)%nsgf
612 : DO iset = 1, nseta
613 : ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
614 : nsgf_max = MAX(nsgf_max, nsgfa(iset))
615 : END DO
616 : END DO
617 :
618 : !! Allocate work arrays
619 : ALLOCATE (primitive_forces(12*nsgf_max**4))
620 : primitive_forces = 0.0_dp
621 :
622 : ! ** Allocate buffers for pgf_lists
623 : nneighbors = SIZE(actual_x_data%neighbor_cells)
624 : ALLOCATE (pgf_list_ij(max_pgf**2))
625 : ALLOCATE (pgf_list_kl(max_pgf**2))
626 : !$OMP ATOMIC READ
627 : tmp_i4 = pgf_product_list_size
628 : ALLOCATE (pgf_product_list(tmp_i4))
629 : ALLOCATE (nimages(max_pgf**2))
630 :
631 : DO i = 1, max_pgf**2
632 : ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
633 : ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
634 : END DO
635 :
636 : ALLOCATE (pbd_buf(nsgf_max**2))
637 : ALLOCATE (pbc_buf(nsgf_max**2))
638 : ALLOCATE (pad_buf(nsgf_max**2))
639 : ALLOCATE (pac_buf(nsgf_max**2))
640 : IF (with_resp_density) THEN
641 : ALLOCATE (pbd_buf_resp(nsgf_max**2))
642 : ALLOCATE (pbc_buf_resp(nsgf_max**2))
643 : ALLOCATE (pad_buf_resp(nsgf_max**2))
644 : ALLOCATE (pac_buf_resp(nsgf_max**2))
645 : END IF
646 : IF (my_nspins == 2) THEN
647 : ALLOCATE (pbd_buf_beta(nsgf_max**2))
648 : ALLOCATE (pbc_buf_beta(nsgf_max**2))
649 : ALLOCATE (pad_buf_beta(nsgf_max**2))
650 : ALLOCATE (pac_buf_beta(nsgf_max**2))
651 : IF (with_resp_density) THEN
652 : ALLOCATE (pbd_buf_resp_beta(nsgf_max**2))
653 : ALLOCATE (pbc_buf_resp_beta(nsgf_max**2))
654 : ALLOCATE (pad_buf_resp_beta(nsgf_max**2))
655 : ALLOCATE (pac_buf_resp_beta(nsgf_max**2))
656 : END IF
657 : END IF
658 :
659 : ALLOCATE (ede_work(ncos_max**4*12))
660 : ALLOCATE (ede_work2(ncos_max**4*12))
661 : ALLOCATE (ede_work_forces(ncos_max**4*12))
662 : ALLOCATE (ede_buffer1(ncos_max**4))
663 : ALLOCATE (ede_buffer2(ncos_max**4))
664 : ALLOCATE (ede_primitives_tmp(nsgf_max**4))
665 :
666 : IF (use_virial) THEN
667 : ALLOCATE (primitive_forces_virial(12*nsgf_max**4*3))
668 : primitive_forces_virial = 0.0_dp
669 : ALLOCATE (ede_work_virial(ncos_max**4*12*3))
670 : ALLOCATE (ede_work2_virial(ncos_max**4*12*3))
671 : ALLOCATE (ede_primitives_tmp_virial(nsgf_max**4))
672 : tmp_virial = 0.0_dp
673 : ELSE
674 : ! dummy allocation
675 : ALLOCATE (primitive_forces_virial(1))
676 : primitive_forces_virial = 0.0_dp
677 : ALLOCATE (ede_work_virial(1))
678 : ALLOCATE (ede_work2_virial(1))
679 : ALLOCATE (ede_primitives_tmp_virial(1))
680 : END IF
681 :
682 : !! Start caluclating integrals of the form (ab|cd) or (ij|kl)
683 : !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
684 : !!
685 : !! (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
686 : !!
687 : !! by iterating in the following way
688 : !!
689 : !! DO iatom=1,natom and DO katom=1,natom
690 : !! DO jatom=iatom,natom DO latom=katom,natom
691 : !!
692 : !! The third symmetry
693 : !!
694 : !! (ab|cd) = (cd|ab)
695 : !!
696 : !! is taken into account by the following criterion:
697 : !!
698 : !! IF(katom+latom<=iatom+jatom) THEN
699 : !! IF( ((iatom+jatom)==(katom+latom) ) .AND.(katom<iatom)) CYCLE
700 : !!
701 : !! Furthermore, if iatom==jatom==katom==latom we cycle, because the derivatives are zero anyway.
702 : !!
703 : !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
704 : !! factor ( symm_fac ).
705 : !!
706 : !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
707 : !! different hierarchies of short range screening matrices.
708 : !!
709 : !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
710 : !! defined as :
711 : !!
712 : !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
713 : !!
714 : !! This tells the process where to start the main loops and how many bunches of integrals it has to
715 : !! calculate. The original parallelization is a simple modulo distribution that is binned and
716 : !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
717 : !! we need to know which was the initial cpu_id.
718 : !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
719 : !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
720 : !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
721 :
722 : !$OMP BARRIER
723 : !$OMP MASTER
724 : CALL timeset(routineN//"_main", handle_main)
725 : !$OMP END MASTER
726 : !$OMP BARRIER
727 :
728 : coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
729 : ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
730 : ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
731 :
732 : !$OMP BARRIER
733 :
734 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
735 : actual_x_data%distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
736 : memory_parameter%ram_counter_forces = HUGE(memory_parameter%ram_counter_forces)
737 : END IF
738 :
739 : IF (treat_forces_in_core) THEN
740 : buffer_overflow = .FALSE.
741 : ELSE
742 : buffer_overflow = .TRUE.
743 : END IF
744 :
745 : do_dynamic_load_balancing = .TRUE.
746 : IF (n_threads == 1) do_dynamic_load_balancing = .FALSE.
747 :
748 : IF (do_dynamic_load_balancing) THEN
749 : my_bin_size = SIZE(actual_x_data%distribution_forces)
750 : ELSE
751 : my_bin_size = 1
752 : END IF
753 : !! We do not need the containers if MAX_MEM = 0
754 : IF (treat_forces_in_core) THEN
755 : !! IF new md step -> reinitialize containers
756 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
757 : CALL dealloc_containers(actual_x_data%store_forces, memory_parameter%actual_memory_usage)
758 : CALL alloc_containers(actual_x_data%store_forces, my_bin_size)
759 :
760 : DO bin = 1, my_bin_size
761 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
762 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
763 : CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
764 : DO i = 1, 64
765 : CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
766 : END DO
767 : END DO
768 : END IF
769 :
770 : !! Decompress the first cache for maxvals and integrals
771 : IF (.NOT. actual_x_data%memory_parameter%recalc_forces) THEN
772 : DO bin = 1, my_bin_size
773 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
774 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
775 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
776 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
777 : CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
778 : memory_parameter%actual_memory_usage, .FALSE.)
779 : DO i = 1, 64
780 : CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
781 : memory_parameter%actual_memory_usage, .FALSE.)
782 : END DO
783 : END DO
784 : END IF
785 : END IF
786 :
787 : !$OMP BARRIER
788 : !$OMP MASTER
789 : IF (do_dynamic_load_balancing) THEN
790 : ! ** Lets construct the task list
791 : shm_total_bins = 0
792 : DO i = 1, n_threads
793 : shm_total_bins = shm_total_bins + SIZE(my_x_data(irep, i)%distribution_forces)
794 : END DO
795 : ALLOCATE (tmp_task_list(shm_total_bins))
796 : shm_task_counter = 0
797 : DO i = 1, n_threads
798 : DO bin = 1, SIZE(my_x_data(irep, i)%distribution_forces)
799 : shm_task_counter = shm_task_counter + 1
800 : tmp_task_list(shm_task_counter)%thread_id = i
801 : tmp_task_list(shm_task_counter)%bin_id = bin
802 : tmp_task_list(shm_task_counter)%cost = my_x_data(irep, i)%distribution_forces(bin)%cost
803 : END DO
804 : END DO
805 :
806 : ! ** Now sort the task list
807 : ALLOCATE (tmp_task_list_cost(shm_total_bins))
808 : ALLOCATE (tmp_index(shm_total_bins))
809 :
810 : DO i = 1, shm_total_bins
811 : tmp_task_list_cost(i) = tmp_task_list(i)%cost
812 : END DO
813 :
814 : CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
815 :
816 : ALLOCATE (actual_x_data%task_list(shm_total_bins))
817 :
818 : DO i = 1, shm_total_bins
819 : actual_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
820 : END DO
821 :
822 : shm_task_list => actual_x_data%task_list
823 : shm_task_counter = 0
824 :
825 : DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
826 : END IF
827 :
828 : !! precalculate maximum density matrix elements in blocks
829 : shm_pmax_block => actual_x_data%pmax_block
830 : actual_x_data%pmax_block = 0.0_dp
831 : IF (screen_pmat_forces) THEN
832 : DO iatom_block = 1, SIZE(actual_x_data%blocks)
833 : iatom_start = actual_x_data%blocks(iatom_block)%istart
834 : iatom_end = actual_x_data%blocks(iatom_block)%iend
835 : DO jatom_block = 1, SIZE(actual_x_data%blocks)
836 : jatom_start = actual_x_data%blocks(jatom_block)%istart
837 : jatom_end = actual_x_data%blocks(jatom_block)%iend
838 : shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
839 : END DO
840 : END DO
841 : END IF
842 : shm_atomic_pair_list => actual_x_data%atomic_pair_list_forces
843 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
844 : CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
845 : do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
846 : actual_x_data%blocks)
847 : END IF
848 :
849 : my_bin_size = SIZE(actual_x_data%distribution_forces)
850 : DO bin = 1, my_bin_size
851 : actual_x_data%distribution_forces(bin)%time_forces = 0.0_dp
852 : END DO
853 : !$OMP END MASTER
854 : !$OMP BARRIER
855 :
856 : IF (treat_forces_in_core) THEN
857 : IF (my_bin_size > 0) THEN
858 : maxval_container => actual_x_data%store_forces%maxval_container(1)
859 : maxval_cache => actual_x_data%store_forces%maxval_cache(1)
860 :
861 : integral_containers => actual_x_data%store_forces%integral_containers(:, 1)
862 : integral_caches => actual_x_data%store_forces%integral_caches(:, 1)
863 : END IF
864 : END IF
865 :
866 : nblocks = load_balance_parameter%nblocks
867 :
868 : bins_left = .TRUE.
869 : do_it = .TRUE.
870 : bin = 0
871 : DO WHILE (bins_left)
872 : IF (.NOT. do_dynamic_load_balancing) THEN
873 : bin = bin + 1
874 : IF (bin > my_bin_size) THEN
875 : do_it = .FALSE.
876 : bins_left = .FALSE.
877 : ELSE
878 : do_it = .TRUE.
879 : bins_left = .TRUE.
880 : distribution_forces => actual_x_data%distribution_forces(bin)
881 : END IF
882 : ELSE
883 : !$OMP CRITICAL(hfxderivatives_critical)
884 : shm_task_counter = shm_task_counter + 1
885 : IF (shm_task_counter <= shm_total_bins) THEN
886 : my_thread_id = shm_task_list(shm_task_counter)%thread_id
887 : my_bin_id = shm_task_list(shm_task_counter)%bin_id
888 : IF (treat_forces_in_core) THEN
889 : maxval_cache => my_x_data(irep, my_thread_id)%store_forces%maxval_cache(my_bin_id)
890 : maxval_container => my_x_data(irep, my_thread_id)%store_forces%maxval_container(my_bin_id)
891 : integral_caches => my_x_data(irep, my_thread_id)%store_forces%integral_caches(:, my_bin_id)
892 : integral_containers => my_x_data(irep, my_thread_id)%store_forces%integral_containers(:, my_bin_id)
893 : END IF
894 : distribution_forces => my_x_data(irep, my_thread_id)%distribution_forces(my_bin_id)
895 : do_it = .TRUE.
896 : bins_left = .TRUE.
897 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
898 : distribution_forces%ram_counter = HUGE(distribution_forces%ram_counter)
899 : END IF
900 : counter = 0_Int_8
901 : ELSE
902 : do_it = .FALSE.
903 : bins_left = .FALSE.
904 : END IF
905 : !$OMP END CRITICAL(hfxderivatives_critical)
906 : END IF
907 :
908 : IF (.NOT. do_it) CYCLE
909 : !$OMP MASTER
910 : CALL timeset(routineN//"_bin", handle_bin)
911 : !$OMP END MASTER
912 : bintime_start = m_walltime()
913 : my_istart = distribution_forces%istart
914 : my_current_counter = 0
915 : IF (distribution_forces%number_of_atom_quartets == 0 .OR. &
916 : my_istart == -1_int_8) my_istart = nblocks**4
917 : atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
918 : latom_block = INT(MODULO(atom_block, nblocks)) + 1
919 : tmp_block = atom_block/nblocks
920 : katom_block = INT(MODULO(tmp_block, nblocks)) + 1
921 : IF (latom_block < katom_block) CYCLE atomic_blocks
922 : tmp_block = tmp_block/nblocks
923 : jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
924 : tmp_block = tmp_block/nblocks
925 : iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
926 : IF (jatom_block < iatom_block) CYCLE atomic_blocks
927 : my_current_counter = my_current_counter + 1
928 : IF (my_current_counter > distribution_forces%number_of_atom_quartets) EXIT atomic_blocks
929 :
930 : iatom_start = actual_x_data%blocks(iatom_block)%istart
931 : iatom_end = actual_x_data%blocks(iatom_block)%iend
932 : jatom_start = actual_x_data%blocks(jatom_block)%istart
933 : jatom_end = actual_x_data%blocks(jatom_block)%iend
934 : katom_start = actual_x_data%blocks(katom_block)%istart
935 : katom_end = actual_x_data%blocks(katom_block)%iend
936 : latom_start = actual_x_data%blocks(latom_block)%istart
937 : latom_end = actual_x_data%blocks(latom_block)%iend
938 :
939 : pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block) + &
940 : shm_pmax_block(latom_block, jatom_block), &
941 : shm_pmax_block(latom_block, iatom_block) + &
942 : shm_pmax_block(katom_block, jatom_block))
943 :
944 : IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE atomic_blocks
945 :
946 : CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
947 : jatom_start, jatom_end, &
948 : kind_of, basis_parameter, particle_set, &
949 : do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
950 : log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
951 :
952 : CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
953 : latom_start, latom_end, &
954 : kind_of, basis_parameter, particle_set, &
955 : do_periodic, screen_coeffs_set, screen_coeffs_kind, coeffs_kind_max0, &
956 : log10_eps_schwarz, cell, pmax_blocks, shm_atomic_pair_list)
957 :
958 : DO i_list_ij = 1, list_ij%n_element
959 : iatom = list_ij%elements(i_list_ij)%pair(1)
960 : jatom = list_ij%elements(i_list_ij)%pair(2)
961 : i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
962 : i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
963 : ikind = list_ij%elements(i_list_ij)%kind_pair(1)
964 : jkind = list_ij%elements(i_list_ij)%kind_pair(2)
965 : ra = list_ij%elements(i_list_ij)%r1
966 : rb = list_ij%elements(i_list_ij)%r2
967 : rab2 = list_ij%elements(i_list_ij)%dist2
968 :
969 : la_max => basis_parameter(ikind)%lmax
970 : la_min => basis_parameter(ikind)%lmin
971 : npgfa => basis_parameter(ikind)%npgf
972 : nseta = basis_parameter(ikind)%nset
973 : zeta => basis_parameter(ikind)%zet
974 : nsgfa => basis_parameter(ikind)%nsgf
975 : sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
976 : nsgfl_a => basis_parameter(ikind)%nsgfl
977 : sphi_a_u1 = UBOUND(sphi_a_ext, 1)
978 : sphi_a_u2 = UBOUND(sphi_a_ext, 2)
979 : sphi_a_u3 = UBOUND(sphi_a_ext, 3)
980 :
981 : lb_max => basis_parameter(jkind)%lmax
982 : lb_min => basis_parameter(jkind)%lmin
983 : npgfb => basis_parameter(jkind)%npgf
984 : nsetb = basis_parameter(jkind)%nset
985 : zetb => basis_parameter(jkind)%zet
986 : nsgfb => basis_parameter(jkind)%nsgf
987 : sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
988 : nsgfl_b => basis_parameter(jkind)%nsgfl
989 : sphi_b_u1 = UBOUND(sphi_b_ext, 1)
990 : sphi_b_u2 = UBOUND(sphi_b_ext, 2)
991 : sphi_b_u3 = UBOUND(sphi_b_ext, 3)
992 :
993 : i_atom = atom_of_kind(iatom)
994 : forces_map(1, 1) = ikind
995 : forces_map(1, 2) = i_atom
996 : j_atom = atom_of_kind(jatom)
997 : forces_map(2, 1) = jkind
998 : forces_map(2, 2) = j_atom
999 :
1000 : DO i_list_kl = 1, list_kl%n_element
1001 : katom = list_kl%elements(i_list_kl)%pair(1)
1002 : latom = list_kl%elements(i_list_kl)%pair(2)
1003 :
1004 : !All four centers equivalent => zero-contribution
1005 : !VIIRAL IF((iatom==jatom .AND. iatom==katom .AND. iatom==latom)) CYCLE
1006 : IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
1007 : IF (((iatom + jatom) == (katom + latom)) .AND. (katom < iatom)) CYCLE
1008 :
1009 : i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
1010 : i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
1011 : kkind = list_kl%elements(i_list_kl)%kind_pair(1)
1012 : lkind = list_kl%elements(i_list_kl)%kind_pair(2)
1013 : rc = list_kl%elements(i_list_kl)%r1
1014 : rd = list_kl%elements(i_list_kl)%r2
1015 : rcd2 = list_kl%elements(i_list_kl)%dist2
1016 :
1017 : IF (screen_pmat_forces) THEN
1018 : pmax_atom = MAX(shm_pmax_atom(katom, iatom) + &
1019 : shm_pmax_atom(latom, jatom), &
1020 : shm_pmax_atom(latom, iatom) + &
1021 : shm_pmax_atom(katom, jatom))
1022 : ELSE
1023 : pmax_atom = 0.0_dp
1024 : END IF
1025 :
1026 : IF ((screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
1027 : screen_coeffs_kind(jkind, ikind)%x(2)) + &
1028 : (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1029 : screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1030 :
1031 : IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
1032 : shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
1033 : shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
1034 : shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
1035 : k_atom = atom_of_kind(katom)
1036 : forces_map(3, 1) = kkind
1037 : forces_map(3, 2) = k_atom
1038 :
1039 : l_atom = atom_of_kind(latom)
1040 : forces_map(4, 1) = lkind
1041 : forces_map(4, 2) = l_atom
1042 :
1043 : IF (my_nspins == 1) THEN
1044 : fac = 0.25_dp*hf_fraction
1045 : ELSE
1046 : fac = 0.5_dp*hf_fraction
1047 : END IF
1048 : !calculate symmetry_factor
1049 : symm_fac = 0.25_dp
1050 : IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
1051 : IF (katom == latom) symm_fac = symm_fac*2.0_dp
1052 : IF (iatom == katom .AND. jatom == latom .AND. &
1053 : iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
1054 : IF (iatom == katom .AND. iatom == jatom .AND. &
1055 : katom == latom) symm_fac = symm_fac*2.0_dp
1056 :
1057 : symm_fac = 1.0_dp/symm_fac
1058 : fac = fac*symm_fac
1059 :
1060 : lc_max => basis_parameter(kkind)%lmax
1061 : lc_min => basis_parameter(kkind)%lmin
1062 : npgfc => basis_parameter(kkind)%npgf
1063 : zetc => basis_parameter(kkind)%zet
1064 : nsgfc => basis_parameter(kkind)%nsgf
1065 : sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
1066 : nsgfl_c => basis_parameter(kkind)%nsgfl
1067 : sphi_c_u1 = UBOUND(sphi_c_ext, 1)
1068 : sphi_c_u2 = UBOUND(sphi_c_ext, 2)
1069 : sphi_c_u3 = UBOUND(sphi_c_ext, 3)
1070 :
1071 : ld_max => basis_parameter(lkind)%lmax
1072 : ld_min => basis_parameter(lkind)%lmin
1073 : npgfd => basis_parameter(lkind)%npgf
1074 : zetd => basis_parameter(lkind)%zet
1075 : nsgfd => basis_parameter(lkind)%nsgf
1076 : sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
1077 : nsgfl_d => basis_parameter(lkind)%nsgfl
1078 : sphi_d_u1 = UBOUND(sphi_d_ext, 1)
1079 : sphi_d_u2 = UBOUND(sphi_d_ext, 2)
1080 : sphi_d_u3 = UBOUND(sphi_d_ext, 3)
1081 :
1082 : atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
1083 : atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
1084 : atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
1085 : atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
1086 :
1087 : IF (jatom < latom) THEN
1088 : offset_bd_set => shm_set_offset(:, :, lkind, jkind)
1089 : ELSE
1090 : offset_bd_set => shm_set_offset(:, :, jkind, lkind)
1091 : END IF
1092 : IF (jatom < katom) THEN
1093 : offset_bc_set => shm_set_offset(:, :, kkind, jkind)
1094 : ELSE
1095 : offset_bc_set => shm_set_offset(:, :, jkind, kkind)
1096 : END IF
1097 : IF (iatom < latom) THEN
1098 : offset_ad_set => shm_set_offset(:, :, lkind, ikind)
1099 : ELSE
1100 : offset_ad_set => shm_set_offset(:, :, ikind, lkind)
1101 : END IF
1102 : IF (iatom < katom) THEN
1103 : offset_ac_set => shm_set_offset(:, :, kkind, ikind)
1104 : ELSE
1105 : offset_ac_set => shm_set_offset(:, :, ikind, kkind)
1106 : END IF
1107 :
1108 : IF (screen_pmat_forces) THEN
1109 : swap_id = 16
1110 : kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
1111 : IF (ikind >= kkind) THEN
1112 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1113 : actual_x_data%map_atom_to_kind_atom(katom), &
1114 : actual_x_data%map_atom_to_kind_atom(iatom))
1115 : ELSE
1116 : ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1117 : actual_x_data%map_atom_to_kind_atom(iatom), &
1118 : actual_x_data%map_atom_to_kind_atom(katom))
1119 : swap_id = swap_id + 1
1120 : END IF
1121 : kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
1122 : IF (jkind >= lkind) THEN
1123 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1124 : actual_x_data%map_atom_to_kind_atom(latom), &
1125 : actual_x_data%map_atom_to_kind_atom(jatom))
1126 : ELSE
1127 : ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1128 : actual_x_data%map_atom_to_kind_atom(jatom), &
1129 : actual_x_data%map_atom_to_kind_atom(latom))
1130 : swap_id = swap_id + 2
1131 : END IF
1132 : kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
1133 : IF (ikind >= lkind) THEN
1134 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1135 : actual_x_data%map_atom_to_kind_atom(latom), &
1136 : actual_x_data%map_atom_to_kind_atom(iatom))
1137 : ELSE
1138 : ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1139 : actual_x_data%map_atom_to_kind_atom(iatom), &
1140 : actual_x_data%map_atom_to_kind_atom(latom))
1141 : swap_id = swap_id + 4
1142 : END IF
1143 : kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
1144 : IF (jkind >= kkind) THEN
1145 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1146 : actual_x_data%map_atom_to_kind_atom(katom), &
1147 : actual_x_data%map_atom_to_kind_atom(jatom))
1148 : ELSE
1149 : ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
1150 : actual_x_data%map_atom_to_kind_atom(jatom), &
1151 : actual_x_data%map_atom_to_kind_atom(katom))
1152 : swap_id = swap_id + 8
1153 : END IF
1154 : END IF
1155 :
1156 : !! At this stage, check for memory used in compression
1157 :
1158 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1159 : IF (treat_forces_in_core) THEN
1160 : ! ** We know the maximum amount of integrals that we can store per MPI-process
1161 : ! ** Now we need to sum the current memory usage among all openMP threads
1162 : ! ** We can just read what is currently stored on the corresponding x_data type
1163 : ! ** If this is thread i and it tries to read the data from thread j, that is
1164 : ! ** currently writing that data, we just dont care, because the possible error
1165 : ! ** is of the order of CACHE_SIZE
1166 : mem_compression_counter = 0
1167 : DO i = 1, n_threads
1168 : !$OMP ATOMIC READ
1169 : tmp_i4 = my_x_data(irep, i)%memory_parameter%actual_memory_usage
1170 : mem_compression_counter = mem_compression_counter + &
1171 : tmp_i4*memory_parameter%cache_size
1172 : END DO
1173 : IF (mem_compression_counter + memory_parameter%final_comp_counter_energy &
1174 : > memory_parameter%max_compression_counter) THEN
1175 : buffer_overflow = .TRUE.
1176 : IF (do_dynamic_load_balancing) THEN
1177 : distribution_forces%ram_counter = counter
1178 : ELSE
1179 : memory_parameter%ram_counter_forces = counter
1180 : END IF
1181 : ELSE
1182 : counter = counter + 1
1183 : buffer_overflow = .FALSE.
1184 : END IF
1185 : END IF
1186 : ELSE
1187 : IF (treat_forces_in_core) THEN
1188 : IF (do_dynamic_load_balancing) THEN
1189 : IF (distribution_forces%ram_counter == counter) THEN
1190 : buffer_overflow = .TRUE.
1191 : ELSE
1192 : counter = counter + 1
1193 : buffer_overflow = .FALSE.
1194 : END IF
1195 : ELSE
1196 : IF (memory_parameter%ram_counter_forces == counter) THEN
1197 : buffer_overflow = .TRUE.
1198 : ELSE
1199 : counter = counter + 1
1200 : buffer_overflow = .FALSE.
1201 : END IF
1202 : END IF
1203 : END IF
1204 : END IF
1205 :
1206 : DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
1207 : iset = set_list_ij(i_set_list_ij)%pair(1)
1208 : jset = set_list_ij(i_set_list_ij)%pair(2)
1209 :
1210 : ncob = npgfb(jset)*ncoset(lb_max(jset))
1211 : max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
1212 : screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
1213 : !! Near field screening
1214 : IF (max_val1 + (screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
1215 : screen_coeffs_kind(lkind, kkind)%x(2)) + pmax_atom < log10_eps_schwarz) CYCLE
1216 : sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
1217 : sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
1218 :
1219 : DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
1220 : kset = set_list_kl(i_set_list_kl)%pair(1)
1221 : lset = set_list_kl(i_set_list_kl)%pair(2)
1222 :
1223 : max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
1224 : screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
1225 : max_val2 = max_val1 + max_val2_set
1226 :
1227 : !! Near field screening
1228 : IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
1229 : sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
1230 : sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
1231 :
1232 : IF (screen_pmat_forces) THEN
1233 : CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
1234 : iset, jset, kset, lset, &
1235 : pmax_tmp, swap_id)
1236 :
1237 : log10_pmax = log_2 + pmax_tmp
1238 : ELSE
1239 : log10_pmax = 0.0_dp
1240 : END IF
1241 :
1242 : max_val2 = max_val2 + log10_pmax
1243 : IF (max_val2 < log10_eps_schwarz) CYCLE
1244 :
1245 : pmax_entry = EXP(log10_pmax*ln_10)
1246 : !! store current number of integrals, update total number and number of integrals in buffer
1247 : current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1248 : IF (buffer_overflow) THEN
1249 : neris_onthefly = neris_onthefly + current_counter
1250 : END IF
1251 :
1252 : !! Get integrals from buffer and update Kohn-Sham matrix
1253 : IF (.NOT. buffer_overflow .AND. .NOT. actual_x_data%memory_parameter%recalc_forces) THEN
1254 : nints = current_counter
1255 : CALL hfx_get_single_cache_element(estimate_to_store_int, 6, &
1256 : maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1257 : use_disk_storage)
1258 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1259 : ! IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1260 : IF (.NOT. use_virial) THEN
1261 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1262 : END IF
1263 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1264 : buffer_left = nints
1265 : buffer_start = 1
1266 : neris_incore = neris_incore + INT(nints, int_8)
1267 : DO WHILE (buffer_left > 0)
1268 : buffer_size = MIN(buffer_left, cache_size)
1269 : CALL hfx_get_mult_cache_elements(primitive_forces(buffer_start), &
1270 : buffer_size, nbits, &
1271 : integral_caches(nbits), &
1272 : integral_containers(nbits), &
1273 : eps_storage, pmax_entry, &
1274 : memory_parameter%actual_memory_usage, &
1275 : use_disk_storage)
1276 : buffer_left = buffer_left - buffer_size
1277 : buffer_start = buffer_start + buffer_size
1278 : END DO
1279 : END IF
1280 : !! Calculate integrals if we run out of buffer or the geometry did change
1281 : IF (actual_x_data%memory_parameter%recalc_forces .OR. buffer_overflow) THEN
1282 : max_contraction_val = max_contraction(iset, iatom)* &
1283 : max_contraction(jset, jatom)* &
1284 : max_contraction(kset, katom)* &
1285 : max_contraction(lset, latom)* &
1286 : pmax_entry
1287 : tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
1288 : tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
1289 : tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
1290 : tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
1291 :
1292 : CALL forces4(private_deriv, ra, rb, rc, rd, npgfa(iset), npgfb(jset), &
1293 : npgfc(kset), npgfd(lset), &
1294 : la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
1295 : lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
1296 : nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1297 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1298 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1299 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1300 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1301 : zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
1302 : zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
1303 : primitive_forces, &
1304 : potential_parameter, &
1305 : actual_x_data%neighbor_cells, &
1306 : screen_coeffs_set(jset, iset, jkind, ikind)%x, &
1307 : screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
1308 : max_contraction_val, cartesian_estimate, cell, neris_tmp, &
1309 : log10_pmax, log10_eps_schwarz, &
1310 : tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
1311 : pgf_list_ij, pgf_list_kl, pgf_product_list, &
1312 : nsgfl_a(:, iset), nsgfl_b(:, jset), &
1313 : nsgfl_c(:, kset), nsgfl_d(:, lset), &
1314 : sphi_a_ext_set, sphi_b_ext_set, sphi_c_ext_set, sphi_d_ext_set, &
1315 : ede_work, ede_work2, ede_work_forces, &
1316 : ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1317 : nimages, do_periodic, use_virial, ede_work_virial, ede_work2_virial, &
1318 : ede_primitives_tmp_virial, primitive_forces_virial, &
1319 : cartesian_estimate_virial)
1320 :
1321 : nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)*12
1322 : neris_total = neris_total + nints
1323 : nprim_ints = nprim_ints + neris_tmp
1324 : ! IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
1325 : ! estimate_to_store_int = EXPONENT(cartesian_estimate)
1326 : ! estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1327 : ! cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int+1)
1328 : ! IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1329 : ! IF (cartesian_estimate < eps_schwarz) THEN
1330 : ! CALL hfx_add_single_cache_element( &
1331 : ! estimate_to_store_int, 6, &
1332 : ! maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1333 : ! use_disk_storage, max_val_memory)
1334 : ! END IF
1335 : ! END IF
1336 : !
1337 : ! IF (.NOT. use_virial) THEN
1338 : ! IF (cartesian_estimate < eps_schwarz) CYCLE
1339 : ! END IF
1340 :
1341 : !! Compress the array for storage
1342 : spherical_estimate = 0.0_dp
1343 : DO i = 1, nints
1344 : spherical_estimate = MAX(spherical_estimate, ABS(primitive_forces(i)))
1345 : END DO
1346 :
1347 : spherical_estimate_virial = 0.0_dp
1348 : IF (use_virial) THEN
1349 : DO i = 1, 3*nints
1350 : spherical_estimate_virial = MAX(spherical_estimate_virial, ABS(primitive_forces_virial(i)))
1351 : END DO
1352 : END IF
1353 :
1354 : IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
1355 : estimate_to_store_int = EXPONENT(spherical_estimate)
1356 : estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
1357 : IF (.NOT. buffer_overflow .AND. actual_x_data%memory_parameter%recalc_forces) THEN
1358 : CALL hfx_add_single_cache_element(estimate_to_store_int, 6, &
1359 : maxval_cache, maxval_container, &
1360 : memory_parameter%actual_memory_usage, &
1361 : use_disk_storage, max_val_memory)
1362 : END IF
1363 : spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
1364 : IF (.NOT. use_virial) THEN
1365 : IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
1366 : END IF
1367 : IF (.NOT. buffer_overflow) THEN
1368 : nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
1369 : buffer_left = nints
1370 : buffer_start = 1
1371 : ! neris_incore = neris_incore+nints
1372 : neris_incore = neris_incore + INT(nints, int_8)
1373 :
1374 : DO WHILE (buffer_left > 0)
1375 : buffer_size = MIN(buffer_left, CACHE_SIZE)
1376 : CALL hfx_add_mult_cache_elements(primitive_forces(buffer_start), &
1377 : buffer_size, nbits, &
1378 : integral_caches(nbits), &
1379 : integral_containers(nbits), &
1380 : eps_storage, pmax_entry, &
1381 : memory_parameter%actual_memory_usage, &
1382 : use_disk_storage)
1383 : buffer_left = buffer_left - buffer_size
1384 : buffer_start = buffer_start + buffer_size
1385 : END DO
1386 : ELSE
1387 : !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
1388 : !! but only if we treat forces in-core
1389 : IF (treat_forces_in_core) THEN
1390 : DO i = 1, nints
1391 : primitive_forces(i) = primitive_forces(i)*pmax_entry
1392 : IF (ABS(primitive_forces(i)) > eps_storage) THEN
1393 : primitive_forces(i) = ANINT(primitive_forces(i)/eps_storage, dp)*eps_storage/pmax_entry
1394 : ELSE
1395 : primitive_forces(i) = 0.0_dp
1396 : END IF
1397 : END DO
1398 : END IF
1399 : END IF
1400 : END IF
1401 : IF (with_resp_density) THEN
1402 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1403 : full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1404 : iatom, jatom, katom, latom, &
1405 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1406 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1407 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1408 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1409 : full_density_resp, pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1410 : iatom, jatom, katom, latom, &
1411 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1412 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1413 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1414 : ELSE
1415 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1416 : full_density_alpha(:, 1), pbd_buf, pbc_buf, pad_buf, pac_buf, &
1417 : iatom, jatom, katom, latom, &
1418 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1419 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1420 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1421 : END IF
1422 : IF (my_nspins == 2) THEN
1423 : IF (with_resp_density) THEN
1424 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1425 : full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, &
1426 : pad_buf_beta, pac_buf_beta, iatom, jatom, katom, latom, &
1427 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1428 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1429 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1430 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1431 : full_density_resp_beta, pbd_buf_resp_beta, pbc_buf_resp_beta, &
1432 : pad_buf_resp_beta, pac_buf_resp_beta, iatom, jatom, katom, latom, &
1433 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1434 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1435 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1436 : ELSE
1437 : CALL prefetch_density_matrix(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1438 : full_density_beta(:, 1), pbd_buf_beta, pbc_buf_beta, pad_buf_beta, &
1439 : pac_buf_beta, iatom, jatom, katom, latom, &
1440 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
1441 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
1442 : atomic_offset_ad, atomic_offset_ac, is_anti_symmetric)
1443 : END IF
1444 : END IF
1445 : IF (spherical_estimate*pmax_entry >= eps_schwarz) THEN
1446 : DO coord = 1, 12
1447 : T2 => primitive_forces((coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1448 : coord*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1449 :
1450 : IF (with_resp_density) THEN
1451 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1452 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1453 : T2, force, forces_map, coord, &
1454 : pbd_buf_resp, pbc_buf_resp, pad_buf_resp, pac_buf_resp, &
1455 : my_resp_only)
1456 : ELSE
1457 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1458 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1459 : T2, force, forces_map, coord)
1460 : END IF
1461 : IF (my_nspins == 2) THEN
1462 : IF (with_resp_density) THEN
1463 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1464 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1465 : fac, T2, force, forces_map, coord, &
1466 : pbd_buf_resp_beta, pbc_buf_resp_beta, &
1467 : pad_buf_resp_beta, pac_buf_resp_beta, &
1468 : my_resp_only)
1469 : ELSE
1470 : CALL update_forces(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1471 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, fac, &
1472 : T2, force, forces_map, coord)
1473 : END IF
1474 : END IF
1475 : END DO !coord
1476 : END IF
1477 : IF (use_virial .AND. spherical_estimate_virial*pmax_entry >= eps_schwarz) THEN
1478 : DO coord = 1, 12
1479 : DO i = 1, 3
1480 : T2 => primitive_forces_virial( &
1481 : ((i - 1)*12 + coord - 1)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset) + 1: &
1482 : ((i - 1)*12 + coord)*nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset))
1483 : IF (with_resp_density) THEN
1484 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1485 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1486 : T2, tmp_virial, coord, i, &
1487 : pbd_buf_resp, pbc_buf_resp, pad_buf_resp, &
1488 : pac_buf_resp, my_resp_only)
1489 : ELSE
1490 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1491 : pbd_buf, pbc_buf, pad_buf, pac_buf, fac, &
1492 : T2, tmp_virial, coord, i)
1493 : END IF
1494 : IF (my_nspins == 2) THEN
1495 : IF (with_resp_density) THEN
1496 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1497 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1498 : fac, T2, tmp_virial, coord, i, &
1499 : pbd_buf_resp_beta, pbc_buf_resp_beta, &
1500 : pad_buf_resp_beta, pac_buf_resp_beta, &
1501 : my_resp_only)
1502 : ELSE
1503 : CALL update_virial(nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
1504 : pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta, &
1505 : fac, T2, tmp_virial, coord, i)
1506 : END IF
1507 : END IF
1508 : END DO
1509 : END DO !coord
1510 : END IF
1511 :
1512 : END DO !i_set_list_kl
1513 : END DO !i_set_list_ij
1514 : END DO !i_list_kl
1515 : END DO !i_list_ij
1516 : END DO atomic_blocks
1517 : bintime_stop = m_walltime()
1518 : distribution_forces%time_forces = bintime_stop - bintime_start
1519 : !$OMP MASTER
1520 : CALL timestop(handle_bin)
1521 : !$OMP END MASTER
1522 : END DO !bin
1523 :
1524 : !$OMP MASTER
1525 : logger => cp_get_default_logger()
1526 : do_print_load_balance_info = .FALSE.
1527 : do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
1528 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
1529 : !$OMP END MASTER
1530 : !$OMP BARRIER
1531 : IF (do_print_load_balance_info) THEN
1532 : iw = -1
1533 : !$OMP MASTER
1534 : iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
1535 : extension=".scfLog")
1536 : !$OMP END MASTER
1537 :
1538 : CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
1539 : hfx_do_eval_forces)
1540 :
1541 : !$OMP MASTER
1542 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1543 : "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
1544 : !$OMP END MASTER
1545 : END IF
1546 :
1547 : IF (use_virial) THEN
1548 : DO i = 1, 3
1549 : DO j = 1, 3
1550 : DO k = 1, 3
1551 : !$OMP ATOMIC
1552 : virial%pv_fock_4c(i, j) = virial%pv_fock_4c(i, j) + tmp_virial(i, k)*cell%hmat(j, k)
1553 : END DO
1554 : END DO
1555 : END DO
1556 : END IF
1557 :
1558 : !$OMP BARRIER
1559 : !! Get some number about ERIS
1560 : !$OMP ATOMIC
1561 : shm_neris_total = shm_neris_total + neris_total
1562 : !$OMP ATOMIC
1563 : shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
1564 : !$OMP ATOMIC
1565 : shm_nprim_ints = shm_nprim_ints + nprim_ints
1566 :
1567 : storage_counter_integrals = memory_parameter%actual_memory_usage* &
1568 : memory_parameter%cache_size
1569 : stor_count_max_val = max_val_memory*memory_parameter%cache_size
1570 : !$OMP ATOMIC
1571 : shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
1572 : !$OMP ATOMIC
1573 : shm_neris_incore = shm_neris_incore + neris_incore
1574 : !$OMP ATOMIC
1575 : shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
1576 : !$OMP BARRIER
1577 :
1578 : ! IF(with_resp_density) THEN
1579 : ! ! restore original load_balance_parameter
1580 : ! actual_x_data%load_balance_parameter = load_balance_parameter_energy
1581 : ! DEALLOCATE(load_balance_parameter_energy)
1582 : ! END IF
1583 :
1584 : !$OMP MASTER
1585 : !! Print some memeory information if this is the first step
1586 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1587 : tmp_i8(1:6) = [shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, &
1588 : shm_neris_total, shm_nprim_ints, shm_stor_count_max_val]
1589 : CALL para_env%sum(tmp_i8)
1590 : shm_storage_counter_integrals = tmp_i8(1)
1591 : shm_neris_onthefly = tmp_i8(2)
1592 : shm_neris_incore = tmp_i8(3)
1593 : shm_neris_total = tmp_i8(4)
1594 : shm_nprim_ints = tmp_i8(5)
1595 : shm_stor_count_max_val = tmp_i8(6)
1596 : mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
1597 : compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
1598 : mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
1599 :
1600 : IF (shm_neris_incore == 0) THEN
1601 : mem_eris = 0
1602 : compression_factor = 0.0_dp
1603 : END IF
1604 :
1605 : logger => cp_get_default_logger()
1606 :
1607 : iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
1608 : extension=".scfLog")
1609 : IF (iw > 0) THEN
1610 : WRITE (UNIT=iw, FMT="(/,(T3,A,T65,I16))") &
1611 : "HFX_MEM_INFO| Number of cart. primitive DERIV's calculated: ", shm_nprim_ints
1612 :
1613 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1614 : "HFX_MEM_INFO| Number of sph. DERIV's calculated: ", shm_neris_total
1615 :
1616 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1617 : "HFX_MEM_INFO| Number of sph. DERIV's stored in-core: ", shm_neris_incore
1618 :
1619 : WRITE (UNIT=iw, FMT="((T3,A,T62,I19))") &
1620 : "HFX_MEM_INFO| Number of sph. DERIV's calculated on the fly: ", shm_neris_onthefly
1621 :
1622 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1623 : "HFX_MEM_INFO| Total memory consumption DERIV's RAM [MiB]: ", mem_eris
1624 :
1625 : WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
1626 : "HFX_MEM_INFO| Whereof max-vals [MiB]: ", mem_max_val
1627 :
1628 : WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2),/)") &
1629 : "HFX_MEM_INFO| Total compression factor DERIV's RAM: ", compression_factor
1630 : END IF
1631 :
1632 : CALL cp_print_key_finished_output(iw, logger, hfx_section, &
1633 : "HF_INFO")
1634 : END IF
1635 : !$OMP END MASTER
1636 :
1637 : !! flush caches if the geometry changed
1638 : IF (do_dynamic_load_balancing) THEN
1639 : my_bin_size = SIZE(actual_x_data%distribution_forces)
1640 : ELSE
1641 : my_bin_size = 1
1642 : END IF
1643 :
1644 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1645 : IF (treat_forces_in_core) THEN
1646 : DO bin = 1, my_bin_size
1647 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1648 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
1649 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1650 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1651 : CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
1652 : .FALSE.)
1653 : DO i = 1, 64
1654 : CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
1655 : memory_parameter%actual_memory_usage, .FALSE.)
1656 : END DO
1657 : END DO
1658 : END IF
1659 : END IF
1660 : !! reset all caches except we calculate all on the fly
1661 : IF (treat_forces_in_core) THEN
1662 : DO bin = 1, my_bin_size
1663 : maxval_cache => actual_x_data%store_forces%maxval_cache(bin)
1664 : maxval_container => actual_x_data%store_forces%maxval_container(bin)
1665 : integral_caches => actual_x_data%store_forces%integral_caches(:, bin)
1666 : integral_containers => actual_x_data%store_forces%integral_containers(:, bin)
1667 :
1668 : CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
1669 : DO i = 1, 64
1670 : CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
1671 : memory_parameter%actual_memory_usage, &
1672 : .FALSE.)
1673 : END DO
1674 : END DO
1675 : END IF
1676 :
1677 : IF (actual_x_data%memory_parameter%recalc_forces) THEN
1678 : actual_x_data%memory_parameter%recalc_forces = .FALSE.
1679 : END IF
1680 :
1681 : !$OMP BARRIER
1682 : !$OMP MASTER
1683 : CALL timestop(handle_main)
1684 : !$OMP END MASTER
1685 : !$OMP BARRIER
1686 : DEALLOCATE (last_sgf_global)
1687 : !$OMP MASTER
1688 : DEALLOCATE (full_density_alpha)
1689 : IF (with_resp_density) THEN
1690 : DEALLOCATE (full_density_resp)
1691 : END IF
1692 : IF (my_nspins == 2) THEN
1693 : DEALLOCATE (full_density_beta)
1694 : IF (with_resp_density) THEN
1695 : DEALLOCATE (full_density_resp_beta)
1696 : END IF
1697 : END IF
1698 : IF (do_dynamic_load_balancing) THEN
1699 : DEALLOCATE (actual_x_data%task_list)
1700 : END IF
1701 : !$OMP END MASTER
1702 : DEALLOCATE (primitive_forces)
1703 : DEALLOCATE (atom_of_kind, kind_of)
1704 : DEALLOCATE (max_contraction)
1705 : DEALLOCATE (pbd_buf)
1706 : DEALLOCATE (pbc_buf)
1707 : DEALLOCATE (pad_buf)
1708 : DEALLOCATE (pac_buf)
1709 :
1710 : IF (with_resp_density) THEN
1711 : DEALLOCATE (pbd_buf_resp)
1712 : DEALLOCATE (pbc_buf_resp)
1713 : DEALLOCATE (pad_buf_resp)
1714 : DEALLOCATE (pac_buf_resp)
1715 : END IF
1716 :
1717 : DO i = 1, max_pgf**2
1718 : DEALLOCATE (pgf_list_ij(i)%image_list)
1719 : DEALLOCATE (pgf_list_kl(i)%image_list)
1720 : END DO
1721 :
1722 : DEALLOCATE (pgf_list_ij)
1723 : DEALLOCATE (pgf_list_kl)
1724 : DEALLOCATE (pgf_product_list)
1725 :
1726 : DEALLOCATE (set_list_ij, set_list_kl)
1727 :
1728 : DEALLOCATE (primitive_forces_virial)
1729 :
1730 : DEALLOCATE (ede_work, ede_work2, ede_work_forces, ede_buffer1, ede_buffer2, ede_primitives_tmp)
1731 :
1732 : DEALLOCATE (ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial)
1733 :
1734 : DEALLOCATE (nimages)
1735 :
1736 : IF (my_nspins == 2) THEN
1737 : DEALLOCATE (pbd_buf_beta, pbc_buf_beta, pad_buf_beta, pac_buf_beta)
1738 : IF (with_resp_density) THEN
1739 : DEALLOCATE (pbd_buf_resp_beta)
1740 : DEALLOCATE (pbc_buf_resp_beta)
1741 : DEALLOCATE (pad_buf_resp_beta)
1742 : DEALLOCATE (pac_buf_resp_beta)
1743 : END IF
1744 : END IF
1745 :
1746 : !
1747 : ! this wraps to free_libint, but currently causes a segfault
1748 : ! as a result, we don't call it, but some memory remains allocated
1749 : !
1750 : ! CALL terminate_libderiv(private_deriv)
1751 :
1752 : !$OMP END PARALLEL
1753 :
1754 1834 : CALL timestop(handle)
1755 :
1756 3812012 : END SUBROUTINE derivatives_four_center
1757 :
1758 : ! **************************************************************************************************
1759 : !> \brief ...
1760 : !> \param deriv ...
1761 : !> \param ra ...
1762 : !> \param rb ...
1763 : !> \param rc ...
1764 : !> \param rd ...
1765 : !> \param npgfa ...
1766 : !> \param npgfb ...
1767 : !> \param npgfc ...
1768 : !> \param npgfd ...
1769 : !> \param la_min ...
1770 : !> \param la_max ...
1771 : !> \param lb_min ...
1772 : !> \param lb_max ...
1773 : !> \param lc_min ...
1774 : !> \param lc_max ...
1775 : !> \param ld_min ...
1776 : !> \param ld_max ...
1777 : !> \param nsgfa ...
1778 : !> \param nsgfb ...
1779 : !> \param nsgfc ...
1780 : !> \param nsgfd ...
1781 : !> \param sphi_a_u1 ...
1782 : !> \param sphi_a_u2 ...
1783 : !> \param sphi_a_u3 ...
1784 : !> \param sphi_b_u1 ...
1785 : !> \param sphi_b_u2 ...
1786 : !> \param sphi_b_u3 ...
1787 : !> \param sphi_c_u1 ...
1788 : !> \param sphi_c_u2 ...
1789 : !> \param sphi_c_u3 ...
1790 : !> \param sphi_d_u1 ...
1791 : !> \param sphi_d_u2 ...
1792 : !> \param sphi_d_u3 ...
1793 : !> \param zeta ...
1794 : !> \param zetb ...
1795 : !> \param zetc ...
1796 : !> \param zetd ...
1797 : !> \param primitive_integrals ...
1798 : !> \param potential_parameter ...
1799 : !> \param neighbor_cells ...
1800 : !> \param screen1 ...
1801 : !> \param screen2 ...
1802 : !> \param eps_schwarz ...
1803 : !> \param max_contraction_val ...
1804 : !> \param cart_estimate ...
1805 : !> \param cell ...
1806 : !> \param neris_tmp ...
1807 : !> \param log10_pmax ...
1808 : !> \param log10_eps_schwarz ...
1809 : !> \param R1_pgf ...
1810 : !> \param R2_pgf ...
1811 : !> \param pgf1 ...
1812 : !> \param pgf2 ...
1813 : !> \param pgf_list_ij ...
1814 : !> \param pgf_list_kl ...
1815 : !> \param pgf_product_list ...
1816 : !> \param nsgfl_a ...
1817 : !> \param nsgfl_b ...
1818 : !> \param nsgfl_c ...
1819 : !> \param nsgfl_d ...
1820 : !> \param sphi_a_ext ...
1821 : !> \param sphi_b_ext ...
1822 : !> \param sphi_c_ext ...
1823 : !> \param sphi_d_ext ...
1824 : !> \param ede_work ...
1825 : !> \param ede_work2 ...
1826 : !> \param ede_work_forces ...
1827 : !> \param ede_buffer1 ...
1828 : !> \param ede_buffer2 ...
1829 : !> \param ede_primitives_tmp ...
1830 : !> \param nimages ...
1831 : !> \param do_periodic ...
1832 : !> \param use_virial ...
1833 : !> \param ede_work_virial ...
1834 : !> \param ede_work2_virial ...
1835 : !> \param ede_primitives_tmp_virial ...
1836 : !> \param primitive_integrals_virial ...
1837 : !> \param cart_estimate_virial ...
1838 : ! **************************************************************************************************
1839 1656696 : SUBROUTINE forces4(deriv, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
1840 : la_min, la_max, lb_min, lb_max, &
1841 : lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
1842 : sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1843 : sphi_b_u1, sphi_b_u2, sphi_b_u3, &
1844 : sphi_c_u1, sphi_c_u2, sphi_c_u3, &
1845 : sphi_d_u1, sphi_d_u2, sphi_d_u3, &
1846 1656696 : zeta, zetb, zetc, zetd, &
1847 1656696 : primitive_integrals, &
1848 : potential_parameter, neighbor_cells, &
1849 : screen1, screen2, eps_schwarz, max_contraction_val, &
1850 : cart_estimate, cell, neris_tmp, log10_pmax, &
1851 : log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
1852 : pgf_list_ij, pgf_list_kl, &
1853 : pgf_product_list, &
1854 1656696 : nsgfl_a, nsgfl_b, nsgfl_c, &
1855 1656696 : nsgfl_d, &
1856 1656696 : sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
1857 : ede_work, ede_work2, ede_work_forces, &
1858 : ede_buffer1, ede_buffer2, ede_primitives_tmp, &
1859 : nimages, do_periodic, use_virial, ede_work_virial, &
1860 1656696 : ede_work2_virial, ede_primitives_tmp_virial, primitive_integrals_virial, &
1861 : cart_estimate_virial)
1862 :
1863 : TYPE(cp_libint_t) :: deriv
1864 : REAL(dp), INTENT(IN) :: ra(3), rb(3), rc(3), rd(3)
1865 : INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
1866 : lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
1867 : sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
1868 : sphi_d_u3
1869 : REAL(dp), DIMENSION(1:npgfa), INTENT(IN) :: zeta
1870 : REAL(dp), DIMENSION(1:npgfb), INTENT(IN) :: zetb
1871 : REAL(dp), DIMENSION(1:npgfc), INTENT(IN) :: zetc
1872 : REAL(dp), DIMENSION(1:npgfd), INTENT(IN) :: zetd
1873 : REAL(dp), &
1874 : DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12) :: primitive_integrals
1875 : TYPE(hfx_potential_type) :: potential_parameter
1876 : TYPE(hfx_cell_type), DIMENSION(:), POINTER :: neighbor_cells
1877 : REAL(dp), INTENT(IN) :: screen1(2), screen2(2), eps_schwarz, &
1878 : max_contraction_val
1879 : REAL(dp) :: cart_estimate
1880 : TYPE(cell_type), POINTER :: cell
1881 : INTEGER(int_8) :: neris_tmp
1882 : REAL(dp), INTENT(IN) :: log10_pmax, log10_eps_schwarz
1883 : TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
1884 : POINTER :: R1_pgf, R2_pgf, pgf1, pgf2
1885 : TYPE(hfx_pgf_list), DIMENSION(*) :: pgf_list_ij, pgf_list_kl
1886 : TYPE(hfx_pgf_product_list), ALLOCATABLE, &
1887 : DIMENSION(:), INTENT(INOUT) :: pgf_product_list
1888 : INTEGER, DIMENSION(0:), INTENT(IN) :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
1889 : REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
1890 : sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
1891 : sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
1892 : REAL(dp), DIMENSION(*) :: ede_work, ede_work2, ede_work_forces, &
1893 : ede_buffer1, ede_buffer2, &
1894 : ede_primitives_tmp
1895 : INTEGER, DIMENSION(*) :: nimages
1896 : LOGICAL, INTENT(IN) :: do_periodic, use_virial
1897 : REAL(dp), DIMENSION(*) :: ede_work_virial, ede_work2_virial, &
1898 : ede_primitives_tmp_virial
1899 : REAL(dp), &
1900 : DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd, 12, 3) :: primitive_integrals_virial
1901 : REAL(dp) :: cart_estimate_virial
1902 :
1903 : INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
1904 : ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
1905 : nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
1906 : REAL(dp) :: EtaInv, tmp_max, tmp_max_virial, Zeta_A, &
1907 : Zeta_B, Zeta_C, Zeta_D, ZetaInv
1908 :
1909 : CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
1910 : pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
1911 : nelements_ij, &
1912 1656696 : neighbor_cells, nimages, do_periodic)
1913 : CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
1914 : pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
1915 : nelements_kl, &
1916 1656696 : neighbor_cells, nimages, do_periodic)
1917 :
1918 1656696 : cart_estimate = 0.0_dp
1919 848144748 : primitive_integrals = 0.0_dp
1920 1656696 : IF (use_virial) THEN
1921 284878632 : primitive_integrals_virial = 0.0_dp
1922 198417 : cart_estimate_virial = 0.0_dp
1923 : END IF
1924 1656696 : neris_tmp = 0_int_8
1925 1656696 : max_l = la_max + lb_max + lc_max + ld_max + 1
1926 4465246 : DO list_ij = 1, nelements_ij
1927 2808550 : Zeta_A = pgf_list_ij(list_ij)%zeta
1928 2808550 : Zeta_B = pgf_list_ij(list_ij)%zetb
1929 2808550 : ZetaInv = pgf_list_ij(list_ij)%ZetaInv
1930 2808550 : ipgf = pgf_list_ij(list_ij)%ipgf
1931 2808550 : jpgf = pgf_list_ij(list_ij)%jpgf
1932 :
1933 17979414 : DO list_kl = 1, nelements_kl
1934 13514168 : Zeta_C = pgf_list_kl(list_kl)%zeta
1935 13514168 : Zeta_D = pgf_list_kl(list_kl)%zetb
1936 13514168 : EtaInv = pgf_list_kl(list_kl)%ZetaInv
1937 13514168 : kpgf = pgf_list_kl(list_kl)%ipgf
1938 13514168 : lpgf = pgf_list_kl(list_kl)%jpgf
1939 :
1940 : CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
1941 : nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
1942 13514168 : potential_parameter, max_l, do_periodic)
1943 13514168 : s_offset_a = 0
1944 32600209 : DO la = la_min, la_max
1945 16277491 : s_offset_b = 0
1946 16277491 : ncoa = nco(la)
1947 16277491 : nsgfla = nsgfl_a(la)
1948 16277491 : nsoa = nso(la)
1949 :
1950 33549833 : DO lb = lb_min, lb_max
1951 17272342 : s_offset_c = 0
1952 17272342 : ncob = nco(lb)
1953 17272342 : nsgflb = nsgfl_b(lb)
1954 17272342 : nsob = nso(lb)
1955 :
1956 42648386 : DO lc = lc_min, lc_max
1957 25376044 : s_offset_d = 0
1958 25376044 : ncoc = nco(lc)
1959 25376044 : nsgflc = nsgfl_c(lc)
1960 25376044 : nsoc = nso(lc)
1961 :
1962 57749680 : DO ld = ld_min, ld_max
1963 32373636 : ncod = nco(ld)
1964 32373636 : nsgfld = nsgfl_d(ld)
1965 32373636 : nsod = nso(ld)
1966 32373636 : tmp_max = 0.0_dp
1967 32373636 : tmp_max_virial = 0.0_dp
1968 : CALL evaluate_deriv_eri(deriv, nproducts, pgf_product_list, &
1969 : la, lb, lc, ld, &
1970 : ncoa, ncob, ncoc, ncod, &
1971 : nsgfa, nsgfb, nsgfc, nsgfd, &
1972 : primitive_integrals, &
1973 : max_contraction_val, tmp_max, eps_schwarz, neris_tmp, &
1974 : Zeta_A, Zeta_B, Zeta_C, Zeta_D, ZetaInv, EtaInv, &
1975 : s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
1976 : nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
1977 : sphi_a_ext(1, la + 1, ipgf), &
1978 : sphi_b_ext(1, lb + 1, jpgf), &
1979 : sphi_c_ext(1, lc + 1, kpgf), &
1980 : sphi_d_ext(1, ld + 1, lpgf), &
1981 : ede_work, ede_work2, ede_work_forces, &
1982 : ede_buffer1, ede_buffer2, ede_primitives_tmp, use_virial, &
1983 : ede_work_virial, ede_work2_virial, ede_primitives_tmp_virial, &
1984 32373636 : primitive_integrals_virial, cell, tmp_max_virial)
1985 32373636 : cart_estimate = MAX(tmp_max, cart_estimate)
1986 32373636 : IF (use_virial) cart_estimate_virial = MAX(tmp_max_virial, cart_estimate_virial)
1987 57749680 : s_offset_d = s_offset_d + nsod*nsgfld
1988 : END DO !ld
1989 42648386 : s_offset_c = s_offset_c + nsoc*nsgflc
1990 : END DO !lc
1991 33549833 : s_offset_b = s_offset_b + nsob*nsgflb
1992 : END DO !lb
1993 29791659 : s_offset_a = s_offset_a + nsoa*nsgfla
1994 : END DO !la
1995 : END DO
1996 : END DO
1997 :
1998 1656696 : END SUBROUTINE forces4
1999 :
2000 : ! **************************************************************************************************
2001 : !> \brief Given a 2d index pair, this function returns a 1d index pair for
2002 : !> a symmetric upper triangle NxN matrix
2003 : !> The compiler should inline this function, therefore it appears in
2004 : !> several modules
2005 : !> \param i 2d index
2006 : !> \param j 2d index
2007 : !> \param N matrix size
2008 : !> \return ...
2009 : !> \par History
2010 : !> 03.2009 created [Manuel Guidon]
2011 : !> \author Manuel Guidon
2012 : ! **************************************************************************************************
2013 58052 : PURE FUNCTION get_1D_idx(i, j, N)
2014 : INTEGER, INTENT(IN) :: i, j
2015 : INTEGER(int_8), INTENT(IN) :: N
2016 : INTEGER(int_8) :: get_1D_idx
2017 :
2018 : INTEGER(int_8) :: min_ij
2019 :
2020 58052 : min_ij = MIN(i, j)
2021 58052 : get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
2022 :
2023 58052 : END FUNCTION get_1D_idx
2024 :
2025 : ! **************************************************************************************************
2026 : !> \brief This routine prefetches density matrix elements, i.e. reshuffles the
2027 : !> data in a way that they can be accessed later on in a cache friendly
2028 : !> way
2029 : !> \param ma_max Size of matrix blocks
2030 : !> \param mb_max Size of matrix blocks
2031 : !> \param mc_max Size of matrix blocks
2032 : !> \param md_max Size of matrix blocks
2033 : !> \param density ...
2034 : !> \param pbd buffer that will contain P(b,d)
2035 : !> \param pbc buffer that will contain P(b,c)
2036 : !> \param pad buffer that will contain P(a,d)
2037 : !> \param pac buffer that will contain P(a,c)
2038 : !> \param iatom ...
2039 : !> \param jatom ...
2040 : !> \param katom ...
2041 : !> \param latom ...
2042 : !> \param iset ...
2043 : !> \param jset ...
2044 : !> \param kset ...
2045 : !> \param lset ...
2046 : !> \param offset_bd_set ...
2047 : !> \param offset_bc_set ...
2048 : !> \param offset_ad_set ...
2049 : !> \param offset_ac_set ...
2050 : !> \param atomic_offset_bd ...
2051 : !> \param atomic_offset_bc ...
2052 : !> \param atomic_offset_ad ...
2053 : !> \param atomic_offset_ac ...
2054 : !> \param anti_symmetric ...
2055 : !> \par History
2056 : !> 03.2009 created [Manuel Guidon]
2057 : !> \author Manuel Guidon
2058 : ! **************************************************************************************************
2059 2953427 : SUBROUTINE prefetch_density_matrix(ma_max, mb_max, mc_max, md_max, &
2060 2953427 : density, pbd, pbc, pad, pac, &
2061 : iatom, jatom, katom, latom, &
2062 : iset, jset, kset, lset, offset_bd_set, offset_bc_set, &
2063 : offset_ad_set, offset_ac_set, atomic_offset_bd, atomic_offset_bc, &
2064 : atomic_offset_ad, atomic_offset_ac, anti_symmetric)
2065 :
2066 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2067 : REAL(dp), DIMENSION(:), INTENT(IN) :: density
2068 : REAL(dp), DIMENSION(*), INTENT(INOUT) :: pbd, pbc, pad, pac
2069 : INTEGER, INTENT(IN) :: iatom, jatom, katom, latom, iset, jset, &
2070 : kset, lset
2071 : INTEGER, DIMENSION(:, :), POINTER :: offset_bd_set, offset_bc_set, &
2072 : offset_ad_set, offset_ac_set
2073 : INTEGER, INTENT(IN) :: atomic_offset_bd, atomic_offset_bc, &
2074 : atomic_offset_ad, atomic_offset_ac
2075 : LOGICAL :: anti_symmetric
2076 :
2077 : INTEGER :: i, j, ma, mb, mc, md, offset_ac, &
2078 : offset_ad, offset_bc, offset_bd
2079 : REAL(dp) :: fac
2080 :
2081 2953427 : fac = 1.0_dp
2082 2953427 : IF (anti_symmetric) fac = -1.0_dp
2083 2953427 : IF (jatom >= latom) THEN
2084 2947869 : i = 1
2085 2947869 : offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
2086 2947869 : j = offset_bd
2087 8666547 : DO md = 1, md_max
2088 18713602 : DO mb = 1, mb_max
2089 10047055 : pbd(i) = density(j)*fac
2090 10047055 : i = i + 1
2091 15765733 : j = j + 1
2092 : END DO
2093 : END DO
2094 : ELSE
2095 5558 : i = 1
2096 5558 : offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
2097 13394 : DO md = 1, md_max
2098 7836 : j = offset_bd + md - 1
2099 25078 : DO mb = 1, mb_max
2100 11684 : pbd(i) = density(j)
2101 11684 : i = i + 1
2102 19520 : j = j + md_max
2103 : END DO
2104 : END DO
2105 : END IF
2106 2953427 : IF (jatom >= katom) THEN
2107 2953427 : i = 1
2108 2953427 : offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
2109 2953427 : j = offset_bc
2110 9679610 : DO mc = 1, mc_max
2111 21143683 : DO mb = 1, mb_max
2112 11464073 : pbc(i) = density(j)*fac
2113 11464073 : i = i + 1
2114 18190256 : j = j + 1
2115 : END DO
2116 : END DO
2117 : ELSE
2118 0 : i = 1
2119 0 : offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
2120 0 : DO mc = 1, mc_max
2121 0 : j = offset_bc + mc - 1
2122 0 : DO mb = 1, mb_max
2123 0 : pbc(i) = density(j)
2124 0 : i = i + 1
2125 0 : j = j + mc_max
2126 : END DO
2127 : END DO
2128 : END IF
2129 2953427 : IF (iatom >= latom) THEN
2130 2280106 : i = 1
2131 2280106 : offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
2132 2280106 : j = offset_ad
2133 7039963 : DO md = 1, md_max
2134 17025747 : DO ma = 1, ma_max
2135 9985784 : pad(i) = density(j)*fac
2136 9985784 : i = i + 1
2137 14745641 : j = j + 1
2138 : END DO
2139 : END DO
2140 : ELSE
2141 673321 : i = 1
2142 673321 : offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
2143 1639978 : DO md = 1, md_max
2144 966657 : j = offset_ad + md - 1
2145 3994159 : DO ma = 1, ma_max
2146 2354181 : pad(i) = density(j)
2147 2354181 : i = i + 1
2148 3320838 : j = j + md_max
2149 : END DO
2150 : END DO
2151 : END IF
2152 2953427 : IF (iatom >= katom) THEN
2153 2875430 : i = 1
2154 2875430 : offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
2155 2875430 : j = offset_ac
2156 9492346 : DO mc = 1, mc_max
2157 23754881 : DO ma = 1, ma_max
2158 14262535 : pac(i) = density(j)*fac
2159 14262535 : i = i + 1
2160 20879451 : j = j + 1
2161 : END DO
2162 : END DO
2163 : ELSE
2164 77997 : i = 1
2165 77997 : offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
2166 187264 : DO mc = 1, mc_max
2167 109267 : j = offset_ac + mc - 1
2168 518492 : DO ma = 1, ma_max
2169 331228 : pac(i) = density(j)
2170 331228 : i = i + 1
2171 440495 : j = j + mc_max
2172 : END DO
2173 : END DO
2174 : END IF
2175 2953427 : END SUBROUTINE prefetch_density_matrix
2176 :
2177 : ! **************************************************************************************************
2178 : !> \brief This routine updates the forces using buffered density matrices
2179 : !> \param ma_max Size of matrix blocks
2180 : !> \param mb_max Size of matrix blocks
2181 : !> \param mc_max Size of matrix blocks
2182 : !> \param md_max Size of matrix blocks
2183 : !> \param pbd buffer that will contain P(b,d)
2184 : !> \param pbc buffer that will contain P(b,c)
2185 : !> \param pad buffer that will contain P(a,d)
2186 : !> \param pac buffer that will contain P(a,c)
2187 : !> \param fac mulitplication factor (spin, symmetry)
2188 : !> \param prim primitive forces
2189 : !> \param force storage loacation for forces
2190 : !> \param forces_map index table
2191 : !> \param coord which of the 12 coords to be updated
2192 : !> \param pbd_resp ...
2193 : !> \param pbc_resp ...
2194 : !> \param pad_resp ...
2195 : !> \param pac_resp ...
2196 : !> \param resp_only ...
2197 : !> \par History
2198 : !> 03.2009 created [Manuel Guidon]
2199 : !> \author Manuel Guidon
2200 : ! **************************************************************************************************
2201 23372892 : SUBROUTINE update_forces(ma_max, mb_max, mc_max, md_max, &
2202 : pbd, pbc, pad, pac, fac, &
2203 23372892 : prim, force, forces_map, coord, &
2204 : pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2205 :
2206 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2207 : REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2208 : REAL(dp), INTENT(IN) :: fac
2209 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2210 : INTENT(IN) :: prim
2211 : TYPE(qs_force_type), DIMENSION(:), POINTER :: force
2212 : INTEGER, INTENT(IN) :: forces_map(4, 2), coord
2213 : REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2214 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2215 :
2216 : INTEGER :: ma, mb, mc, md, p_index
2217 : LOGICAL :: full_force, with_resp_density
2218 : REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2219 : temp4, teresp
2220 :
2221 23372892 : with_resp_density = .FALSE.
2222 : IF (PRESENT(pbd_resp) .AND. &
2223 : PRESENT(pbc_resp) .AND. &
2224 23372892 : PRESENT(pad_resp) .AND. &
2225 : PRESENT(pac_resp)) with_resp_density = .TRUE.
2226 :
2227 : IF (with_resp_density) THEN
2228 12038916 : full_force = .TRUE.
2229 12038916 : IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2230 12038916 : p_index = 0
2231 12038916 : temp4 = 0.0_dp
2232 35143416 : DO md = 1, md_max
2233 91310964 : DO mc = 1, mc_max
2234 186970452 : DO mb = 1, mb_max
2235 107698404 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2236 107698404 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2237 107698404 : temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2238 107698404 : temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2239 462275412 : DO ma = 1, ma_max
2240 298409460 : p_index = p_index + 1
2241 : ! HF-SCF
2242 298409460 : IF (full_force) THEN
2243 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2244 102372672 : temp3*pac((mc - 1)*ma_max + ma)
2245 : ELSE
2246 : teresp = 0.0_dp
2247 : END IF
2248 : ! RESP+HF
2249 : teresp = teresp + &
2250 : pac((mc - 1)*ma_max + ma)*temp3_resp + &
2251 : pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2252 : pad((md - 1)*ma_max + ma)*temp1_resp + &
2253 298409460 : pad_resp((md - 1)*ma_max + ma)*temp1
2254 406107864 : temp4 = temp4 + teresp*prim(p_index)
2255 : END DO !ma
2256 : END DO !mb
2257 : END DO !mc
2258 : END DO !md
2259 : ELSE
2260 : p_index = 0
2261 : temp4 = 0.0_dp
2262 33809268 : DO md = 1, md_max
2263 94531200 : DO mc = 1, mc_max
2264 203911452 : DO mb = 1, mb_max
2265 120714228 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2266 120714228 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2267 586306428 : DO ma = 1, ma_max
2268 404870268 : p_index = p_index + 1
2269 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2270 404870268 : temp3*pac((mc - 1)*ma_max + ma)
2271 525584496 : temp4 = temp4 + teresp*prim(p_index)
2272 : END DO !ma
2273 : END DO !mb
2274 : END DO !mc
2275 : END DO !md
2276 : END IF
2277 :
2278 23372892 : !$OMP ATOMIC
2279 : force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2280 : forces_map((coord - 1)/3 + 1, 2)) = &
2281 : force(forces_map((coord - 1)/3 + 1, 1))%fock_4c(MOD(coord - 1, 3) + 1, &
2282 : forces_map((coord - 1)/3 + 1, 2)) - &
2283 : temp4
2284 :
2285 23372892 : END SUBROUTINE update_forces
2286 :
2287 : ! **************************************************************************************************
2288 : !> \brief This routine updates the virial using buffered density matrices
2289 : !> \param ma_max Size of matrix blocks
2290 : !> \param mb_max Size of matrix blocks
2291 : !> \param mc_max Size of matrix blocks
2292 : !> \param md_max Size of matrix blocks
2293 : !> \param pbd buffer that will contain P(b,d)
2294 : !> \param pbc buffer that will contain P(b,c)
2295 : !> \param pad buffer that will contain P(a,d)
2296 : !> \param pac buffer that will contain P(a,c)
2297 : !> \param fac mulitplication factor (spin, symmetry)
2298 : !> \param prim primitive forces
2299 : !> \param tmp_virial ...
2300 : !> \param coord which of the 12 coords to be updated
2301 : !> \param l ...
2302 : !> \param pbd_resp ...
2303 : !> \param pbc_resp ...
2304 : !> \param pad_resp ...
2305 : !> \param pac_resp ...
2306 : !> \par History
2307 : !> 03.2009 created [Manuel Guidon]
2308 : !> \author Manuel Guidon
2309 : ! **************************************************************************************************
2310 6323868 : SUBROUTINE update_virial(ma_max, mb_max, mc_max, md_max, &
2311 : pbd, pbc, pad, pac, fac, &
2312 6323868 : prim, tmp_virial, coord, l, &
2313 : pbd_resp, pbc_resp, pad_resp, pac_resp, resp_only)
2314 :
2315 : INTEGER, INTENT(IN) :: ma_max, mb_max, mc_max, md_max
2316 : REAL(dp), DIMENSION(*), INTENT(IN) :: pbd, pbc, pad, pac
2317 : REAL(dp), INTENT(IN) :: fac
2318 : REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
2319 : INTENT(IN) :: prim
2320 : REAL(dp) :: tmp_virial(3, 3)
2321 : INTEGER, INTENT(IN) :: coord, l
2322 : REAL(dp), DIMENSION(*), INTENT(IN), OPTIONAL :: pbd_resp, pbc_resp, pad_resp, pac_resp
2323 : LOGICAL, INTENT(IN), OPTIONAL :: resp_only
2324 :
2325 : INTEGER :: i, j, ma, mb, mc, md, p_index
2326 : LOGICAL :: with_resp_density, full_force
2327 : REAL(dp) :: temp1, temp1_resp, temp3, temp3_resp, &
2328 : temp4, teresp
2329 :
2330 6323868 : with_resp_density = .FALSE.
2331 : IF (PRESENT(pbd_resp) .AND. &
2332 : PRESENT(pbc_resp) .AND. &
2333 6323868 : PRESENT(pad_resp) .AND. &
2334 : PRESENT(pac_resp)) with_resp_density = .TRUE.
2335 :
2336 : IF (with_resp_density) THEN
2337 5587128 : full_force = .TRUE.
2338 5587128 : IF (PRESENT(resp_only)) full_force = .NOT. resp_only
2339 5587128 : p_index = 0
2340 5587128 : temp4 = 0.0_dp
2341 16806312 : DO md = 1, md_max
2342 41978952 : DO mc = 1, mc_max
2343 91230912 : DO mb = 1, mb_max
2344 54839088 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2345 54839088 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2346 54839088 : temp1_resp = pbc_resp((mc - 1)*mb_max + mb)*fac
2347 54839088 : temp3_resp = pbd_resp((md - 1)*mb_max + mb)*fac
2348 236616804 : DO ma = 1, ma_max
2349 156605076 : p_index = p_index + 1
2350 : ! HF-SCF
2351 156605076 : IF (full_force) THEN
2352 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2353 25169472 : temp3*pac((mc - 1)*ma_max + ma)
2354 : ELSE
2355 : teresp = 0.0_dp
2356 : END IF
2357 : ! RESP+HF
2358 : teresp = teresp + &
2359 : pac((mc - 1)*ma_max + ma)*temp3_resp + &
2360 : pac_resp((mc - 1)*ma_max + ma)*temp3 + &
2361 : pad((md - 1)*ma_max + ma)*temp1_resp + &
2362 156605076 : pad_resp((md - 1)*ma_max + ma)*temp1
2363 211444164 : temp4 = temp4 + teresp*prim(p_index)
2364 : END DO !ma
2365 : END DO !mb
2366 : END DO !mc
2367 : END DO !md
2368 : ELSE
2369 : p_index = 0
2370 : temp4 = 0.0_dp
2371 1830528 : DO md = 1, md_max
2372 3917916 : DO mc = 1, mc_max
2373 5776452 : DO mb = 1, mb_max
2374 2595276 : temp1 = pbc((mc - 1)*mb_max + mb)*fac
2375 2595276 : temp3 = pbd((md - 1)*mb_max + mb)*fac
2376 9136800 : DO ma = 1, ma_max
2377 4454136 : p_index = p_index + 1
2378 : teresp = temp1*pad((md - 1)*ma_max + ma) + &
2379 4454136 : temp3*pac((mc - 1)*ma_max + ma)
2380 7049412 : temp4 = temp4 + teresp*prim(p_index)
2381 : END DO !ma
2382 : END DO !mb
2383 : END DO !mc
2384 : END DO !md
2385 : END IF
2386 :
2387 6323868 : j = l
2388 6323868 : i = MOD(coord - 1, 3) + 1
2389 6323868 : tmp_virial(i, j) = tmp_virial(i, j) - temp4
2390 6323868 : END SUBROUTINE update_virial
2391 :
2392 : #:include 'hfx_get_pmax_val.fypp'
2393 :
2394 : END MODULE hfx_derivatives
|