LCOV - code coverage report
Current view: top level - src - hfx_energy_potential.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 88.7 % 408 362
Test Date: 2026-07-04 06:36:57 Functions: 83.3 % 6 5

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines to calculate HFX energy and potential
      10              : !> \par History
      11              : !>      11.2006 created [Manuel Guidon]
      12              : !>      10.2025 added libGint interface [M Puligheddu]
      13              : !> \author Manuel Guidon
      14              : ! **************************************************************************************************
      15              : MODULE hfx_energy_potential
      16              :    USE admm_types, ONLY: get_admm_env
      17              :    USE atomic_kind_types, ONLY: atomic_kind_type, &
      18              :                                 get_atomic_kind_set
      19              :    USE bibliography, ONLY: cite_reference, &
      20              :                            guidon2008, &
      21              :                            guidon2009
      22              :    USE cell_types, ONLY: cell_type, &
      23              :                          pbc
      24              :    USE cp_control_types, ONLY: dft_control_type
      25              :    USE cp_files, ONLY: close_file, &
      26              :                        open_file
      27              :    USE cp_log_handling, ONLY: cp_get_default_logger, &
      28              :                               cp_logger_type
      29              :    USE cp_output_handling, ONLY: cp_p_file, &
      30              :                                  cp_print_key_finished_output, &
      31              :                                  cp_print_key_should_output, &
      32              :                                  cp_print_key_unit_nr
      33              :    USE message_passing, ONLY: mp_para_env_type
      34              :    USE cp_dbcsr_api, ONLY: dbcsr_copy, &
      35              :                            dbcsr_get_matrix_type, &
      36              :                            dbcsr_p_type, &
      37              :                            dbcsr_type_antisymmetric, &
      38              :                            dbcsr_dot_threadsafe
      39              :    USE gamma, ONLY: init_md_ftable
      40              :    USE hfx_communication, ONLY: distribute_ks_matrix, &
      41              :                                 get_atomic_block_maps, &
      42              :                                 get_full_density
      43              :    USE hfx_compression_methods, ONLY: hfx_add_mult_cache_elements, &
      44              :                                       hfx_add_single_cache_element, &
      45              :                                       hfx_decompress_first_cache, &
      46              :                                       hfx_flush_last_cache, &
      47              :                                       hfx_get_mult_cache_elements, &
      48              :                                       hfx_get_single_cache_element, &
      49              :                                       hfx_reset_cache_and_container
      50              :    USE hfx_contract_block, ONLY: contract_block
      51              :    USE hfx_libint_interface, ONLY: evaluate_eri
      52              :    USE hfx_load_balance_methods, ONLY: collect_load_balance_info, &
      53              :                                        hfx_load_balance, &
      54              :                                        hfx_update_load_balance
      55              :    USE hfx_pair_list_methods, ONLY: build_atomic_pair_list, &
      56              :                                     build_pair_list, &
      57              :                                     build_pair_list_pgf, &
      58              :                                     build_pgf_product_list, &
      59              :                                     pgf_product_list_size
      60              :    USE hfx_screening_methods, ONLY: calc_pair_dist_radii, &
      61              :                                     calc_screening_functions, &
      62              :                                     update_pmax_mat
      63              :    USE hfx_types, ONLY: &
      64              :       alloc_containers, dealloc_containers, hfx_basis_info_type, hfx_basis_type, hfx_cache_type, &
      65              :       hfx_cell_type, hfx_container_type, hfx_create_neighbor_cells, hfx_distribution, &
      66              :       hfx_general_type, hfx_init_container, hfx_load_balance_type, hfx_memory_type, hfx_p_kind, &
      67              :       hfx_pgf_list, hfx_pgf_product_list, hfx_potential_type, hfx_reset_memory_usage_counter, &
      68              :       hfx_screen_coeff_type, hfx_screening_type, hfx_task_list_type, hfx_type, init_t_c_g0_lmax, &
      69              :       log_zero, pair_list_type, pair_set_list_type
      70              : 
      71              :    USE input_constants, ONLY: do_potential_mix_cl_trunc, &
      72              :                               do_potential_truncated, &
      73              :                               do_potential_TShPSC, &
      74              :                               do_potential_short, &
      75              :                               do_potential_coulomb, &
      76              :                               hfx_do_eval_energy, &
      77              :                               hfx_library_is_libint, hfx_library_is_libGint, hfx_library_is_both
      78              : 
      79              :    USE input_section_types, ONLY: section_vals_type
      80              :    USE kinds, ONLY: default_string_length, &
      81              :                     dp, &
      82              :                     int_8
      83              :    USE kpoint_types, ONLY: get_kpoint_info, &
      84              :                            kpoint_type
      85              :    USE libint_wrapper, ONLY: cp_libint_t
      86              :    USE machine, ONLY: m_flush, &
      87              :                       m_memory, &
      88              :                       m_walltime
      89              :    USE mathconstants, ONLY: fac
      90              :    USE orbital_pointers, ONLY: nco, &
      91              :                                ncoset, &
      92              :                                nso
      93              :    USE particle_types, ONLY: particle_type
      94              :    USE qs_environment_types, ONLY: get_qs_env, &
      95              :                                    qs_environment_type
      96              :    USE qs_ks_types, ONLY: get_ks_env, &
      97              :                           qs_ks_env_type
      98              :    USE t_c_g0, ONLY: init
      99              :    USE util, ONLY: sort
     100              :    use, intrinsic :: iso_c_binding, only: c_ptr
     101              : 
     102              :    use cp2k_info, only: cp2k_flags
     103              :    use offload_api, only: offload_get_chosen_device, offload_set_chosen_device, offload_activate_chosen_device
     104              :    use libGint_wrapper, only: cp_libGint_init, libGint_update_env, libGint_set_density, libGint_coulomb4, &
     105              :                               libGint_update_fock_matrix, libGint_get_fock_matrix
     106              : 
     107              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
     108              : 
     109              : #include "./base/base_uses.f90"
     110              : 
     111              :    IMPLICIT NONE
     112              :    PRIVATE
     113              : 
     114              :    PUBLIC ::  integrate_four_center, coulomb4
     115              : 
     116              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_energy_potential'
     117              : 
     118              : !***
     119              : 
     120              : CONTAINS
     121              : 
     122              : ! **************************************************************************************************
     123              : !> \brief computes four center integrals for a full basis set and updates the
     124              : !>      Kohn-Sham-Matrix and energy. Uses all 8 eri symmetries
     125              : !> \param qs_env ...
     126              : !> \param x_data ...
     127              : !> \param ks_matrix ...
     128              : !> \param ehfx energy calculated with the updated HFX matrix
     129              : !> \param rho_ao density matrix in ao basis
     130              : !> \param hfx_section input_section HFX
     131              : !> \param para_env ...
     132              : !> \param geometry_did_change flag that indicates we have to recalc integrals
     133              : !> \param irep Index for the HFX replica
     134              : !> \param distribute_fock_matrix Flag that indicates whether to communicate the
     135              : !>        new fock matrix or not
     136              : !> \param ispin ...
     137              : !> \par History
     138              : !>      06.2007 created [Manuel Guidon]
     139              : !>      08.2007 optimized load balance [Manuel Guidon]
     140              : !>      09.2007 new parallelization [Manuel Guidon]
     141              : !>      02.2009 completely rewritten screening part [Manuel Guidon]
     142              : !>      12.2017 major bug fix. removed wrong cycle that was caussing segfault.
     143              : !>              see https://groups.google.com/forum/#!topic/cp2k/pc6B14XOALY
     144              : !>              [Tobias Binninger + Valery Weber]
     145              : !> \author Manuel Guidon
     146              : ! **************************************************************************************************
     147        40523 :    SUBROUTINE integrate_four_center(qs_env, x_data, ks_matrix, ehfx, rho_ao, hfx_section, para_env, &
     148              :                                     geometry_did_change, irep, distribute_fock_matrix, &
     149              :                                     ispin, nspins)
     150              : 
     151              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     152              :       TYPE(hfx_type), DIMENSION(:, :), POINTER           :: x_data
     153              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
     154              :       REAL(KIND=dp), INTENT(OUT)                         :: ehfx
     155              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
     156              :       TYPE(section_vals_type), POINTER                   :: hfx_section
     157              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     158              :       LOGICAL                                            :: geometry_did_change
     159              :       INTEGER                                            :: irep
     160              :       LOGICAL, INTENT(IN)                                :: distribute_fock_matrix
     161              :       INTEGER, INTENT(IN)                                :: ispin
     162              :       INTEGER, INTENT(IN), OPTIONAL                      :: nspins
     163              : 
     164              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'integrate_four_center'
     165              : 
     166              :       CHARACTER(len=default_string_length)               :: eps_scaling_str, eps_schwarz_min_str
     167              :       INTEGER :: act_atomic_block_offset, act_set_offset, atomic_offset_ac, atomic_offset_ad, &
     168              :                  atomic_offset_bc, atomic_offset_bd, bin, bits_max_val, buffer_left, buffer_size, &
     169              :                  buffer_start, cache_size, current_counter, handle, handle_bin, handle_dist_ks, &
     170              :                  handle_getP, handle_load, handle_main, i, inla, nla, i_list_ij, i_list_kl, i_set_list_ij, &
     171              :                  i_set_list_ij_start, i_set_list_ij_stop, i_set_list_kl, i_set_list_kl_start, &
     172              :                  i_set_list_kl_stop, i_thread, iatom, iatom_block, iatom_end, iatom_start, ikind, img, &
     173              :                  iset, iw, j, jatom, jatom_block, jatom_end, jatom_start, jkind, jset, katom, katom_block, &
     174              :                  katom_end
     175              :       INTEGER :: katom_start, kind_kind_idx, kkind, kset, l_max, latom, latom_block, latom_end, &
     176              :                  latom_start, lkind, lset, ma, max_am, max_pgf, max_set, mb, my_bin_id, my_bin_size, &
     177              :                  my_thread_id, n_threads, natom, nbits, ncob, ncos_max, nints, nkimages, nkind, &
     178              :                  nneighbors, nseta, nsetb, nsgf_max, my_nspins, pa, sgfb, shm_task_counter, shm_total_bins, &
     179              :                  sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, &
     180              :                  sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, swap_id, tmp_i4, unit_id
     181              :       INTEGER(int_8) :: atom_block, counter, estimate_to_store_int, max_val_memory, &
     182              :                         mb_size_buffers, mb_size_f, mb_size_p, mem_compression_counter, &
     183              :                         mem_compression_counter_disk, mem_eris, mem_eris_disk, mem_max_val, memsize_after, &
     184              :                         memsize_before, my_current_counter, my_istart, n_processes, nblocks, ncpu, neris_disk, &
     185              :                         neris_incore, neris_onthefly, neris_tmp, neris_total, nprim_ints, &
     186              :                         shm_mem_compression_counter, shm_neris_disk, shm_neris_incore, shm_neris_onthefly, &
     187              :                         shm_neris_total, shm_nprim_ints, shm_stor_count_int_disk, shm_stor_count_max_val, &
     188              :                         shm_storage_counter_integrals, stor_count_int_disk
     189              :       INTEGER(int_8) :: stor_count_max_val, storage_counter_integrals, subtr_size_mb, tmp_block, &
     190              :                         tmp_i8(8)
     191        40523 :       INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: tmp_task_list_cost
     192        81046 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, last_sgf_global, nimages, &
     193        81046 :                                                             tmp_index, first_set_of_atom
     194        40523 :       INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, lb_min, lc_max, lc_min, ld_max, &
     195        81046 :                                         ld_min, npgfa, npgfb, npgfc, npgfd, nsgfa, nsgfb, nsgfc, nsgfd, shm_block_offset
     196        81046 :       INTEGER, DIMENSION(:, :), POINTER :: first_sgfb, nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d, &
     197        40523 :                                            offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, shm_atomic_block_offset
     198              :       INTEGER, DIMENSION(:, :), POINTER, SAVE            :: shm_is_assoc_atomic_block
     199        40523 :       INTEGER, DIMENSION(:, :, :), POINTER               :: cell_to_index
     200        40523 :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: shm_set_offset
     201              :       INTEGER, SAVE                                      :: shm_number_of_p_entries
     202              :       LOGICAL :: bins_left, buffer_overflow, do_disk_storage, do_dynamic_load_balancing, do_it, &
     203              :                  do_kpoints, do_p_screening, do_periodic, do_print_load_balance_info, is_anti_symmetric, &
     204              :                  ks_fully_occ, my_geo_change, treat_lsd_in_core, use_disk_storage
     205        40523 :       LOGICAL, DIMENSION(:, :), POINTER                  :: shm_atomic_pair_list
     206              :       REAL(dp) :: afac, bintime_start, bintime_stop, cartesian_estimate, compression_factor, &
     207              :                   compression_factor_disk, ene_x_aa, ene_x_aa_diag, ene_x_bb, ene_x_bb_diag, eps_schwarz, &
     208              :                   eps_storage, etmp, fac, hf_fraction, ln_10, log10_eps_schwarz, log10_pmax, &
     209              :                   max_contraction_val, max_val1, max_val2, max_val2_set, pmax_atom, pmax_blocks, &
     210              :                   pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, screen_kind_kl, &
     211              :                   spherical_estimate, symm_fac
     212        40523 :       REAL(dp), ALLOCATABLE, DIMENSION(:) :: ee_buffer1, ee_buffer2, ee_primitives_tmp, ee_work, &
     213        81046 :                                              ee_work2, kac_buf, kad_buf, kbc_buf, kbd_buf, pac_buf, pad_buf, pbc_buf, pbd_buf, &
     214        40523 :                                              primitive_integrals
     215        40523 :       REAL(dp), DIMENSION(:), POINTER                    :: p_work
     216        40523 :       REAL(dp), DIMENSION(:, :), POINTER :: full_density_alpha, full_density_beta, full_ks_alpha, &
     217        81046 :                                             full_ks_beta, max_contraction, ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, shm_pmax_atom, &
     218        40523 :                                             shm_pmax_block, sphi_b, zeta, zetb, zetc, zetd, &
     219        40523 :                                             full_ks_alpha_from_gpu, full_ks_beta_from_gpu
     220        40523 :       REAL(dp), DIMENSION(:, :, :), POINTER              :: sphi_a_ext_set, sphi_b_ext_set, &
     221        40523 :                                                             sphi_c_ext_set, sphi_d_ext_set
     222        40523 :       REAL(dp), DIMENSION(:, :, :, :), POINTER           :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
     223        40523 :                                                             sphi_d_ext
     224        40523 :       REAL(dp), DIMENSION(:, :, :), POINTER             :: gcc
     225              :       REAL(KIND=dp)                                      :: coeffs_kind_max0
     226        40523 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     227              :       TYPE(cell_type), POINTER                           :: cell
     228              :       TYPE(cp_libint_t)                                  :: private_lib
     229              :       TYPE(cp_logger_type), POINTER                      :: logger
     230        40523 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks_aux_fit_hfx
     231              :       TYPE(dft_control_type), POINTER                    :: dft_control
     232              :       TYPE(hfx_basis_info_type), POINTER                 :: basis_info
     233        40523 :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     234        40523 :       TYPE(hfx_cache_type), DIMENSION(:), POINTER        :: integral_caches, integral_caches_disk
     235              :       TYPE(hfx_cache_type), POINTER                      :: maxval_cache, maxval_cache_disk
     236        40523 :       TYPE(hfx_container_type), DIMENSION(:), POINTER    :: integral_containers, &
     237        40523 :                                                             integral_containers_disk
     238              :       TYPE(hfx_container_type), POINTER                  :: maxval_container, maxval_container_disk
     239              :       TYPE(hfx_distribution), POINTER                    :: distribution_energy
     240              :       TYPE(hfx_general_type)                             :: general_parameter
     241              :       TYPE(hfx_load_balance_type), POINTER               :: load_balance_parameter
     242              :       TYPE(hfx_memory_type), POINTER                     :: memory_parameter
     243        40523 :       TYPE(hfx_p_kind), DIMENSION(:), POINTER            :: shm_initial_p
     244        40523 :       TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:)      :: pgf_list_ij, pgf_list_kl
     245              :       TYPE(hfx_pgf_product_list), ALLOCATABLE, &
     246        40523 :          DIMENSION(:)                                    :: pgf_product_list
     247              :       TYPE(hfx_potential_type)                           :: potential_parameter
     248              :       TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
     249        40523 :          POINTER                                         :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
     250        40523 :                                                             tmp_screen_pgf1, tmp_screen_pgf2
     251              :       TYPE(hfx_screen_coeff_type), &
     252        40523 :          DIMENSION(:, :, :, :), POINTER                  :: screen_coeffs_set
     253              :       TYPE(hfx_screen_coeff_type), &
     254        40523 :          DIMENSION(:, :, :, :, :, :), POINTER            :: radii_pgf, screen_coeffs_pgf
     255              :       TYPE(hfx_screening_type)                           :: screening_parameter
     256        40523 :       TYPE(hfx_task_list_type), DIMENSION(:), POINTER    :: shm_task_list, tmp_task_list
     257              :       TYPE(hfx_type), POINTER                            :: actual_x_data, shm_master_x_data
     258              :       TYPE(kpoint_type), POINTER                         :: kpoints
     259              :       TYPE(pair_list_type)                               :: list_ij, list_kl
     260              :       TYPE(pair_set_list_type), ALLOCATABLE, &
     261        40523 :          DIMENSION(:)                                    :: set_list_ij, set_list_kl
     262        40523 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     263              :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     264              : 
     265              :       logical :: screened
     266              :       real(dp) :: max_abs_delta_KS
     267              :       integer :: iatom_set, jatom_set, katom_set, latom_set, shared_dev
     268              :       logical :: use_libint, use_libgint, use_only_alpha_spin
     269              :       logical :: cp2k_was_compiled_with_cuda_offload, cp2k_was_compiled_with_libint
     270              :       logical :: cp2k_was_compiled_with_libGint
     271              : 
     272        40523 :       NULLIFY (dft_control, matrix_ks_aux_fit_hfx)
     273              : 
     274        40523 :       CALL timeset(routineN, handle)
     275              : 
     276        40523 :       CALL cite_reference(Guidon2008)
     277        40523 :       CALL cite_reference(Guidon2009)
     278              : 
     279        40523 :       ehfx = 0.0_dp
     280              : 
     281              :       ! ** This is not very clean, but effective. ispin can only be 2 if we do the beta spin part in core
     282        40523 :       my_geo_change = geometry_did_change
     283        40523 :       IF (ispin == 2) my_geo_change = .FALSE.
     284              : 
     285        40523 :       logger => cp_get_default_logger()
     286              : 
     287        40523 :       is_anti_symmetric = dbcsr_get_matrix_type(rho_ao(1, 1)%matrix) == dbcsr_type_antisymmetric
     288              : 
     289        40523 :       IF (my_geo_change) THEN
     290         2448 :          CALL m_memory(memsize_before)
     291         2448 :          CALL para_env%max(memsize_before)
     292              :          iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
     293         2448 :                                    extension=".scfLog")
     294         2448 :          IF (iw > 0) THEN
     295              :             WRITE (UNIT=iw, FMT="(/,(T3,A,T60,I21))") &
     296          442 :                "HFX_MEM_INFO| Est. max. program size before HFX [MiB]:", memsize_before/(1024*1024)
     297          442 :             CALL m_flush(iw)
     298              :          END IF
     299              :          CALL cp_print_key_finished_output(iw, logger, hfx_section, &
     300         2448 :                                            "HF_INFO")
     301              :       END IF
     302              : 
     303        40523 :       CALL get_qs_env(qs_env=qs_env, atomic_kind_set=atomic_kind_set, cell=cell)
     304              : 
     305        40523 :       NULLIFY (cell_to_index)
     306        40523 :       CALL get_qs_env(qs_env=qs_env, do_kpoints=do_kpoints)
     307        40523 :       IF (do_kpoints) THEN
     308            0 :          CALL get_qs_env(qs_env=qs_env, ks_env=ks_env)
     309            0 :          CALL get_ks_env(ks_env=ks_env, kpoints=kpoints)
     310            0 :          CALL get_kpoint_info(kpoint=kpoints, cell_to_index=cell_to_index)
     311              :       END IF
     312              : 
     313              :       !! Calculate l_max used in fgamma , because init_md_ftable is definitely not thread safe
     314        40523 :       nkind = SIZE(atomic_kind_set, 1)
     315        40523 :       l_max = 0
     316       115369 :       DO ikind = 1, nkind
     317       337315 :          l_max = MAX(l_max, MAXVAL(x_data(1, 1)%basis_parameter(ikind)%lmax))
     318              :       END DO
     319        40523 :       l_max = 4*l_max
     320        40523 :       CALL init_md_ftable(l_max)
     321              : 
     322        40523 :       IF (x_data(1, 1)%potential_parameter%potential_type == do_potential_truncated .OR. &
     323              :           x_data(1, 1)%potential_parameter%potential_type == do_potential_mix_cl_trunc) THEN
     324        10094 :          IF (l_max > init_t_c_g0_lmax) THEN
     325          322 :             IF (para_env%is_source()) THEN
     326          161 :                CALL open_file(unit_number=unit_id, file_name=x_data(1, 1)%potential_parameter%filename)
     327              :             END IF
     328          322 :             CALL init(l_max, unit_id, para_env%mepos, para_env)
     329          322 :             IF (para_env%is_source()) THEN
     330          161 :                CALL close_file(unit_id)
     331              :             END IF
     332          322 :             init_t_c_g0_lmax = l_max
     333              :          END IF
     334              :       END IF
     335              : 
     336        40523 :       n_threads = 1
     337        40523 : !$    n_threads = omp_get_max_threads()
     338              : 
     339              :       ! This initialization is needed to prevent a segmentation fault. The correct assigment is done below
     340        40523 :       my_nspins = 0
     341        40523 :       IF (PRESENT(nspins)) my_nspins = nspins
     342              : 
     343        40523 :       shm_neris_total = 0
     344        40523 :       shm_nprim_ints = 0
     345        40523 :       shm_neris_onthefly = 0
     346        40523 :       shm_storage_counter_integrals = 0
     347        40523 :       shm_stor_count_int_disk = 0
     348        40523 :       shm_neris_incore = 0
     349        40523 :       shm_neris_disk = 0
     350        40523 :       shm_stor_count_max_val = 0
     351              : 
     352              : !$OMP PARALLEL DEFAULT(OMP_DEFAULT_NONE_WITH_OOP) &
     353              : !$OMP SHARED(qs_env,&
     354              : !$OMP                                  x_data,&
     355              : !$OMP                                  ks_matrix,&
     356              : !$OMP                                  ehfx,&
     357              : !$OMP                                  rho_ao,&
     358              : !$OMP                                  matrix_ks_aux_fit_hfx,&
     359              : !$OMP                                  hfx_section,&
     360              : !$OMP                                  para_env,&
     361              : !$OMP                                  my_geo_change,&
     362              : !$OMP                                  irep,&
     363              : !$OMP                                  distribute_fock_matrix,&
     364              : !$OMP                                  cell_to_index,&
     365              : !$OMP                                  ncoset,&
     366              : !$OMP                                  nso,&
     367              : !$OMP                                  nco,&
     368              : !$OMP                                  full_ks_alpha,&
     369              : !$OMP                                  full_ks_alpha_from_gpu,&
     370              : !$OMP                                  full_ks_beta_from_gpu,&
     371              : !$OMP                                  full_ks_beta,&
     372              : !$OMP                                  n_threads,&
     373              : !$OMP                                  full_density_alpha,&
     374              : !$OMP                                  full_density_beta,&
     375              : !$OMP                                  shm_initial_p,&
     376              : !$OMP                                  shm_is_assoc_atomic_block,&
     377              : !$OMP                                  shm_number_of_p_entries,&
     378              : !$OMP                                  shm_neris_total,&
     379              : !$OMP                                  shm_neris_onthefly,&
     380              : !$OMP                                  shm_storage_counter_integrals,&
     381              : !$OMP                                  shm_stor_count_int_disk,&
     382              : !$OMP                                  shm_neris_incore,&
     383              : !$OMP                                  shm_neris_disk,&
     384              : !$OMP                                  shm_nprim_ints,&
     385              : !$OMP                                  shm_stor_count_max_val,&
     386              : !$OMP                                  cell,&
     387              : !$OMP                                  screen_coeffs_set,&
     388              : !$OMP                                  screen_coeffs_kind,&
     389              : !$OMP                                  screen_coeffs_pgf,&
     390              : !$OMP                                  pgf_product_list_size,&
     391              : !$OMP                                  radii_pgf,&
     392              : !$OMP                                  nkind,&
     393              : !$OMP                                  ispin,&
     394              : !$OMP                                  is_anti_symmetric,&
     395              : !$OMP                                  shm_atomic_block_offset,&
     396              : !$OMP                                  shm_set_offset,&
     397              : !$OMP                                  shm_block_offset,&
     398              : !$OMP                                  shm_task_counter,&
     399              : !$OMP                                  shm_task_list,&
     400              : !$OMP                                  shm_total_bins,&
     401              : !$OMP                                  shm_master_x_data,&
     402              : !$OMP                                  shm_pmax_atom,&
     403              : !$OMP                                  shm_pmax_block,&
     404              : !$OMP                                  shm_atomic_pair_list,&
     405              : !$OMP                                  shm_mem_compression_counter,&
     406              : !$OMP                                  shared_dev, &
     407              : !$OMP                                  do_print_load_balance_info,use_libgint,use_libint, &
     408              : !$OMP                                  cp2k_was_compiled_with_cuda_offload, &
     409              : !$OMP                                  cp2k_was_compiled_with_libint, &
     410              : !$OMP                                  cp2k_was_compiled_with_libGint, &
     411              : !$OMP                                  my_nspins) &
     412              : !$OMP PRIVATE(ln_10,i_thread,actual_x_data,do_periodic,screening_parameter,potential_parameter,&
     413              : !$OMP         general_parameter,load_balance_parameter,memory_parameter,cache_size,bits_max_val,&
     414              : !$OMP         basis_parameter,basis_info,treat_lsd_in_core,ncpu,n_processes,neris_total,neris_incore,&
     415              : !$OMP         neris_disk,neris_onthefly,mem_eris,mem_eris_disk,mem_max_val,compression_factor,&
     416              : !$OMP         compression_factor_disk,nprim_ints,neris_tmp,max_val_memory,max_am,do_p_screening,&
     417              : !$OMP         max_set,particle_set,atomic_kind_set,natom,kind_of,ncos_max,nsgf_max,ikind,&
     418              : !$OMP         nseta,npgfa,la_max,nsgfa,primitive_integrals,pbd_buf,pbc_buf,pad_buf,pac_buf,kbd_buf,kbc_buf,&
     419              : !$OMP         kad_buf,kac_buf,ee_work,ee_work2,ee_buffer1,ee_buffer2,ee_primitives_tmp,max_contraction,&
     420              : !$OMP         max_pgf,jkind,lb_max,nsetb,npgfb,first_sgfb,sphi_b,nsgfb,ncob,sgfb,nneighbors,pgf_list_ij,pgf_list_kl,&
     421              : !$OMP         pgf_product_list,nimages,ks_fully_occ,subtr_size_mb,use_disk_storage,counter,do_disk_storage,&
     422              : !$OMP         maxval_container_disk,maxval_cache_disk,integral_containers_disk,integral_caches_disk,eps_schwarz,&
     423              : !$OMP         log10_eps_schwarz,eps_storage,hf_fraction,buffer_overflow,logger,private_lib,last_sgf_global,handle_getp,&
     424              : !$OMP         p_work,fac,handle_load,do_dynamic_load_balancing,my_bin_size,maxval_container,integral_containers,maxval_cache,&
     425              : !$OMP         integral_caches,tmp_task_list,tmp_task_list_cost,tmp_index,handle_main,coeffs_kind_max0,set_list_ij,&
     426              : !$OMP         set_list_kl,iatom_start,iatom_end,jatom_start,jatom_end,nblocks,bins_left,do_it,distribution_energy,&
     427              : !$OMP         my_thread_id,my_bin_id,handle_bin,bintime_start,my_istart,my_current_counter,latom_block,tmp_block,&
     428              : !$OMP         katom_block,katom_start,katom_end,latom_start,latom_end,pmax_blocks,list_ij,list_kl,i_set_list_ij_start,&
     429              : !$OMP         i_set_list_ij_stop,ra,rb,rab2,la_min,zeta,sphi_a_ext,nsgfl_a,sphi_a_u1,sphi_a_u2,sphi_a_u3,&
     430              : !$OMP         lb_min,zetb,sphi_b_ext,nsgfl_b,sphi_b_u1,sphi_b_u2,sphi_b_u3,katom,latom,i_set_list_kl_start,i_set_list_kl_stop,&
     431              : !$OMP         kkind,lkind,rc,rd,rcd2,pmax_atom,screen_kind_ij,screen_kind_kl,symm_fac,lc_max,lc_min,npgfc,zetc,nsgfc,sphi_c_ext,&
     432              : !$OMP         nsgfl_c,sphi_c_u1,sphi_c_u2,sphi_c_u3,ld_max,ld_min,npgfd,zetd,nsgfd,sphi_d_ext,nsgfl_d,sphi_d_u1,sphi_d_u2,&
     433              : !$OMP         sphi_d_u3,atomic_offset_bd,atomic_offset_bc,atomic_offset_ad,atomic_offset_ac,offset_bd_set,offset_bc_set,&
     434              : !$OMP         offset_ad_set,offset_ac_set,swap_id,kind_kind_idx,ptr_p_1,ptr_p_2,ptr_p_3,ptr_p_4,mem_compression_counter,&
     435              : !$OMP         mem_compression_counter_disk,max_val1,sphi_a_ext_set,sphi_b_ext_set,kset,lset,max_val2_set,max_val2,&
     436              : !$OMP         sphi_c_ext_set,sphi_d_ext_set,pmax_entry,log10_pmax,current_counter,nints,estimate_to_store_int,&
     437              : !$OMP         spherical_estimate,nbits,buffer_left,buffer_start,buffer_size,max_contraction_val,tmp_r_1,tmp_r_2,&
     438              : !$OMP         tmp_screen_pgf1,tmp_screen_pgf2,cartesian_estimate,bintime_stop,iw,memsize_after,storage_counter_integrals,&
     439              : !$OMP         stor_count_int_disk,stor_count_max_val,ene_x_aa,ene_x_bb,mb_size_p,mb_size_f,mb_size_buffers,afac,ene_x_aa_diag,&
     440              : !$OMP         ene_x_bb_diag,act_atomic_block_offset,act_set_offset,j,handle_dist_ks,tmp_i8,tmp_i4,dft_control,&
     441              : !$OMP         etmp,nkimages,img,bin,eps_scaling_str,eps_schwarz_min_str, &
     442              : !$OMP         i,inla,nla,gcc,first_set_of_atom,max_abs_delta_KS, screened, &
     443        40523 : !$OMP         iatom_set,jatom_set,katom_set,latom_set,use_only_alpha_spin)
     444              : 
     445              :       ln_10 = LOG(10.0_dp)
     446              :       i_thread = 0
     447              : !$    i_thread = omp_get_thread_num()
     448              : 
     449              :       actual_x_data => x_data(irep, i_thread + 1)
     450              : !$OMP MASTER
     451              :       shm_master_x_data => x_data(irep, 1)
     452              : !$OMP END MASTER
     453              : !$OMP BARRIER
     454              : 
     455              :       do_periodic = actual_x_data%periodic_parameter%do_periodic
     456              : 
     457              :       IF (do_periodic) THEN
     458              :          ! ** Rebuild neighbor lists in case the cell has changed (i.e. NPT MD)
     459              :          actual_x_data%periodic_parameter%number_of_shells = actual_x_data%periodic_parameter%mode
     460              :          CALL hfx_create_neighbor_cells(actual_x_data, actual_x_data%periodic_parameter%number_of_shells_from_input, &
     461              :                                         cell, i_thread)
     462              :       END IF
     463              : 
     464              :       screening_parameter = actual_x_data%screening_parameter
     465              :       potential_parameter = actual_x_data%potential_parameter
     466              : 
     467              :       general_parameter = actual_x_data%general_parameter
     468              :       load_balance_parameter => actual_x_data%load_balance_parameter
     469              :       memory_parameter => actual_x_data%memory_parameter
     470              : 
     471              :       cache_size = memory_parameter%cache_size
     472              :       bits_max_val = memory_parameter%bits_max_val
     473              : 
     474              :       basis_parameter => actual_x_data%basis_parameter
     475              :       basis_info => actual_x_data%basis_info
     476              : 
     477              :       treat_lsd_in_core = general_parameter%treat_lsd_in_core
     478              : 
     479              :       ncpu = para_env%num_pe
     480              :       n_processes = ncpu*n_threads
     481              : 
     482              :       !! initialize some counters
     483              :       neris_total = 0_int_8
     484              :       neris_incore = 0_int_8
     485              :       neris_disk = 0_int_8
     486              :       neris_onthefly = 0_int_8
     487              :       mem_eris = 0_int_8
     488              :       mem_eris_disk = 0_int_8
     489              :       mem_max_val = 0_int_8
     490              :       compression_factor = 0.0_dp
     491              :       compression_factor_disk = 0.0_dp
     492              :       nprim_ints = 0_int_8
     493              :       neris_tmp = 0_int_8
     494              :       max_val_memory = 1_int_8
     495              : 
     496              :       ! Set the integral engine
     497              :       if (general_parameter%hfx_library == hfx_library_is_libint) then
     498              :          use_libint = .true.
     499              :          use_libGint = .false.
     500              :       end if
     501              :       if (general_parameter%hfx_library == hfx_library_is_libGint) then
     502              :          use_libint = .false.
     503              :          use_libGint = .true.
     504              :       end if
     505              :       if (general_parameter%hfx_library == hfx_library_is_both) then
     506              :          use_libint = .true.
     507              :          use_libGint = .true.
     508              :       end if
     509              : 
     510              :       max_am = basis_info%max_am
     511              : 
     512              :       CALL get_qs_env(qs_env=qs_env, &
     513              :                       atomic_kind_set=atomic_kind_set, &
     514              :                       particle_set=particle_set, &
     515              :                       dft_control=dft_control)
     516              :       IF (dft_control%do_admm) CALL get_admm_env(qs_env%admm_env, matrix_ks_aux_fit_hfx=matrix_ks_aux_fit_hfx)
     517              : 
     518              :       do_p_screening = screening_parameter%do_initial_p_screening
     519              :       ! Special treatment for MP2 with initial density screening
     520              :       IF (do_p_screening) THEN
     521              :          IF (ASSOCIATED(qs_env%mp2_env)) THEN
     522              :             IF ((qs_env%mp2_env%ri_grad%free_hfx_buffer)) THEN
     523              :                do_p_screening = ((qs_env%mp2_env%p_screen) .AND. (qs_env%mp2_env%not_last_hfx))
     524              :             ELSE
     525              :                do_p_screening = .FALSE.
     526              :             END IF
     527              :          END IF
     528              :       END IF
     529              :       max_set = basis_info%max_set
     530              :       natom = SIZE(particle_set, 1)
     531              : 
     532              :       ! Number of image matrices in k-point calculations (nkimages==1 -> no kpoints)
     533              :       nkimages = dft_control%nimages
     534              :       CPASSERT(nkimages == 1)
     535              : 
     536              :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, kind_of=kind_of)
     537              : 
     538              :       !! precompute maximum nco and allocate scratch
     539              :       ncos_max = 0
     540              :       nsgf_max = 0
     541              :       DO iatom = 1, natom
     542              :          ikind = kind_of(iatom)
     543              :          nseta = basis_parameter(ikind)%nset
     544              :          npgfa => basis_parameter(ikind)%npgf
     545              :          la_max => basis_parameter(ikind)%lmax
     546              :          nsgfa => basis_parameter(ikind)%nsgf
     547              :          DO iset = 1, nseta
     548              :             ncos_max = MAX(ncos_max, ncoset(la_max(iset)))
     549              :             nsgf_max = MAX(nsgf_max, nsgfa(iset))
     550              :          END DO
     551              :       END DO
     552              :       !! Allocate the arrays for the integrals.
     553              :       ALLOCATE (primitive_integrals(nsgf_max**4))
     554              :       primitive_integrals = 0.0_dp
     555              : 
     556              :       ALLOCATE (pbd_buf(nsgf_max**2))
     557              :       ALLOCATE (pbc_buf(nsgf_max**2))
     558              :       ALLOCATE (pad_buf(nsgf_max**2))
     559              :       ALLOCATE (pac_buf(nsgf_max**2))
     560              :       ALLOCATE (kbd_buf(nsgf_max**2))
     561              :       ALLOCATE (kbc_buf(nsgf_max**2))
     562              :       ALLOCATE (kad_buf(nsgf_max**2))
     563              :       ALLOCATE (kac_buf(nsgf_max**2))
     564              :       ALLOCATE (ee_work(ncos_max**4))
     565              :       ALLOCATE (ee_work2(ncos_max**4))
     566              :       ALLOCATE (ee_buffer1(ncos_max**4))
     567              :       ALLOCATE (ee_buffer2(ncos_max**4))
     568              :       ALLOCATE (ee_primitives_tmp(nsgf_max**4))
     569              : 
     570              :       IF (my_nspins == 0) my_nspins = dft_control%nspins
     571              : 
     572              :       ALLOCATE (max_contraction(max_set, natom))
     573              : 
     574              :       max_contraction = 0.0_dp
     575              :       max_pgf = 0
     576              :       DO jatom = 1, natom
     577              :          jkind = kind_of(jatom)
     578              :          lb_max => basis_parameter(jkind)%lmax
     579              :          nsetb = basis_parameter(jkind)%nset
     580              :          npgfb => basis_parameter(jkind)%npgf
     581              :          first_sgfb => basis_parameter(jkind)%first_sgf
     582              :          sphi_b => basis_parameter(jkind)%sphi
     583              :          nsgfb => basis_parameter(jkind)%nsgf
     584              :          DO jset = 1, nsetb
     585              :             ! takes the primitive to contracted transformation into account
     586              :             ncob = npgfb(jset)*ncoset(lb_max(jset))
     587              :             sgfb = first_sgfb(1, jset)
     588              :             ! if the primitives are assumed to be all of max_val2, max_val2*p2s_b becomes
     589              :             ! the maximum value after multiplication with sphi_b
     590              :             max_contraction(jset, jatom) = MAXVAL([(SUM(ABS(sphi_b(1:ncob, i))), i=sgfb, sgfb + nsgfb(jset) - 1)])
     591              :             max_pgf = MAX(max_pgf, npgfb(jset))
     592              :          END DO
     593              :       END DO
     594              : 
     595              :       ! ** Allocate buffers for pgf_lists
     596              :       nneighbors = SIZE(actual_x_data%neighbor_cells)
     597              :       ALLOCATE (pgf_list_ij(max_pgf**2))
     598              :       ALLOCATE (pgf_list_kl(max_pgf**2))
     599              :       ! the size of pgf_product_list is allocated and resized as needed. The initial guess grows as needed
     600              : !$OMP     ATOMIC READ
     601              :       tmp_i4 = pgf_product_list_size
     602              :       ALLOCATE (pgf_product_list(tmp_i4))
     603              :       ALLOCATE (nimages(max_pgf**2))
     604              : 
     605              :       DO i = 1, max_pgf**2
     606              :          ALLOCATE (pgf_list_ij(i)%image_list(nneighbors))
     607              :          ALLOCATE (pgf_list_kl(i)%image_list(nneighbors))
     608              :       END DO
     609              : !$OMP BARRIER
     610              : !$OMP MASTER
     611              :       !! Calculate helper array that stores if a certain atomic pair is associated in the KS matrix
     612              :       IF (my_geo_change) THEN
     613              :          CALL get_atomic_block_maps(ks_matrix(1, 1)%matrix, basis_parameter, kind_of, &
     614              :                                     shm_master_x_data%is_assoc_atomic_block, &
     615              :                                     shm_master_x_data%number_of_p_entries, &
     616              :                                     para_env, &
     617              :                                     shm_master_x_data%atomic_block_offset, &
     618              :                                     shm_master_x_data%set_offset, &
     619              :                                     shm_master_x_data%block_offset, &
     620              :                                     shm_master_x_data%map_atoms_to_cpus, &
     621              :                                     nkind)
     622              : 
     623              :          shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
     624              : 
     625              :          !! Get occupation of KS-matrix
     626              :          ks_fully_occ = .TRUE.
     627              :          outer: DO iatom = 1, natom
     628              :             DO jatom = iatom, natom
     629              :                IF (shm_is_assoc_atomic_block(jatom, iatom) == 0) THEN
     630              :                   ks_fully_occ = .FALSE.
     631              :                   EXIT outer
     632              :                END IF
     633              :             END DO
     634              :          END DO outer
     635              : 
     636              :          IF (.NOT. ks_fully_occ) THEN
     637              :             CALL cp_warn(__LOCATION__, &
     638              :                          "The Kohn Sham matrix is not 100% occupied. This "// &
     639              :                          "may result in incorrect Hartree-Fock results. Setting "// &
     640              :                          "MIN_PAIR_LIST_RADIUS to -1 in the QS section ensures a "// &
     641              :                          "fully occupied KS matrix. For more information "// &
     642              :                          "see FAQ: https://www.cp2k.org/faq:hfx_eps_warning")
     643              :          END IF
     644              :       END IF
     645              : 
     646              :       ! ** Set pointers
     647              :       shm_number_of_p_entries = shm_master_x_data%number_of_p_entries
     648              :       shm_is_assoc_atomic_block => shm_master_x_data%is_assoc_atomic_block
     649              :       shm_atomic_block_offset => shm_master_x_data%atomic_block_offset
     650              :       shm_set_offset => shm_master_x_data%set_offset
     651              :       shm_block_offset => shm_master_x_data%block_offset
     652              : !$OMP END MASTER
     653              : !$OMP BARRIER
     654              : 
     655              :       ! ** Reset storage counter given by MAX_MEMORY by subtracting all buffers
     656              :       ! ** Fock and density Matrices (shared)
     657              :       subtr_size_mb = 2_int_8*shm_block_offset(ncpu + 1)
     658              :       ! ** if non restricted ==> alpha/beta spin
     659              :       IF (.NOT. treat_lsd_in_core) THEN
     660              :          IF (my_nspins == 2) subtr_size_mb = subtr_size_mb*2_int_8
     661              :       END IF
     662              :       ! ** Initial P only MAX(alpha,beta) (shared)
     663              :       IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
     664              :          subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
     665              :       END IF
     666              :       ! ** In core forces require their own initial P
     667              :       IF (screening_parameter%do_p_screening_forces) THEN
     668              :          IF (memory_parameter%treat_forces_in_core) THEN
     669              :             subtr_size_mb = subtr_size_mb + memory_parameter%size_p_screen
     670              :          END IF
     671              :       END IF
     672              :       ! ** primitive buffer (not shared by the threads)
     673              :       subtr_size_mb = subtr_size_mb + nsgf_max**4*n_threads
     674              :       ! ** density + fock buffers
     675              :       subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
     676              :       ! ** screening functions (shared)
     677              :       ! ** coeffs_pgf
     678              :       subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
     679              :       ! ** coeffs_set
     680              :       subtr_size_mb = subtr_size_mb + max_set**2*nkind**2
     681              :       ! ** coeffs_kind
     682              :       subtr_size_mb = subtr_size_mb + nkind**2
     683              :       ! ** radii_pgf
     684              :       subtr_size_mb = subtr_size_mb + max_pgf**2*max_set**2*nkind**2
     685              : 
     686              :       ! ** is_assoc (shared)
     687              :       subtr_size_mb = subtr_size_mb + natom**2
     688              : 
     689              :       ! ** pmax_atom (shared)
     690              :       IF (do_p_screening) THEN
     691              :          subtr_size_mb = subtr_size_mb + natom**2
     692              :       END IF
     693              :       IF (screening_parameter%do_p_screening_forces) THEN
     694              :          IF (memory_parameter%treat_forces_in_core) THEN
     695              :             subtr_size_mb = subtr_size_mb + natom**2
     696              :          END IF
     697              :       END IF
     698              : 
     699              :       ! ** Convert into MiB's
     700              :       subtr_size_mb = subtr_size_mb*8_int_8/1024_int_8/1024_int_8
     701              : 
     702              :       ! ** Subtracting all these buffers from MAX_MEMORY yields the amount
     703              :       ! ** of RAM that is left for the compressed integrals. When using threads
     704              :       ! ** all the available memory is shared among all n_threads. i.e. the faster
     705              :       ! ** ones can steal from the slower ones
     706              : 
     707              :       CALL hfx_reset_memory_usage_counter(memory_parameter, subtr_size_mb)
     708              : 
     709              :       use_disk_storage = .FALSE.
     710              :       counter = 0_int_8
     711              :       do_disk_storage = memory_parameter%do_disk_storage
     712              :       IF (do_disk_storage) THEN
     713              :          maxval_container_disk => actual_x_data%store_ints%maxval_container_disk
     714              :          maxval_cache_disk => actual_x_data%store_ints%maxval_cache_disk
     715              : 
     716              :          integral_containers_disk => actual_x_data%store_ints%integral_containers_disk
     717              :          integral_caches_disk => actual_x_data%store_ints%integral_caches_disk
     718              :       END IF
     719              : 
     720              :       IF (my_geo_change) THEN
     721              :          memory_parameter%ram_counter = HUGE(memory_parameter%ram_counter)
     722              :       END IF
     723              : 
     724              :       IF (my_geo_change) THEN
     725              :          memory_parameter%recalc_forces = .TRUE.
     726              :       ELSE
     727              :          IF (.NOT. memory_parameter%treat_forces_in_core) memory_parameter%recalc_forces = .TRUE.
     728              :       END IF
     729              : 
     730              :       !! Get screening parameter
     731              :       eps_schwarz = screening_parameter%eps_schwarz
     732              :       IF (eps_schwarz <= 0.0_dp) THEN
     733              :          log10_eps_schwarz = log_zero
     734              :       ELSE
     735              :          log10_eps_schwarz = LOG10(eps_schwarz)
     736              :       END IF
     737              :       !! get storage epsilon
     738              :       eps_storage = eps_schwarz*memory_parameter%eps_storage_scaling
     739              : 
     740              :       !! If we have a hybrid functional, we may need only a fraction of exact exchange
     741              :       hf_fraction = general_parameter%fraction
     742              : 
     743              :       !! The number of integrals that fit into the given MAX_MEMORY
     744              : 
     745              :       !! Parameters related to the potential 1/r, erf(wr)/r, erfc(wr/r)
     746              :       potential_parameter = actual_x_data%potential_parameter
     747              : 
     748              :       !! Variable to check if we calculate the integrals in-core or on the fly
     749              :       !! If TRUE -> on the fly
     750              :       IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
     751              :          buffer_overflow = .FALSE.
     752              :       ELSE
     753              :          buffer_overflow = .TRUE.
     754              :       END IF
     755              :       logger => cp_get_default_logger()
     756              : 
     757              :       private_lib = actual_x_data%lib
     758              : 
     759              :       !! Helper array to map local basis function indices to global ones
     760              :       ALLOCATE (last_sgf_global(0:natom))
     761              :       last_sgf_global(0) = 0
     762              :       DO iatom = 1, natom
     763              :          ikind = kind_of(iatom)
     764              :          last_sgf_global(iatom) = last_sgf_global(iatom - 1) + basis_parameter(ikind)%nsgf_total
     765              :       END DO
     766              : !$OMP BARRIER
     767              : !$OMP MASTER
     768              :       !! Let master thread get the density (avoid problems with MPI)
     769              :       !! Get the full density from all the processors
     770              :       NULLIFY (full_density_alpha, full_density_beta)
     771              :       ALLOCATE (full_density_alpha(shm_block_offset(ncpu + 1), nkimages))
     772              :       IF (.NOT. treat_lsd_in_core .OR. my_nspins == 1) THEN
     773              :          CALL timeset(routineN//"_getP", handle_getP)
     774              :          DO img = 1, nkimages
     775              :             CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
     776              :                                   shm_master_x_data%block_offset, &
     777              :                                   kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
     778              :          END DO
     779              : 
     780              :          IF (my_nspins == 2) THEN
     781              :             ALLOCATE (full_density_beta(shm_block_offset(ncpu + 1), nkimages))
     782              :             DO img = 1, nkimages
     783              :                CALL get_full_density(para_env, full_density_beta(:, img), rho_ao(2, img)%matrix, shm_number_of_p_entries, &
     784              :                                      shm_master_x_data%block_offset, &
     785              :                                      kind_of, basis_parameter, get_max_vals_spin=.FALSE., antisymmetric=is_anti_symmetric)
     786              :             END DO
     787              :          END IF
     788              :          CALL timestop(handle_getP)
     789              : 
     790              :          !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
     791              :          !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
     792              :          !! a changed geometry
     793              :          NULLIFY (shm_initial_p)
     794              :          IF (do_p_screening) THEN
     795              :             shm_initial_p => shm_master_x_data%initial_p
     796              :             shm_pmax_atom => shm_master_x_data%pmax_atom
     797              :             IF (my_geo_change) THEN
     798              :                CALL update_pmax_mat(shm_master_x_data%initial_p, &
     799              :                                     shm_master_x_data%map_atom_to_kind_atom, &
     800              :                                     shm_master_x_data%set_offset, &
     801              :                                     shm_master_x_data%atomic_block_offset, &
     802              :                                     shm_pmax_atom, &
     803              :                                     full_density_alpha, full_density_beta, &
     804              :                                     natom, kind_of, basis_parameter, &
     805              :                                     nkind, shm_master_x_data%is_assoc_atomic_block)
     806              :             END IF
     807              :          END IF
     808              :       ELSE
     809              :          IF (do_p_screening) THEN
     810              :             CALL timeset(routineN//"_getP", handle_getP)
     811              :             DO img = 1, nkimages
     812              :                CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(1, img)%matrix, shm_number_of_p_entries, &
     813              :                                      shm_master_x_data%block_offset, &
     814              :                                      kind_of, basis_parameter, get_max_vals_spin=.TRUE., &
     815              :                                      rho_beta=rho_ao(2, img)%matrix, antisymmetric=is_anti_symmetric)
     816              :             END DO
     817              :             CALL timestop(handle_getP)
     818              : 
     819              :             !! Calculate the max values of the density matrix actual_pmax stores the data from the actual density matrix
     820              :             !! and x_data%initial_p stores the same for the initial guess. The initial guess is updated only in the case of
     821              :             !! a changed geometry
     822              :             NULLIFY (shm_initial_p)
     823              :             shm_initial_p => actual_x_data%initial_p
     824              :             shm_pmax_atom => shm_master_x_data%pmax_atom
     825              :             IF (my_geo_change) THEN
     826              :                CALL update_pmax_mat(shm_master_x_data%initial_p, &
     827              :                                     shm_master_x_data%map_atom_to_kind_atom, &
     828              :                                     shm_master_x_data%set_offset, &
     829              :                                     shm_master_x_data%atomic_block_offset, &
     830              :                                     shm_pmax_atom, &
     831              :                                     full_density_alpha, full_density_beta, &
     832              :                                     natom, kind_of, basis_parameter, &
     833              :                                     nkind, shm_master_x_data%is_assoc_atomic_block)
     834              :             END IF
     835              :          END IF
     836              :          ! ** Now get the density(ispin)
     837              :          DO img = 1, nkimages
     838              :             CALL get_full_density(para_env, full_density_alpha(:, img), rho_ao(ispin, img)%matrix, shm_number_of_p_entries, &
     839              :                                   shm_master_x_data%block_offset, &
     840              :                                   kind_of, basis_parameter, get_max_vals_spin=.FALSE., &
     841              :                                   antisymmetric=is_anti_symmetric)
     842              :          END DO
     843              :       END IF
     844              : 
     845              :       NULLIFY (full_ks_alpha, full_ks_beta)
     846              :       ALLOCATE (shm_master_x_data%full_ks_alpha(shm_block_offset(ncpu + 1), nkimages))
     847              :       full_ks_alpha => shm_master_x_data%full_ks_alpha
     848              :       full_ks_alpha = 0.0_dp
     849              : 
     850              :       IF (.NOT. treat_lsd_in_core) THEN
     851              :          IF (my_nspins == 2) THEN
     852              :             ALLOCATE (shm_master_x_data%full_ks_beta(shm_block_offset(ncpu + 1), nkimages))
     853              :             full_ks_beta => shm_master_x_data%full_ks_beta
     854              :             full_ks_beta = 0.0_dp
     855              :          END IF
     856              :       END IF
     857              : 
     858              : !$OMP END MASTER
     859              : !$OMP BARRIER
     860              : 
     861              :       !! Initialize schwarz screening matrices for near field estimates and boxing screening matrices
     862              :       !! for far field estimates. The update is only performed if the geomtry of the system changed.
     863              :       !! If the system is periodic, then the corresponding routines are called and some variables
     864              :       !! are initialized
     865              : 
     866              :       IF (.NOT. shm_master_x_data%screen_funct_is_initialized) THEN
     867              :          CALL calc_pair_dist_radii(qs_env, basis_parameter, &
     868              :                                    shm_master_x_data%pair_dist_radii_pgf, max_set, max_pgf, eps_schwarz, &
     869              :                                    n_threads, i_thread)
     870              : !$OMP BARRIER
     871              :          CALL calc_screening_functions(qs_env, basis_parameter, private_lib, shm_master_x_data%potential_parameter, &
     872              :                                        shm_master_x_data%screen_funct_coeffs_set, &
     873              :                                        shm_master_x_data%screen_funct_coeffs_kind, &
     874              :                                        shm_master_x_data%screen_funct_coeffs_pgf, &
     875              :                                        shm_master_x_data%pair_dist_radii_pgf, &
     876              :                                        max_set, max_pgf, n_threads, i_thread, p_work)
     877              : 
     878              : !$OMP MASTER
     879              :          shm_master_x_data%screen_funct_is_initialized = .TRUE.
     880              : !$OMP END MASTER
     881              :       END IF
     882              : !$OMP BARRIER
     883              : 
     884              : !$OMP MASTER
     885              :       screen_coeffs_set => shm_master_x_data%screen_funct_coeffs_set
     886              :       screen_coeffs_kind => shm_master_x_data%screen_funct_coeffs_kind
     887              :       screen_coeffs_pgf => shm_master_x_data%screen_funct_coeffs_pgf
     888              :       radii_pgf => shm_master_x_data%pair_dist_radii_pgf
     889              : !$OMP END MASTER
     890              : !$OMP BARRIER
     891              : 
     892              :       !! Initialize a prefactor depending on the fraction and the number of spins
     893              :       IF (my_nspins == 1) THEN
     894              :          fac = 0.5_dp*hf_fraction
     895              :       ELSE
     896              :          fac = 1.0_dp*hf_fraction
     897              :       END IF
     898              : 
     899              :       !! Call routines that distribute the load on all processes. If we want to screen on a initial density matrix, there is
     900              :       !! an optional parameter. Of course, this is only done if the geometry did change
     901              : !$OMP BARRIER
     902              : !$OMP MASTER
     903              :       CALL timeset(routineN//"_load", handle_load)
     904              : !$OMP END MASTER
     905              : !$OMP BARRIER
     906              :       IF (my_geo_change) THEN
     907              :          IF (actual_x_data%b_first_load_balance_energy) THEN
     908              :             CALL hfx_load_balance(actual_x_data, eps_schwarz, particle_set, max_set, para_env, &
     909              :                                   screen_coeffs_set, screen_coeffs_kind, &
     910              :                                   shm_is_assoc_atomic_block, do_periodic, load_balance_parameter, &
     911              :                                   kind_of, basis_parameter, shm_initial_p, shm_pmax_atom, i_thread, n_threads, &
     912              :                                   cell, do_p_screening, actual_x_data%map_atom_to_kind_atom, &
     913              :                                   nkind, hfx_do_eval_energy, shm_pmax_block, use_virial=.FALSE.)
     914              :             actual_x_data%b_first_load_balance_energy = .FALSE.
     915              :          ELSE
     916              :             CALL hfx_update_load_balance(actual_x_data, para_env, &
     917              :                                          load_balance_parameter, &
     918              :                                          i_thread, n_threads, hfx_do_eval_energy)
     919              :          END IF
     920              :       END IF
     921              : !$OMP BARRIER
     922              : !$OMP MASTER
     923              :       CALL timestop(handle_load)
     924              : !$OMP END MASTER
     925              : !$OMP BARRIER
     926              : 
     927              :       use_only_alpha_spin = treat_lsd_in_core .or. .not. my_nspins == 2
     928              : 
     929              :       ! Set up the gpu enviroment for libGint
     930              :       if (use_libGint) then
     931              : 
     932              :          call offload_activate_chosen_device()
     933              : 
     934              :          ! At least once, and before any other call from all OMP threads
     935              :          call cp_libGint_init(actual_x_data)
     936              : 
     937              :          ! At least at geo change
     938              :          call libGint_update_env(fac, memory_parameter, do_periodic, cell, actual_x_data, &
     939              :                                  nneighbors, max_pgf, natom, kind_of, particle_set, basis_parameter)
     940              : 
     941              :          ! Every cycle, comunicate the updated density to libGint
     942              :          if (use_only_alpha_spin) then
     943              :             call libGint_set_density(full_density_alpha)
     944              :          else
     945              :             call libGint_set_density(full_density_alpha, full_density_beta)
     946              :          end if
     947              :       end if
     948              :       !!!
     949              : 
     950              :       !! Start calculating integrals of the form (ab|cd) or (ij|kl)
     951              :       !! In order to do so, there is a main four-loop structure that takes into account the two symmetries
     952              :       !!
     953              :       !!   (ab|cd) = (ba|cd) = (ab|dc) = (ba|dc)
     954              :       !!
     955              :       !! by iterating in the following way
     956              :       !!
     957              :       !! DO iatom=1,natom               and       DO katom=1,natom
     958              :       !!   DO jatom=iatom,natom                     DO latom=katom,natom
     959              :       !!
     960              :       !! The third symmetry
     961              :       !!
     962              :       !!  (ab|cd) = (cd|ab)
     963              :       !!
     964              :       !! is taken into account by the following criterion:
     965              :       !!
     966              :       !! IF(katom+latom<=iatom+jatom)  THEN
     967              :       !!   IF( ((iatom+jatom)==(katom+latom) ) .AND.(katom<iatom)) CYCLE
     968              :       !!
     969              :       !! Depending on the degeneracy of an integral the exchange contribution is multiplied by a corresponding
     970              :       !! factor ( symm_fac ).
     971              :       !!
     972              :       !! If one quartet does not pass the screening we CYCLE on the outer most possible loop. Thats why we use
     973              :       !! different hierarchies of short range screening matrices.
     974              :       !!
     975              :       !! If we do a parallel run, each process owns a unique array of workloads. Here, a workload is
     976              :       !! defined as :
     977              :       !!
     978              :       !! istart, jstart, kstart, lstart, number_of_atom_quartets, initial_cpu_id
     979              :       !!
     980              :       !! This tells the process where to start the main loops and how many bunches of integrals it has to
     981              :       !! calculate. The original parallelization is a simple modulo distribution that is binned and
     982              :       !! optimized in the load_balance routines. Since the Monte Carlo routines can swap processors,
     983              :       !! we need to know which was the initial cpu_id.
     984              :       !! Furthermore, the indices iatom, jatom, katom, latom have to be set to istart, jstart, kstart and
     985              :       !! lstart only the first time the loop is executed. All subsequent loops have to start with one or
     986              :       !! iatom and katom respectively. Therefore, we use flags like first_j_loop etc.
     987              : 
     988              :       !! LibGint follows the same patter, distributing integrals over mpi and openmp with a shared-over-mpi
     989              :       !! density and fock matrix. The main differences are that the fock matrix is not in a valid state
     990              :       !! until after the libGint_get_fock_matrix has returned -in approximately a thousand lines-,
     991              :       !! and the integrals are not saved in memory, but recomputed each cycle
     992              : 
     993              :       do_dynamic_load_balancing = .TRUE.
     994              : 
     995              :       IF (n_threads == 1 .OR. do_disk_storage) do_dynamic_load_balancing = .FALSE.
     996              : 
     997              :       IF (do_dynamic_load_balancing) THEN
     998              :          my_bin_size = SIZE(actual_x_data%distribution_energy)
     999              :       ELSE
    1000              :          my_bin_size = 1
    1001              :       END IF
    1002              :       !! We do not need the containers if MAX_MEM = 0
    1003              :       IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    1004              :          !! IF new md step -> reinitialize containers
    1005              :          IF (my_geo_change) THEN
    1006              :             CALL dealloc_containers(actual_x_data%store_ints, memory_parameter%actual_memory_usage)
    1007              :             CALL alloc_containers(actual_x_data%store_ints, my_bin_size)
    1008              : 
    1009              :             DO bin = 1, my_bin_size
    1010              :                maxval_container => actual_x_data%store_ints%maxval_container(bin)
    1011              :                integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
    1012              :                CALL hfx_init_container(maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
    1013              :                DO i = 1, 64
    1014              :                   CALL hfx_init_container(integral_containers(i), memory_parameter%actual_memory_usage, .FALSE.)
    1015              :                END DO
    1016              :             END DO
    1017              :          END IF
    1018              : 
    1019              :          !! Decompress the first cache for maxvals and integrals
    1020              :          IF (.NOT. my_geo_change) THEN
    1021              :             DO bin = 1, my_bin_size
    1022              :                maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
    1023              :                maxval_container => actual_x_data%store_ints%maxval_container(bin)
    1024              :                integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
    1025              :                integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
    1026              :                CALL hfx_decompress_first_cache(bits_max_val, maxval_cache, maxval_container, &
    1027              :                                                memory_parameter%actual_memory_usage, .FALSE.)
    1028              :                DO i = 1, 64
    1029              :                   CALL hfx_decompress_first_cache(i, integral_caches(i), integral_containers(i), &
    1030              :                                                   memory_parameter%actual_memory_usage, .FALSE.)
    1031              :                END DO
    1032              :             END DO
    1033              :          END IF
    1034              :       END IF
    1035              : 
    1036              :       !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
    1037              : !$OMP CRITICAL(hfxenergy_io_critical)
    1038              :       !! If we do disk storage, we have to initialize the containers/caches
    1039              :       IF (do_disk_storage) THEN
    1040              :          !! IF new md step -> reinitialize containers
    1041              :          IF (my_geo_change) THEN
    1042              :             CALL hfx_init_container(maxval_container_disk, memory_parameter%actual_memory_usage_disk, do_disk_storage)
    1043              :             DO i = 1, 64
    1044              :                CALL hfx_init_container(integral_containers_disk(i), memory_parameter%actual_memory_usage_disk, do_disk_storage)
    1045              :             END DO
    1046              :          END IF
    1047              :          !! Decompress the first cache for maxvals and integrals
    1048              :          IF (.NOT. my_geo_change) THEN
    1049              :             CALL hfx_decompress_first_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
    1050              :                                             memory_parameter%actual_memory_usage_disk, .TRUE.)
    1051              :             DO i = 1, 64
    1052              :                CALL hfx_decompress_first_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
    1053              :                                                memory_parameter%actual_memory_usage_disk, .TRUE.)
    1054              :             END DO
    1055              :          END IF
    1056              :       END IF
    1057              : !$OMP END CRITICAL(hfxenergy_io_critical)
    1058              : 
    1059              : !$OMP BARRIER
    1060              : !$OMP MASTER
    1061              : 
    1062              :       IF (do_dynamic_load_balancing) THEN
    1063              :          ! ** Lets construct the task list
    1064              :          shm_total_bins = 0
    1065              :          DO i = 1, n_threads
    1066              :             shm_total_bins = shm_total_bins + SIZE(x_data(irep, i)%distribution_energy)
    1067              :          END DO
    1068              :          ALLOCATE (tmp_task_list(shm_total_bins))
    1069              :          shm_task_counter = 0
    1070              :          DO i = 1, n_threads
    1071              :             DO bin = 1, SIZE(x_data(irep, i)%distribution_energy)
    1072              :                shm_task_counter = shm_task_counter + 1
    1073              :                tmp_task_list(shm_task_counter)%thread_id = i
    1074              :                tmp_task_list(shm_task_counter)%bin_id = bin
    1075              :                tmp_task_list(shm_task_counter)%cost = x_data(irep, i)%distribution_energy(bin)%cost
    1076              :             END DO
    1077              :          END DO
    1078              : 
    1079              :          ! ** Now sort the task list
    1080              :          ALLOCATE (tmp_task_list_cost(shm_total_bins))
    1081              :          ALLOCATE (tmp_index(shm_total_bins))
    1082              : 
    1083              :          DO i = 1, shm_total_bins
    1084              :             tmp_task_list_cost(i) = tmp_task_list(i)%cost
    1085              :          END DO
    1086              : 
    1087              :          CALL sort(tmp_task_list_cost, shm_total_bins, tmp_index)
    1088              : 
    1089              :          ALLOCATE (shm_master_x_data%task_list(shm_total_bins))
    1090              : 
    1091              :          DO i = 1, shm_total_bins
    1092              :             shm_master_x_data%task_list(i) = tmp_task_list(tmp_index(shm_total_bins - i + 1))
    1093              :          END DO
    1094              : 
    1095              :          shm_task_list => shm_master_x_data%task_list
    1096              :          shm_task_counter = 0
    1097              : 
    1098              :          DEALLOCATE (tmp_task_list_cost, tmp_index, tmp_task_list)
    1099              :       END IF
    1100              : !$OMP END MASTER
    1101              : !$OMP BARRIER
    1102              : 
    1103              :       IF (my_bin_size > 0) THEN
    1104              :          maxval_container => actual_x_data%store_ints%maxval_container(1)
    1105              :          maxval_cache => actual_x_data%store_ints%maxval_cache(1)
    1106              : 
    1107              :          integral_containers => actual_x_data%store_ints%integral_containers(:, 1)
    1108              :          integral_caches => actual_x_data%store_ints%integral_caches(:, 1)
    1109              :       END IF
    1110              : 
    1111              : !$OMP BARRIER
    1112              : !$OMP MASTER
    1113              :       CALL timeset(routineN//"_main", handle_main)
    1114              : !$OMP END MASTER
    1115              : !$OMP BARRIER
    1116              : 
    1117              :       coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
    1118              :       ALLOCATE (set_list_ij((max_set*load_balance_parameter%block_size)**2))
    1119              :       ALLOCATE (set_list_kl((max_set*load_balance_parameter%block_size)**2))
    1120              : 
    1121              : !$OMP BARRIER
    1122              : !$OMP MASTER
    1123              : 
    1124              :       !! precalculate maximum density matrix elements in blocks
    1125              :       actual_x_data%pmax_block = 0.0_dp
    1126              :       shm_pmax_block => actual_x_data%pmax_block
    1127              :       IF (do_p_screening) THEN
    1128              :          DO iatom_block = 1, SIZE(actual_x_data%blocks)
    1129              :             iatom_start = actual_x_data%blocks(iatom_block)%istart
    1130              :             iatom_end = actual_x_data%blocks(iatom_block)%iend
    1131              :             DO jatom_block = 1, SIZE(actual_x_data%blocks)
    1132              :                jatom_start = actual_x_data%blocks(jatom_block)%istart
    1133              :                jatom_end = actual_x_data%blocks(jatom_block)%iend
    1134              :                shm_pmax_block(iatom_block, jatom_block) = MAXVAL(shm_pmax_atom(iatom_start:iatom_end, jatom_start:jatom_end))
    1135              :             END DO
    1136              :          END DO
    1137              :       END IF
    1138              :       shm_atomic_pair_list => actual_x_data%atomic_pair_list
    1139              :       IF (my_geo_change) THEN
    1140              :          CALL build_atomic_pair_list(natom, shm_atomic_pair_list, kind_of, basis_parameter, particle_set, &
    1141              :                                      do_periodic, screen_coeffs_kind, coeffs_kind_max0, log10_eps_schwarz, cell, &
    1142              :                                      actual_x_data%blocks)
    1143              :       END IF
    1144              : 
    1145              :       my_bin_size = SIZE(actual_x_data%distribution_energy)
    1146              :       ! reset timings for the new SCF round
    1147              :       IF (my_geo_change) THEN
    1148              :          DO bin = 1, my_bin_size
    1149              :             actual_x_data%distribution_energy(bin)%time_first_scf = 0.0_dp
    1150              :             actual_x_data%distribution_energy(bin)%time_other_scf = 0.0_dp
    1151              :             actual_x_data%distribution_energy(bin)%time_forces = 0.0_dp
    1152              :          END DO
    1153              :       END IF
    1154              : !$OMP END MASTER
    1155              : !$OMP BARRIER
    1156              : 
    1157              :       my_bin_size = SIZE(actual_x_data%distribution_energy)
    1158              :       nblocks = load_balance_parameter%nblocks
    1159              : 
    1160              :       bins_left = .TRUE.
    1161              :       do_it = .TRUE.
    1162              :       bin = 0
    1163              :       ! Main cycle starts here
    1164              :       DO WHILE (bins_left)
    1165              :          ! Check if this thread can run this bin
    1166              :          IF (.NOT. do_dynamic_load_balancing) THEN
    1167              :             bin = bin + 1
    1168              :             IF (bin > my_bin_size) THEN
    1169              :                do_it = .FALSE.
    1170              :                bins_left = .FALSE.
    1171              :             ELSE
    1172              :                do_it = .TRUE.
    1173              :                bins_left = .TRUE.
    1174              :                distribution_energy => actual_x_data%distribution_energy(bin)
    1175              :             END IF
    1176              :          ELSE
    1177              : !$OMP CRITICAL(hfxenergy_critical)
    1178              :             shm_task_counter = shm_task_counter + 1
    1179              :             IF (shm_task_counter <= shm_total_bins) THEN
    1180              :                my_thread_id = shm_task_list(shm_task_counter)%thread_id
    1181              :                my_bin_id = shm_task_list(shm_task_counter)%bin_id
    1182              :                IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    1183              :                   maxval_cache => x_data(irep, my_thread_id)%store_ints%maxval_cache(my_bin_id)
    1184              :                   maxval_container => x_data(irep, my_thread_id)%store_ints%maxval_container(my_bin_id)
    1185              :                   integral_caches => x_data(irep, my_thread_id)%store_ints%integral_caches(:, my_bin_id)
    1186              :                   integral_containers => x_data(irep, my_thread_id)%store_ints%integral_containers(:, my_bin_id)
    1187              :                END IF
    1188              :                distribution_energy => x_data(irep, my_thread_id)%distribution_energy(my_bin_id)
    1189              :                do_it = .TRUE.
    1190              :                bins_left = .TRUE.
    1191              :                IF (my_geo_change) THEN
    1192              :                   distribution_energy%ram_counter = HUGE(distribution_energy%ram_counter)
    1193              :                END IF
    1194              :                counter = 0_Int_8
    1195              :             ELSE
    1196              :                do_it = .FALSE.
    1197              :                bins_left = .FALSE.
    1198              :             END IF
    1199              : !$OMP END CRITICAL(hfxenergy_critical)
    1200              :          END IF
    1201              :          IF (.NOT. do_it) CYCLE
    1202              : 
    1203              : !$OMP MASTER
    1204              :          CALL timeset(routineN//"_bin", handle_bin)
    1205              : !$OMP END MASTER
    1206              : 
    1207              :          bintime_start = m_walltime()
    1208              :          my_istart = distribution_energy%istart
    1209              :          my_current_counter = 0
    1210              :          IF (distribution_energy%number_of_atom_quartets == 0 .OR. &
    1211              :              my_istart == -1_int_8) my_istart = nblocks**4
    1212              :          atomic_blocks: DO atom_block = my_istart, nblocks**4 - 1, n_processes
    1213              :             latom_block = INT(MODULO(atom_block, nblocks)) + 1
    1214              :             tmp_block = atom_block/nblocks
    1215              :             katom_block = INT(MODULO(tmp_block, nblocks)) + 1
    1216              :             IF (latom_block < katom_block) CYCLE atomic_blocks
    1217              :             tmp_block = tmp_block/nblocks
    1218              :             jatom_block = INT(MODULO(tmp_block, nblocks)) + 1
    1219              :             tmp_block = tmp_block/nblocks
    1220              :             iatom_block = INT(MODULO(tmp_block, nblocks)) + 1
    1221              :             IF (jatom_block < iatom_block) CYCLE atomic_blocks
    1222              :             my_current_counter = my_current_counter + 1
    1223              :             IF (my_current_counter > distribution_energy%number_of_atom_quartets) EXIT atomic_blocks
    1224              : 
    1225              :             iatom_start = actual_x_data%blocks(iatom_block)%istart
    1226              :             iatom_end = actual_x_data%blocks(iatom_block)%iend
    1227              :             jatom_start = actual_x_data%blocks(jatom_block)%istart
    1228              :             jatom_end = actual_x_data%blocks(jatom_block)%iend
    1229              :             katom_start = actual_x_data%blocks(katom_block)%istart
    1230              :             katom_end = actual_x_data%blocks(katom_block)%iend
    1231              :             latom_start = actual_x_data%blocks(latom_block)%istart
    1232              :             latom_end = actual_x_data%blocks(latom_block)%iend
    1233              : 
    1234              :             pmax_blocks = MAX(shm_pmax_block(katom_block, iatom_block), &
    1235              :                               shm_pmax_block(latom_block, jatom_block), &
    1236              :                               shm_pmax_block(latom_block, iatom_block), &
    1237              :                               shm_pmax_block(katom_block, jatom_block))
    1238              : 
    1239              :             IF (2.0_dp*coeffs_kind_max0 + pmax_blocks < log10_eps_schwarz) CYCLE atomic_blocks
    1240              : 
    1241              :             CALL build_pair_list(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
    1242              :                                  jatom_start, jatom_end, &
    1243              :                                  kind_of, basis_parameter, particle_set, &
    1244              :                                  do_periodic, screen_coeffs_set, screen_coeffs_kind, &
    1245              :                                  coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
    1246              :                                  shm_atomic_pair_list)
    1247              : 
    1248              :             CALL build_pair_list(natom, list_kl, set_list_kl, katom_start, katom_end, &
    1249              :                                  latom_start, latom_end, &
    1250              :                                  kind_of, basis_parameter, particle_set, &
    1251              :                                  do_periodic, screen_coeffs_set, screen_coeffs_kind, &
    1252              :                                  coeffs_kind_max0, log10_eps_schwarz, cell, pmax_blocks, &
    1253              :                                  shm_atomic_pair_list)
    1254              : 
    1255              :             DO i_list_ij = 1, list_ij%n_element
    1256              : 
    1257              :                iatom = list_ij%elements(i_list_ij)%pair(1)
    1258              :                jatom = list_ij%elements(i_list_ij)%pair(2)
    1259              :                i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
    1260              :                i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
    1261              :                ikind = list_ij%elements(i_list_ij)%kind_pair(1)
    1262              :                jkind = list_ij%elements(i_list_ij)%kind_pair(2)
    1263              :                ra = list_ij%elements(i_list_ij)%r1
    1264              :                rb = list_ij%elements(i_list_ij)%r2
    1265              :                rab2 = list_ij%elements(i_list_ij)%dist2
    1266              : 
    1267              :                la_max => basis_parameter(ikind)%lmax
    1268              :                la_min => basis_parameter(ikind)%lmin
    1269              :                npgfa => basis_parameter(ikind)%npgf
    1270              :                nseta = basis_parameter(ikind)%nset
    1271              :                zeta => basis_parameter(ikind)%zet
    1272              :                nsgfa => basis_parameter(ikind)%nsgf
    1273              :                sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
    1274              :                nsgfl_a => basis_parameter(ikind)%nsgfl
    1275              :                sphi_a_u1 = UBOUND(sphi_a_ext, 1)
    1276              :                sphi_a_u2 = UBOUND(sphi_a_ext, 2)
    1277              :                sphi_a_u3 = UBOUND(sphi_a_ext, 3)
    1278              : 
    1279              :                lb_max => basis_parameter(jkind)%lmax
    1280              :                lb_min => basis_parameter(jkind)%lmin
    1281              :                npgfb => basis_parameter(jkind)%npgf
    1282              :                nsetb = basis_parameter(jkind)%nset
    1283              :                zetb => basis_parameter(jkind)%zet
    1284              :                nsgfb => basis_parameter(jkind)%nsgf
    1285              :                sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
    1286              :                nsgfl_b => basis_parameter(jkind)%nsgfl
    1287              :                sphi_b_u1 = UBOUND(sphi_b_ext, 1)
    1288              :                sphi_b_u2 = UBOUND(sphi_b_ext, 2)
    1289              :                sphi_b_u3 = UBOUND(sphi_b_ext, 3)
    1290              : 
    1291              :                DO i_list_kl = 1, list_kl%n_element
    1292              :                   katom = list_kl%elements(i_list_kl)%pair(1)
    1293              :                   latom = list_kl%elements(i_list_kl)%pair(2)
    1294              : 
    1295              :                   IF (.NOT. (katom + latom <= iatom + jatom)) CYCLE
    1296              :                   IF (((iatom + jatom) == (katom + latom)) .AND. (katom < iatom)) CYCLE
    1297              :                   i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
    1298              :                   i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
    1299              :                   kkind = list_kl%elements(i_list_kl)%kind_pair(1)
    1300              :                   lkind = list_kl%elements(i_list_kl)%kind_pair(2)
    1301              :                   rc = list_kl%elements(i_list_kl)%r1
    1302              :                   rd = list_kl%elements(i_list_kl)%r2
    1303              :                   rcd2 = list_kl%elements(i_list_kl)%dist2
    1304              : 
    1305              :                   IF (do_p_screening) THEN
    1306              :                      pmax_atom = MAX(shm_pmax_atom(katom, iatom), &
    1307              :                                      shm_pmax_atom(latom, jatom), &
    1308              :                                      shm_pmax_atom(latom, iatom), &
    1309              :                                      shm_pmax_atom(katom, jatom))
    1310              :                   ELSE
    1311              :                      pmax_atom = 0.0_dp
    1312              :                   END IF
    1313              : 
    1314              :                   screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
    1315              :                                    screen_coeffs_kind(jkind, ikind)%x(2)
    1316              :                   screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
    1317              :                                    screen_coeffs_kind(lkind, kkind)%x(2)
    1318              : 
    1319              :                   IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
    1320              : 
    1321              :                   !! we want to be consistent with the KS matrix. If none of the atomic indices
    1322              :                   !! is associated cycle
    1323              :                   IF (.NOT. (shm_is_assoc_atomic_block(latom, iatom) >= 1 .AND. &
    1324              :                              shm_is_assoc_atomic_block(katom, iatom) >= 1 .AND. &
    1325              :                              shm_is_assoc_atomic_block(katom, jatom) >= 1 .AND. &
    1326              :                              shm_is_assoc_atomic_block(latom, jatom) >= 1)) CYCLE
    1327              : 
    1328              :                   !! calculate symmetry_factor according to degeneracy of atomic quartet
    1329              :                   symm_fac = 0.5_dp
    1330              :                   IF (iatom == jatom) symm_fac = symm_fac*2.0_dp
    1331              :                   IF (katom == latom) symm_fac = symm_fac*2.0_dp
    1332              :                   IF (iatom == katom .AND. jatom == latom .AND. iatom /= jatom .AND. katom /= latom) symm_fac = symm_fac*2.0_dp
    1333              :                   IF (iatom == katom .AND. iatom == jatom .AND. katom == latom) symm_fac = symm_fac*2.0_dp
    1334              :                   symm_fac = 1.0_dp/symm_fac
    1335              : 
    1336              :                   lc_max => basis_parameter(kkind)%lmax
    1337              :                   lc_min => basis_parameter(kkind)%lmin
    1338              :                   npgfc => basis_parameter(kkind)%npgf
    1339              :                   zetc => basis_parameter(kkind)%zet
    1340              :                   nsgfc => basis_parameter(kkind)%nsgf
    1341              :                   sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
    1342              :                   nsgfl_c => basis_parameter(kkind)%nsgfl
    1343              :                   sphi_c_u1 = UBOUND(sphi_c_ext, 1)
    1344              :                   sphi_c_u2 = UBOUND(sphi_c_ext, 2)
    1345              :                   sphi_c_u3 = UBOUND(sphi_c_ext, 3)
    1346              : 
    1347              :                   ld_max => basis_parameter(lkind)%lmax
    1348              :                   ld_min => basis_parameter(lkind)%lmin
    1349              :                   npgfd => basis_parameter(lkind)%npgf
    1350              :                   zetd => basis_parameter(lkind)%zet
    1351              :                   nsgfd => basis_parameter(lkind)%nsgf
    1352              :                   sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
    1353              :                   nsgfl_d => basis_parameter(lkind)%nsgfl
    1354              :                   sphi_d_u1 = UBOUND(sphi_d_ext, 1)
    1355              :                   sphi_d_u2 = UBOUND(sphi_d_ext, 2)
    1356              :                   sphi_d_u3 = UBOUND(sphi_d_ext, 3)
    1357              : 
    1358              :                   ! Note: shm_atomic_block_offset is already symmetric
    1359              :                   ! see get_atomic_block_maps in hfx_communication.F
    1360              :                   atomic_offset_bd = shm_atomic_block_offset(jatom, latom)
    1361              :                   atomic_offset_bc = shm_atomic_block_offset(jatom, katom)
    1362              :                   atomic_offset_ad = shm_atomic_block_offset(iatom, latom)
    1363              :                   atomic_offset_ac = shm_atomic_block_offset(iatom, katom)
    1364              : 
    1365              :                   IF (jatom < latom) THEN
    1366              :                      offset_bd_set => shm_set_offset(:, :, lkind, jkind)
    1367              :                   ELSE
    1368              :                      offset_bd_set => shm_set_offset(:, :, jkind, lkind)
    1369              :                   END IF
    1370              :                   IF (jatom < katom) THEN
    1371              :                      offset_bc_set => shm_set_offset(:, :, kkind, jkind)
    1372              :                   ELSE
    1373              :                      offset_bc_set => shm_set_offset(:, :, jkind, kkind)
    1374              :                   END IF
    1375              :                   IF (iatom < latom) THEN
    1376              :                      offset_ad_set => shm_set_offset(:, :, lkind, ikind)
    1377              :                   ELSE
    1378              :                      offset_ad_set => shm_set_offset(:, :, ikind, lkind)
    1379              :                   END IF
    1380              :                   IF (iatom < katom) THEN
    1381              :                      offset_ac_set => shm_set_offset(:, :, kkind, ikind)
    1382              :                   ELSE
    1383              :                      offset_ac_set => shm_set_offset(:, :, ikind, kkind)
    1384              :                   END IF
    1385              : 
    1386              :                   IF (do_p_screening) THEN
    1387              :                      swap_id = 0
    1388              :                      kind_kind_idx = INT(get_1D_idx(kkind, ikind, INT(nkind, int_8)))
    1389              :                      IF (ikind >= kkind) THEN
    1390              :                         ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1391              :                                                                        actual_x_data%map_atom_to_kind_atom(katom), &
    1392              :                                                                        actual_x_data%map_atom_to_kind_atom(iatom))
    1393              :                      ELSE
    1394              :                         ptr_p_1 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1395              :                                                                        actual_x_data%map_atom_to_kind_atom(iatom), &
    1396              :                                                                        actual_x_data%map_atom_to_kind_atom(katom))
    1397              :                         swap_id = swap_id + 1
    1398              :                      END IF
    1399              :                      kind_kind_idx = INT(get_1D_idx(lkind, jkind, INT(nkind, int_8)))
    1400              :                      IF (jkind >= lkind) THEN
    1401              :                         ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1402              :                                                                        actual_x_data%map_atom_to_kind_atom(latom), &
    1403              :                                                                        actual_x_data%map_atom_to_kind_atom(jatom))
    1404              :                      ELSE
    1405              :                         ptr_p_2 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1406              :                                                                        actual_x_data%map_atom_to_kind_atom(jatom), &
    1407              :                                                                        actual_x_data%map_atom_to_kind_atom(latom))
    1408              :                         swap_id = swap_id + 2
    1409              :                      END IF
    1410              :                      kind_kind_idx = INT(get_1D_idx(lkind, ikind, INT(nkind, int_8)))
    1411              :                      IF (ikind >= lkind) THEN
    1412              :                         ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1413              :                                                                        actual_x_data%map_atom_to_kind_atom(latom), &
    1414              :                                                                        actual_x_data%map_atom_to_kind_atom(iatom))
    1415              :                      ELSE
    1416              :                         ptr_p_3 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1417              :                                                                        actual_x_data%map_atom_to_kind_atom(iatom), &
    1418              :                                                                        actual_x_data%map_atom_to_kind_atom(latom))
    1419              :                         swap_id = swap_id + 4
    1420              :                      END IF
    1421              :                      kind_kind_idx = INT(get_1D_idx(kkind, jkind, INT(nkind, int_8)))
    1422              :                      IF (jkind >= kkind) THEN
    1423              :                         ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1424              :                                                                        actual_x_data%map_atom_to_kind_atom(katom), &
    1425              :                                                                        actual_x_data%map_atom_to_kind_atom(jatom))
    1426              :                      ELSE
    1427              :                         ptr_p_4 => shm_initial_p(kind_kind_idx)%p_kind(:, :, &
    1428              :                                                                        actual_x_data%map_atom_to_kind_atom(jatom), &
    1429              :                                                                        actual_x_data%map_atom_to_kind_atom(katom))
    1430              :                         swap_id = swap_id + 8
    1431              :                      END IF
    1432              :                   END IF
    1433              : 
    1434              :                   !! At this stage, check for memory used in compression
    1435              :                   IF (my_geo_change) THEN
    1436              :                      IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    1437              :                         ! ** We know the maximum amount of integrals that we can store per MPI-process
    1438              :                         ! ** Now we need to sum the current memory usage among all openMP threads
    1439              :                         ! ** We can just read what is currently stored on the corresponding x_data type
    1440              :                         ! ** If this is thread i and it tries to read the data from thread j, that is
    1441              :                         ! ** currently writing that data, we just dont care, because the possible error
    1442              :                         ! ** is of the order of CACHE_SIZE
    1443              :                         mem_compression_counter = 0
    1444              :                         DO i = 1, n_threads
    1445              : !$OMP                   ATOMIC READ
    1446              :                            tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
    1447              :                            mem_compression_counter = mem_compression_counter + &
    1448              :                                                      tmp_i4*memory_parameter%cache_size
    1449              :                         END DO
    1450              :                         IF (mem_compression_counter > memory_parameter%max_compression_counter) THEN
    1451              :                            buffer_overflow = .TRUE.
    1452              :                            IF (do_dynamic_load_balancing) THEN
    1453              :                               distribution_energy%ram_counter = counter
    1454              :                            ELSE
    1455              :                               memory_parameter%ram_counter = counter
    1456              :                            END IF
    1457              :                         ELSE
    1458              :                            counter = counter + 1
    1459              :                            buffer_overflow = .FALSE.
    1460              :                         END IF
    1461              :                      END IF
    1462              :                   ELSE
    1463              :                      IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    1464              :                         IF (do_dynamic_load_balancing) THEN
    1465              :                            IF (distribution_energy%ram_counter == counter) THEN
    1466              :                               buffer_overflow = .TRUE.
    1467              :                            ELSE
    1468              :                               counter = counter + 1
    1469              :                               buffer_overflow = .FALSE.
    1470              :                            END IF
    1471              : 
    1472              :                         ELSE
    1473              :                            IF (memory_parameter%ram_counter == counter) THEN
    1474              :                               buffer_overflow = .TRUE.
    1475              :                            ELSE
    1476              :                               counter = counter + 1
    1477              :                               buffer_overflow = .FALSE.
    1478              :                            END IF
    1479              :                         END IF
    1480              :                      END IF
    1481              :                   END IF
    1482              : 
    1483              :                   IF (buffer_overflow .AND. do_disk_storage) THEN
    1484              :                      use_disk_storage = .TRUE.
    1485              :                      buffer_overflow = .FALSE.
    1486              :                   END IF
    1487              : 
    1488              :                   IF (use_disk_storage) THEN
    1489              : !$OMP               ATOMIC READ
    1490              :                      tmp_i4 = memory_parameter%actual_memory_usage_disk
    1491              :                      mem_compression_counter_disk = tmp_i4*memory_parameter%cache_size
    1492              :                      IF (mem_compression_counter_disk > memory_parameter%max_compression_counter_disk) THEN
    1493              :                         buffer_overflow = .TRUE.
    1494              :                         use_disk_storage = .FALSE.
    1495              :                      END IF
    1496              :                   END IF
    1497              : 
    1498              :                   DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
    1499              :                      iset = set_list_ij(i_set_list_ij)%pair(1)
    1500              :                      jset = set_list_ij(i_set_list_ij)%pair(2)
    1501              : 
    1502              :                      ncob = npgfb(jset)*ncoset(lb_max(jset))
    1503              :                      max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
    1504              :                                 screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
    1505              : 
    1506              :                      IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
    1507              : 
    1508              :                      sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
    1509              :                      sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
    1510              :                      DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
    1511              :                         kset = set_list_kl(i_set_list_kl)%pair(1)
    1512              :                         lset = set_list_kl(i_set_list_kl)%pair(2)
    1513              : 
    1514              :                         max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
    1515              :                                         screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
    1516              :                         max_val2 = max_val1 + max_val2_set
    1517              : 
    1518              :                         !! Near field screening
    1519              :                         IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
    1520              :                         sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
    1521              :                         sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
    1522              :                         !! get max_vals if we screen on initial density
    1523              :                         IF (do_p_screening) THEN
    1524              :                            CALL get_pmax_val(ptr_p_1, ptr_p_2, ptr_p_3, ptr_p_4, &
    1525              :                                              iset, jset, kset, lset, &
    1526              :                                              pmax_entry, swap_id)
    1527              :                         ELSE
    1528              :                            pmax_entry = 0.0_dp
    1529              :                         END IF
    1530              :                         log10_pmax = pmax_entry
    1531              :                         max_val2 = max_val2 + log10_pmax
    1532              :                         IF (max_val2 < log10_eps_schwarz) CYCLE
    1533              :                         pmax_entry = EXP(log10_pmax*ln_10)
    1534              : 
    1535              :                         ! libGint can only run in direct mode at the moment
    1536              :                         ! so there is no need to check the buffers
    1537              :                         if (use_libGint) then
    1538              :                            tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
    1539              :                            tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
    1540              :                            ! TODO a more modern approach in which the call is more like
    1541              :                            ! call coulom_4_gpu( HFX_data, iset, jset, kset, lset )
    1542              :                            call libGint_coulomb4( &
    1543              :                               iatom, jatom, katom, latom, iset, jset, kset, lset, &
    1544              :                               ra, rb, rc, rd, &
    1545              :                               npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
    1546              :                               potential_parameter, &
    1547              :                               screen_coeffs_set(jset, iset, jkind, ikind)%x, &
    1548              :                               screen_coeffs_set(lset, kset, lkind, kkind)%x, &
    1549              :                               log10_pmax, log10_eps_schwarz, &
    1550              :                               tmp_screen_pgf1, tmp_screen_pgf2, &
    1551              :                               actual_x_data%neighbor_cells, cell, do_periodic, screened)
    1552              : 
    1553              :                            if (.not. screened) then
    1554              : 
    1555              :                               ! TODO a more modern approach in which the call is more like
    1556              :                               ! call update_fock_matrix_gpu( HFX_data, iset, jset, kset, lset )
    1557              :                               call libGint_update_fock_matrix( &
    1558              :                                  symm_fac, &
    1559              :                                  iatom, jatom, katom, latom, &
    1560              :                                  iset, jset, kset, lset, &
    1561              :                                  atomic_offset_ac, atomic_offset_ad, atomic_offset_bc, atomic_offset_bd, &
    1562              :                                  offset_ac_set, offset_ad_set, offset_bc_set, offset_bd_set, &
    1563              :                                  nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1564              :                                  la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
    1565              :                                  lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
    1566              :                                  nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d)
    1567              : 
    1568              :                            end if
    1569              :                         end if
    1570              :                         ! Run using libint as integral engine
    1571              :                         if_use_libint: if (use_libint) then
    1572              :                            !! store current number of integrals, update total number and number of integrals in buffer
    1573              :                            current_counter = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
    1574              :                            IF (buffer_overflow) THEN
    1575              :                               neris_onthefly = neris_onthefly + current_counter
    1576              :                            END IF
    1577              : 
    1578              :                            !! Get integrals from buffer and update Kohn-Sham matrix
    1579              :                            IF (.NOT. buffer_overflow .AND. .NOT. my_geo_change) THEN
    1580              :                               nints = current_counter
    1581              :                               IF (.NOT. use_disk_storage) THEN
    1582              :                                  CALL hfx_get_single_cache_element( &
    1583              :                                     estimate_to_store_int, 6, &
    1584              :                                     maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    1585              :                                     use_disk_storage)
    1586              :                               ELSE
    1587              :                                  CALL hfx_get_single_cache_element( &
    1588              :                                     estimate_to_store_int, 6, &
    1589              :                                     maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
    1590              :                                     use_disk_storage)
    1591              :                               END IF
    1592              :                               spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    1593              :                               IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
    1594              :                               nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
    1595              :                               buffer_left = nints
    1596              :                               buffer_start = 1
    1597              :                               IF (.NOT. use_disk_storage) THEN
    1598              :                                  neris_incore = neris_incore + INT(nints, int_8)
    1599              :                               ELSE
    1600              :                                  neris_disk = neris_disk + INT(nints, int_8)
    1601              :                               END IF
    1602              :                               DO WHILE (buffer_left > 0)
    1603              :                                  buffer_size = MIN(buffer_left, cache_size)
    1604              :                                  IF (.NOT. use_disk_storage) THEN
    1605              :                                     CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
    1606              :                                                                      buffer_size, nbits, &
    1607              :                                                                      integral_caches(nbits), &
    1608              :                                                                      integral_containers(nbits), &
    1609              :                                                                      eps_storage, pmax_entry, &
    1610              :                                                                      memory_parameter%actual_memory_usage, &
    1611              :                                                                      use_disk_storage)
    1612              :                                  ELSE
    1613              :                                     CALL hfx_get_mult_cache_elements(primitive_integrals(buffer_start), &
    1614              :                                                                      buffer_size, nbits, &
    1615              :                                                                      integral_caches_disk(nbits), &
    1616              :                                                                      integral_containers_disk(nbits), &
    1617              :                                                                      eps_storage, pmax_entry, &
    1618              :                                                                      memory_parameter%actual_memory_usage_disk, &
    1619              :                                                                      use_disk_storage)
    1620              :                                  END IF
    1621              :                                  buffer_left = buffer_left - buffer_size
    1622              :                                  buffer_start = buffer_start + buffer_size
    1623              :                               END DO
    1624              :                            END IF
    1625              :                            !! Calculate integrals if we run out of buffer or the geometry did change
    1626              :                            IF (my_geo_change .OR. buffer_overflow) THEN
    1627              : 
    1628              :                               max_contraction_val = max_contraction(iset, iatom)* &
    1629              :                                                     max_contraction(jset, jatom)* &
    1630              :                                                     max_contraction(kset, katom)* &
    1631              :                                                     max_contraction(lset, latom)*pmax_entry
    1632              :                               tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
    1633              :                               tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
    1634              :                               tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
    1635              :                               tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
    1636              : 
    1637              :                               CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
    1638              :                                             la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
    1639              :                                             lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
    1640              :                                             nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1641              :                                             sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    1642              :                                             sphi_b_u1, sphi_b_u2, sphi_b_u3, &
    1643              :                                             sphi_c_u1, sphi_c_u2, sphi_c_u3, &
    1644              :                                             sphi_d_u1, sphi_d_u2, sphi_d_u3, &
    1645              :                                             zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
    1646              :                                             zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
    1647              :                                             primitive_integrals, &
    1648              :                                             potential_parameter, &
    1649              :                                             actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
    1650              :                                             screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
    1651              :                                             max_contraction_val, cartesian_estimate, cell, neris_tmp, &
    1652              :                                             log10_pmax, log10_eps_schwarz, &
    1653              :                                             tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
    1654              :                                             pgf_list_ij, pgf_list_kl, pgf_product_list, &
    1655              :                                             nsgfl_a(:, iset), nsgfl_b(:, jset), &
    1656              :                                             nsgfl_c(:, kset), nsgfl_d(:, lset), &
    1657              :                                             sphi_a_ext_set, &
    1658              :                                             sphi_b_ext_set, &
    1659              :                                             sphi_c_ext_set, &
    1660              :                                             sphi_d_ext_set, &
    1661              :                                             ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
    1662              :                                             nimages, do_periodic, p_work)
    1663              : 
    1664              :                               nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
    1665              :                               neris_total = neris_total + nints
    1666              :                               nprim_ints = nprim_ints + neris_tmp
    1667              : 
    1668              :                               !! Compress the array for storage
    1669              :                               spherical_estimate = 0.0_dp
    1670              :                               DO i = 1, nints
    1671              :                                  spherical_estimate = MAX(spherical_estimate, ABS(primitive_integrals(i)))
    1672              :                               END DO
    1673              : 
    1674              :                               IF (spherical_estimate == 0.0_dp) spherical_estimate = TINY(spherical_estimate)
    1675              :                               estimate_to_store_int = EXPONENT(spherical_estimate)
    1676              :                               estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
    1677              : 
    1678              :                               IF (.NOT. buffer_overflow .AND. my_geo_change) THEN
    1679              :                                  IF (.NOT. use_disk_storage) THEN
    1680              :                                     CALL hfx_add_single_cache_element( &
    1681              :                                        estimate_to_store_int, 6, &
    1682              :                                        maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    1683              :                                        use_disk_storage, max_val_memory)
    1684              :                                  ELSE
    1685              :                                     CALL hfx_add_single_cache_element( &
    1686              :                                        estimate_to_store_int, 6, &
    1687              :                                        maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
    1688              :                                        use_disk_storage)
    1689              :                                  END IF
    1690              :                               END IF
    1691              :                               spherical_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
    1692              :                               IF (spherical_estimate*pmax_entry < eps_schwarz) CYCLE
    1693              :                               IF (.NOT. buffer_overflow) THEN
    1694              :                                  nbits = EXPONENT(ANINT(spherical_estimate*pmax_entry/eps_storage)) + 1
    1695              : 
    1696              :                                  ! In case of a tight eps_storage threshold the number of significant
    1697              :                                  ! bits in the integer number NINT(value*pmax_entry/eps_storage) may
    1698              :                                  ! exceed the width of the storage element. As the compression algorithm
    1699              :                                  ! is designed for IEEE 754 double precision numbers, a 64-bit signed
    1700              :                                  ! integer variable which is used to store the result of this float-to-
    1701              :                                  ! integer conversion (we have no wish to use more memory for storing
    1702              :                                  ! compressed ERIs than it is needed for uncompressed ERIs) may overflow.
    1703              :                                  ! Abort with a meaningful message when it happens.
    1704              :                                  !
    1705              :                                  ! The magic number 63 stands for the number of magnitude bits
    1706              :                                  ! (64 bits minus one sign bit).
    1707              :                                  IF (nbits > 63) THEN
    1708              :                                     WRITE (eps_schwarz_min_str, '(ES10.3E2)') &
    1709              :                                        spherical_estimate*pmax_entry/ &
    1710              :                                        (SET_EXPONENT(1.0_dp, 63)*memory_parameter%eps_storage_scaling)
    1711              : 
    1712              :                                     WRITE (eps_scaling_str, '(ES10.3E2)') &
    1713              :                                        spherical_estimate*pmax_entry/(SET_EXPONENT(1.0_dp, 63)*eps_schwarz)
    1714              : 
    1715              :                                     CALL cp_abort(__LOCATION__, &
    1716              :                                                   "Overflow during ERI's compression. Please use a larger "// &
    1717              :                                                   "EPS_SCHWARZ threshold (above "//TRIM(ADJUSTL(eps_schwarz_min_str))// &
    1718              :                                                   ") or increase the EPS_STORAGE_SCALING factor above "// &
    1719              :                                                   TRIM(ADJUSTL(eps_scaling_str))//".")
    1720              :                                  END IF
    1721              : 
    1722              :                                  buffer_left = nints
    1723              :                                  buffer_start = 1
    1724              :                                  IF (.NOT. use_disk_storage) THEN
    1725              :                                     neris_incore = neris_incore + INT(nints, int_8)
    1726              :                                  ELSE
    1727              :                                     neris_disk = neris_disk + INT(nints, int_8)
    1728              :                                  END IF
    1729              :                                  DO WHILE (buffer_left > 0)
    1730              :                                     buffer_size = MIN(buffer_left, CACHE_SIZE)
    1731              :                                     IF (.NOT. use_disk_storage) THEN
    1732              :                                        CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
    1733              :                                                                         buffer_size, nbits, &
    1734              :                                                                         integral_caches(nbits), &
    1735              :                                                                         integral_containers(nbits), &
    1736              :                                                                         eps_storage, pmax_entry, &
    1737              :                                                                         memory_parameter%actual_memory_usage, &
    1738              :                                                                         use_disk_storage)
    1739              :                                     ELSE
    1740              :                                        CALL hfx_add_mult_cache_elements(primitive_integrals(buffer_start), &
    1741              :                                                                         buffer_size, nbits, &
    1742              :                                                                         integral_caches_disk(nbits), &
    1743              :                                                                         integral_containers_disk(nbits), &
    1744              :                                                                         eps_storage, pmax_entry, &
    1745              :                                                                         memory_parameter%actual_memory_usage_disk, &
    1746              :                                                                         use_disk_storage)
    1747              :                                     END IF
    1748              :                                     buffer_left = buffer_left - buffer_size
    1749              :                                     buffer_start = buffer_start + buffer_size
    1750              :                                  END DO
    1751              :                               ELSE
    1752              :                                  !! In order to be consistent with in-core part, round all the eris wrt. eps_schwarz
    1753              :                                  DO i = 1, nints
    1754              :                                     primitive_integrals(i) = primitive_integrals(i)*pmax_entry
    1755              :                                     IF (ABS(primitive_integrals(i)) > eps_storage) THEN
    1756              :                                        primitive_integrals(i) = ANINT(primitive_integrals(i)/eps_storage, dp)*eps_storage/pmax_entry
    1757              :                                     ELSE
    1758              :                                        primitive_integrals(i) = 0.0_dp
    1759              :                                     END IF
    1760              :                                  END DO
    1761              :                               END IF
    1762              :                            END IF
    1763              :                            !!!  DEBUG, print out primitive integrals and indices. Only works serial no OMP  !!!
    1764              :                            IF (.FALSE.) THEN
    1765              :                               CALL print_integrals( &
    1766              :                                  iatom, jatom, katom, latom, shm_set_offset, shm_atomic_block_offset, &
    1767              :                                  iset, jset, kset, lset, nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), primitive_integrals)
    1768              :                            END IF
    1769              :                            IF (.NOT. is_anti_symmetric) THEN
    1770              :                               !! Update Kohn-Sham matrix
    1771              :                               CALL update_fock_matrix( &
    1772              :                                  nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1773              :                                  fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
    1774              :                                  primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
    1775              :                                  kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
    1776              :                                  iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
    1777              :                                  atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
    1778              :                               IF (.NOT. treat_lsd_in_core) THEN
    1779              :                                  IF (my_nspins == 2) THEN
    1780              :                                     CALL update_fock_matrix( &
    1781              :                                        nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1782              :                                        fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
    1783              :                                        primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
    1784              :                                        kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
    1785              :                                        iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
    1786              :                                        atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
    1787              :                                  END IF
    1788              :                               END IF
    1789              :                            ELSE
    1790              :                               !! Update Kohn-Sham matrix
    1791              :                               CALL update_fock_matrix_as( &
    1792              :                                  nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1793              :                                  fac, symm_fac, full_density_alpha(:, 1), full_ks_alpha(:, 1), &
    1794              :                                  primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
    1795              :                                  kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
    1796              :                                  iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
    1797              :                                  atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
    1798              :                               IF (.NOT. treat_lsd_in_core) THEN
    1799              :                                  IF (my_nspins == 2) THEN
    1800              :                                     CALL update_fock_matrix_as( &
    1801              :                                        nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
    1802              :                                        fac, symm_fac, full_density_beta(:, 1), full_ks_beta(:, 1), &
    1803              :                                        primitive_integrals, pbd_buf, pbc_buf, pad_buf, pac_buf, kbd_buf, &
    1804              :                                        kbc_buf, kad_buf, kac_buf, iatom, jatom, katom, latom, &
    1805              :                                        iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, offset_ac_set, &
    1806              :                                        atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, atomic_offset_ac)
    1807              :                                  END IF
    1808              :                               END IF
    1809              :                            END IF
    1810              :                         END IF if_use_libint
    1811              :                      END DO ! i_set_list_kl
    1812              :                   END DO ! i_set_list_ij
    1813              :                   IF (do_disk_storage) THEN
    1814              :                      buffer_overflow = .TRUE.
    1815              :                   END IF
    1816              :                END DO !i_list_ij
    1817              :             END DO !i_list_kl
    1818              :          END DO atomic_blocks
    1819              :          bintime_stop = m_walltime()
    1820              :          IF (my_geo_change) THEN
    1821              :             distribution_energy%time_first_scf = bintime_stop - bintime_start
    1822              :          ELSE
    1823              :             distribution_energy%time_other_scf = &
    1824              :                distribution_energy%time_other_scf + bintime_stop - bintime_start
    1825              :          END IF
    1826              : !$OMP MASTER
    1827              :          CALL timestop(handle_bin)
    1828              : !$OMP END MASTER
    1829              :       END DO !bin
    1830              : 
    1831              :       !!!
    1832              :       ! IF running both libint and libGint, uses the values from libint to check libGint values
    1833              :       if (use_libGint .and. use_libint) then
    1834              :          ! barrier needed after allocate  full_ks_alpha(beta)_from_gpu before libGint get K
    1835              :          ! to feed libgint a valid position for full_ks_alpha(beta)_from_gpu
    1836              :          allocate (full_ks_alpha_from_gpu, Mold=full_ks_alpha)
    1837              :          !$OMP BARRIER
    1838              :          if (use_only_alpha_spin) then
    1839              :             call libGint_get_fock_matrix(full_ks_alpha_from_gpu)
    1840              :          else
    1841              :             allocate (full_ks_beta_from_gpu, Mold=full_ks_beta)
    1842              :             !$OMP BARRIER
    1843              :             call libGint_get_fock_matrix(full_ks_alpha_from_gpu, full_ks_beta_from_gpu)
    1844              :          end if
    1845              : 
    1846              :          !$OMP BARRIER
    1847              :          !$omp single
    1848              :          max_abs_delta_KS = 0.0_dp
    1849              :          do i = 1, size(full_ks_alpha)
    1850              :             max_abs_delta_KS = max(max_abs_delta_ks, abs(full_ks_alpha(i, 1) - full_ks_alpha_from_gpu(i, 1)))
    1851              :          end do
    1852              :          write (UNIT=iw, FMT=*) " ------------------------------------ "
    1853              :          write (UNIT=iw, FMT=*) " | rank ", para_env%mepos
    1854              :          write (UNIT=iw, FMT=*) " |  max abs dif: ", max_abs_delta_KS
    1855              :          write (UNIT=iw, FMT=*) " ------------------------------------ "
    1856              : 
    1857              :          ! Print all F if delta is too large or (suspiciously) too small
    1858              :          if (max_abs_delta_KS > 100*eps_schwarz .or. max_abs_delta_KS < 1.E-20) then
    1859              :             write (UNIT=iw, FMT=*) " K from CPU  |  K from GPU  |  Delta | Ratio "
    1860              :             do i = 1, size(full_ks_alpha)
    1861              :                write (UNIT=iw, FMT=*) i, full_ks_alpha(i, 1), full_ks_alpha_from_gpu(i, 1), &
    1862              :                   full_ks_alpha(i, 1) - full_ks_alpha_from_gpu(i, 1), &
    1863              :                   full_ks_alpha(i, 1)/full_ks_alpha_from_gpu(i, 1)
    1864              :             end do
    1865              :          end if
    1866              : 
    1867              :          if (.not. use_only_alpha_spin) then
    1868              :             max_abs_delta_KS = 0.0_dp
    1869              :             do i = 1, size(full_ks_beta)
    1870              :                max_abs_delta_KS = max(max_abs_delta_ks, abs(full_ks_beta(i, 1) - full_ks_beta_from_gpu(i, 1)))
    1871              :             end do
    1872              :             write (UNIT=iw, FMT=*) " ------------------------------------ "
    1873              :             write (UNIT=iw, FMT=*) " | rank ", para_env%mepos
    1874              :             write (UNIT=iw, FMT=*) " |  max abs dif B: ", max_abs_delta_KS
    1875              :             write (UNIT=iw, FMT=*) " ------------------------------------ "
    1876              : 
    1877              :             ! Print all F if delta is too large or (suspiciously) too small
    1878              :             if (max_abs_delta_KS > 100*eps_schwarz .or. max_abs_delta_KS < 1.E-18) then
    1879              :                write (UNIT=iw, FMT=*) " BETA K from CPU  |  K from GPU  |  Delta | Ratio "
    1880              :                do i = 1, size(full_ks_beta)
    1881              :                   write (UNIT=iw, FMT=*) i, full_ks_beta(i, 1), full_ks_beta_from_gpu(i, 1), &
    1882              :                      full_ks_beta(i, 1) - full_ks_beta_from_gpu(i, 1), &
    1883              :                      full_ks_beta(i, 1)/full_ks_beta_from_gpu(i, 1)
    1884              :                end do
    1885              :             end if
    1886              :          end if
    1887              :          !$omp end single
    1888              :       end if
    1889              : 
    1890              :       ! Copy the KS matrix from device to host.
    1891              :       if (use_libGint .and. .not. use_libint) then
    1892              :          if (use_only_alpha_spin) then
    1893              :             call libGint_get_fock_matrix(full_ks_alpha)
    1894              :          else
    1895              :             call libGint_get_fock_matrix(full_ks_alpha, full_ks_beta)
    1896              :          end if
    1897              :       end if
    1898              : 
    1899              : !$OMP MASTER
    1900              :       logger => cp_get_default_logger()
    1901              :       do_print_load_balance_info = .FALSE.
    1902              :       do_print_load_balance_info = BTEST(cp_print_key_should_output(logger%iter_info, hfx_section, &
    1903              :                                                                     "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO"), cp_p_file)
    1904              : !$OMP END MASTER
    1905              : !$OMP BARRIER
    1906              :       IF (do_print_load_balance_info) THEN
    1907              :          iw = -1
    1908              : !$OMP MASTER
    1909              :          iw = cp_print_key_unit_nr(logger, hfx_section, "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO", &
    1910              :                                    extension=".scfLog")
    1911              : !$OMP END MASTER
    1912              : 
    1913              :          CALL collect_load_balance_info(para_env, actual_x_data, iw, n_threads, i_thread, &
    1914              :                                         hfx_do_eval_energy)
    1915              : 
    1916              : !$OMP MASTER
    1917              :          CALL cp_print_key_finished_output(iw, logger, hfx_section, &
    1918              :                                            "LOAD_BALANCE%PRINT/LOAD_BALANCE_INFO")
    1919              : !$OMP END MASTER
    1920              :       END IF
    1921              : 
    1922              : !$OMP BARRIER
    1923              : !$OMP MASTER
    1924              :       CALL m_memory(memsize_after)
    1925              : !$OMP END MASTER
    1926              : !$OMP BARRIER
    1927              : 
    1928              :       DEALLOCATE (primitive_integrals)
    1929              : !$OMP BARRIER
    1930              :       !! Get some number about ERIS
    1931              : !$OMP ATOMIC
    1932              :       shm_neris_total = shm_neris_total + neris_total
    1933              : !$OMP ATOMIC
    1934              :       shm_neris_onthefly = shm_neris_onthefly + neris_onthefly
    1935              : !$OMP ATOMIC
    1936              :       shm_nprim_ints = shm_nprim_ints + nprim_ints
    1937              : 
    1938              :       storage_counter_integrals = memory_parameter%actual_memory_usage* &
    1939              :                                   memory_parameter%cache_size
    1940              :       stor_count_int_disk = memory_parameter%actual_memory_usage_disk* &
    1941              :                             memory_parameter%cache_size
    1942              :       stor_count_max_val = max_val_memory*memory_parameter%cache_size
    1943              : !$OMP ATOMIC
    1944              :       shm_storage_counter_integrals = shm_storage_counter_integrals + storage_counter_integrals
    1945              : !$OMP ATOMIC
    1946              :       shm_stor_count_int_disk = shm_stor_count_int_disk + stor_count_int_disk
    1947              : !$OMP ATOMIC
    1948              :       shm_neris_incore = shm_neris_incore + neris_incore
    1949              : !$OMP ATOMIC
    1950              :       shm_neris_disk = shm_neris_disk + neris_disk
    1951              : !$OMP ATOMIC
    1952              :       shm_stor_count_max_val = shm_stor_count_max_val + stor_count_max_val
    1953              : !$OMP BARRIER
    1954              : 
    1955              :       ! ** Calculate how much memory has already been used (might be needed for in-core forces
    1956              : !$OMP MASTER
    1957              :       shm_mem_compression_counter = 0
    1958              :       DO i = 1, n_threads
    1959              : !$OMP       ATOMIC READ
    1960              :          tmp_i4 = x_data(irep, i)%memory_parameter%actual_memory_usage
    1961              :          shm_mem_compression_counter = shm_mem_compression_counter + &
    1962              :                                        tmp_i4*memory_parameter%cache_size
    1963              :       END DO
    1964              : !$OMP END MASTER
    1965              : !$OMP BARRIER
    1966              :       actual_x_data%memory_parameter%final_comp_counter_energy = shm_mem_compression_counter
    1967              : 
    1968              : !$OMP MASTER
    1969              :       !! Calculate the exchange energies from the Kohn-Sham matrix. Before we can go on, we have to symmetrize.
    1970              :       ene_x_aa = 0.0_dp
    1971              :       ene_x_bb = 0.0_dp
    1972              : 
    1973              :       mb_size_p = shm_block_offset(ncpu + 1)/1024/128
    1974              :       mb_size_f = shm_block_offset(ncpu + 1)/1024/128
    1975              :       IF (.NOT. treat_lsd_in_core) THEN
    1976              :          IF (my_nspins == 2) THEN
    1977              :             mb_size_f = mb_size_f*2
    1978              :             mb_size_p = mb_size_p*2
    1979              :          END IF
    1980              :       END IF
    1981              :       !! size of primitive_integrals(not shared)
    1982              :       mb_size_buffers = INT(nsgf_max, int_8)**4*n_threads
    1983              :       !! fock density buffers (not shared)
    1984              :       mb_size_buffers = mb_size_buffers + INT(nsgf_max, int_8)**2*n_threads
    1985              :       subtr_size_mb = subtr_size_mb + 8_int_8*nsgf_max**2*n_threads
    1986              :       !! size of screening functions (shared)
    1987              :       mb_size_buffers = mb_size_buffers + max_pgf**2*max_set**2*nkind**2 &
    1988              :                         + max_set**2*nkind**2 &
    1989              :                         + nkind**2 &
    1990              :                         + max_pgf**2*max_set**2*nkind**2
    1991              :       !! is_assoc (shared)
    1992              :       mb_size_buffers = mb_size_buffers + natom**2
    1993              :       ! ** pmax_atom (shared)
    1994              :       IF (do_p_screening) THEN
    1995              :          mb_size_buffers = mb_size_buffers + natom**2
    1996              :       END IF
    1997              :       IF (screening_parameter%do_p_screening_forces) THEN
    1998              :          IF (memory_parameter%treat_forces_in_core) THEN
    1999              :             mb_size_buffers = mb_size_buffers + natom**2
    2000              :          END IF
    2001              :       END IF
    2002              :       ! ** Initial P only MAX(alpha,beta) (shared)
    2003              :       IF (do_p_screening .OR. screening_parameter%do_p_screening_forces) THEN
    2004              :          mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
    2005              :       END IF
    2006              :       ! ** In core forces require their own initial P
    2007              :       IF (screening_parameter%do_p_screening_forces) THEN
    2008              :          IF (memory_parameter%treat_forces_in_core) THEN
    2009              :             mb_size_buffers = mb_size_buffers + memory_parameter%size_p_screen
    2010              :          END IF
    2011              :       END IF
    2012              : 
    2013              :       !! mb
    2014              :       mb_size_buffers = mb_size_buffers/1024/128
    2015              : 
    2016              :       afac = 1.0_dp
    2017              :       IF (is_anti_symmetric) afac = -1.0_dp
    2018              :       CALL timestop(handle_main)
    2019              :       ene_x_aa_diag = 0.0_dp
    2020              :       ene_x_bb_diag = 0.0_dp
    2021              :       DO iatom = 1, natom
    2022              :          ikind = kind_of(iatom)
    2023              :          nseta = basis_parameter(ikind)%nset
    2024              :          nsgfa => basis_parameter(ikind)%nsgf
    2025              :          jatom = iatom
    2026              :          jkind = kind_of(jatom)
    2027              :          nsetb = basis_parameter(jkind)%nset
    2028              :          nsgfb => basis_parameter(jkind)%nsgf
    2029              :          act_atomic_block_offset = shm_atomic_block_offset(jatom, iatom)
    2030              :          DO img = 1, nkimages
    2031              :             DO iset = 1, nseta
    2032              :                DO jset = 1, nsetb
    2033              :                   act_set_offset = shm_set_offset(jset, iset, jkind, ikind)
    2034              :                   i = act_set_offset + act_atomic_block_offset - 1
    2035              :                   DO ma = 1, nsgfa(iset)
    2036              :                      j = shm_set_offset(iset, jset, jkind, ikind) + act_atomic_block_offset - 1 + ma - 1
    2037              :                      DO mb = 1, nsgfb(jset)
    2038              :                         IF (i > j) THEN
    2039              :                            full_ks_alpha(i, img) = (full_ks_alpha(i, img) + full_ks_alpha(j, img)*afac)
    2040              :                            full_ks_alpha(j, img) = full_ks_alpha(i, img)*afac
    2041              :                            IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
    2042              :                               full_ks_beta(i, img) = (full_ks_beta(i, img) + full_ks_beta(j, img)*afac)
    2043              :                               full_ks_beta(j, img) = full_ks_beta(i, img)*afac
    2044              :                            END IF
    2045              :                         END IF
    2046              :                         ! ** For adiabatically rescaled functionals we need the energy coming from the diagonal elements
    2047              :                         IF (i == j) THEN
    2048              :                            ene_x_aa_diag = ene_x_aa_diag + full_ks_alpha(i, img)*full_density_alpha(i, img)
    2049              :                            IF (.NOT. treat_lsd_in_core .AND. my_nspins == 2) THEN
    2050              :                               ene_x_bb_diag = ene_x_bb_diag + full_ks_beta(i, img)*full_density_beta(i, img)
    2051              :                            END IF
    2052              :                         END IF
    2053              :                         i = i + 1
    2054              :                         j = j + nsgfa(iset)
    2055              :                      END DO
    2056              :                   END DO
    2057              :                END DO
    2058              :             END DO
    2059              :          END DO
    2060              :       END DO
    2061              : 
    2062              :       CALL para_env%sync()
    2063              :       afac = 1.0_dp
    2064              :       IF (is_anti_symmetric) afac = 0._dp
    2065              :       IF (distribute_fock_matrix) THEN
    2066              :          !! Distribute the current KS-matrix to all the processes
    2067              :          CALL timeset(routineN//"_dist_KS", handle_dist_ks)
    2068              :          DO img = 1, nkimages
    2069              :             CALL distribute_ks_matrix(para_env, full_ks_alpha(:, img), ks_matrix(ispin, img)%matrix, shm_number_of_p_entries, &
    2070              :                                       shm_block_offset, kind_of, basis_parameter, &
    2071              :                                       off_diag_fac=0.5_dp, diag_fac=afac)
    2072              :          END DO
    2073              : 
    2074              :          NULLIFY (full_ks_alpha)
    2075              :          DEALLOCATE (shm_master_x_data%full_ks_alpha)
    2076              :          IF (.NOT. treat_lsd_in_core) THEN
    2077              :             IF (my_nspins == 2) THEN
    2078              :                DO img = 1, nkimages
    2079              :                   CALL distribute_ks_matrix(para_env, full_ks_beta(:, img), ks_matrix(2, img)%matrix, shm_number_of_p_entries, &
    2080              :                                             shm_block_offset, kind_of, basis_parameter, &
    2081              :                                             off_diag_fac=0.5_dp, diag_fac=afac)
    2082              :                END DO
    2083              :                NULLIFY (full_ks_beta)
    2084              :                DEALLOCATE (shm_master_x_data%full_ks_beta)
    2085              :             END IF
    2086              :          END IF
    2087              :          CALL timestop(handle_dist_ks)
    2088              :       END IF
    2089              : 
    2090              :       IF (distribute_fock_matrix) THEN
    2091              :          !! ** Calculate the exchange energy
    2092              :          ene_x_aa = 0.0_dp
    2093              :          DO img = 1, nkimages
    2094              :             CALL dbcsr_dot_threadsafe(ks_matrix(ispin, img)%matrix, rho_ao(ispin, img)%matrix, etmp)
    2095              :             ene_x_aa = ene_x_aa + etmp
    2096              :          END DO
    2097              :          !for ADMMS, we need the exchange matrix k(d) for both spins
    2098              :          IF (dft_control%do_admm) THEN
    2099              :             CPASSERT(nkimages == 1)
    2100              :             CALL dbcsr_copy(matrix_ks_aux_fit_hfx(ispin)%matrix, ks_matrix(ispin, 1)%matrix, &
    2101              :                             name="HF exch. part of matrix_ks_aux_fit for ADMMS")
    2102              :          END IF
    2103              : 
    2104              :          ene_x_bb = 0.0_dp
    2105              :          IF (my_nspins == 2 .AND. .NOT. treat_lsd_in_core) THEN
    2106              :             DO img = 1, nkimages
    2107              :                CALL dbcsr_dot_threadsafe(ks_matrix(2, img)%matrix, rho_ao(2, img)%matrix, etmp)
    2108              :                ene_x_bb = ene_x_bb + etmp
    2109              :             END DO
    2110              :             !for ADMMS, we need the exchange matrix k(d) for both spins
    2111              :             IF (dft_control%do_admm) THEN
    2112              :                CPASSERT(nkimages == 1)
    2113              :                CALL dbcsr_copy(matrix_ks_aux_fit_hfx(2)%matrix, ks_matrix(2, 1)%matrix, &
    2114              :                                name="HF exch. part of matrix_ks_aux_fit for ADMMS")
    2115              :             END IF
    2116              :          END IF
    2117              : 
    2118              :          !! Update energy type
    2119              :          ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
    2120              :       ELSE
    2121              :          ! ** It is easier to correct the following expression by the diagonal energy contribution,
    2122              :          ! ** than explicitly going throuhg the diagonal elements
    2123              :          DO img = 1, nkimages
    2124              :             DO pa = 1, SIZE(full_ks_alpha, 1)
    2125              :                ene_x_aa = ene_x_aa + full_ks_alpha(pa, img)*full_density_alpha(pa, img)
    2126              :             END DO
    2127              :          END DO
    2128              :          ! ** Now correct
    2129              :          ene_x_aa = (ene_x_aa + ene_x_aa_diag)*0.5_dp
    2130              :          IF (my_nspins == 2) THEN
    2131              :             DO img = 1, nkimages
    2132              :                DO pa = 1, SIZE(full_ks_beta, 1)
    2133              :                   ene_x_bb = ene_x_bb + full_ks_beta(pa, img)*full_density_beta(pa, img)
    2134              :                END DO
    2135              :             END DO
    2136              :             ! ** Now correct
    2137              :             ene_x_bb = (ene_x_bb + ene_x_bb_diag)*0.5_dp
    2138              :          END IF
    2139              :          CALL para_env%sum(ene_x_aa)
    2140              :          IF (my_nspins == 2) CALL para_env%sum(ene_x_bb)
    2141              :          ehfx = 0.5_dp*(ene_x_aa + ene_x_bb)
    2142              :       END IF
    2143              : 
    2144              :       !! Print some memeory information if this is the first step
    2145              :       IF (my_geo_change) THEN
    2146              :          tmp_i8(1:8) = [shm_storage_counter_integrals, shm_neris_onthefly, shm_neris_incore, shm_neris_disk, &
    2147              :                         shm_neris_total, shm_stor_count_int_disk, shm_nprim_ints, shm_stor_count_max_val]
    2148              :          CALL para_env%sum(tmp_i8)
    2149              :          shm_storage_counter_integrals = tmp_i8(1)
    2150              :          shm_neris_onthefly = tmp_i8(2)
    2151              :          shm_neris_incore = tmp_i8(3)
    2152              :          shm_neris_disk = tmp_i8(4)
    2153              :          shm_neris_total = tmp_i8(5)
    2154              :          shm_stor_count_int_disk = tmp_i8(6)
    2155              :          shm_nprim_ints = tmp_i8(7)
    2156              :          shm_stor_count_max_val = tmp_i8(8)
    2157              :          CALL para_env%max(memsize_after)
    2158              :          mem_eris = (shm_storage_counter_integrals + 128*1024 - 1)/1024/128
    2159              :          compression_factor = REAL(shm_neris_incore, dp)/REAL(shm_storage_counter_integrals, dp)
    2160              :          mem_eris_disk = (shm_stor_count_int_disk + 128*1024 - 1)/1024/128
    2161              :          compression_factor_disk = REAL(shm_neris_disk, dp)/REAL(shm_stor_count_int_disk, dp)
    2162              :          mem_max_val = (shm_stor_count_max_val + 128*1024 - 1)/1024/128
    2163              : 
    2164              :          IF (shm_neris_incore == 0) THEN
    2165              :             mem_eris = 0
    2166              :             compression_factor = 0.0_dp
    2167              :          END IF
    2168              :          IF (shm_neris_disk == 0) THEN
    2169              :             mem_eris_disk = 0
    2170              :             compression_factor_disk = 0.0_dp
    2171              :          END IF
    2172              : 
    2173              :          iw = cp_print_key_unit_nr(logger, hfx_section, "HF_INFO", &
    2174              :                                    extension=".scfLog")
    2175              :          IF (iw > 0) THEN
    2176              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2177              :                "HFX_MEM_INFO| Number of cart. primitive ERI's calculated:      ", shm_nprim_ints
    2178              : 
    2179              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2180              :                "HFX_MEM_INFO| Number of sph. ERI's calculated:           ", shm_neris_total
    2181              : 
    2182              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2183              :                "HFX_MEM_INFO| Number of sph. ERI's stored in-core:        ", shm_neris_incore
    2184              : 
    2185              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2186              :                "HFX_MEM_INFO| Number of sph. ERI's stored on disk:        ", shm_neris_disk
    2187              : 
    2188              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2189              :                "HFX_MEM_INFO| Number of sph. ERI's calculated on the fly: ", shm_neris_onthefly
    2190              : 
    2191              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2192              :                "HFX_MEM_INFO| Total memory consumption ERI's RAM [MiB]:            ", mem_eris
    2193              : 
    2194              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2195              :                "HFX_MEM_INFO| Whereof max-vals [MiB]:            ", mem_max_val
    2196              : 
    2197              :             WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
    2198              :                "HFX_MEM_INFO| Total compression factor ERI's RAM:                  ", compression_factor
    2199              : 
    2200              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2201              :                "HFX_MEM_INFO| Total memory consumption ERI's disk [MiB]:       ", mem_eris_disk
    2202              : 
    2203              :             WRITE (UNIT=iw, FMT="((T3,A,T60,F21.2))") &
    2204              :                "HFX_MEM_INFO| Total compression factor ERI's disk:             ", compression_factor_disk
    2205              : 
    2206              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2207              :                "HFX_MEM_INFO| Size of density/Fock matrix [MiB]:             ", 2_int_8*mb_size_p
    2208              : 
    2209              :             IF (do_periodic) THEN
    2210              :                WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2211              :                   "HFX_MEM_INFO| Size of buffers [MiB]:             ", mb_size_buffers
    2212              :                WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2213              :                   "HFX_MEM_INFO| Number of periodic image cells considered: ", SIZE(shm_master_x_data%neighbor_cells)
    2214              :             ELSE
    2215              :                WRITE (UNIT=iw, FMT="((T3,A,T60,I21))") &
    2216              :                   "HFX_MEM_INFO| Size of buffers [MiB]:             ", mb_size_buffers
    2217              :             END IF
    2218              :             WRITE (UNIT=iw, FMT="((T3,A,T60,I21),/)") &
    2219              :                "HFX_MEM_INFO| Est. max. program size after HFX  [MiB]:", memsize_after/(1024*1024)
    2220              :             CALL m_flush(iw)
    2221              :          END IF
    2222              : 
    2223              :          CALL cp_print_key_finished_output(iw, logger, hfx_section, &
    2224              :                                            "HF_INFO")
    2225              :       END IF
    2226              : !$OMP END MASTER
    2227              : 
    2228              :       !! flush caches if the geometry changed
    2229              :       IF (do_dynamic_load_balancing) THEN
    2230              :          my_bin_size = SIZE(actual_x_data%distribution_energy)
    2231              :       ELSE
    2232              :          my_bin_size = 1
    2233              :       END IF
    2234              : 
    2235              :       IF (my_geo_change) THEN
    2236              :          IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    2237              :             DO bin = 1, my_bin_size
    2238              :                maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
    2239              :                maxval_container => actual_x_data%store_ints%maxval_container(bin)
    2240              :                integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
    2241              :                integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
    2242              :                CALL hfx_flush_last_cache(bits_max_val, maxval_cache, maxval_container, memory_parameter%actual_memory_usage, &
    2243              :                                          .FALSE.)
    2244              :                DO i = 1, 64
    2245              :                   CALL hfx_flush_last_cache(i, integral_caches(i), integral_containers(i), &
    2246              :                                             memory_parameter%actual_memory_usage, .FALSE.)
    2247              :                END DO
    2248              :             END DO
    2249              :          END IF
    2250              :       END IF
    2251              :       !! reset all caches except we calculate all on the fly
    2252              :       IF (.NOT. memory_parameter%do_all_on_the_fly) THEN
    2253              :          DO bin = 1, my_bin_size
    2254              :             maxval_cache => actual_x_data%store_ints%maxval_cache(bin)
    2255              :             maxval_container => actual_x_data%store_ints%maxval_container(bin)
    2256              :             integral_caches => actual_x_data%store_ints%integral_caches(:, bin)
    2257              :             integral_containers => actual_x_data%store_ints%integral_containers(:, bin)
    2258              : 
    2259              :             CALL hfx_reset_cache_and_container(maxval_cache, maxval_container, memory_parameter%actual_memory_usage, .FALSE.)
    2260              :             DO i = 1, 64
    2261              :                CALL hfx_reset_cache_and_container(integral_caches(i), integral_containers(i), &
    2262              :                                                   memory_parameter%actual_memory_usage, &
    2263              :                                                   .FALSE.)
    2264              :             END DO
    2265              :          END DO
    2266              :       END IF
    2267              : 
    2268              :       !! Since the I/O routines are no thread-safe, i.e. the procedure to get the unit number, put a lock here
    2269              : !$OMP CRITICAL(hfxenergy_out_critical)
    2270              :       IF (do_disk_storage) THEN
    2271              :          !! flush caches if the geometry changed
    2272              :          IF (my_geo_change) THEN
    2273              :             CALL hfx_flush_last_cache(bits_max_val, maxval_cache_disk, maxval_container_disk, &
    2274              :                                       memory_parameter%actual_memory_usage_disk, .TRUE.)
    2275              :             DO i = 1, 64
    2276              :                CALL hfx_flush_last_cache(i, integral_caches_disk(i), integral_containers_disk(i), &
    2277              :                                          memory_parameter%actual_memory_usage_disk, .TRUE.)
    2278              :             END DO
    2279              :          END IF
    2280              :          !! reset all caches except we calculate all on the fly
    2281              :          CALL hfx_reset_cache_and_container(maxval_cache_disk, maxval_container_disk, memory_parameter%actual_memory_usage_disk, &
    2282              :                                             do_disk_storage)
    2283              :          DO i = 1, 64
    2284              :             CALL hfx_reset_cache_and_container(integral_caches_disk(i), integral_containers_disk(i), &
    2285              :                                                memory_parameter%actual_memory_usage_disk, do_disk_storage)
    2286              :          END DO
    2287              :       END IF
    2288              : !$OMP END CRITICAL(hfxenergy_out_critical)
    2289              : !$OMP BARRIER
    2290              :       !! Clean up
    2291              :       DEALLOCATE (last_sgf_global)
    2292              : !$OMP MASTER
    2293              :       DEALLOCATE (full_density_alpha)
    2294              :       IF (.NOT. treat_lsd_in_core) THEN
    2295              :          IF (my_nspins == 2) THEN
    2296              :             DEALLOCATE (full_density_beta)
    2297              :          END IF
    2298              :       END IF
    2299              :       IF (do_dynamic_load_balancing) THEN
    2300              :          DEALLOCATE (shm_master_x_data%task_list)
    2301              :       END IF
    2302              : !$OMP END MASTER
    2303              :       DEALLOCATE (pbd_buf, pbc_buf, pad_buf, pac_buf)
    2304              :       DEALLOCATE (kbd_buf, kbc_buf, kad_buf, kac_buf)
    2305              :       DEALLOCATE (set_list_ij, set_list_kl)
    2306              : 
    2307              :       DO i = 1, max_pgf**2
    2308              :          DEALLOCATE (pgf_list_ij(i)%image_list)
    2309              :          DEALLOCATE (pgf_list_kl(i)%image_list)
    2310              :       END DO
    2311              : 
    2312              :       DEALLOCATE (pgf_list_ij)
    2313              :       DEALLOCATE (pgf_list_kl)
    2314              :       DEALLOCATE (pgf_product_list)
    2315              : 
    2316              :       DEALLOCATE (max_contraction, kind_of)
    2317              : 
    2318              :       DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
    2319              : 
    2320              :       DEALLOCATE (nimages)
    2321              : 
    2322              : !$OMP BARRIER
    2323              : !$OMP END PARALLEL
    2324              : 
    2325        40523 :       CALL timestop(handle)
    2326     84044702 :    END SUBROUTINE integrate_four_center
    2327              : 
    2328              : ! **************************************************************************************************
    2329              : !> \brief calculates two-electron integrals of a quartet/shell using the library
    2330              : !>      lib_int in the periodic case
    2331              : !> \param lib ...
    2332              : !> \param ra ...
    2333              : !> \param rb ...
    2334              : !> \param rc ...
    2335              : !> \param rd ...
    2336              : !> \param npgfa ...
    2337              : !> \param npgfb ...
    2338              : !> \param npgfc ...
    2339              : !> \param npgfd ...
    2340              : !> \param la_min ...
    2341              : !> \param la_max ...
    2342              : !> \param lb_min ...
    2343              : !> \param lb_max ...
    2344              : !> \param lc_min ...
    2345              : !> \param lc_max ...
    2346              : !> \param ld_min ...
    2347              : !> \param ld_max ...
    2348              : !> \param nsgfa ...
    2349              : !> \param nsgfb ...
    2350              : !> \param nsgfc ...
    2351              : !> \param nsgfd ...
    2352              : !> \param sphi_a_u1 ...
    2353              : !> \param sphi_a_u2 ...
    2354              : !> \param sphi_a_u3 ...
    2355              : !> \param sphi_b_u1 ...
    2356              : !> \param sphi_b_u2 ...
    2357              : !> \param sphi_b_u3 ...
    2358              : !> \param sphi_c_u1 ...
    2359              : !> \param sphi_c_u2 ...
    2360              : !> \param sphi_c_u3 ...
    2361              : !> \param sphi_d_u1 ...
    2362              : !> \param sphi_d_u2 ...
    2363              : !> \param sphi_d_u3 ...
    2364              : !> \param zeta ...
    2365              : !> \param zetb ...
    2366              : !> \param zetc ...
    2367              : !> \param zetd ...
    2368              : !> \param primitive_integrals array of primitive_integrals
    2369              : !> \param potential_parameter contains info for libint
    2370              : !> \param neighbor_cells Periodic images
    2371              : !> \param screen1 set based coefficients for near field screening
    2372              : !> \param screen2 set based coefficients for near field screening
    2373              : !> \param eps_schwarz threshold
    2374              : !> \param max_contraction_val maximum multiplication factor for cart -> sph
    2375              : !> \param cart_estimate maximum calculated integral value
    2376              : !> \param cell cell
    2377              : !> \param neris_tmp counter for calculated cart integrals
    2378              : !> \param log10_pmax logarithm of initial p matrix max element
    2379              : !> \param log10_eps_schwarz log of threshold
    2380              : !> \param R1_pgf coefficients for radii of product distribution function
    2381              : !> \param R2_pgf coefficients for radii of product distribution function
    2382              : !> \param pgf1 schwarz coefficients pgf basid
    2383              : !> \param pgf2 schwarz coefficients pgf basid
    2384              : !> \param pgf_list_ij ...
    2385              : !> \param pgf_list_kl ...
    2386              : !> \param pgf_product_list ...
    2387              : !> \param nsgfl_a ...
    2388              : !> \param nsgfl_b ...
    2389              : !> \param nsgfl_c ...
    2390              : !> \param nsgfl_d ...
    2391              : !> \param sphi_a_ext ...
    2392              : !> \param sphi_b_ext ...
    2393              : !> \param sphi_c_ext ...
    2394              : !> \param sphi_d_ext ...
    2395              : !> \param ee_work ...
    2396              : !> \param ee_work2 ...
    2397              : !> \param ee_buffer1 ...
    2398              : !> \param ee_buffer2 ...
    2399              : !> \param ee_primitives_tmp ...
    2400              : !> \param nimages ...
    2401              : !> \param do_periodic ...
    2402              : !> \param p_work ...
    2403              : !> \par History
    2404              : !>      11.2006 created [Manuel Guidon]
    2405              : !>      02.2009 completely rewritten screening part [Manuel Guidon]
    2406              : !> \author Manuel Guidon
    2407              : ! **************************************************************************************************
    2408      8519285 :    SUBROUTINE coulomb4(lib, ra, rb, rc, rd, npgfa, npgfb, npgfc, npgfd, &
    2409              :                        la_min, la_max, lb_min, lb_max, &
    2410              :                        lc_min, lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, &
    2411              :                        sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    2412              :                        sphi_b_u1, sphi_b_u2, sphi_b_u3, &
    2413              :                        sphi_c_u1, sphi_c_u2, sphi_c_u3, &
    2414              :                        sphi_d_u1, sphi_d_u2, sphi_d_u3, &
    2415      8519285 :                        zeta, zetb, zetc, zetd, &
    2416      8519285 :                        primitive_integrals, &
    2417              :                        potential_parameter, neighbor_cells, &
    2418              :                        screen1, screen2, eps_schwarz, max_contraction_val, &
    2419              :                        cart_estimate, cell, neris_tmp, log10_pmax, &
    2420              :                        log10_eps_schwarz, R1_pgf, R2_pgf, pgf1, pgf2, &
    2421              :                        pgf_list_ij, pgf_list_kl, &
    2422              :                        pgf_product_list, &
    2423      8519285 :                        nsgfl_a, nsgfl_b, nsgfl_c, &
    2424      8519285 :                        nsgfl_d, &
    2425      8519285 :                        sphi_a_ext, sphi_b_ext, sphi_c_ext, sphi_d_ext, &
    2426              :                        ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
    2427              :                        nimages, do_periodic, p_work)
    2428              : 
    2429              :       TYPE(cp_libint_t)                                  :: lib
    2430              :       REAL(dp), INTENT(IN)                               :: ra(3), rb(3), rc(3), rd(3)
    2431              :       INTEGER, INTENT(IN) :: npgfa, npgfb, npgfc, npgfd, la_min, la_max, lb_min, lb_max, lc_min, &
    2432              :                              lc_max, ld_min, ld_max, nsgfa, nsgfb, nsgfc, nsgfd, sphi_a_u1, sphi_a_u2, sphi_a_u3, &
    2433              :                              sphi_b_u1, sphi_b_u2, sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, &
    2434              :                              sphi_d_u3
    2435              :       REAL(dp), DIMENSION(1:npgfa), INTENT(IN)           :: zeta
    2436              :       REAL(dp), DIMENSION(1:npgfb), INTENT(IN)           :: zetb
    2437              :       REAL(dp), DIMENSION(1:npgfc), INTENT(IN)           :: zetc
    2438              :       REAL(dp), DIMENSION(1:npgfd), INTENT(IN)           :: zetd
    2439              :       REAL(dp), DIMENSION(nsgfa, nsgfb, nsgfc, nsgfd)    :: primitive_integrals
    2440              :       TYPE(hfx_potential_type)                           :: potential_parameter
    2441              :       TYPE(hfx_cell_type), DIMENSION(:), POINTER         :: neighbor_cells
    2442              :       REAL(dp), INTENT(IN)                               :: screen1(2), screen2(2), eps_schwarz, &
    2443              :                                                             max_contraction_val
    2444              :       REAL(dp)                                           :: cart_estimate
    2445              :       TYPE(cell_type), POINTER                           :: cell
    2446              :       INTEGER(int_8)                                     :: neris_tmp
    2447              :       REAL(dp), INTENT(IN)                               :: log10_pmax, log10_eps_schwarz
    2448              :       TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
    2449              :          POINTER                                         :: R1_pgf, R2_pgf, pgf1, pgf2
    2450              :       TYPE(hfx_pgf_list), DIMENSION(*)                   :: pgf_list_ij, pgf_list_kl
    2451              :       TYPE(hfx_pgf_product_list), ALLOCATABLE, &
    2452              :          DIMENSION(:), INTENT(INOUT)                     :: pgf_product_list
    2453              :       INTEGER, DIMENSION(0:), INTENT(IN)                 :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
    2454              :       REAL(dp), INTENT(IN) :: sphi_a_ext(sphi_a_u1, sphi_a_u2, sphi_a_u3), &
    2455              :                               sphi_b_ext(sphi_b_u1, sphi_b_u2, sphi_b_u3), sphi_c_ext(sphi_c_u1, sphi_c_u2, sphi_c_u3), &
    2456              :                               sphi_d_ext(sphi_d_u1, sphi_d_u2, sphi_d_u3)
    2457              :       REAL(dp), DIMENSION(*)                             :: ee_work, ee_work2, ee_buffer1, &
    2458              :                                                             ee_buffer2, ee_primitives_tmp
    2459              :       INTEGER, DIMENSION(*)                              :: nimages
    2460              :       LOGICAL, INTENT(IN)                                :: do_periodic
    2461              :       REAL(dp), DIMENSION(:), POINTER                    :: p_work
    2462              : 
    2463              :       INTEGER :: ipgf, jpgf, kpgf, la, lb, lc, ld, list_ij, list_kl, lpgf, max_l, ncoa, ncob, &
    2464              :                  ncoc, ncod, nelements_ij, nelements_kl, nproducts, nsgfla, nsgflb, nsgflc, nsgfld, nsoa, &
    2465              :                  nsob, nsoc, nsod, s_offset_a, s_offset_b, s_offset_c, s_offset_d
    2466              :       REAL(dp)                                           :: EtaInv, tmp_max, ZetaInv
    2467              : 
    2468              :       CALL build_pair_list_pgf(npgfa, npgfb, pgf_list_ij, zeta, zetb, screen1, screen2, &
    2469              :                                pgf1, R1_pgf, log10_pmax, log10_eps_schwarz, ra, rb, &
    2470              :                                nelements_ij, &
    2471      8519285 :                                neighbor_cells, nimages, do_periodic)
    2472              :       CALL build_pair_list_pgf(npgfc, npgfd, pgf_list_kl, zetc, zetd, screen2, screen1, &
    2473              :                                pgf2, R2_pgf, log10_pmax, log10_eps_schwarz, rc, rd, &
    2474              :                                nelements_kl, &
    2475      8519285 :                                neighbor_cells, nimages, do_periodic)
    2476              : 
    2477      8519285 :       cart_estimate = 0.0_dp
    2478      8519285 :       neris_tmp = 0
    2479    567657313 :       primitive_integrals = 0.0_dp
    2480      8519285 :       max_l = la_max + lb_max + lc_max + ld_max
    2481              : 
    2482     29367805 :       DO list_ij = 1, nelements_ij
    2483     20848520 :          ZetaInv = pgf_list_ij(list_ij)%ZetaInv
    2484     20848520 :          ipgf = pgf_list_ij(list_ij)%ipgf
    2485     20848520 :          jpgf = pgf_list_ij(list_ij)%jpgf
    2486              : 
    2487    105687421 :          DO list_kl = 1, nelements_kl
    2488     76319616 :             EtaInv = pgf_list_kl(list_kl)%ZetaInv
    2489     76319616 :             kpgf = pgf_list_kl(list_kl)%ipgf
    2490     76319616 :             lpgf = pgf_list_kl(list_kl)%jpgf
    2491              : 
    2492              :             CALL build_pgf_product_list(pgf_list_ij(list_ij), pgf_list_kl(list_kl), pgf_product_list, &
    2493              :                                         nproducts, log10_pmax, log10_eps_schwarz, neighbor_cells, cell, &
    2494     76319616 :                                         potential_parameter, max_l, do_periodic)
    2495              : 
    2496     76319616 :             s_offset_a = 0
    2497    188156426 :             DO la = la_min, la_max
    2498     90988290 :                s_offset_b = 0
    2499     90988290 :                ncoa = nco(la)
    2500     90988290 :                nsgfla = nsgfl_a(la)
    2501     90988290 :                nsoa = nso(la)
    2502              : 
    2503    194398410 :                DO lb = lb_min, lb_max
    2504    103410120 :                   s_offset_c = 0
    2505    103410120 :                   ncob = nco(lb)
    2506    103410120 :                   nsgflb = nsgfl_b(lb)
    2507    103410120 :                   nsob = nso(lb)
    2508              : 
    2509    242972951 :                   DO lc = lc_min, lc_max
    2510    139562831 :                      s_offset_d = 0
    2511    139562831 :                      ncoc = nco(lc)
    2512    139562831 :                      nsgflc = nsgfl_c(lc)
    2513    139562831 :                      nsoc = nso(lc)
    2514              : 
    2515    323919785 :                      DO ld = ld_min, ld_max
    2516    184356954 :                         ncod = nco(ld)
    2517    184356954 :                         nsgfld = nsgfl_d(ld)
    2518    184356954 :                         nsod = nso(ld)
    2519              : 
    2520    184356954 :                         tmp_max = 0.0_dp
    2521              :                         CALL evaluate_eri(lib, nproducts, pgf_product_list, &
    2522              :                                           la, lb, lc, ld, &
    2523              :                                           ncoa, ncob, ncoc, ncod, &
    2524              :                                           nsgfa, nsgfb, nsgfc, nsgfd, &
    2525              :                                           primitive_integrals, &
    2526              :                                           max_contraction_val, tmp_max, eps_schwarz, &
    2527              :                                           neris_tmp, ZetaInv, EtaInv, &
    2528              :                                           s_offset_a, s_offset_b, s_offset_c, s_offset_d, &
    2529              :                                           nsgfla, nsgflb, nsgflc, nsgfld, nsoa, nsob, nsoc, nsod, &
    2530              :                                           sphi_a_ext(1, la + 1, ipgf), &
    2531              :                                           sphi_b_ext(1, lb + 1, jpgf), &
    2532              :                                           sphi_c_ext(1, lc + 1, kpgf), &
    2533              :                                           sphi_d_ext(1, ld + 1, lpgf), &
    2534              :                                           ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
    2535    184356954 :                                           p_work)
    2536    184356954 :                         cart_estimate = MAX(tmp_max, cart_estimate)
    2537    323919785 :                         s_offset_d = s_offset_d + nsod*nsgfld
    2538              :                      END DO !ld
    2539    242972951 :                      s_offset_c = s_offset_c + nsoc*nsgflc
    2540              :                   END DO !lc
    2541    194398410 :                   s_offset_b = s_offset_b + nsob*nsgflb
    2542              :                END DO !lb
    2543    167307906 :                s_offset_a = s_offset_a + nsoa*nsgfla
    2544              :             END DO !la
    2545              :          END DO
    2546              :       END DO
    2547              : 
    2548      8519285 :    END SUBROUTINE coulomb4
    2549              : 
    2550              : ! **************************************************************************************************
    2551              : !> \brief Given a 2d index pair, this function returns a 1d index pair for
    2552              : !>        a symmetric upper triangle NxN matrix
    2553              : !>        The compiler should inline this function, therefore it appears in
    2554              : !>        several modules
    2555              : !> \param i 2d index
    2556              : !> \param j 2d index
    2557              : !> \param N matrix size
    2558              : !> \return ...
    2559              : !> \par History
    2560              : !>      03.2009 created [Manuel Guidon]
    2561              : !> \author Manuel Guidon
    2562              : ! **************************************************************************************************
    2563        28264 :    PURE FUNCTION get_1D_idx(i, j, N)
    2564              :       INTEGER, INTENT(IN)                                :: i, j
    2565              :       INTEGER(int_8), INTENT(IN)                         :: N
    2566              :       INTEGER(int_8)                                     :: get_1D_idx
    2567              : 
    2568              :       INTEGER(int_8)                                     :: min_ij
    2569              : 
    2570        28264 :       min_ij = MIN(i, j)
    2571        28264 :       get_1D_idx = min_ij*N + MAX(i, j) - (min_ij - 1)*min_ij/2 - N
    2572              : 
    2573        28264 :    END FUNCTION get_1D_idx
    2574              : 
    2575              : ! **************************************************************************************************
    2576              : !> \brief This routine prefetches density/fock matrix elements and stores them
    2577              : !>        in cache friendly arrays. These buffers are then used to update the
    2578              : !>        fock matrix
    2579              : !> \param ma_max Size of matrix blocks
    2580              : !> \param mb_max Size of matrix blocks
    2581              : !> \param mc_max Size of matrix blocks
    2582              : !> \param md_max Size of matrix blocks
    2583              : !> \param fac multiplication factor (spin)
    2584              : !> \param symm_fac multiplication factor (symmetry)
    2585              : !> \param density upper triangular density matrix
    2586              : !> \param ks upper triangular fock matrix
    2587              : !> \param prim primitive integrals
    2588              : !> \param pbd buffer that will contain P(b,d)
    2589              : !> \param pbc buffer that will contain P(b,c)
    2590              : !> \param pad buffer that will contain P(a,d)
    2591              : !> \param pac buffer that will contain P(a,c)
    2592              : !> \param kbd buffer for KS(b,d)
    2593              : !> \param kbc buffer for KS(b,c)
    2594              : !> \param kad buffer for KS(a,d)
    2595              : !> \param kac buffer for KS(a,c)
    2596              : !> \param iatom ...
    2597              : !> \param jatom ...
    2598              : !> \param katom ...
    2599              : !> \param latom ...
    2600              : !> \param iset ...
    2601              : !> \param jset ...
    2602              : !> \param kset ...
    2603              : !> \param lset ...
    2604              : !> \param offset_bd_set ...
    2605              : !> \param offset_bc_set ...
    2606              : !> \param offset_ad_set ...
    2607              : !> \param offset_ac_set ...
    2608              : !> \param atomic_offset_bd ...
    2609              : !> \param atomic_offset_bc ...
    2610              : !> \param atomic_offset_ad ...
    2611              : !> \param atomic_offset_ac ...
    2612              : !> \par History
    2613              : !>      03.2009 created [Manuel Guidon]
    2614              : !> \author Manuel Guidon
    2615              : ! **************************************************************************************************
    2616              : 
    2617     77149063 :    SUBROUTINE update_fock_matrix(ma_max, mb_max, mc_max, md_max, &
    2618     77149063 :                                  fac, symm_fac, density, ks, prim, &
    2619              :                                  pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
    2620              :                                  iatom, jatom, katom, latom, &
    2621              :                                  iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
    2622              :                                  offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
    2623              :                                  atomic_offset_ac)
    2624              : 
    2625              :       INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
    2626              :       REAL(dp), INTENT(IN)                               :: fac, symm_fac
    2627              :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
    2628              :       REAL(dp), DIMENSION(:), INTENT(INOUT)              :: ks
    2629              :       REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
    2630              :          INTENT(IN)                                      :: prim
    2631              :       REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
    2632              :       INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
    2633              :                                                             kset, lset
    2634              :       INTEGER, DIMENSION(:, :), POINTER, INTENT(IN)      :: offset_bd_set, offset_bc_set, &
    2635              :                                                             offset_ad_set, offset_ac_set
    2636              :       INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
    2637              :                                                             atomic_offset_ad, atomic_offset_ac
    2638              : 
    2639              :       INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
    2640              :                                                             offset_ad, offset_bc, offset_bd
    2641              :       REAL(dp)                                           :: ki
    2642              : 
    2643     77149063 :       IF (jatom >= latom) THEN
    2644     75952697 :          i = 1
    2645     75952697 :          offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
    2646     75952697 :          j = offset_bd
    2647    228983323 :          DO md = 1, md_max
    2648    514429017 :             DO mb = 1, mb_max
    2649    285445694 :                pbd(i) = density(j)
    2650    285445694 :                i = i + 1
    2651    438476320 :                j = j + 1
    2652              :             END DO
    2653              :          END DO
    2654              :       ELSE
    2655      1196366 :          i = 1
    2656      1196366 :          offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
    2657      3523312 :          DO md = 1, md_max
    2658      2326946 :             j = offset_bd + md - 1
    2659      8645991 :             DO mb = 1, mb_max
    2660      5122679 :                pbd(i) = density(j)
    2661      5122679 :                i = i + 1
    2662      7449625 :                j = j + md_max
    2663              :             END DO
    2664              :          END DO
    2665              :       END IF
    2666     77149063 :       IF (jatom >= katom) THEN
    2667     77149063 :          i = 1
    2668     77149063 :          offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
    2669     77149063 :          j = offset_bc
    2670    256522867 :          DO mc = 1, mc_max
    2671    581257238 :             DO mb = 1, mb_max
    2672    324734371 :                pbc(i) = density(j)
    2673    324734371 :                i = i + 1
    2674    504108175 :                j = j + 1
    2675              :             END DO
    2676              :          END DO
    2677              :       ELSE
    2678            0 :          i = 1
    2679            0 :          offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
    2680            0 :          DO mc = 1, mc_max
    2681            0 :             j = offset_bc + mc - 1
    2682            0 :             DO mb = 1, mb_max
    2683            0 :                pbc(i) = density(j)
    2684            0 :                i = i + 1
    2685            0 :                j = j + mc_max
    2686              :             END DO
    2687              :          END DO
    2688              :       END IF
    2689     77149063 :       IF (iatom >= latom) THEN
    2690     55438727 :          i = 1
    2691     55438727 :          offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
    2692     55438727 :          j = offset_ad
    2693    174714273 :          DO md = 1, md_max
    2694    433908748 :             DO ma = 1, ma_max
    2695    259194475 :                pad(i) = density(j)
    2696    259194475 :                i = i + 1
    2697    378470021 :                j = j + 1
    2698              :             END DO
    2699              :          END DO
    2700              :       ELSE
    2701     21710336 :          i = 1
    2702     21710336 :          offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
    2703     57792362 :          DO md = 1, md_max
    2704     36082026 :             j = offset_ad + md - 1
    2705    144513718 :             DO ma = 1, ma_max
    2706     86721356 :                pad(i) = density(j)
    2707     86721356 :                i = i + 1
    2708    122803382 :                j = j + md_max
    2709              :             END DO
    2710              :          END DO
    2711              :       END IF
    2712     77149063 :       IF (iatom >= katom) THEN
    2713     73568651 :          i = 1
    2714     73568651 :          offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
    2715     73568651 :          j = offset_ac
    2716    246427416 :          DO mc = 1, mc_max
    2717    626798704 :             DO ma = 1, ma_max
    2718    380371288 :                pac(i) = density(j)
    2719    380371288 :                i = i + 1
    2720    553230053 :                j = j + 1
    2721              :             END DO
    2722              :          END DO
    2723              :       ELSE
    2724      3580412 :          i = 1
    2725      3580412 :          offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
    2726     10095451 :          DO mc = 1, mc_max
    2727      6515039 :             j = offset_ac + mc - 1
    2728     27315653 :             DO ma = 1, ma_max
    2729     17220202 :                pac(i) = density(j)
    2730     17220202 :                i = i + 1
    2731     23735241 :                j = j + mc_max
    2732              :             END DO
    2733              :          END DO
    2734              :       END IF
    2735              : 
    2736     77149063 :       CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
    2737     77149063 :       IF (jatom >= latom) THEN
    2738              :          i = 1
    2739              :          j = offset_bd
    2740    228983323 :          DO md = 1, md_max
    2741    514429017 :             DO mb = 1, mb_max
    2742    285445694 :                ki = kbd(i)
    2743    285445694 : !$OMP           ATOMIC
    2744              :                ks(j) = ks(j) + ki
    2745    285445694 :                i = i + 1
    2746    438476320 :                j = j + 1
    2747              :             END DO
    2748              :          END DO
    2749              :       ELSE
    2750              :          i = 1
    2751      3523312 :          DO md = 1, md_max
    2752      2326946 :             j = offset_bd + md - 1
    2753      8645991 :             DO mb = 1, mb_max
    2754      5122679 :                ki = kbd(i)
    2755      5122679 : !$OMP           ATOMIC
    2756              :                ks(j) = ks(j) + ki
    2757      5122679 :                i = i + 1
    2758      7449625 :                j = j + md_max
    2759              :             END DO
    2760              :          END DO
    2761              :       END IF
    2762     77149063 :       IF (jatom >= katom) THEN
    2763              :          i = 1
    2764              :          j = offset_bc
    2765    256522867 :          DO mc = 1, mc_max
    2766    581257238 :             DO mb = 1, mb_max
    2767    324734371 :                ki = kbc(i)
    2768    324734371 : !$OMP           ATOMIC
    2769              :                ks(j) = ks(j) + ki
    2770    324734371 :                i = i + 1
    2771    504108175 :                j = j + 1
    2772              :             END DO
    2773              :          END DO
    2774              :       ELSE
    2775              :          i = 1
    2776            0 :          DO mc = 1, mc_max
    2777            0 :             j = offset_bc + mc - 1
    2778            0 :             DO mb = 1, mb_max
    2779            0 :                ki = kbc(i)
    2780            0 : !$OMP           ATOMIC
    2781              :                ks(j) = ks(j) + ki
    2782            0 :                i = i + 1
    2783            0 :                j = j + mc_max
    2784              :             END DO
    2785              :          END DO
    2786              :       END IF
    2787     77149063 :       IF (iatom >= latom) THEN
    2788              :          i = 1
    2789              :          j = offset_ad
    2790    174714273 :          DO md = 1, md_max
    2791    433908748 :             DO ma = 1, ma_max
    2792    259194475 :                ki = kad(i)
    2793    259194475 : !$OMP           ATOMIC
    2794              :                ks(j) = ks(j) + ki
    2795    259194475 :                i = i + 1
    2796    378470021 :                j = j + 1
    2797              :             END DO
    2798              :          END DO
    2799              :       ELSE
    2800              :          i = 1
    2801     57792362 :          DO md = 1, md_max
    2802     36082026 :             j = offset_ad + md - 1
    2803    144513718 :             DO ma = 1, ma_max
    2804     86721356 :                ki = kad(i)
    2805     86721356 : !$OMP           ATOMIC
    2806              :                ks(j) = ks(j) + ki
    2807     86721356 :                i = i + 1
    2808    122803382 :                j = j + md_max
    2809              :             END DO
    2810              :          END DO
    2811              :       END IF
    2812     77149063 :       IF (iatom >= katom) THEN
    2813              :          i = 1
    2814              :          j = offset_ac
    2815    246427416 :          DO mc = 1, mc_max
    2816    626798704 :             DO ma = 1, ma_max
    2817    380371288 :                ki = kac(i)
    2818    380371288 : !$OMP           ATOMIC
    2819              :                ks(j) = ks(j) + ki
    2820    380371288 :                i = i + 1
    2821    553230053 :                j = j + 1
    2822              :             END DO
    2823              :          END DO
    2824              :       ELSE
    2825              :          i = 1
    2826     10095451 :          DO mc = 1, mc_max
    2827      6515039 :             j = offset_ac + mc - 1
    2828     27315653 :             DO ma = 1, ma_max
    2829     17220202 :                ki = kac(i)
    2830     17220202 : !$OMP           ATOMIC
    2831              :                ks(j) = ks(j) + ki
    2832     17220202 :                i = i + 1
    2833     23735241 :                j = j + mc_max
    2834              :             END DO
    2835              :          END DO
    2836              :       END IF
    2837     77149063 :    END SUBROUTINE update_fock_matrix
    2838              : 
    2839              : ! **************************************************************************************************
    2840              : !> \brief ...
    2841              : !> \param ma_max ...
    2842              : !> \param mb_max ...
    2843              : !> \param mc_max ...
    2844              : !> \param md_max ...
    2845              : !> \param fac ...
    2846              : !> \param symm_fac ...
    2847              : !> \param density ...
    2848              : !> \param ks ...
    2849              : !> \param prim ...
    2850              : !> \param pbd ...
    2851              : !> \param pbc ...
    2852              : !> \param pad ...
    2853              : !> \param pac ...
    2854              : !> \param kbd ...
    2855              : !> \param kbc ...
    2856              : !> \param kad ...
    2857              : !> \param kac ...
    2858              : !> \param iatom ...
    2859              : !> \param jatom ...
    2860              : !> \param katom ...
    2861              : !> \param latom ...
    2862              : !> \param iset ...
    2863              : !> \param jset ...
    2864              : !> \param kset ...
    2865              : !> \param lset ...
    2866              : !> \param offset_bd_set ...
    2867              : !> \param offset_bc_set ...
    2868              : !> \param offset_ad_set ...
    2869              : !> \param offset_ac_set ...
    2870              : !> \param atomic_offset_bd ...
    2871              : !> \param atomic_offset_bc ...
    2872              : !> \param atomic_offset_ad ...
    2873              : !> \param atomic_offset_ac ...
    2874              : ! **************************************************************************************************
    2875      5562963 :    SUBROUTINE update_fock_matrix_as(ma_max, mb_max, mc_max, md_max, &
    2876      5562963 :                                     fac, symm_fac, density, ks, prim, &
    2877              :                                     pbd, pbc, pad, pac, kbd, kbc, kad, kac, &
    2878              :                                     iatom, jatom, katom, latom, &
    2879              :                                     iset, jset, kset, lset, offset_bd_set, offset_bc_set, offset_ad_set, &
    2880              :                                     offset_ac_set, atomic_offset_bd, atomic_offset_bc, atomic_offset_ad, &
    2881              :                                     atomic_offset_ac)
    2882              : 
    2883              :       INTEGER, INTENT(IN)                                :: ma_max, mb_max, mc_max, md_max
    2884              :       REAL(dp), INTENT(IN)                               :: fac, symm_fac
    2885              :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: density
    2886              :       REAL(dp), DIMENSION(:), INTENT(INOUT)              :: ks
    2887              :       REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
    2888              :          INTENT(IN)                                      :: prim
    2889              :       REAL(dp), DIMENSION(*), INTENT(INOUT)              :: pbd, pbc, pad, pac, kbd, kbc, kad, kac
    2890              :       INTEGER, INTENT(IN)                                :: iatom, jatom, katom, latom, iset, jset, &
    2891              :                                                             kset, lset
    2892              :       INTEGER, DIMENSION(:, :), POINTER                  :: offset_bd_set, offset_bc_set, &
    2893              :                                                             offset_ad_set, offset_ac_set
    2894              :       INTEGER, INTENT(IN)                                :: atomic_offset_bd, atomic_offset_bc, &
    2895              :                                                             atomic_offset_ad, atomic_offset_ac
    2896              : 
    2897              :       INTEGER                                            :: i, j, ma, mb, mc, md, offset_ac, &
    2898              :                                                             offset_ad, offset_bc, offset_bd
    2899              : 
    2900      5562963 :       IF (jatom >= latom) THEN
    2901      5549451 :          i = 1
    2902      5549451 :          offset_bd = offset_bd_set(jset, lset) + atomic_offset_bd - 1
    2903      5549451 :          j = offset_bd
    2904     15215976 :          DO md = 1, md_max
    2905     29405199 :             DO mb = 1, mb_max
    2906     14189223 :                pbd(i) = +density(j)
    2907     14189223 :                i = i + 1
    2908     23855748 :                j = j + 1
    2909              :             END DO
    2910              :          END DO
    2911              :       ELSE
    2912        13512 :          i = 1
    2913        13512 :          offset_bd = offset_bd_set(lset, jset) + atomic_offset_bd - 1
    2914        28504 :          DO md = 1, md_max
    2915        14992 :             j = offset_bd + md - 1
    2916        47164 :             DO mb = 1, mb_max
    2917        18660 :                pbd(i) = -density(j)
    2918        18660 :                i = i + 1
    2919        33652 :                j = j + md_max
    2920              :             END DO
    2921              :          END DO
    2922              :       END IF
    2923      5562963 :       IF (jatom >= katom) THEN
    2924      5562963 :          i = 1
    2925      5562963 :          offset_bc = offset_bc_set(jset, kset) + atomic_offset_bc - 1
    2926      5562963 :          j = offset_bc
    2927     17097841 :          DO mc = 1, mc_max
    2928     33527858 :             DO mb = 1, mb_max
    2929     16430017 :                pbc(i) = -density(j)
    2930     16430017 :                i = i + 1
    2931     27964895 :                j = j + 1
    2932              :             END DO
    2933              :          END DO
    2934              :       ELSE
    2935            0 :          i = 1
    2936            0 :          offset_bc = offset_bc_set(kset, jset) + atomic_offset_bc - 1
    2937            0 :          DO mc = 1, mc_max
    2938            0 :             j = offset_bc + mc - 1
    2939            0 :             DO mb = 1, mb_max
    2940            0 :                pbc(i) = density(j)
    2941            0 :                i = i + 1
    2942            0 :                j = j + mc_max
    2943              :             END DO
    2944              :          END DO
    2945              :       END IF
    2946      5562963 :       IF (iatom >= latom) THEN
    2947      4162454 :          i = 1
    2948      4162454 :          offset_ad = offset_ad_set(iset, lset) + atomic_offset_ad - 1
    2949      4162454 :          j = offset_ad
    2950     12159819 :          DO md = 1, md_max
    2951     27371932 :             DO ma = 1, ma_max
    2952     15212113 :                pad(i) = -density(j)
    2953     15212113 :                i = i + 1
    2954     23209478 :                j = j + 1
    2955              :             END DO
    2956              :          END DO
    2957              :       ELSE
    2958      1400509 :          i = 1
    2959      1400509 :          offset_ad = offset_ad_set(lset, iset) + atomic_offset_ad - 1
    2960      3084661 :          DO md = 1, md_max
    2961      1684152 :             j = offset_ad + md - 1
    2962      6852229 :             DO ma = 1, ma_max
    2963      3767568 :                pad(i) = density(j)
    2964      3767568 :                i = i + 1
    2965      5451720 :                j = j + md_max
    2966              :             END DO
    2967              :          END DO
    2968              :       END IF
    2969      5562963 :       IF (iatom >= katom) THEN
    2970      5358791 :          i = 1
    2971      5358791 :          offset_ac = offset_ac_set(iset, kset) + atomic_offset_ac - 1
    2972      5358791 :          j = offset_ac
    2973     16600747 :          DO mc = 1, mc_max
    2974     38437337 :             DO ma = 1, ma_max
    2975     21836590 :                pac(i) = +density(j)
    2976     21836590 :                i = i + 1
    2977     33078546 :                j = j + 1
    2978              :             END DO
    2979              :          END DO
    2980              :       ELSE
    2981       204172 :          i = 1
    2982       204172 :          offset_ac = offset_ac_set(kset, iset) + atomic_offset_ac - 1
    2983       497094 :          DO mc = 1, mc_max
    2984       292922 :             j = offset_ac + mc - 1
    2985      1248888 :             DO ma = 1, ma_max
    2986       751794 :                pac(i) = -density(j)
    2987       751794 :                i = i + 1
    2988      1044716 :                j = j + mc_max
    2989              :             END DO
    2990              :          END DO
    2991              :       END IF
    2992              : 
    2993      5562963 :       CALL contract_block(ma_max, mb_max, mc_max, md_max, kbd, kbc, kad, kac, pbd, pbc, pad, pac, prim, fac*symm_fac)
    2994              : 
    2995      5562963 :       IF (jatom >= latom) THEN
    2996              :          i = 1
    2997              :          j = offset_bd
    2998     15215976 :          DO md = 1, md_max
    2999     29405199 :             DO mb = 1, mb_max
    3000     14189223 : !$OMP           ATOMIC
    3001              :                ks(j) = ks(j) + kbd(i)
    3002     14189223 :                i = i + 1
    3003     23855748 :                j = j + 1
    3004              :             END DO
    3005              :          END DO
    3006              :       ELSE
    3007              :          i = 1
    3008        28504 :          DO md = 1, md_max
    3009        14992 :             j = offset_bd + md - 1
    3010        47164 :             DO mb = 1, mb_max
    3011        18660 : !$OMP           ATOMIC
    3012              :                ks(j) = ks(j) - kbd(i)
    3013        18660 :                i = i + 1
    3014        33652 :                j = j + md_max
    3015              :             END DO
    3016              :          END DO
    3017              :       END IF
    3018      5562963 :       IF (jatom >= katom) THEN
    3019              :          i = 1
    3020              :          j = offset_bc
    3021     17097841 :          DO mc = 1, mc_max
    3022     33527858 :             DO mb = 1, mb_max
    3023     16430017 : !$OMP           ATOMIC
    3024              :                ks(j) = ks(j) - kbc(i)
    3025     16430017 :                i = i + 1
    3026     27964895 :                j = j + 1
    3027              :             END DO
    3028              :          END DO
    3029              :       ELSE
    3030              :          i = 1
    3031            0 :          DO mc = 1, mc_max
    3032            0 :             j = offset_bc + mc - 1
    3033            0 :             DO mb = 1, mb_max
    3034            0 : !$OMP           ATOMIC
    3035              :                ks(j) = ks(j) + kbc(i)
    3036            0 :                i = i + 1
    3037            0 :                j = j + mc_max
    3038              :             END DO
    3039              :          END DO
    3040              :       END IF
    3041      5562963 :       IF (iatom >= latom) THEN
    3042              :          i = 1
    3043              :          j = offset_ad
    3044     12159819 :          DO md = 1, md_max
    3045     27371932 :             DO ma = 1, ma_max
    3046     15212113 : !$OMP           ATOMIC
    3047              :                ks(j) = ks(j) - kad(i)
    3048     15212113 :                i = i + 1
    3049     23209478 :                j = j + 1
    3050              :             END DO
    3051              :          END DO
    3052              :       ELSE
    3053              :          i = 1
    3054      3084661 :          DO md = 1, md_max
    3055      1684152 :             j = offset_ad + md - 1
    3056      6852229 :             DO ma = 1, ma_max
    3057      3767568 : !$OMP           ATOMIC
    3058              :                ks(j) = ks(j) + kad(i)
    3059      3767568 :                i = i + 1
    3060      5451720 :                j = j + md_max
    3061              :             END DO
    3062              :          END DO
    3063              :       END IF
    3064              : ! XXXXXXXXXXXXXXXXXXXXXXXX
    3065      5562963 :       IF (iatom >= katom) THEN
    3066              :          i = 1
    3067              :          j = offset_ac
    3068     16600747 :          DO mc = 1, mc_max
    3069     38437337 :             DO ma = 1, ma_max
    3070     21836590 : !$OMP           ATOMIC
    3071              :                ks(j) = ks(j) + kac(i)
    3072     21836590 :                i = i + 1
    3073     33078546 :                j = j + 1
    3074              :             END DO
    3075              :          END DO
    3076              :       ELSE
    3077              :          i = 1
    3078       497094 :          DO mc = 1, mc_max
    3079       292922 :             j = offset_ac + mc - 1
    3080      1248888 :             DO ma = 1, ma_max
    3081       751794 : !$OMP           ATOMIC
    3082              :                ks(j) = ks(j) - kac(i)
    3083       751794 :                i = i + 1
    3084      1044716 :                j = j + mc_max
    3085              :             END DO
    3086              :          END DO
    3087              :       END IF
    3088              : 
    3089      5562963 :    END SUBROUTINE update_fock_matrix_as
    3090              : 
    3091              : ! **************************************************************************************************
    3092              : !> \brief ...
    3093              : !> \param i ...
    3094              : !> \param j ...
    3095              : !> \param k ...
    3096              : !> \param l ...
    3097              : !> \param set_offsets ...
    3098              : !> \param atom_offsets ...
    3099              : !> \param iset ...
    3100              : !> \param jset ...
    3101              : !> \param kset ...
    3102              : !> \param lset ...
    3103              : !> \param ma_max ...
    3104              : !> \param mb_max ...
    3105              : !> \param mc_max ...
    3106              : !> \param md_max ...
    3107              : !> \param prim ...
    3108              : ! **************************************************************************************************
    3109            0 :    SUBROUTINE print_integrals(i, j, k, l, set_offsets, atom_offsets, iset, jset, kset, lset, ma_max, mb_max, mc_max, md_max, prim)
    3110              :       INTEGER                                            :: i, j, k, l
    3111              :       INTEGER, DIMENSION(:, :, :, :), POINTER            :: set_offsets
    3112              :       INTEGER, DIMENSION(:, :), POINTER                  :: atom_offsets
    3113              :       INTEGER                                            :: iset, jset, kset, lset, ma_max, mb_max, &
    3114              :                                                             mc_max, md_max
    3115              :       REAL(dp), DIMENSION(ma_max*mb_max*mc_max*md_max), &
    3116              :          INTENT(IN)                                      :: prim
    3117              : 
    3118              :       INTEGER                                            :: iint, ma, mb, mc, md
    3119              : 
    3120            0 :       iint = 0
    3121            0 :       DO md = 1, md_max
    3122            0 :          DO mc = 1, mc_max
    3123            0 :             DO mb = 1, mb_max
    3124            0 :                DO ma = 1, ma_max
    3125            0 :                   iint = iint + 1
    3126            0 :                   IF (ABS(prim(iint)) > 0.0000000000001) &
    3127            0 :                      WRITE (99, *) atom_offsets(i, 1) + ma + set_offsets(iset, 1, i, 1) - 1, &
    3128            0 :                      atom_offsets(j, 1) + ma + set_offsets(jset, 1, j, 1) - 1, &
    3129            0 :                      atom_offsets(k, 1) + ma + set_offsets(kset, 1, k, 1) - 1, &
    3130            0 :                      atom_offsets(l, 1) + ma + set_offsets(lset, 1, l, 1) - 1, &
    3131            0 :                      prim(iint)
    3132              :                END DO
    3133              :             END DO
    3134              :          END DO
    3135              :       END DO
    3136              : 
    3137            0 :    END SUBROUTINE print_integrals
    3138              : 
    3139              :    #:include "hfx_get_pmax_val.fypp"
    3140              : 
    3141              : END MODULE hfx_energy_potential
        

Generated by: LCOV version 2.0-1