LCOV - code coverage report
Current view: top level - src - hfx_ri.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:9e7f125) Lines: 1536 1907 80.5 %
Date: 2025-05-16 07:28:05 Functions: 16 23 69.6 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief RI-methods for HFX
      10             : ! **************************************************************************************************
      11             : 
      12             : MODULE hfx_ri
      13             : 
      14             :    USE OMP_LIB,                         ONLY: omp_get_num_threads,&
      15             :                                               omp_get_thread_num
      16             :    USE arnoldi_api,                     ONLY: arnoldi_extremal
      17             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      18             :                                               get_atomic_kind,&
      19             :                                               get_atomic_kind_set
      20             :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      21             :                                               gto_basis_set_p_type,&
      22             :                                               gto_basis_set_type
      23             :    USE cell_types,                      ONLY: cell_type,&
      24             :                                               get_cell
      25             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      26             :    USE cp_control_types,                ONLY: dft_control_type
      27             :    USE cp_dbcsr_api,                    ONLY: &
      28             :         dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      29             :         dbcsr_distribution_get, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
      30             :         dbcsr_get_info, dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
      31             :         dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
      32             :         dbcsr_type_symmetric
      33             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      34             :                                               cp_dbcsr_cholesky_invert
      35             :    USE cp_dbcsr_contrib,                ONLY: dbcsr_add_on_diag,&
      36             :                                               dbcsr_dot,&
      37             :                                               dbcsr_frobenius_norm
      38             :    USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
      39             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      40             :                                               copy_fm_to_dbcsr,&
      41             :                                               cp_dbcsr_dist2d_to_dist,&
      42             :                                               dbcsr_deallocate_matrix_set
      43             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      44             :                                               cp_fm_struct_release,&
      45             :                                               cp_fm_struct_type
      46             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      47             :                                               cp_fm_release,&
      48             :                                               cp_fm_type,&
      49             :                                               cp_fm_write_unformatted
      50             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      51             :                                               cp_logger_type
      52             :    USE cp_output_handling,              ONLY: cp_p_file,&
      53             :                                               cp_print_key_finished_output,&
      54             :                                               cp_print_key_should_output,&
      55             :                                               cp_print_key_unit_nr
      56             :    USE dbt_api,                         ONLY: &
      57             :         dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
      58             :         dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
      59             :         dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
      60             :         dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
      61             :         dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
      62             :         dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
      63             :         dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
      64             :    USE distribution_2d_types,           ONLY: distribution_2d_type
      65             :    USE hfx_types,                       ONLY: alloc_containers,&
      66             :                                               block_ind_type,&
      67             :                                               dealloc_containers,&
      68             :                                               hfx_compression_type,&
      69             :                                               hfx_ri_init,&
      70             :                                               hfx_ri_release,&
      71             :                                               hfx_ri_type
      72             :    USE input_constants,                 ONLY: hfx_ri_do_2c_cholesky,&
      73             :                                               hfx_ri_do_2c_diag,&
      74             :                                               hfx_ri_do_2c_iter
      75             :    USE input_cp2k_hfx,                  ONLY: ri_mo,&
      76             :                                               ri_pmat
      77             :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      78             :                                               section_vals_type,&
      79             :                                               section_vals_val_get
      80             :    USE iterate_matrix,                  ONLY: invert_hotelling,&
      81             :                                               matrix_sqrt_newton_schulz
      82             :    USE kinds,                           ONLY: default_string_length,&
      83             :                                               dp,&
      84             :                                               int_8
      85             :    USE machine,                         ONLY: m_walltime
      86             :    USE message_passing,                 ONLY: mp_cart_type,&
      87             :                                               mp_comm_type,&
      88             :                                               mp_para_env_type
      89             :    USE orbital_pointers,                ONLY: nso
      90             :    USE particle_methods,                ONLY: get_particle_set
      91             :    USE particle_types,                  ONLY: particle_type
      92             :    USE qs_environment_types,            ONLY: get_qs_env,&
      93             :                                               qs_environment_type
      94             :    USE qs_force_types,                  ONLY: qs_force_type
      95             :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      96             :    USE qs_interactions,                 ONLY: init_interaction_radii_orb_basis
      97             :    USE qs_kind_types,                   ONLY: qs_kind_type
      98             :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      99             :    USE qs_mo_types,                     ONLY: get_mo_set,&
     100             :                                               mo_set_type
     101             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type,&
     102             :                                               release_neighbor_list_sets
     103             :    USE qs_rho_types,                    ONLY: qs_rho_get,&
     104             :                                               qs_rho_type
     105             :    USE qs_tensors,                      ONLY: &
     106             :         build_2c_derivatives, build_2c_integrals, build_2c_neighbor_lists, build_3c_derivatives, &
     107             :         build_3c_integrals, build_3c_neighbor_lists, calc_2c_virial, calc_3c_virial, &
     108             :         compress_tensor, decompress_tensor, get_tensor_occupancy, neighbor_list_3c_destroy
     109             :    USE qs_tensors_types,                ONLY: create_2c_tensor,&
     110             :                                               create_3c_tensor,&
     111             :                                               create_tensor_batches,&
     112             :                                               distribution_3d_create,&
     113             :                                               distribution_3d_type,&
     114             :                                               neighbor_list_3c_type,&
     115             :                                               split_block_sizes
     116             :    USE string_utilities,                ONLY: uppercase
     117             :    USE util,                            ONLY: sort
     118             :    USE virial_types,                    ONLY: virial_type
     119             : #include "./base/base_uses.f90"
     120             : 
     121             : !$ USE OMP_LIB, ONLY: omp_get_num_threads
     122             : 
     123             :    IMPLICIT NONE
     124             :    PRIVATE
     125             : 
     126             :    PUBLIC :: hfx_ri_update_ks, hfx_ri_update_forces, get_force_from_3c_trace, get_2c_der_force, &
     127             :              get_idx_to_atom, print_ri_hfx, hfx_ri_pre_scf_calc_tensors, check_inverse
     128             : 
     129             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
     130             : CONTAINS
     131             : 
     132             : ! **************************************************************************************************
     133             : !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
     134             : !> \param ri_data ...
     135             : !> \param qs_env ...
     136             : !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
     137             : ! **************************************************************************************************
     138          22 :    SUBROUTINE switch_ri_flavor(ri_data, qs_env)
     139             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     140             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     141             : 
     142             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'switch_ri_flavor'
     143             : 
     144             :       INTEGER                                            :: handle, n_mem, new_flavor
     145          22 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     146             :       TYPE(dft_control_type), POINTER                    :: dft_control
     147             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     148          22 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     149          22 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     150             : 
     151          22 :       NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
     152             : 
     153          22 :       CALL timeset(routineN, handle)
     154             : 
     155             :       CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
     156          22 :                       particle_set=particle_set, qs_kind_set=qs_kind_set)
     157             : 
     158          22 :       CALL hfx_ri_release(ri_data, write_stats=.FALSE.)
     159             : 
     160          22 :       IF (ri_data%flavor == ri_pmat) THEN
     161             :          new_flavor = ri_mo
     162             :       ELSE
     163          12 :          new_flavor = ri_pmat
     164             :       END IF
     165          22 :       ri_data%flavor = new_flavor
     166             : 
     167          22 :       n_mem = ri_data%n_mem_input
     168          22 :       ri_data%n_mem_input = ri_data%n_mem_flavor_switch
     169          22 :       ri_data%n_mem_flavor_switch = n_mem
     170             : 
     171          22 :       CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
     172             : 
     173             :       !Need to recalculate the integrals in the new flavor
     174             :       !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
     175             :       !      that only makes sense if actual integral calculation is not negligible
     176          22 :       IF (ri_data%flavor == ri_pmat) THEN
     177          12 :          CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
     178             :       ELSE
     179          10 :          CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
     180             :       END IF
     181             : 
     182          22 :       IF (ri_data%unit_nr > 0) THEN
     183           0 :          IF (ri_data%flavor == ri_pmat) THEN
     184           0 :             WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
     185             :          ELSE
     186           0 :             WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
     187             :          END IF
     188             :       END IF
     189             : 
     190          22 :       CALL timestop(handle)
     191             : 
     192          22 :    END SUBROUTINE switch_ri_flavor
     193             : 
     194             : ! **************************************************************************************************
     195             : !> \brief Pre-SCF steps in MO flavor of RI HFX
     196             : !>
     197             : !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
     198             : !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
     199             : !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
     200             : !> \param qs_env ...
     201             : !> \param ri_data ...
     202             : !> \param nspins ...
     203             : ! **************************************************************************************************
     204          26 :    SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
     205             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     206             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     207             :       INTEGER, INTENT(IN)                                :: nspins
     208             : 
     209             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_pre_scf_mo'
     210             : 
     211             :       INTEGER                                            :: handle, handle2, ispin, n_dependent, &
     212             :                                                             unit_nr, unit_nr_dbcsr
     213             :       REAL(KIND=dp)                                      :: threshold
     214             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     215         156 :       TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
     216         130 :          t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
     217          26 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_int, t_2c_work
     218          26 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int
     219             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     220             : 
     221          26 :       CALL timeset(routineN, handle)
     222             : 
     223          26 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
     224          26 :       unit_nr = ri_data%unit_nr
     225             : 
     226          26 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
     227             : 
     228          26 :       CALL timeset(routineN//"_int", handle2)
     229             : 
     230         806 :       ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
     231          26 :       CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int)
     232             : 
     233          26 :       CALL timestop(handle2)
     234             : 
     235          26 :       CALL timeset(routineN//"_2c", handle2)
     236          26 :       IF (.NOT. ri_data%same_op) THEN
     237           4 :          SELECT CASE (ri_data%t2c_method)
     238             :          CASE (hfx_ri_do_2c_iter)
     239           0 :             CALL dbcsr_create(t_2c_op_RI_inv(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     240           0 :             threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
     241           0 :             CALL invert_hotelling(t_2c_op_RI_inv(1), t_2c_op_RI(1), threshold=threshold, silent=.FALSE.)
     242             :          CASE (hfx_ri_do_2c_cholesky)
     243           4 :             CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
     244           4 :             CALL cp_dbcsr_cholesky_decompose(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env)
     245           4 :             CALL cp_dbcsr_cholesky_invert(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
     246             :          CASE (hfx_ri_do_2c_diag)
     247           0 :             CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
     248             :             CALL cp_dbcsr_power(t_2c_op_RI_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
     249           4 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     250             :          END SELECT
     251             : 
     252           4 :          IF (ri_data%check_2c_inv) THEN
     253           0 :             CALL check_inverse(t_2c_op_RI_inv(1), t_2c_op_RI(1), unit_nr=unit_nr)
     254             :          END IF
     255             : 
     256           4 :          CALL dbcsr_release(t_2c_op_RI(1))
     257             : 
     258           4 :          SELECT CASE (ri_data%t2c_method)
     259             :          CASE (hfx_ri_do_2c_iter)
     260           0 :             CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     261           0 :             CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     262             :             CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
     263             :                                            ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
     264           0 :                                            ri_data%max_iter_lanczos)
     265             : 
     266           0 :             CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
     267             :          CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
     268           4 :             CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
     269             :             CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
     270           8 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     271             :          END SELECT
     272             : 
     273             :          !We need S^-1 and (P|Q) for the forces.
     274           4 :          CALL dbt_create(t_2c_op_RI_inv(1), t_2c_work(1))
     275           4 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_RI_inv(1), t_2c_work(1))
     276           4 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     277           4 :          CALL dbt_destroy(t_2c_work(1))
     278           4 :          CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     279             : 
     280           4 :          CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
     281           4 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
     282           4 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
     283           4 :          CALL dbt_destroy(t_2c_work(1))
     284           4 :          CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
     285             : 
     286           4 :          IF (ri_data%check_2c_inv) THEN
     287           0 :             CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
     288             :          END IF
     289           4 :          CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
     290             :          CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_RI_inv(1), t_2c_op_pot_sqrt(1), &
     291           4 :                              0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
     292           4 :          CALL dbcsr_release(t_2c_op_RI_inv(1))
     293           4 :          CALL dbcsr_release(t_2c_op_pot_sqrt(1))
     294             :       ELSE
     295          22 :          SELECT CASE (ri_data%t2c_method)
     296             :          CASE (hfx_ri_do_2c_iter)
     297           0 :             CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     298           0 :             CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     299             :             CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
     300             :                                            ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
     301           0 :                                            ri_data%max_iter_lanczos)
     302           0 :             CALL dbcsr_release(t_2c_op_pot_sqrt(1))
     303             :          CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
     304          22 :             CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
     305             :             CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
     306          44 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     307             :          END SELECT
     308          22 :          IF (ri_data%check_2c_inv) THEN
     309           0 :             CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
     310             :          END IF
     311             : 
     312             :          !We need (P|Q)^-1 for the forces
     313          22 :          CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
     314          22 :          CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
     315          22 :          CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
     316          22 :          CALL dbcsr_release(dbcsr_work_1(1))
     317          22 :          CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
     318          22 :          CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
     319          22 :          CALL dbcsr_release(dbcsr_work_2(1))
     320          22 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     321          22 :          CALL dbt_destroy(t_2c_work(1))
     322          22 :          CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     323             :       END IF
     324             : 
     325          26 :       CALL dbcsr_release(t_2c_op_pot(1))
     326             : 
     327          26 :       CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
     328          26 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
     329          26 :       CALL dbcsr_release(t_2c_int_mat(1))
     330          58 :       DO ispin = 1, nspins
     331          58 :          CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
     332             :       END DO
     333          26 :       CALL dbt_destroy(t_2c_int(1))
     334          26 :       CALL timestop(handle2)
     335             : 
     336          26 :       CALL timeset(routineN//"_3c", handle2)
     337          26 :       CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     338          26 :       CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
     339          26 :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
     340          26 :       CALL dbt_destroy(t_3c_int(1, 1))
     341          26 :       CALL timestop(handle2)
     342             : 
     343          26 :       CALL timestop(handle)
     344             : 
     345         260 :    END SUBROUTINE
     346             : 
     347             : ! **************************************************************************************************
     348             : !> \brief ...
     349             : !> \param matrix_1 ...
     350             : !> \param matrix_2 ...
     351             : !> \param name ...
     352             : !> \param unit_nr ...
     353             : ! **************************************************************************************************
     354           0 :    SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
     355             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_1, matrix_2
     356             :       CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
     357             :       INTEGER, INTENT(IN)                                :: unit_nr
     358             : 
     359             :       CHARACTER(len=default_string_length)               :: name_prv
     360             :       REAL(KIND=dp)                                      :: error, frob_matrix, frob_matrix_base
     361             :       TYPE(dbcsr_type)                                   :: matrix_tmp
     362             : 
     363           0 :       IF (PRESENT(name)) THEN
     364           0 :          name_prv = name
     365             :       ELSE
     366           0 :          CALL dbcsr_get_info(matrix_1, name=name_prv)
     367             :       END IF
     368             : 
     369           0 :       CALL dbcsr_create(matrix_tmp, template=matrix_1)
     370             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
     371           0 :                           0.0_dp, matrix_tmp)
     372           0 :       frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
     373           0 :       CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
     374           0 :       frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     375           0 :       error = frob_matrix/frob_matrix_base
     376           0 :       IF (unit_nr > 0) THEN
     377             :          WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
     378           0 :             "HFX_RI_INFO| Error for INV(", TRIM(name_prv), "):", error
     379             :       END IF
     380             : 
     381           0 :       CALL dbcsr_release(matrix_tmp)
     382           0 :    END SUBROUTINE
     383             : 
     384             : ! **************************************************************************************************
     385             : !> \brief ...
     386             : !> \param matrix ...
     387             : !> \param matrix_sqrt ...
     388             : !> \param matrix_sqrt_inv ...
     389             : !> \param name ...
     390             : !> \param unit_nr ...
     391             : ! **************************************************************************************************
     392           0 :    SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
     393             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     394             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: matrix_sqrt, matrix_sqrt_inv
     395             :       CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
     396             :       INTEGER, INTENT(IN)                                :: unit_nr
     397             : 
     398             :       CHARACTER(len=default_string_length)               :: name_prv
     399             :       REAL(KIND=dp)                                      :: frob_matrix
     400             :       TYPE(dbcsr_type)                                   :: matrix_copy, matrix_tmp
     401             : 
     402           0 :       IF (PRESENT(name)) THEN
     403           0 :          name_prv = name
     404             :       ELSE
     405           0 :          CALL dbcsr_get_info(matrix, name=name_prv)
     406             :       END IF
     407           0 :       IF (PRESENT(matrix_sqrt)) THEN
     408           0 :          CALL dbcsr_create(matrix_tmp, template=matrix)
     409           0 :          CALL dbcsr_copy(matrix_copy, matrix_sqrt)
     410             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
     411           0 :                              0.0_dp, matrix_tmp)
     412           0 :          CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
     413           0 :          frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     414           0 :          IF (unit_nr > 0) THEN
     415             :             WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
     416           0 :                "HFX_RI_INFO| Error for SQRT(", TRIM(name_prv), "):", frob_matrix
     417             :          END IF
     418           0 :          CALL dbcsr_release(matrix_tmp)
     419           0 :          CALL dbcsr_release(matrix_copy)
     420             :       END IF
     421             : 
     422           0 :       IF (PRESENT(matrix_sqrt_inv)) THEN
     423           0 :          CALL dbcsr_create(matrix_tmp, template=matrix)
     424           0 :          CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
     425             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
     426           0 :                              0.0_dp, matrix_tmp)
     427           0 :          CALL check_inverse(matrix_tmp, matrix, name="SQRT("//TRIM(name_prv)//")", unit_nr=unit_nr)
     428           0 :          CALL dbcsr_release(matrix_tmp)
     429           0 :          CALL dbcsr_release(matrix_copy)
     430             :       END IF
     431             : 
     432           0 :    END SUBROUTINE
     433             : 
     434             : ! **************************************************************************************************
     435             : !> \brief Calculate 2-center and 3-center integrals
     436             : !>
     437             : !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
     438             : !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
     439             : !> v_1 is HF operator, v_2 is RI metric
     440             : !> \param qs_env ...
     441             : !> \param ri_data ...
     442             : !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
     443             : !> \param t_2c_int_pot K_1(P, R)
     444             : !> \param t_3c_int int_3c(mu, lambda, P)
     445             : !> \param do_kpoints ...
     446             : !> \notes the integral tensor arrays are already allocated on entry
     447             : ! **************************************************************************************************
     448        4004 :    SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
     449             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     450             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     451             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT)        :: t_2c_int_RI, t_2c_int_pot
     452             :       TYPE(dbt_type), DIMENSION(:, :)                    :: t_3c_int
     453             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints
     454             : 
     455             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_calc_tensors'
     456             : 
     457             :       CHARACTER                                          :: symm
     458             :       INTEGER                                            :: handle, i_img, i_mem, ibasis, j_img, &
     459             :                                                             n_mem, natom, nblks, nimg, nkind, &
     460             :                                                             nthreads
     461             :       INTEGER(int_8)                                     :: nze
     462         178 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, dist_RI_ext, &
     463         356 :          ends_array_mc_block_int, ends_array_mc_int, sizes_AO, sizes_RI, sizes_RI_ext, &
     464         178 :          sizes_RI_ext_split, starts_array_mc_block_int, starts_array_mc_int
     465             :       INTEGER, DIMENSION(3)                              :: pcoord, pdims
     466         356 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
     467             :       LOGICAL                                            :: converged, do_kpoints_prv
     468             :       REAL(dp)                                           :: max_ev, min_ev, occ, RI_range
     469         178 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     470             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     471        1246 :       TYPE(dbt_distribution_type)                        :: t_dist
     472         534 :       TYPE(dbt_pgrid_type)                               :: pgrid
     473        1246 :       TYPE(dbt_type)                                     :: t_3c_tmp
     474         178 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_batched
     475             :       TYPE(dft_control_type), POINTER                    :: dft_control
     476             :       TYPE(distribution_2d_type), POINTER                :: dist_2d
     477             :       TYPE(distribution_3d_type)                         :: dist_3d
     478             :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
     479         178 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
     480             :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
     481         178 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c
     482             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     483             :       TYPE(neighbor_list_3c_type)                        :: nl_3c
     484             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     485         178 :          POINTER                                         :: nl_2c_pot, nl_2c_RI
     486         178 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     487         178 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     488             :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     489             : 
     490         178 :       CALL timeset(routineN, handle)
     491         178 :       NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_RI, &
     492         178 :                particle_set, qs_kind_set, ks_env, para_env)
     493             : 
     494             :       CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
     495         178 :                       distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
     496             : 
     497         178 :       RI_range = 0.0_dp
     498         178 :       do_kpoints_prv = .FALSE.
     499         178 :       IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
     500         178 :       nimg = 1
     501         178 :       IF (do_kpoints_prv) THEN
     502          70 :          nimg = ri_data%nimg
     503          70 :          RI_range = ri_data%kp_RI_range
     504             :       END IF
     505             : 
     506         712 :       ALLOCATE (sizes_RI(natom), sizes_AO(natom))
     507        1332 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
     508         178 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
     509         178 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_RI, basis=basis_set_RI)
     510         178 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
     511         178 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_AO, basis=basis_set_AO)
     512             : 
     513         488 :       DO ibasis = 1, SIZE(basis_set_AO)
     514         310 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
     515         310 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
     516             :          ! interaction radii should be based on eps_pgf_orb controlled in RI section
     517             :          ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
     518             :          ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
     519         310 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
     520         488 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
     521             :       END DO
     522             : 
     523         178 :       n_mem = ri_data%n_mem
     524             :       CALL create_tensor_batches(sizes_RI, n_mem, starts_array_mc_int, ends_array_mc_int, &
     525             :                                  starts_array_mc_block_int, ends_array_mc_block_int)
     526             : 
     527         178 :       DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
     528             : 
     529             :       !We separate the K-points and standard 3c integrals, because handle quite differently
     530         178 :       IF (.NOT. do_kpoints_prv) THEN
     531             : 
     532         108 :          nthreads = 1
     533         108 : !$       nthreads = omp_get_num_threads()
     534         108 :          pdims = 0
     535         432 :          CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(n_mem*nthreads)), natom, natom])
     536             : 
     537         972 :          ALLOCATE (t_3c_int_batched(1, 1))
     538             :          CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_RI, dist_AO_1, dist_AO_2, pgrid, &
     539             :                                sizes_RI, sizes_AO, sizes_AO, map1=[1], map2=[2, 3], &
     540         108 :                                name="(RI | AO AO)")
     541             : 
     542         108 :          CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
     543         108 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
     544         108 :          CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
     545             :          CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
     546         108 :                                      nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
     547         108 :          DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
     548             : 
     549             :          CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
     550             :                                ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
     551             :                                map1=[1], map2=[2, 3], &
     552         108 :                                name="O (RI AO | AO)")
     553             : 
     554             :          CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
     555         108 :                                       "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
     556             : 
     557         424 :          DO i_mem = 1, n_mem
     558             :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
     559             :                                     basis_set_RI, basis_set_AO, basis_set_AO, &
     560             :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
     561             :                                     desymmetrize=.FALSE., &
     562         948 :                                     bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
     563         316 :             CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.TRUE., move_data=.TRUE.)
     564         424 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
     565             :          END DO
     566             : 
     567         108 :          CALL dbt_destroy(t_3c_int_batched(1, 1))
     568             : 
     569         108 :          CALL neighbor_list_3c_destroy(nl_3c)
     570             : 
     571         108 :          CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
     572             : 
     573         108 :          IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
     574             :             ! desymmetrize
     575          82 :             CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
     576          82 :             CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
     577             : 
     578             :             ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
     579             :             ! with RI metric but not to bare integral tensor
     580          82 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
     581             :          ELSE
     582          26 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
     583             :          END IF
     584             : 
     585         108 :          CALL dbt_destroy(t_3c_tmp)
     586             : 
     587             :       ELSE !do_kpoints
     588             : 
     589          70 :          nthreads = 1
     590          70 : !$       nthreads = omp_get_num_threads()
     591          70 :          pdims = 0
     592         280 :          CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, MAX(1, natom/(n_mem*nthreads))])
     593             : 
     594             :          !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
     595             :          !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
     596          70 :          nblks = SIZE(ri_data%bsizes_RI_split)
     597         350 :          ALLOCATE (sizes_RI_ext(natom*ri_data%ncell_RI), sizes_RI_ext_split(nblks*ri_data%ncell_RI))
     598         506 :          DO i_img = 1, ri_data%ncell_RI
     599        1308 :             sizes_RI_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_RI(:)
     600        2344 :             sizes_RI_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
     601             :          END DO
     602             : 
     603             :          CALL create_3c_tensor(t_3c_tmp, dist_AO_1, dist_AO_2, dist_RI, &
     604             :                                pgrid, sizes_AO, sizes_AO, sizes_RI, map1=[1, 2], map2=[3], &
     605          70 :                                name="(AO AO | RI)")
     606          70 :          CALL dbt_destroy(t_3c_tmp)
     607             : 
     608             :          !For the integrals to work, the distribution along the RI direction must be repeated
     609         210 :          ALLOCATE (dist_RI_ext(natom*ri_data%ncell_RI))
     610         506 :          DO i_img = 1, ri_data%ncell_RI
     611        1378 :             dist_RI_ext((i_img - 1)*natom + 1:i_img*natom) = dist_RI(:)
     612             :          END DO
     613             : 
     614        2416 :          ALLOCATE (t_3c_int_batched(nimg, 1))
     615          70 :          CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
     616             :          CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
     617          70 :                          sizes_AO, sizes_AO, sizes_RI_ext)
     618        1716 :          DO i_img = 2, nimg
     619        1716 :             CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
     620             :          END DO
     621          70 :          CALL dbt_distribution_destroy(t_dist)
     622             : 
     623          70 :          CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
     624          70 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
     625          70 :          CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
     626             :          CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
     627          70 :                                      nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
     628          70 :          DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
     629             : 
     630             :          ! create 3c tensor for storage of ints
     631             :          CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
     632          70 :                                       "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., own_dist=.TRUE.)
     633             : 
     634             :          CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
     635             :                                sizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
     636             :                                map1=[1], map2=[2, 3], &
     637          70 :                                name="O (RI AO | AO)")
     638        1716 :          DO j_img = 2, nimg
     639        1716 :             CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
     640             :          END DO
     641             : 
     642          70 :          CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
     643         192 :          DO i_mem = 1, n_mem
     644             :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
     645             :                                     basis_set_AO, basis_set_AO, basis_set_RI, &
     646             :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
     647             :                                     desymmetrize=.FALSE., do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
     648             :                                     bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
     649         366 :                                     RI_range=RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
     650             : 
     651        3440 :             DO i_img = 1, nimg
     652             :                !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
     653        3248 :                CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
     654        3370 :                IF (nze > 0) THEN
     655        2154 :                   CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.TRUE.)
     656        2154 :                   CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
     657             :                   CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
     658        2154 :                                 summation=.TRUE., move_data=.TRUE.)
     659             :                END IF
     660             :             END DO
     661             :          END DO
     662             : 
     663        1786 :          DO i_img = 1, nimg
     664        1786 :             CALL dbt_destroy(t_3c_int_batched(i_img, 1))
     665             :          END DO
     666        1786 :          DEALLOCATE (t_3c_int_batched)
     667          70 :          CALL neighbor_list_3c_destroy(nl_3c)
     668          70 :          CALL dbt_destroy(t_3c_tmp)
     669             :       END IF
     670         178 :       CALL dbt_pgrid_destroy(pgrid)
     671             : 
     672             :       CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
     673             :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
     674         178 :                                    dist_2d=dist_2d)
     675             : 
     676         178 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
     677         534 :       ALLOCATE (row_bsize(SIZE(sizes_RI)))
     678         356 :       ALLOCATE (col_bsize(SIZE(sizes_RI)))
     679         676 :       row_bsize(:) = sizes_RI
     680         676 :       col_bsize(:) = sizes_RI
     681             : 
     682             :       !Use non-symmetric nl and matrices for k-points
     683         178 :       symm = dbcsr_type_symmetric
     684         178 :       IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
     685             : 
     686         178 :       CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
     687        1824 :       DO i_img = 2, nimg
     688        1824 :          CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
     689             :       END DO
     690             : 
     691         178 :       IF (.NOT. ri_data%same_op) THEN
     692          88 :          CALL dbcsr_create(t_2c_int_RI(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
     693        1734 :          DO i_img = 2, nimg
     694        1734 :             CALL dbcsr_create(t_2c_int_RI(i_img), template=t_2c_int_RI(1))
     695             :          END DO
     696             :       END IF
     697         178 :       DEALLOCATE (col_bsize, row_bsize)
     698             : 
     699         178 :       CALL dbcsr_distribution_release(dbcsr_dist)
     700             : 
     701             :       CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_RI, basis_set_RI, &
     702         178 :                               ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
     703         178 :       CALL release_neighbor_list_sets(nl_2c_pot)
     704             : 
     705         178 :       IF (.NOT. ri_data%same_op) THEN
     706             :          CALL build_2c_neighbor_lists(nl_2c_RI, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
     707             :                                       "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
     708          88 :                                       dist_2d=dist_2d)
     709             : 
     710             :          CALL build_2c_integrals(t_2c_int_RI, ri_data%filter_eps_2c, qs_env, nl_2c_RI, basis_set_RI, basis_set_RI, &
     711          88 :                                  ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
     712             : 
     713          88 :          CALL release_neighbor_list_sets(nl_2c_RI)
     714             :       END IF
     715             : 
     716         488 :       DO ibasis = 1, SIZE(basis_set_AO)
     717         310 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
     718         310 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
     719             :          ! reset interaction radii of orb basis
     720         310 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
     721         488 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
     722             :       END DO
     723             : 
     724         178 :       IF (ri_data%calc_condnum) THEN
     725             :          CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
     726           4 :                                max_iter=ri_data%max_iter_lanczos, converged=converged)
     727             : 
     728           4 :          IF (.NOT. converged) THEN
     729           0 :             CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
     730             :          END IF
     731             : 
     732           4 :          IF (ri_data%unit_nr > 0) THEN
     733           2 :             WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
     734           2 :             IF (min_ev > 0) THEN
     735             :                WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
     736           2 :                   "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
     737             :             ELSE
     738             :                WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
     739           0 :                   "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
     740             :             END IF
     741             :          END IF
     742             : 
     743           4 :          IF (.NOT. ri_data%same_op) THEN
     744             :             CALL arnoldi_extremal(t_2c_int_RI(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
     745           2 :                                   max_iter=ri_data%max_iter_lanczos, converged=converged)
     746             : 
     747           2 :             IF (.NOT. converged) THEN
     748           0 :                CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
     749             :             END IF
     750             : 
     751           2 :             IF (ri_data%unit_nr > 0) THEN
     752           1 :                WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
     753           1 :                IF (min_ev > 0) THEN
     754             :                   WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
     755           1 :                      "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
     756             :                ELSE
     757             :                   WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
     758           0 :                      "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
     759             :                END IF
     760             :             END IF
     761             :          END IF
     762             :       END IF
     763             : 
     764         178 :       CALL timestop(handle)
     765        1176 :    END SUBROUTINE
     766             : 
     767             : ! **************************************************************************************************
     768             : !> \brief Pre-SCF steps in rho flavor of RI HFX
     769             : !>
     770             : !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
     771             : !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
     772             : !> \param qs_env ...
     773             : !> \param ri_data ...
     774             : ! **************************************************************************************************
     775          82 :    SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data)
     776             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     777             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     778             : 
     779             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat'
     780             : 
     781             :       INTEGER                                            :: handle, handle2, i_mem, j_mem, &
     782             :                                                             n_dependent, unit_nr, unit_nr_dbcsr
     783             :       INTEGER(int_8)                                     :: nflop, nze, nze_O
     784          82 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_ranges_AO, batch_ranges_RI
     785             :       INTEGER, DIMENSION(2, 1)                           :: bounds_i
     786             :       INTEGER, DIMENSION(2, 2)                           :: bounds_j
     787             :       INTEGER, DIMENSION(3)                              :: dims_3c
     788             :       REAL(KIND=dp)                                      :: compression_factor, memory_3c, occ, &
     789             :                                                             threshold
     790             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     791         328 :       TYPE(dbcsr_type), DIMENSION(1)                     :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, &
     792         246 :                                                             t_2c_tmp, t_2c_tmp_2
     793         738 :       TYPE(dbt_type)                                     :: t_3c_2
     794         164 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_int, t_2c_work
     795          82 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_1
     796             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     797             : 
     798          82 :       CALL timeset(routineN, handle)
     799             : 
     800          82 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
     801          82 :       unit_nr = ri_data%unit_nr
     802             : 
     803          82 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
     804             : 
     805          82 :       CALL timeset(routineN//"_int", handle2)
     806             : 
     807        2542 :       ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
     808          82 :       CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int_1)
     809             : 
     810          82 :       CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.TRUE.)
     811             : 
     812          82 :       CALL dbt_destroy(t_3c_int_1(1, 1))
     813             : 
     814          82 :       CALL timestop(handle2)
     815             : 
     816          82 :       CALL timeset(routineN//"_2c", handle2)
     817             : 
     818          82 :       IF (ri_data%same_op) t_2c_op_RI(1) = t_2c_op_pot(1)
     819          82 :       CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     820          82 :       threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
     821             : 
     822          82 :       SELECT CASE (ri_data%t2c_method)
     823             :       CASE (hfx_ri_do_2c_iter)
     824             :          CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_RI(1), &
     825           0 :                                threshold=threshold, silent=.FALSE.)
     826             :       CASE (hfx_ri_do_2c_cholesky)
     827          82 :          CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
     828          82 :          CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
     829          82 :          CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
     830             :       CASE (hfx_ri_do_2c_diag)
     831           0 :          CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
     832             :          CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
     833          82 :                              para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     834             :       END SELECT
     835             : 
     836          82 :       IF (ri_data%check_2c_inv) THEN
     837           0 :          CALL check_inverse(t_2c_int_mat(1), t_2c_op_RI(1), unit_nr=unit_nr)
     838             :       END IF
     839             : 
     840             :       !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
     841          82 :       CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
     842          82 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
     843          82 :       CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     844          82 :       CALL dbt_destroy(t_2c_work(1))
     845          82 :       CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     846          82 :       IF (.NOT. ri_data%same_op) THEN
     847             :          !Also save the RI (P|Q) integral
     848          14 :          CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
     849          14 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
     850          14 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
     851          14 :          CALL dbt_destroy(t_2c_work(1))
     852          14 :          CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
     853             :       END IF
     854             : 
     855          82 :       IF (ri_data%same_op) THEN
     856          68 :          CALL dbcsr_release(t_2c_op_pot(1))
     857             :       ELSE
     858          14 :          CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     859          14 :          CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     860          14 :          CALL dbcsr_release(t_2c_op_RI(1))
     861             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
     862          14 :                              filter_eps=ri_data%filter_eps)
     863             : 
     864          14 :          CALL dbcsr_release(t_2c_op_pot(1))
     865             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
     866          14 :                              filter_eps=ri_data%filter_eps)
     867          14 :          CALL dbcsr_release(t_2c_tmp(1))
     868          14 :          CALL dbcsr_release(t_2c_int_mat(1))
     869          14 :          t_2c_int_mat(1) = t_2c_tmp_2(1)
     870             :       END IF
     871             : 
     872          82 :       CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
     873          82 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
     874          82 :       CALL dbcsr_release(t_2c_int_mat(1))
     875          82 :       CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.TRUE.)
     876          82 :       CALL dbt_destroy(t_2c_int(1))
     877          82 :       CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
     878             : 
     879          82 :       CALL timestop(handle2)
     880             : 
     881          82 :       CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
     882             : 
     883          82 :       CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
     884             : 
     885          82 :       memory_3c = 0.0_dp
     886          82 :       nze_O = 0
     887             : 
     888         246 :       ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
     889         246 :       ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
     890         328 :       batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
     891          82 :       batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
     892         328 :       batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
     893          82 :       batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
     894             : 
     895             :       CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_RI, &
     896          82 :                                      batch_range_2=batch_ranges_AO)
     897             :       CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_RI, &
     898          82 :                                      batch_range_2=batch_ranges_AO)
     899             : 
     900         328 :       DO i_mem = 1, ri_data%n_mem_RI
     901         738 :          bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
     902             : 
     903         246 :          CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
     904         984 :          DO j_mem = 1, ri_data%n_mem
     905        2214 :             bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
     906        2214 :             bounds_j(:, 2) = [1, dims_3c(3)]
     907         738 :             CALL timeset(routineN//"_RIx3C", handle2)
     908             :             CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
     909             :                               0.0_dp, t_3c_2, &
     910             :                               contract_1=[2], notcontract_1=[1], &
     911             :                               contract_2=[1], notcontract_2=[2, 3], &
     912             :                               map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
     913             :                               bounds_2=bounds_i, &
     914             :                               bounds_3=bounds_j, &
     915             :                               unit_nr=unit_nr_dbcsr, &
     916         738 :                               flop=nflop)
     917             : 
     918         738 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
     919         738 :             CALL timestop(handle2)
     920             : 
     921         738 :             CALL timeset(routineN//"_copy_2", handle2)
     922         738 :             CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     923             : 
     924         738 :             CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
     925         738 :             nze_O = nze_O + nze
     926             : 
     927             :             CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
     928         738 :                                  ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
     929             : 
     930        3198 :             CALL timestop(handle2)
     931             :          END DO
     932         328 :          CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
     933             :       END DO
     934          82 :       CALL dbt_batched_contract_finalize(t_3c_2)
     935          82 :       CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
     936             : 
     937          82 :       CALL para_env%sum(memory_3c)
     938          82 :       compression_factor = REAL(nze_O, dp)*1.0E-06*8.0_dp/memory_3c
     939             : 
     940          82 :       IF (unit_nr > 0) THEN
     941             :          WRITE (UNIT=unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
     942          20 :             "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
     943             : 
     944             :          WRITE (UNIT=unit_nr, FMT="((T3,A,T60,F21.2))") &
     945          20 :             "MEMORY_INFO| Compression factor:                  ", compression_factor
     946             :       END IF
     947             : 
     948          82 :       CALL dbt_clear(ri_data%t_2c_int(1, 1))
     949          82 :       CALL dbt_destroy(t_3c_2)
     950             : 
     951          82 :       CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     952             : 
     953          82 :       CALL timestop(handle)
     954         738 :    END SUBROUTINE
     955             : 
     956             : ! **************************************************************************************************
     957             : !> \brief Sorts 2d indices w.r.t. rows and columns
     958             : !> \param blk_ind ...
     959             : ! **************************************************************************************************
     960           0 :    SUBROUTINE sort_unique_blkind_2d(blk_ind)
     961             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), &
     962             :          INTENT(INOUT)                                   :: blk_ind
     963             : 
     964             :       INTEGER                                            :: end_ind, iblk, iblk_all, irow, nblk, &
     965             :                                                             ncols, start_ind
     966           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ind_1, ind_2, sort_1, sort_2
     967           0 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blk_ind_tmp
     968             : 
     969           0 :       nblk = SIZE(blk_ind, 1)
     970             : 
     971           0 :       ALLOCATE (sort_1(nblk))
     972           0 :       ALLOCATE (ind_1(nblk))
     973             : 
     974           0 :       sort_1(:) = blk_ind(:, 1)
     975           0 :       CALL sort(sort_1, nblk, ind_1)
     976             : 
     977           0 :       blk_ind(:, :) = blk_ind(ind_1, :)
     978             : 
     979           0 :       start_ind = 1
     980             : 
     981           0 :       DO WHILE (start_ind <= nblk)
     982           0 :          irow = blk_ind(start_ind, 1)
     983           0 :          end_ind = start_ind
     984             : 
     985           0 :          IF (end_ind + 1 <= nblk) THEN
     986           0 :          DO WHILE (blk_ind(end_ind + 1, 1) == irow)
     987           0 :             end_ind = end_ind + 1
     988           0 :             IF (end_ind + 1 > nblk) EXIT
     989             :          END DO
     990             :          END IF
     991             : 
     992           0 :          ncols = end_ind - start_ind + 1
     993           0 :          ALLOCATE (sort_2(ncols))
     994           0 :          ALLOCATE (ind_2(ncols))
     995           0 :          sort_2(:) = blk_ind(start_ind:end_ind, 2)
     996           0 :          CALL sort(sort_2, ncols, ind_2)
     997           0 :          ind_2 = ind_2 + start_ind - 1
     998             : 
     999           0 :          blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
    1000           0 :          start_ind = end_ind + 1
    1001             : 
    1002           0 :          DEALLOCATE (sort_2, ind_2)
    1003             :       END DO
    1004             : 
    1005           0 :       ALLOCATE (blk_ind_tmp(nblk, 2))
    1006           0 :       blk_ind_tmp = 0
    1007             : 
    1008             :       iblk = 0
    1009           0 :       DO iblk_all = 1, nblk
    1010           0 :          IF (iblk >= 1) THEN
    1011           0 :             IF (ALL(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
    1012             :                CYCLE
    1013             :             END IF
    1014             :          END IF
    1015           0 :          iblk = iblk + 1
    1016           0 :          blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
    1017             :       END DO
    1018           0 :       nblk = iblk
    1019             : 
    1020           0 :       DEALLOCATE (blk_ind)
    1021           0 :       ALLOCATE (blk_ind(nblk, 2))
    1022             : 
    1023           0 :       blk_ind(:, :) = blk_ind_tmp(:nblk, :)
    1024             : 
    1025           0 :    END SUBROUTINE
    1026             : 
    1027             : ! **************************************************************************************************
    1028             : !> \brief ...
    1029             : !> \param qs_env ...
    1030             : !> \param ri_data ...
    1031             : !> \param ks_matrix ...
    1032             : !> \param ehfx ...
    1033             : !> \param mos ...
    1034             : !> \param rho_ao ...
    1035             : !> \param geometry_did_change ...
    1036             : !> \param nspins ...
    1037             : !> \param hf_fraction ...
    1038             : ! **************************************************************************************************
    1039        1888 :    SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
    1040             :                                geometry_did_change, nspins, hf_fraction)
    1041             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1042             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1043             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
    1044             :       REAL(KIND=dp), INTENT(OUT)                         :: ehfx
    1045             :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
    1046             :          OPTIONAL                                        :: mos
    1047             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
    1048             :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1049             :       INTEGER, INTENT(IN)                                :: nspins
    1050             :       REAL(KIND=dp), INTENT(IN)                          :: hf_fraction
    1051             : 
    1052             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_update_ks'
    1053             : 
    1054             :       CHARACTER(1)                                       :: mtype
    1055             :       INTEGER                                            :: handle, handle2, i, ispin, j
    1056             :       INTEGER(int_8)                                     :: nblks
    1057             :       INTEGER, DIMENSION(2)                              :: homo
    1058             :       LOGICAL                                            :: is_antisymmetric
    1059             :       REAL(dp)                                           :: etmp, fac
    1060        1888 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: mo_eigenvalues
    1061             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    1062        1888 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: my_ks_matrix, my_rho_ao
    1063        5664 :       TYPE(dbcsr_type), DIMENSION(2)                     :: mo_coeff_b
    1064             :       TYPE(dbcsr_type), POINTER                          :: mo_coeff_b_tmp
    1065             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1066             : 
    1067        1888 :       CALL timeset(routineN, handle)
    1068             : 
    1069        1888 :       IF (nspins == 1) THEN
    1070        1406 :          fac = 0.5_dp*hf_fraction
    1071             :       ELSE
    1072         482 :          fac = 1.0_dp*hf_fraction
    1073             :       END IF
    1074             : 
    1075             :       !If incoming assymetric matrices, need to convert to normal
    1076        1888 :       NULLIFY (my_ks_matrix, my_rho_ao)
    1077        1888 :       CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
    1078        1888 :       is_antisymmetric = mtype == dbcsr_type_antisymmetric
    1079        1888 :       IF (is_antisymmetric) THEN
    1080         960 :          ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
    1081         960 :          ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
    1082             : 
    1083         320 :          DO i = 1, SIZE(ks_matrix, 1)
    1084         480 :             DO j = 1, SIZE(ks_matrix, 2)
    1085         160 :                ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
    1086             :                CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
    1087         160 :                                  matrix_type=dbcsr_type_no_symmetry)
    1088         160 :                CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
    1089             :                CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
    1090         160 :                                  matrix_type=dbcsr_type_no_symmetry)
    1091         320 :                CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
    1092             :             END DO
    1093             :          END DO
    1094             :       ELSE
    1095        1728 :          my_ks_matrix => ks_matrix
    1096        1728 :          my_rho_ao => rho_ao
    1097             :       END IF
    1098             : 
    1099             :       !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
    1100             :       !                            the current flavor is not yet RHO. We switch back to MO if there are
    1101             :       !                            MOs available and the current flavor is actually RHO
    1102        1888 :       IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
    1103          12 :          CALL switch_ri_flavor(ri_data, qs_env)
    1104        1876 :       ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
    1105          10 :          CALL switch_ri_flavor(ri_data, qs_env)
    1106             :       END IF
    1107             : 
    1108        2216 :       SELECT CASE (ri_data%flavor)
    1109             :       CASE (ri_mo)
    1110         328 :          CPASSERT(PRESENT(mos))
    1111         328 :          CALL timeset(routineN//"_MO", handle2)
    1112             : 
    1113         834 :          DO ispin = 1, nspins
    1114         506 :             NULLIFY (mo_coeff_b_tmp)
    1115         506 :             CPASSERT(mos(ispin)%uniform_occupation)
    1116         506 :             CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
    1117             : 
    1118         506 :             IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
    1119         834 :             CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
    1120             :          END DO
    1121             : 
    1122         834 :          DO ispin = 1, nspins
    1123         506 :             CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
    1124         834 :             homo(ispin) = mos(ispin)%homo
    1125             :          END DO
    1126             : 
    1127         328 :          CALL timestop(handle2)
    1128             : 
    1129             :          CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
    1130         328 :                                   geometry_did_change, nspins, fac)
    1131             :       CASE (ri_pmat)
    1132             : 
    1133        1560 :          NULLIFY (para_env)
    1134        1560 :          CALL get_qs_env(qs_env, para_env=para_env)
    1135        3424 :          DO ispin = 1, SIZE(my_rho_ao, 1)
    1136        1864 :             nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
    1137        1864 :             CALL para_env%sum(nblks)
    1138        3424 :             IF (nblks == 0) THEN
    1139           0 :                CPABORT("received empty density matrix")
    1140             :             END IF
    1141             :          END DO
    1142             : 
    1143             :          CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
    1144        3776 :                                     geometry_did_change, nspins, fac)
    1145             : 
    1146             :       END SELECT
    1147             : 
    1148        4258 :       DO ispin = 1, nspins
    1149        4258 :          CALL dbcsr_release(mo_coeff_b(ispin))
    1150             :       END DO
    1151             : 
    1152        4258 :       DO ispin = 1, nspins
    1153        4258 :          CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
    1154             :       END DO
    1155             : 
    1156        1888 :       CALL timeset(routineN//"_energy", handle2)
    1157             :       ! Calculate the exchange energy
    1158        1888 :       ehfx = 0.0_dp
    1159        4258 :       DO ispin = 1, nspins
    1160             :          CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
    1161        2370 :                         etmp)
    1162        4258 :          ehfx = ehfx + 0.5_dp*etmp
    1163             : 
    1164             :       END DO
    1165        1888 :       CALL timestop(handle2)
    1166             : 
    1167             :       !Anti-symmetric case
    1168        1888 :       IF (is_antisymmetric) THEN
    1169         320 :          DO i = 1, SIZE(ks_matrix, 1)
    1170         480 :             DO j = 1, SIZE(ks_matrix, 2)
    1171         160 :                CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
    1172         320 :                CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
    1173             :             END DO
    1174             :          END DO
    1175         160 :          CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
    1176         160 :          CALL dbcsr_deallocate_matrix_set(my_rho_ao)
    1177             :       END IF
    1178             : 
    1179        1888 :       CALL timestop(handle)
    1180        1888 :    END SUBROUTINE
    1181             : 
    1182             : ! **************************************************************************************************
    1183             : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
    1184             : !>
    1185             : !> C(mu, i) (MO coefficients)
    1186             : !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
    1187             : !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
    1188             : !> \param qs_env ...
    1189             : !> \param ri_data ...
    1190             : !> \param ks_matrix ...
    1191             : !> \param mo_coeff C(mu, i)
    1192             : !> \param homo ...
    1193             : !> \param geometry_did_change ...
    1194             : !> \param nspins ...
    1195             : !> \param fac ...
    1196             : ! **************************************************************************************************
    1197         328 :    SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
    1198         328 :                                   homo, geometry_did_change, nspins, fac)
    1199             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1200             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1201             :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: ks_matrix
    1202             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1203             :       INTEGER, DIMENSION(:)                              :: homo
    1204             :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1205             :       INTEGER, INTENT(IN)                                :: nspins
    1206             :       REAL(dp), INTENT(IN)                               :: fac
    1207             : 
    1208             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_mo'
    1209             : 
    1210             :       INTEGER                                            :: bsize, bsum, comm_2d_handle, handle, &
    1211             :                                                             handle2, i_mem, iblock, iproc, ispin, &
    1212             :                                                             n_mem, n_mos, nblock, unit_nr_dbcsr
    1213             :       INTEGER(int_8)                                     :: nblks, nflop
    1214         328 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
    1215         328 :          mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
    1216         328 :          mem_start_block_2, mo_bsizes_1, mo_bsizes_2
    1217         328 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: bounds
    1218             :       INTEGER, DIMENSION(2)                              :: pdims_2d
    1219             :       INTEGER, DIMENSION(3)                              :: pdims
    1220             :       LOGICAL                                            :: do_initialize
    1221             :       REAL(dp)                                           :: t1, t2
    1222             :       TYPE(dbcsr_distribution_type)                      :: ks_dist
    1223        1640 :       TYPE(dbt_pgrid_type)                               :: pgrid, pgrid_2d
    1224        8200 :       TYPE(dbt_type)                                     :: ks_t, ks_t_mat, mo_coeff_t, &
    1225        2952 :                                                             mo_coeff_t_split
    1226         328 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_mo_1, t_3c_int_mo_2
    1227             :       TYPE(mp_comm_type)                                 :: comm_2d
    1228             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1229             : 
    1230         328 :       CALL timeset(routineN, handle)
    1231             : 
    1232         328 :       CPASSERT(SIZE(ks_matrix, 2) == 1)
    1233             : 
    1234         328 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1235             : 
    1236         328 :       IF (geometry_did_change) THEN
    1237          16 :          CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
    1238             :       END IF
    1239             : 
    1240         328 :       nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
    1241         328 :       IF (nblks == 0) THEN
    1242           0 :          CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1243             :       END IF
    1244             : 
    1245         834 :       DO ispin = 1, nspins
    1246         506 :          nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
    1247         834 :          IF (nblks == 0) THEN
    1248           0 :             CPABORT("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1249             :          END IF
    1250             :       END DO
    1251             : 
    1252         328 :       IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
    1253          18 :          do_initialize = .TRUE.
    1254          18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
    1255          18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
    1256          18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
    1257         256 :          ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
    1258         238 :          ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
    1259         238 :          ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
    1260         238 :          ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
    1261             :       ELSE
    1262             :          do_initialize = .FALSE.
    1263             :       END IF
    1264             : 
    1265         328 :       CALL get_qs_env(qs_env, para_env=para_env)
    1266             : 
    1267         328 :       ALLOCATE (bounds(2, 1))
    1268             : 
    1269         328 :       CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
    1270         328 :       CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
    1271             : 
    1272         328 :       CALL comm_2d%set_handle(comm_2d_handle)
    1273         328 :       pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
    1274             : 
    1275             :       CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
    1276         328 :                             name="(AO | AO)")
    1277             : 
    1278         328 :       DEALLOCATE (dist1, dist2)
    1279             : 
    1280         328 :       CALL para_env%sync()
    1281         328 :       t1 = m_walltime()
    1282             : 
    1283        7544 :       ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
    1284         834 :       DO ispin = 1, nspins
    1285             : 
    1286         506 :          CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
    1287        1518 :          ALLOCATE (mo_bsizes_2(n_mos))
    1288        3192 :          mo_bsizes_2 = 1
    1289             : 
    1290             :          CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
    1291         506 :                                     mem_start_block_2, mem_end_block_2)
    1292         506 :          n_mem = ri_data%n_mem
    1293        1518 :          ALLOCATE (mem_size(n_mem))
    1294             : 
    1295        2374 :          DO i_mem = 1, n_mem
    1296        4554 :             bsize = SUM(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
    1297        2374 :             mem_size(i_mem) = bsize
    1298             :          END DO
    1299             : 
    1300         506 :          CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
    1301        1012 :          ALLOCATE (mem_start_block_1(n_mem))
    1302        1012 :          ALLOCATE (mem_end_block_1(n_mem))
    1303         506 :          nblock = SIZE(mo_bsizes_1)
    1304         506 :          iblock = 0
    1305        2374 :          DO i_mem = 1, n_mem
    1306             :             bsum = 0
    1307         506 :             DO
    1308        1868 :                iblock = iblock + 1
    1309        1868 :                CPASSERT(iblock <= nblock)
    1310        1868 :                bsum = bsum + mo_bsizes_1(iblock)
    1311        1868 :                IF (bsum == mem_size(i_mem)) THEN
    1312        1868 :                   IF (i_mem == 1) THEN
    1313         506 :                      mem_start_block_1(i_mem) = 1
    1314             :                   ELSE
    1315        1362 :                      mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
    1316             :                   END IF
    1317        1868 :                   mem_end_block_1(i_mem) = iblock
    1318             :                   EXIT
    1319             :                END IF
    1320             :             END DO
    1321             :          END DO
    1322             : 
    1323        1518 :          ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
    1324        2374 :          batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
    1325         506 :          batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
    1326             : 
    1327        1012 :          ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
    1328        2374 :          batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
    1329         506 :          batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
    1330             : 
    1331         506 :          iproc = para_env%mepos
    1332             : 
    1333             :          CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
    1334             :                                ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
    1335             :                                [1, 2], [3], &
    1336         506 :                                name="(AO RI | MO)")
    1337             : 
    1338         506 :          DEALLOCATE (dist1, dist2, dist3)
    1339             : 
    1340             :          CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
    1341             :                                mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    1342             :                                [1], [2, 3], &
    1343         506 :                                name="(MO | RI AO)")
    1344             : 
    1345         506 :          DEALLOCATE (dist1, dist2, dist3)
    1346             : 
    1347             :          CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
    1348             :                                name="(AO | MO)")
    1349             : 
    1350         506 :          DEALLOCATE (dist1, dist2)
    1351             : 
    1352         506 :          CPASSERT(homo(ispin)/ri_data%n_mem > 0)
    1353             : 
    1354         506 :          IF (do_initialize) THEN
    1355          22 :             pdims(:) = 0
    1356             : 
    1357             :             CALL dbt_pgrid_create(para_env, pdims, pgrid, &
    1358             :                                   tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
    1359             :                                                (homo(ispin) - 1)/ri_data%n_mem + 1, &
    1360          88 :                                                SIZE(ri_data%bsizes_AO_fit)])
    1361             :             CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
    1362             :                                   ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
    1363             :                                   [1], [2, 3], &
    1364          22 :                                   name="(RI | MO AO)")
    1365             : 
    1366          22 :             DEALLOCATE (dist1, dist2, dist3)
    1367             : 
    1368             :             CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
    1369             :                                   ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
    1370             :                                   [1, 2], [3], &
    1371          22 :                                   name="(RI MO | AO)")
    1372          22 :             DEALLOCATE (dist1, dist2, dist3)
    1373          22 :             CALL dbt_pgrid_destroy(pgrid)
    1374             : 
    1375          22 :             CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
    1376          22 :             CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1377             :          END IF
    1378             : 
    1379         506 :          CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
    1380         506 :          CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
    1381         506 :          CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.TRUE.)
    1382         506 :          CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
    1383         506 :          CALL dbt_destroy(mo_coeff_t)
    1384             : 
    1385         506 :          CALL dbt_batched_contract_init(ks_t)
    1386         506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1387         506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1388             : 
    1389         506 :          CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
    1390         506 :          CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1391         506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1392             : 
    1393        2374 :          DO i_mem = 1, n_mem
    1394             : 
    1395        5604 :             bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
    1396             : 
    1397        1868 :             CALL dbt_batched_contract_init(mo_coeff_t_split)
    1398        1868 :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
    1399             :             CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
    1400        1868 :                                            batch_range_3=batch_ranges_1)
    1401        1868 :             CALL timeset(routineN//"_MOx3C_R", handle2)
    1402             :             CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
    1403             :                               0.0_dp, t_3c_int_mo_1(1, 1), &
    1404             :                               contract_1=[1], notcontract_1=[2], &
    1405             :                               contract_2=[3], notcontract_2=[1, 2], &
    1406             :                               map_1=[3], map_2=[1, 2], &
    1407             :                               bounds_2=bounds, &
    1408             :                               filter_eps=ri_data%filter_eps_mo/2, &
    1409             :                               unit_nr=unit_nr_dbcsr, &
    1410             :                               move_data=.FALSE., &
    1411        1868 :                               flop=nflop)
    1412             : 
    1413        1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1414             : 
    1415        1868 :             CALL timestop(handle2)
    1416        1868 :             CALL dbt_batched_contract_finalize(mo_coeff_t_split)
    1417        1868 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
    1418        1868 :             CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
    1419             : 
    1420        1868 :             CALL timeset(routineN//"_copy_1", handle2)
    1421        1868 :             CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.TRUE.)
    1422        1868 :             CALL timestop(handle2)
    1423             : 
    1424        1868 :             CALL dbt_batched_contract_init(mo_coeff_t_split)
    1425        1868 :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
    1426             :             CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
    1427        1868 :                                            batch_range_1=batch_ranges_1)
    1428             : 
    1429        1868 :             CALL timeset(routineN//"_MOx3C_L", handle2)
    1430             :             CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
    1431             :                               0.0_dp, t_3c_int_mo_2(1, 1), &
    1432             :                               contract_1=[1], notcontract_1=[2], &
    1433             :                               contract_2=[1], notcontract_2=[2, 3], &
    1434             :                               map_1=[1], map_2=[2, 3], &
    1435             :                               bounds_2=bounds, &
    1436             :                               filter_eps=ri_data%filter_eps_mo/2, &
    1437             :                               unit_nr=unit_nr_dbcsr, &
    1438             :                               move_data=.FALSE., &
    1439        1868 :                               flop=nflop)
    1440             : 
    1441        1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1442             : 
    1443        1868 :             CALL timestop(handle2)
    1444             : 
    1445        1868 :             CALL dbt_batched_contract_finalize(mo_coeff_t_split)
    1446        1868 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
    1447        1868 :             CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
    1448             : 
    1449        1868 :             CALL timeset(routineN//"_copy_1", handle2)
    1450             :             CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
    1451        1868 :                           summation=.TRUE., move_data=.TRUE.)
    1452             : 
    1453        1868 :             CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
    1454        1868 :             CALL timestop(handle2)
    1455             : 
    1456        1868 :             CALL timeset(routineN//"_RIx3C", handle2)
    1457             : 
    1458             :             CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
    1459             :                               0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
    1460             :                               contract_1=[1], notcontract_1=[2], &
    1461             :                               contract_2=[1], notcontract_2=[2, 3], &
    1462             :                               map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    1463             :                               unit_nr=unit_nr_dbcsr, &
    1464        1868 :                               flop=nflop)
    1465             : 
    1466        1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1467             : 
    1468        1868 :             CALL timestop(handle2)
    1469             : 
    1470        1868 :             CALL timeset(routineN//"_copy_2", handle2)
    1471             : 
    1472             :             ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
    1473        1868 :             CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.TRUE.)
    1474        1868 :             CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1475        1868 :             CALL timestop(handle2)
    1476             : 
    1477        1868 :             CALL timeset(routineN//"_3Cx3C", handle2)
    1478             :             CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
    1479             :                               1.0_dp, ks_t, &
    1480             :                               contract_1=[1, 2], notcontract_1=[3], &
    1481             :                               contract_2=[1, 2], notcontract_2=[3], &
    1482             :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
    1483             :                               unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
    1484        1868 :                               flop=nflop)
    1485             : 
    1486        1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1487             : 
    1488       15450 :             CALL timestop(handle2)
    1489             :          END DO
    1490             : 
    1491         506 :          CALL dbt_batched_contract_finalize(ks_t)
    1492         506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
    1493         506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1494             : 
    1495         506 :          CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
    1496         506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
    1497         506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
    1498             : 
    1499         506 :          CALL dbt_destroy(t_3c_int_mo_1(1, 1))
    1500         506 :          CALL dbt_destroy(t_3c_int_mo_2(1, 1))
    1501         506 :          CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
    1502             : 
    1503         506 :          CALL dbt_destroy(mo_coeff_t_split)
    1504             : 
    1505         506 :          CALL dbt_filter(ks_t, ri_data%filter_eps)
    1506             : 
    1507         506 :          CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
    1508         506 :          CALL dbt_copy(ks_t, ks_t_mat, move_data=.TRUE.)
    1509         506 :          CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
    1510         506 :          CALL dbt_destroy(ks_t_mat)
    1511             : 
    1512           0 :          DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
    1513        2858 :                      mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
    1514             : 
    1515             :       END DO
    1516             : 
    1517         328 :       CALL dbt_pgrid_destroy(pgrid_2d)
    1518         328 :       CALL dbt_destroy(ks_t)
    1519             : 
    1520         328 :       CALL para_env%sync()
    1521         328 :       t2 = m_walltime()
    1522             : 
    1523         328 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    1524             : 
    1525         328 :       CALL timestop(handle)
    1526             : 
    1527        2952 :    END SUBROUTINE
    1528             : 
    1529             : ! **************************************************************************************************
    1530             : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
    1531             : !>
    1532             : !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
    1533             : !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
    1534             : !> \param qs_env ...
    1535             : !> \param ri_data ...
    1536             : !> \param ks_matrix ...
    1537             : !> \param rho_ao ...
    1538             : !> \param geometry_did_change ...
    1539             : !> \param nspins ...
    1540             : !> \param fac ...
    1541             : ! **************************************************************************************************
    1542        1560 :    SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
    1543             :                                     geometry_did_change, nspins, fac)
    1544             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1545             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1546             :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: ks_matrix, rho_ao
    1547             :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1548             :       INTEGER, INTENT(IN)                                :: nspins
    1549             :       REAL(dp), INTENT(IN)                               :: fac
    1550             : 
    1551             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_Pmat'
    1552             : 
    1553             :       INTEGER                                            :: handle, handle2, i_mem, ispin, j_mem, &
    1554             :                                                             n_mem, n_mem_RI, unit_nr, unit_nr_dbcsr
    1555             :       INTEGER(int_8)                                     :: flops_ks_max, flops_p_max, nblks, nflop, &
    1556             :                                                             nze, nze_3c, nze_3c_1, nze_3c_2, &
    1557             :                                                             nze_ks, nze_rho
    1558        1560 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_ranges_AO, batch_ranges_RI, dist1, &
    1559        1560 :                                                             dist2
    1560             :       INTEGER, DIMENSION(2, 1)                           :: bounds_i
    1561             :       INTEGER, DIMENSION(2, 2)                           :: bounds_ij, bounds_j
    1562             :       INTEGER, DIMENSION(3)                              :: dims_3c
    1563             :       REAL(dp)                                           :: memory_3c, occ, occ_3c, occ_3c_1, &
    1564             :                                                             occ_3c_2, occ_ks, occ_rho, t1, t2, &
    1565             :                                                             unused
    1566       39000 :       TYPE(dbt_type)                                     :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
    1567       20280 :                                                             t_3c_3, tensor_old
    1568             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1569             : 
    1570        1560 :       IF (.NOT. fac > EPSILON(0.0_dp)) RETURN
    1571             : 
    1572        1560 :       CALL timeset(routineN, handle)
    1573             : 
    1574        1560 :       NULLIFY (para_env)
    1575             : 
    1576             :       ! get a useful output_unit
    1577        1560 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1578        1560 :       unit_nr = ri_data%unit_nr
    1579             : 
    1580        1560 :       CALL get_qs_env(qs_env, para_env=para_env)
    1581             : 
    1582        1560 :       CPASSERT(SIZE(ks_matrix, 2) == 1)
    1583             : 
    1584        1560 :       IF (geometry_did_change) THEN
    1585          70 :          CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
    1586         160 :          DO ispin = 1, nspins
    1587          90 :             CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
    1588         160 :             CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
    1589             :          END DO
    1590             :       END IF
    1591             : 
    1592        1560 :       nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
    1593        1560 :       IF (nblks == 0) THEN
    1594           0 :          CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1595             :       END IF
    1596             : 
    1597        1560 :       n_mem = ri_data%n_mem
    1598        1560 :       n_mem_RI = ri_data%n_mem_RI
    1599             : 
    1600        1560 :       CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
    1601        1560 :       CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
    1602             : 
    1603             :       CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
    1604             :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    1605             :                             name="(AO | AO)")
    1606        1560 :       DEALLOCATE (dist1, dist2)
    1607             : 
    1608        1560 :       CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
    1609        1560 :       CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
    1610             : 
    1611        1560 :       CALL para_env%sync()
    1612        1560 :       t1 = m_walltime()
    1613             : 
    1614        1560 :       flops_ks_max = 0; flops_p_max = 0
    1615             : 
    1616        4680 :       ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
    1617        4680 :       ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
    1618        6240 :       batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
    1619        1560 :       batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
    1620        6240 :       batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
    1621        1560 :       batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
    1622             : 
    1623        1560 :       memory_3c = 0.0_dp
    1624        3424 :       DO ispin = 1, nspins
    1625             : 
    1626        1864 :          CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
    1627             : 
    1628             :          nze_rho = 0
    1629             :          occ_rho = 0.0_dp
    1630        1864 :          nze_3c_1 = 0
    1631        1864 :          occ_3c_1 = 0.0_dp
    1632        1864 :          nze_3c_2 = 0
    1633        1864 :          occ_3c_2 = 0.0_dp
    1634             : 
    1635        1864 :          CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    1636             : 
    1637             :          !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
    1638        1864 :          CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
    1639        1864 :          CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
    1640             : 
    1641        1864 :          CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
    1642             : 
    1643             :          CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_AO, &
    1644        1864 :                                         batch_range_2=batch_ranges_RI)
    1645        1864 :          CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_AO, batch_range_2=batch_ranges_RI)
    1646             : 
    1647        1864 :          CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
    1648             : 
    1649        7456 :          DO i_mem = 1, n_mem
    1650             : 
    1651        5592 :             CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
    1652             :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_RI, &
    1653        5592 :                                            batch_range_3=batch_ranges_AO)
    1654        5592 :             CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges_AO)
    1655       22368 :             DO j_mem = 1, n_mem_RI
    1656             : 
    1657       16776 :                CALL timeset(routineN//"_Px3C", handle2)
    1658             : 
    1659       16776 :                CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
    1660       50328 :                bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
    1661       50328 :                bounds_j(:, 1) = [1, dims_3c(1)]
    1662       50328 :                bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
    1663             : 
    1664             :                CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
    1665             :                                  0.0_dp, t_3c_1, &
    1666             :                                  contract_1=[2], notcontract_1=[1], &
    1667             :                                  contract_2=[3], notcontract_2=[1, 2], &
    1668             :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    1669             :                                  bounds_2=bounds_i, &
    1670             :                                  bounds_3=bounds_j, &
    1671             :                                  unit_nr=unit_nr_dbcsr, &
    1672       16776 :                                  flop=nflop)
    1673             : 
    1674       16776 :                CALL timestop(handle2)
    1675             : 
    1676       16776 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1677             : 
    1678       16776 :                CALL get_tensor_occupancy(t_3c_1, nze, occ)
    1679       16776 :                nze_3c_1 = nze_3c_1 + nze
    1680       16776 :                occ_3c_1 = occ_3c_1 + occ
    1681             : 
    1682       16776 :                CALL timeset(routineN//"_copy_2", handle2)
    1683       16776 :                CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.TRUE.)
    1684       16776 :                CALL timestop(handle2)
    1685             : 
    1686       50328 :                bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
    1687       50328 :                bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
    1688             : 
    1689             :                CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
    1690       16776 :                                       ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
    1691             : 
    1692       16776 :                CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.TRUE.)
    1693             : 
    1694       16776 :                CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
    1695       16776 :                nze_3c_2 = nze_3c_2 + nze
    1696       16776 :                occ_3c_2 = occ_3c_2 + occ
    1697       16776 :                CALL timeset(routineN//"_KS", handle2)
    1698       16776 :                CALL dbt_batched_contract_init(ks_t)
    1699             :                CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
    1700             :                                  1.0_dp, ks_t, &
    1701             :                                  contract_1=[1, 2], notcontract_1=[3], &
    1702             :                                  contract_2=[1, 2], notcontract_2=[3], &
    1703             :                                  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
    1704             :                                  bounds_1=bounds_ij, &
    1705             :                                  unit_nr=unit_nr_dbcsr, &
    1706       16776 :                                  flop=nflop, move_data=.TRUE.)
    1707             : 
    1708       16776 :                CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
    1709       16776 :                CALL timestop(handle2)
    1710             : 
    1711       72696 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1712             : 
    1713             :             END DO
    1714        5592 :             CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
    1715        5592 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
    1716        7456 :             CALL dbt_batched_contract_finalize(t_3c_1)
    1717             :          END DO
    1718        1864 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
    1719        1864 :          CALL dbt_batched_contract_finalize(t_3c_3)
    1720             : 
    1721        7456 :          DO i_mem = 1, n_mem
    1722       24232 :             DO j_mem = 1, n_mem_RI
    1723        5592 :                ASSOCIATE (blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
    1724             :                   CALL decompress_tensor(tensor_old, blk_indices%ind, &
    1725       16776 :                                          ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
    1726       16776 :                   CALL dbt_copy(tensor_old, t_3c, move_data=.TRUE.)
    1727             : 
    1728       16776 :                   unused = 0
    1729             :                   CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
    1730       33552 :                                        ri_data%filter_eps_storage, unused)
    1731             :                END ASSOCIATE
    1732             :             END DO
    1733             :          END DO
    1734             : 
    1735        1864 :          CALL dbt_destroy(tensor_old)
    1736             : 
    1737        1864 :          CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
    1738             : 
    1739             :          !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
    1740        1864 :          CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    1741        1864 :          CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.TRUE.)
    1742        1864 :          CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
    1743             : 
    1744        1864 :          CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
    1745        1864 :          CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
    1746        1864 :          CALL dbt_clear(ks_tmp)
    1747             : 
    1748        9016 :          IF (unit_nr > 0 .AND. geometry_did_change) THEN
    1749             :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1750          28 :                'Occupancy of density matrix P:', REAL(nze_rho, dp), '/', occ_rho*100, '%'
    1751             :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1752          28 :                'Occupancy of 3c ints:', REAL(nze_3c, dp), '/', occ_3c*100, '%'
    1753             :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1754          28 :                'Occupancy after contraction with K:', REAL(nze_3c_2, dp), '/', occ_3c_2*100, '%'
    1755             :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1756          28 :                'Occupancy after contraction with P:', REAL(nze_3c_1, dp), '/', occ_3c_1*100, '%'
    1757             :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1758          28 :                'Occupancy of Kohn-Sham matrix:', REAL(nze_ks, dp), '/', occ_ks*100, '%'
    1759             :          END IF
    1760             : 
    1761             :       END DO
    1762             : 
    1763        1560 :       CALL para_env%sync()
    1764        1560 :       t2 = m_walltime()
    1765             : 
    1766        1560 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    1767             : 
    1768        1560 :       CALL dbt_destroy(t_3c_1)
    1769        1560 :       CALL dbt_destroy(t_3c_3)
    1770             : 
    1771        1560 :       CALL dbt_destroy(rho_ao_tmp)
    1772        1560 :       CALL dbt_destroy(ks_t)
    1773        1560 :       CALL dbt_destroy(ks_tmp)
    1774             : 
    1775        1560 :       CALL timestop(handle)
    1776             : 
    1777        4680 :    END SUBROUTINE
    1778             : 
    1779             : ! **************************************************************************************************
    1780             : !> \brief Implementation based on the MO flavor
    1781             : !> \param qs_env ...
    1782             : !> \param ri_data ...
    1783             : !> \param nspins ...
    1784             : !> \param hf_fraction ...
    1785             : !> \param mo_coeff ...
    1786             : !> \param use_virial ...
    1787             : !> \note There is no response code for forces with the MO flavor
    1788             : ! **************************************************************************************************
    1789          14 :    SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
    1790             : 
    1791             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1792             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1793             :       INTEGER, INTENT(IN)                                :: nspins
    1794             :       REAL(dp), INTENT(IN)                               :: hf_fraction
    1795             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1796             :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial
    1797             : 
    1798             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_forces_mo'
    1799             : 
    1800             :       INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
    1801             :          n_mem_input_RI, n_mem_RI, n_mem_RI_fit, n_mos, natom, nkind, unit_nr_dbcsr
    1802             :       INTEGER(int_8)                                     :: nflop
    1803          14 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
    1804          14 :          batch_end, batch_end_RI, batch_end_RI_fit, batch_ranges, batch_ranges_RI, &
    1805          14 :          batch_ranges_RI_fit, batch_start, batch_start_RI, batch_start_RI_fit, bsizes_MO, dist1, &
    1806          14 :          dist2, dist3, idx_to_at_AO, idx_to_at_RI, kind_of
    1807             :       INTEGER, DIMENSION(2, 1)                           :: bounds_ctr_1d
    1808             :       INTEGER, DIMENSION(2, 2)                           :: bounds_ctr_2d
    1809             :       INTEGER, DIMENSION(3)                              :: pdims
    1810             :       LOGICAL                                            :: use_virial_prv
    1811             :       REAL(dp)                                           :: pref, spin_fac, t1, t2
    1812          14 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    1813          14 :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
    1814             :       TYPE(cell_type), POINTER                           :: cell
    1815          14 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1816          70 :       TYPE(dbt_pgrid_type)                               :: pgrid_1, pgrid_2
    1817         602 :       TYPE(dbt_type) :: t_2c_RI, t_2c_RI_inv, t_2c_RI_met, t_2c_RI_PQ, t_2c_tmp, t_3c_0, t_3c_1, &
    1818         700 :          t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
    1819         434 :          t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_RI_ctr, t_3c_ri_mo_mo, &
    1820         350 :          t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
    1821          14 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, t_2c_MO_AO, &
    1822          28 :          t_2c_MO_AO_ctr, t_3c_der_AO, t_3c_der_AO_ctr_1, t_3c_der_RI, t_3c_der_RI_ctr_1, &
    1823          14 :          t_3c_der_RI_ctr_2
    1824             :       TYPE(dft_control_type), POINTER                    :: dft_control
    1825             :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    1826          14 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    1827             :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    1828             :       TYPE(hfx_compression_type), ALLOCATABLE, &
    1829          14 :          DIMENSION(:, :)                                 :: t_3c_der_AO_comp, t_3c_der_RI_comp
    1830             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1831          14 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1832          14 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    1833          14 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1834             : 
    1835             :       ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
    1836             :       ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
    1837             :       ! 3) Contract all 3c integrals and derivatives with MO coeffs
    1838             :       ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
    1839             :       ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
    1840             :       ! 6) If metric, do the additional contraction  with S_pq^-1 (Q|R)
    1841             :       ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
    1842             :       ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
    1843             : 
    1844          14 :       use_virial_prv = .FALSE.
    1845          14 :       IF (PRESENT(use_virial)) use_virial_prv = use_virial
    1846          14 :       IF (use_virial_prv) THEN
    1847           0 :          CPABORT("Stress tensor with RI-HFX MO flavor not implemented.")
    1848             :       END IF
    1849             : 
    1850          14 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1851             : 
    1852             :       CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
    1853             :                       atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
    1854             :                       matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
    1855          14 :                       qs_kind_set=qs_kind_set)
    1856             : 
    1857          14 :       pdims(:) = 0
    1858             :       CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
    1859             :                                                                    SIZE(ri_data%bsizes_RI_split), &
    1860          56 :                                                                    SIZE(ri_data%bsizes_AO_split)])
    1861          14 :       pdims(:) = 0
    1862             :       CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
    1863             :                                                                    SIZE(ri_data%bsizes_AO_split), &
    1864          56 :                                                                    SIZE(ri_data%bsizes_AO_split)])
    1865             : 
    1866             :       CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
    1867             :                             ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    1868          14 :                             [1, 2], [3], name="(AO RI | AO)")
    1869          14 :       DEALLOCATE (dist1, dist2, dist3)
    1870             :       CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
    1871             :                             ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    1872          14 :                             [1], [2, 3], name="(RI | AO AO)")
    1873          14 :       DEALLOCATE (dist1, dist2, dist3)
    1874             : 
    1875         104 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    1876          14 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    1877          14 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    1878          14 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    1879          14 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    1880             : 
    1881          38 :       DO ibasis = 1, SIZE(basis_set_AO)
    1882          24 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    1883          24 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    1884          24 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    1885          38 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    1886             :       END DO
    1887             : 
    1888             :       ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_2c_MO_AO(3), t_2c_MO_AO_ctr(3), t_3c_der_AO(3), &
    1889        1148 :                 t_3c_der_AO_ctr_1(3), t_3c_der_RI(3), t_3c_der_RI_ctr_1(3), t_3c_der_RI_ctr_2(3))
    1890             : 
    1891             :       ! 1) Precompute the derivatives
    1892             :       CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    1893             :                                t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    1894          14 :                                basis_set_AO, basis_set_RI, ri_data, qs_env)
    1895             : 
    1896          38 :       DO ibasis = 1, SIZE(basis_set_AO)
    1897          24 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    1898          24 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    1899          24 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    1900          38 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    1901             :       END DO
    1902             : 
    1903          14 :       n_mem = SIZE(t_3c_der_RI_comp, 1)
    1904          56 :       DO i_xyz = 1, 3
    1905          42 :          CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_RI(i_xyz))
    1906          42 :          CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_AO(i_xyz))
    1907             : 
    1908         194 :          DO i_mem = 1, n_mem
    1909             :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    1910         138 :                                    t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    1911         138 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
    1912             : 
    1913             :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    1914         138 :                                    t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    1915         180 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
    1916             :          END DO
    1917             :       END DO
    1918             : 
    1919          56 :       DO i_xyz = 1, 3
    1920         194 :          DO i_mem = 1, n_mem
    1921         138 :             CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
    1922         180 :             CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
    1923             :          END DO
    1924             :       END DO
    1925         290 :       DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
    1926             : 
    1927             :       ! Get the 3c integrals (desymmetrized)
    1928          14 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
    1929          14 :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
    1930             :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
    1931          14 :                     summation=.TRUE., move_data=.TRUE.)
    1932             : 
    1933          14 :       CALL dbt_destroy(t_3c_ao_ri_ao)
    1934          14 :       CALL dbt_destroy(t_3c_ri_ao_ao)
    1935             : 
    1936             :       ! Some utilities
    1937          14 :       spin_fac = 0.5_dp
    1938          14 :       IF (nspins == 2) spin_fac = 1.0_dp
    1939             : 
    1940          42 :       ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
    1941          14 :       CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
    1942             : 
    1943          42 :       ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
    1944          14 :       CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
    1945             : 
    1946          14 :       CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
    1947             : 
    1948             :       ! 2-center RI tensors
    1949             :       CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
    1950          14 :                             ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
    1951          14 :       DEALLOCATE (dist1, dist2)
    1952             : 
    1953             :       CALL create_2c_tensor(t_2c_RI_PQ, dist1, dist2, ri_data%pgrid_2d, &
    1954             :                             ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
    1955          14 :       DEALLOCATE (dist1, dist2)
    1956             : 
    1957          14 :       IF (.NOT. ri_data%same_op) THEN
    1958             :          !precompute the (P|Q)*S^-1 product
    1959           4 :          CALL dbt_create(t_2c_RI_PQ, t_2c_RI_inv)
    1960           4 :          CALL dbt_create(t_2c_RI_PQ, t_2c_RI_met)
    1961           4 :          CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
    1962             : 
    1963             :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
    1964             :                            0.0_dp, t_2c_tmp, &
    1965             :                            contract_1=[2], notcontract_1=[1], &
    1966             :                            contract_2=[1], notcontract_2=[2], &
    1967             :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    1968           4 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    1969           4 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1970             : 
    1971           4 :          CALL dbt_copy(t_2c_tmp, t_2c_RI_inv, move_data=.TRUE.)
    1972           4 :          CALL dbt_destroy(t_2c_tmp)
    1973             :       END IF
    1974             : 
    1975             :       !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
    1976             :       !No need to cut memory further because SCF tensors alrady dense
    1977          14 :       n_mem_input = FLOOR((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
    1978          14 :       n_mem_input_RI = FLOOR((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
    1979             : 
    1980             :       !batches on RI_split and RI_fit blocks
    1981          14 :       n_mem_RI = n_mem_input_RI
    1982             :       CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_RI, batch_start_RI, batch_end_RI, &
    1983          14 :                                  batch_blk_start, batch_blk_end)
    1984          42 :       ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
    1985          28 :       batch_ranges_RI(1:n_mem_RI) = batch_blk_start(1:n_mem_RI)
    1986          14 :       batch_ranges_RI(n_mem_RI + 1) = batch_blk_end(n_mem_RI) + 1
    1987          14 :       DEALLOCATE (batch_blk_start, batch_blk_end)
    1988             : 
    1989          14 :       n_mem_RI_fit = n_mem_input_RI
    1990             :       CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_RI_fit, batch_start_RI_fit, batch_end_RI_fit, &
    1991          14 :                                  batch_blk_start, batch_blk_end)
    1992          42 :       ALLOCATE (batch_ranges_RI_fit(n_mem_RI_fit + 1))
    1993          28 :       batch_ranges_RI_fit(1:n_mem_RI_fit) = batch_blk_start(1:n_mem_RI_fit)
    1994          14 :       batch_ranges_RI_fit(n_mem_RI_fit + 1) = batch_blk_end(n_mem_RI_fit) + 1
    1995          14 :       DEALLOCATE (batch_blk_start, batch_blk_end)
    1996             : 
    1997          32 :       DO ispin = 1, nspins
    1998             : 
    1999             :          ! 2 )Prepare the batches for this spin
    2000          18 :          CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
    2001             :          !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
    2002          36 :          CALL split_block_sizes([n_mos], bsizes_MO, max_size=FLOOR(SQRT(ri_data%max_bsize_MO - 0.1)) + 1)
    2003             : 
    2004             :          !batching on MO blocks
    2005          18 :          n_mem = n_mem_input
    2006             :          CALL create_tensor_batches(bsizes_MO, n_mem, batch_start, batch_end, &
    2007          18 :                                     batch_blk_start, batch_blk_end)
    2008          54 :          ALLOCATE (batch_ranges(n_mem + 1))
    2009          40 :          batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
    2010          18 :          batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
    2011          18 :          DEALLOCATE (batch_blk_start, batch_blk_end)
    2012             : 
    2013             :          ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
    2014             :          CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_MO, &
    2015          18 :                                ri_data%bsizes_AO_split, name="MO coeffs")
    2016          18 :          DEALLOCATE (dist1, dist2)
    2017          18 :          CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
    2018          18 :          CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
    2019          18 :          CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.TRUE.)
    2020          18 :          CALL dbt_destroy(t_2c_tmp)
    2021             : 
    2022          18 :          CALL dbt_create(t_mo_coeff, t_mo_cpy)
    2023          18 :          CALL dbt_copy(t_mo_coeff, t_mo_cpy)
    2024          72 :          DO i_xyz = 1, 3
    2025          54 :             CALL dbt_create(t_mo_coeff, t_2c_MO_AO_ctr(i_xyz))
    2026          72 :             CALL dbt_create(t_mo_coeff, t_2c_MO_AO(i_xyz))
    2027             :          END DO
    2028             : 
    2029             :          CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
    2030          18 :                                ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(AO RI| MO)")
    2031          18 :          DEALLOCATE (dist1, dist2, dist3)
    2032             : 
    2033          18 :          CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
    2034          18 :          CALL dbt_destroy(t_3c_ao_ri_mo)
    2035             : 
    2036             :          CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_MO, ri_data%bsizes_RI_split, &
    2037          18 :                                ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
    2038          18 :          DEALLOCATE (dist1, dist2, dist3)
    2039          18 :          CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
    2040             : 
    2041          72 :          DO i_xyz = 1, 3
    2042          54 :             CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_RI_ctr_1(i_xyz))
    2043          72 :             CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_AO_ctr_1(i_xyz))
    2044             :          END DO
    2045             : 
    2046             :          CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_MO, &
    2047          18 :                                ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(MO RI | MO)")
    2048          18 :          DEALLOCATE (dist1, dist2, dist3)
    2049          18 :          CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
    2050             : 
    2051             :          CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
    2052          18 :                                bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
    2053          18 :          DEALLOCATE (dist1, dist2, dist3)
    2054             : 
    2055          18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
    2056          18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
    2057          18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_RI_ctr)
    2058          72 :          DO i_xyz = 1, 3
    2059          72 :             CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_RI_ctr_2(i_xyz))
    2060             :          END DO
    2061             : 
    2062             :          !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
    2063          18 :          pdims(:) = 0
    2064             :          CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
    2065          18 :                                bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
    2066          18 :          DEALLOCATE (dist1, dist2, dist3)
    2067             : 
    2068          18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
    2069          18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
    2070          18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
    2071             : 
    2072          18 :          CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_RI)
    2073          18 :          CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges)
    2074             : 
    2075          72 :          DO i_xyz = 1, 3
    2076          54 :             CALL dbt_batched_contract_init(t_3c_der_AO(i_xyz), batch_range_2=batch_ranges_RI)
    2077          72 :             CALL dbt_batched_contract_init(t_3c_der_RI(i_xyz), batch_range_2=batch_ranges_RI)
    2078             :          END DO
    2079             : 
    2080          18 :          CALL para_env%sync()
    2081          18 :          t1 = m_walltime()
    2082             : 
    2083             :          ! 2) Loop over batches
    2084          40 :          DO i_mem = 1, n_mem
    2085             : 
    2086          22 :             bounds_ctr_1d(1, 1) = batch_start(i_mem)
    2087          22 :             bounds_ctr_1d(2, 1) = batch_end(i_mem)
    2088             : 
    2089          22 :             bounds_ctr_2d(1, 1) = 1
    2090          96 :             bounds_ctr_2d(2, 1) = SUM(ri_data%bsizes_AO)
    2091             : 
    2092             :             ! 3) Do the first AO to MO contraction here
    2093          22 :             CALL timeset(routineN//"_AO2MO_1", handle)
    2094          22 :             CALL dbt_batched_contract_init(t_mo_coeff)
    2095          44 :             DO k_mem = 1, n_mem_RI
    2096          22 :                bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2097          22 :                bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2098             : 
    2099             :                CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
    2100             :                                  1.0_dp, t_3c_0, &
    2101             :                                  contract_1=[2], notcontract_1=[1], &
    2102             :                                  contract_2=[3], notcontract_2=[1, 2], &
    2103             :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2104             :                                  bounds_2=bounds_ctr_1d, &
    2105             :                                  bounds_3=bounds_ctr_2d, &
    2106          22 :                                  unit_nr=unit_nr_dbcsr, flop=nflop)
    2107          44 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2108             :             END DO
    2109          22 :             CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.TRUE.)
    2110             : 
    2111          88 :             DO i_xyz = 1, 3
    2112         132 :                DO k_mem = 1, n_mem_RI
    2113          66 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2114          66 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2115             : 
    2116             :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_AO(i_xyz), &
    2117             :                                     1.0_dp, t_3c_0, &
    2118             :                                     contract_1=[2], notcontract_1=[1], &
    2119             :                                     contract_2=[3], notcontract_2=[1, 2], &
    2120             :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2121             :                                     bounds_2=bounds_ctr_1d, &
    2122             :                                     bounds_3=bounds_ctr_2d, &
    2123          66 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2124         132 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2125             :                END DO
    2126          66 :                CALL dbt_copy(t_3c_0, t_3c_der_AO_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
    2127             : 
    2128         132 :                DO k_mem = 1, n_mem_RI
    2129          66 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2130          66 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2131             : 
    2132             :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI(i_xyz), &
    2133             :                                     1.0_dp, t_3c_0, &
    2134             :                                     contract_1=[2], notcontract_1=[1], &
    2135             :                                     contract_2=[3], notcontract_2=[1, 2], &
    2136             :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2137             :                                     bounds_2=bounds_ctr_1d, &
    2138             :                                     bounds_3=bounds_ctr_2d, &
    2139          66 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2140         132 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2141             :                END DO
    2142          88 :                CALL dbt_copy(t_3c_0, t_3c_der_RI_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
    2143             :             END DO
    2144          22 :             CALL dbt_batched_contract_finalize(t_mo_coeff)
    2145          22 :             CALL timestop(handle)
    2146             : 
    2147          22 :             CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI)
    2148             :             CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI, &
    2149          22 :                                            batch_range_3=batch_ranges)
    2150          22 :             CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2151             :             CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
    2152          22 :                                            batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2153             : 
    2154             :             CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI_fit, &
    2155          22 :                                            batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2156          22 :             CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2157             : 
    2158          88 :             DO i_xyz = 1, 3
    2159             :                CALL dbt_batched_contract_init(t_3c_der_RI_ctr_1(i_xyz), batch_range_1=batch_ranges, &
    2160          66 :                                               batch_range_2=batch_ranges_RI)
    2161             :                CALL dbt_batched_contract_init(t_3c_der_AO_ctr_1(i_xyz), batch_range_1=batch_ranges, &
    2162          88 :                                               batch_range_2=batch_ranges_RI)
    2163             : 
    2164             :             END DO
    2165             : 
    2166          22 :             IF (.NOT. ri_data%same_op) THEN
    2167           8 :                CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2168             :             END IF
    2169             : 
    2170          52 :             DO j_mem = 1, n_mem
    2171             : 
    2172          30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2173          30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2174             : 
    2175          30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2176          30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2177             : 
    2178             :                ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
    2179          30 :                CALL timeset(routineN//"_AO2MO_2", handle)
    2180          30 :                CALL dbt_batched_contract_init(t_mo_coeff)
    2181          60 :                DO k_mem = 1, n_mem_RI
    2182          30 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2183          30 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2184             : 
    2185             :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
    2186             :                                     1.0_dp, t_3c_work, &
    2187             :                                     contract_1=[2], notcontract_1=[1], &
    2188             :                                     contract_2=[3], notcontract_2=[1, 2], &
    2189             :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2190             :                                     bounds_2=bounds_ctr_1d, &
    2191             :                                     bounds_3=bounds_ctr_2d, &
    2192          30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2193          60 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2194             :                END DO
    2195          30 :                CALL dbt_batched_contract_finalize(t_mo_coeff)
    2196          30 :                CALL timestop(handle)
    2197             : 
    2198          30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2199          30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2200          30 :                bounds_ctr_2d(1, 2) = batch_start(j_mem)
    2201          30 :                bounds_ctr_2d(2, 2) = batch_end(j_mem)
    2202             : 
    2203             :                ! 4) Contract 3c MO integrals with S^-1 as well
    2204          30 :                CALL timeset(routineN//"_2c_inv", handle)
    2205          30 :                CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
    2206          60 :                DO k_mem = 1, n_mem_RI
    2207          30 :                   bounds_ctr_1d(1, 1) = batch_start_RI(k_mem)
    2208          30 :                   bounds_ctr_1d(2, 1) = batch_end_RI(k_mem)
    2209             : 
    2210          30 :                   CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
    2211             :                   CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
    2212             :                                     1.0_dp, t_3c_2, &
    2213             :                                     contract_1=[2], notcontract_1=[1], &
    2214             :                                     contract_2=[1], notcontract_2=[2, 3], &
    2215             :                                     map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2216             :                                     bounds_1=bounds_ctr_1d, &
    2217             :                                     bounds_3=bounds_ctr_2d, &
    2218          30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2219          30 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2220          60 :                   CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
    2221             :                END DO
    2222          30 :                CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
    2223          30 :                CALL timestop(handle)
    2224             : 
    2225             :                !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
    2226          30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2227          30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2228             : 
    2229          30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2230          30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2231             : 
    2232          30 :                CALL timeset(routineN//"_AO2MO_2", handle)
    2233          30 :                CALL dbt_batched_contract_init(t_mo_coeff)
    2234         120 :                DO i_xyz = 1, 3
    2235         180 :                   DO k_mem = 1, n_mem_RI
    2236          90 :                      bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2237          90 :                      bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2238             : 
    2239             :                      CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI_ctr_1(i_xyz), &
    2240             :                                        1.0_dp, t_3c_work, &
    2241             :                                        contract_1=[2], notcontract_1=[1], &
    2242             :                                        contract_2=[3], notcontract_2=[1, 2], &
    2243             :                                        map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2244             :                                        bounds_2=bounds_ctr_1d, &
    2245             :                                        bounds_3=bounds_ctr_2d, &
    2246          90 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2247         180 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2248             :                   END DO
    2249         120 :                   CALL dbt_copy(t_3c_work, t_3c_der_RI_ctr_2(i_xyz), order=[2, 1, 3], move_data=.TRUE.)
    2250             :                END DO
    2251          30 :                CALL dbt_batched_contract_finalize(t_mo_coeff)
    2252          30 :                CALL timestop(handle)
    2253             : 
    2254          30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2255          30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2256          30 :                bounds_ctr_2d(1, 2) = batch_start(j_mem)
    2257          30 :                bounds_ctr_2d(2, 2) = batch_end(j_mem)
    2258             : 
    2259             :                ! 5) Force due to d/dx (P|Q)
    2260          30 :                CALL timeset(routineN//"_PQ_der", handle)
    2261          30 :                CALL dbt_copy(t_3c_2, t_3c_4, move_data=.TRUE.)
    2262          30 :                CALL dbt_copy(t_3c_4, t_3c_5)
    2263          60 :                DO k_mem = 1, n_mem_RI_fit
    2264          30 :                   bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2265          30 :                   bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2266             : 
    2267          30 :                   CALL dbt_batched_contract_init(t_2c_RI_PQ)
    2268             :                   CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
    2269             :                                     1.0_dp, t_2c_RI_PQ, &
    2270             :                                     contract_1=[2, 3], notcontract_1=[1], &
    2271             :                                     contract_2=[2, 3], notcontract_2=[1], &
    2272             :                                     bounds_1=bounds_ctr_2d, &
    2273             :                                     bounds_2=bounds_ctr_1d, &
    2274             :                                     map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2275          30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2276          30 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2277          60 :                   CALL dbt_batched_contract_finalize(t_2c_RI_PQ)
    2278             :                END DO
    2279          30 :                CALL timestop(handle)
    2280             : 
    2281             :                ! 6) If metric, do the additional contraction  with S_pq^-1 (Q|R) (not on the derivatives)
    2282          30 :                IF (.NOT. ri_data%same_op) THEN
    2283          16 :                   CALL timeset(routineN//"_metric", handle)
    2284          32 :                   DO k_mem = 1, n_mem_RI_fit
    2285          16 :                      bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2286          16 :                      bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2287             : 
    2288          16 :                      CALL dbt_batched_contract_init(t_2c_RI_inv)
    2289             :                      CALL dbt_contract(1.0_dp, t_2c_RI_inv, t_3c_4, &
    2290             :                                        1.0_dp, t_3c_6, &
    2291             :                                        contract_1=[2], notcontract_1=[1], &
    2292             :                                        contract_2=[1], notcontract_2=[2, 3], &
    2293             :                                        bounds_1=bounds_ctr_1d, &
    2294             :                                        bounds_3=bounds_ctr_2d, &
    2295             :                                        map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2296          16 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2297          16 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2298          32 :                      CALL dbt_batched_contract_finalize(t_2c_RI_inv)
    2299             :                   END DO
    2300          16 :                   CALL dbt_copy(t_3c_6, t_3c_4, move_data=.TRUE.)
    2301             : 
    2302             :                   ! 8) and get the force due to d/dx S^-1
    2303          32 :                   DO k_mem = 1, n_mem_RI_fit
    2304          16 :                      bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2305          16 :                      bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2306             : 
    2307          16 :                      CALL dbt_batched_contract_init(t_2c_RI_met)
    2308             :                      CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
    2309             :                                        1.0_dp, t_2c_RI_met, &
    2310             :                                        contract_1=[2, 3], notcontract_1=[1], &
    2311             :                                        contract_2=[2, 3], notcontract_2=[1], &
    2312             :                                        bounds_1=bounds_ctr_2d, &
    2313             :                                        bounds_2=bounds_ctr_1d, &
    2314             :                                        map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2315          16 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2316          16 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2317          32 :                      CALL dbt_batched_contract_finalize(t_2c_RI_met)
    2318             :                   END DO
    2319          16 :                   CALL timestop(handle)
    2320             :                END IF
    2321          30 :                CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
    2322             : 
    2323             :                ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
    2324             : 
    2325             :                ! (ab|P')
    2326          30 :                CALL timeset(routineN//"_3c_RI", handle)
    2327          30 :                pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2328          30 :                CALL dbt_copy(t_3c_4, t_3c_RI_ctr, move_data=.TRUE.)
    2329             :                CALL get_force_from_3c_trace(force, t_3c_RI_ctr, t_3c_der_RI_ctr_2, atom_of_kind, kind_of, &
    2330          30 :                                             idx_to_at_RI, pref)
    2331          30 :                CALL timestop(handle)
    2332             : 
    2333             :                ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
    2334             :                ! which also prevents doing a direct 3-center trace
    2335          30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2336          30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2337             : 
    2338          30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2339          30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2340             : 
    2341          30 :                CALL timeset(routineN//"_3c_AO", handle)
    2342          30 :                CALL dbt_copy(t_3c_RI_ctr, t_3c_work, order=[2, 1, 3], move_data=.TRUE.)
    2343         120 :                DO i_xyz = 1, 3
    2344             : 
    2345          90 :                   CALL dbt_batched_contract_init(t_2c_MO_AO_ctr(i_xyz))
    2346         180 :                   DO k_mem = 1, n_mem_RI
    2347          90 :                      bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2348          90 :                      bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2349             : 
    2350             :                      CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_AO_ctr_1(i_xyz), &
    2351             :                                        1.0_dp, t_2c_MO_AO_ctr(i_xyz), &
    2352             :                                        contract_1=[1, 2], notcontract_1=[3], &
    2353             :                                        contract_2=[1, 2], notcontract_2=[3], &
    2354             :                                        map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2355             :                                        bounds_1=bounds_ctr_2d, &
    2356             :                                        bounds_2=bounds_ctr_1d, &
    2357          90 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2358         180 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2359             :                   END DO
    2360         120 :                   CALL dbt_batched_contract_finalize(t_2c_MO_AO_ctr(i_xyz))
    2361             :                END DO
    2362         232 :                CALL timestop(handle)
    2363             : 
    2364             :             END DO !j_mem
    2365          22 :             CALL dbt_batched_contract_finalize(t_3c_1)
    2366          22 :             CALL dbt_batched_contract_finalize(t_3c_work)
    2367          22 :             CALL dbt_batched_contract_finalize(t_3c_2)
    2368          22 :             CALL dbt_batched_contract_finalize(t_3c_3)
    2369          22 :             CALL dbt_batched_contract_finalize(t_3c_4)
    2370          22 :             CALL dbt_batched_contract_finalize(t_3c_5)
    2371             : 
    2372          88 :             DO i_xyz = 1, 3
    2373          66 :                CALL dbt_batched_contract_finalize(t_3c_der_RI_ctr_1(i_xyz))
    2374          88 :                CALL dbt_batched_contract_finalize(t_3c_der_AO_ctr_1(i_xyz))
    2375             :             END DO
    2376             : 
    2377          62 :             IF (.NOT. ri_data%same_op) THEN
    2378           8 :                CALL dbt_batched_contract_finalize(t_3c_6)
    2379             :             END IF
    2380             : 
    2381             :          END DO !i_mem
    2382          18 :          CALL dbt_batched_contract_finalize(t_3c_desymm)
    2383          18 :          CALL dbt_batched_contract_finalize(t_3c_0)
    2384             : 
    2385          72 :          DO i_xyz = 1, 3
    2386          54 :             CALL dbt_batched_contract_finalize(t_3c_der_AO(i_xyz))
    2387          72 :             CALL dbt_batched_contract_finalize(t_3c_der_RI(i_xyz))
    2388             :          END DO
    2389             : 
    2390             :          !Force contribution due to 3-center AO derivatives (a'b|P)
    2391          18 :          pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
    2392          72 :          DO i_xyz = 1, 3
    2393          54 :             CALL dbt_copy(t_2c_MO_AO_ctr(i_xyz), t_2c_MO_AO(i_xyz), move_data=.TRUE.) !ensures matching distributions
    2394          54 :             CALL get_mo_ao_force(force, t_mo_cpy, t_2c_MO_AO(i_xyz), atom_of_kind, kind_of, idx_to_at_AO, pref, i_xyz)
    2395          72 :             CALL dbt_clear(t_2c_MO_AO(i_xyz))
    2396             :          END DO
    2397             : 
    2398             :          !Force contribution of d/dx (P|Q)
    2399          18 :          pref = 0.5_dp*hf_fraction*spin_fac
    2400          18 :          IF (.NOT. ri_data%same_op) pref = -pref
    2401             : 
    2402             :          !Making sure dists of the t_2c_RI tensors match
    2403          18 :          CALL dbt_copy(t_2c_RI_PQ, t_2c_RI, move_data=.TRUE.)
    2404             :          CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, &
    2405          18 :                                kind_of, idx_to_at_RI, pref)
    2406          18 :          CALL dbt_clear(t_2c_RI)
    2407             : 
    2408             :          !Force contribution due to the inverse metric
    2409          18 :          IF (.NOT. ri_data%same_op) THEN
    2410           4 :             pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
    2411             : 
    2412           4 :             CALL dbt_copy(t_2c_RI_met, t_2c_RI, move_data=.TRUE.)
    2413             :             CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, &
    2414           4 :                                   kind_of, idx_to_at_RI, pref)
    2415           4 :             CALL dbt_clear(t_2c_RI)
    2416             :          END IF
    2417             : 
    2418          18 :          CALL dbt_destroy(t_3c_0)
    2419          18 :          CALL dbt_destroy(t_3c_1)
    2420          18 :          CALL dbt_destroy(t_3c_2)
    2421          18 :          CALL dbt_destroy(t_3c_3)
    2422          18 :          CALL dbt_destroy(t_3c_4)
    2423          18 :          CALL dbt_destroy(t_3c_5)
    2424          18 :          CALL dbt_destroy(t_3c_6)
    2425          18 :          CALL dbt_destroy(t_3c_work)
    2426          18 :          CALL dbt_destroy(t_3c_RI_ctr)
    2427          18 :          CALL dbt_destroy(t_3c_mo_ri_ao)
    2428          18 :          CALL dbt_destroy(t_3c_mo_ri_mo)
    2429          18 :          CALL dbt_destroy(t_3c_ri_mo_mo)
    2430          18 :          CALL dbt_destroy(t_3c_ri_mo_mo_fit)
    2431          18 :          CALL dbt_destroy(t_mo_coeff)
    2432          18 :          CALL dbt_destroy(t_mo_cpy)
    2433          72 :          DO i_xyz = 1, 3
    2434          54 :             CALL dbt_destroy(t_2c_MO_AO(i_xyz))
    2435          54 :             CALL dbt_destroy(t_2c_MO_AO_ctr(i_xyz))
    2436          54 :             CALL dbt_destroy(t_3c_der_RI_ctr_1(i_xyz))
    2437          54 :             CALL dbt_destroy(t_3c_der_AO_ctr_1(i_xyz))
    2438          72 :             CALL dbt_destroy(t_3c_der_RI_ctr_2(i_xyz))
    2439             :          END DO
    2440          86 :          DEALLOCATE (batch_ranges, batch_start, batch_end)
    2441             :       END DO !ispin
    2442             : 
    2443             :       ! Clean-up
    2444          14 :       CALL dbt_pgrid_destroy(pgrid_1)
    2445          14 :       CALL dbt_pgrid_destroy(pgrid_2)
    2446          14 :       CALL dbt_destroy(t_3c_desymm)
    2447          14 :       CALL dbt_destroy(t_2c_RI)
    2448          14 :       CALL dbt_destroy(t_2c_RI_PQ)
    2449          14 :       IF (.NOT. ri_data%same_op) THEN
    2450           4 :          CALL dbt_destroy(t_2c_RI_met)
    2451           4 :          CALL dbt_destroy(t_2c_RI_inv)
    2452             :       END IF
    2453          56 :       DO i_xyz = 1, 3
    2454          42 :          CALL dbt_destroy(t_3c_der_AO(i_xyz))
    2455          42 :          CALL dbt_destroy(t_3c_der_RI(i_xyz))
    2456          42 :          CALL dbt_destroy(t_2c_der_RI(i_xyz))
    2457          56 :          IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
    2458             :       END DO
    2459          14 :       CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
    2460             : 
    2461          14 :       CALL para_env%sync()
    2462          14 :       t2 = m_walltime()
    2463             : 
    2464          14 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    2465             : 
    2466         476 :    END SUBROUTINE hfx_ri_forces_mo
    2467             : 
    2468             : ! **************************************************************************************************
    2469             : !> \brief New sparser implementation
    2470             : !> \param qs_env ...
    2471             : !> \param ri_data ...
    2472             : !> \param nspins ...
    2473             : !> \param hf_fraction ...
    2474             : !> \param rho_ao ...
    2475             : !> \param rho_ao_resp ...
    2476             : !> \param use_virial ...
    2477             : !> \param resp_only ...
    2478             : !> \param rescale_factor ...
    2479             : ! **************************************************************************************************
    2480         116 :    SUBROUTINE hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
    2481             :                                  use_virial, resp_only, rescale_factor)
    2482             : 
    2483             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2484             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    2485             :       INTEGER, INTENT(IN)                                :: nspins
    2486             :       REAL(dp), INTENT(IN)                               :: hf_fraction
    2487             :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: rho_ao
    2488             :       TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL         :: rho_ao_resp
    2489             :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial, resp_only
    2490             :       REAL(dp), INTENT(IN), OPTIONAL                     :: rescale_factor
    2491             : 
    2492             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_Pmat'
    2493             : 
    2494             :       INTEGER                                            :: dummy_int, handle, i_mem, i_spin, i_xyz, &
    2495             :                                                             ibasis, j_mem, j_xyz, k_mem, k_xyz, &
    2496             :                                                             n_mem, n_mem_RI, natom, nkind, &
    2497             :                                                             unit_nr_dbcsr
    2498             :       INTEGER(int_8)                                     :: nflop
    2499         116 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_RI, batch_ranges, &
    2500         116 :          batch_ranges_RI, batch_start, batch_start_RI, dist1, dist2, dist3, idx_to_at_AO, &
    2501         116 :          idx_to_at_RI, kind_of
    2502             :       INTEGER, DIMENSION(2, 1)                           :: ibounds, jbounds, kbounds
    2503             :       INTEGER, DIMENSION(2, 2)                           :: ijbounds
    2504             :       INTEGER, DIMENSION(2, 3)                           :: bounds_cpy
    2505         232 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    2506             :       LOGICAL                                            :: do_resp, resp_only_prv, use_virial_prv
    2507             :       REAL(dp)                                           :: pref, spin_fac, t1, t2
    2508             :       REAL(dp), DIMENSION(3, 3)                          :: work_virial
    2509         116 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    2510         116 :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
    2511             :       TYPE(cell_type), POINTER                           :: cell
    2512             :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
    2513             :       TYPE(dbcsr_type)                                   :: dbcsr_tmp, virial_trace
    2514        6728 :       TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_RI, t_2c_RI_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
    2515        7656 :          t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
    2516        5684 :          t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_R, t_SVS
    2517         116 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_der_metric, t_2c_der_RI, &
    2518         116 :                                                             t_3c_der_AO, t_3c_der_RI
    2519             :       TYPE(dft_control_type), POINTER                    :: dft_control
    2520             :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    2521         116 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    2522             :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    2523             :       TYPE(hfx_compression_type), ALLOCATABLE, &
    2524         116 :          DIMENSION(:, :)                                 :: t_3c_der_AO_comp, t_3c_der_RI_comp
    2525             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2526             :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    2527             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    2528         116 :          POINTER                                         :: nl_2c_met, nl_2c_pot
    2529         116 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2530         116 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    2531         116 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2532             :       TYPE(virial_type), POINTER                         :: virial
    2533             : 
    2534             :       !The idea is the following: we need to compute the gradients
    2535             :       ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
    2536             :       ! Which we do in a few steps:
    2537             :       ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
    2538             :       ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
    2539             :       !    For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
    2540             :       ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
    2541             :       !    R_PS = (acP) M_acS
    2542             :       ! 4) From there, we can easily calculate the 2c contributions to the force:
    2543             :       !    Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
    2544             :       !    Metric:    [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
    2545             : 
    2546         116 :       NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
    2547         116 :       NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
    2548             : 
    2549         116 :       use_virial_prv = .FALSE.
    2550         116 :       IF (PRESENT(use_virial)) use_virial_prv = use_virial
    2551             : 
    2552         116 :       do_resp = .FALSE.
    2553         116 :       IF (PRESENT(rho_ao_resp)) THEN
    2554          30 :          IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .TRUE.
    2555             :       END IF
    2556             : 
    2557         116 :       resp_only_prv = .FALSE.
    2558         116 :       IF (PRESENT(resp_only)) resp_only_prv = resp_only
    2559             : 
    2560         116 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    2561             : 
    2562             :       CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
    2563             :                       atomic_kind_set=atomic_kind_set, virial=virial, &
    2564             :                       cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
    2565         116 :                       qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
    2566             : 
    2567             :       CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
    2568             :                             ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    2569         116 :                             [1, 2], [3], name="(AO RI | AO)")
    2570         116 :       DEALLOCATE (dist1, dist2, dist3)
    2571             : 
    2572             :       CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
    2573             :                             ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    2574         116 :                             [1], [2, 3], name="(RI | AO AO)")
    2575         116 :       DEALLOCATE (dist1, dist2, dist3)
    2576             : 
    2577         916 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    2578         116 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    2579         116 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    2580         116 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    2581         116 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    2582             : 
    2583         342 :       DO ibasis = 1, SIZE(basis_set_AO)
    2584         226 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    2585         226 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    2586         226 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    2587         342 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    2588             :       END DO
    2589             : 
    2590             :       ! Precompute the derivatives
    2591        5684 :       ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_3c_der_AO(3), t_3c_der_RI(3))
    2592         116 :       IF (use_virial) THEN
    2593             :          CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    2594             :                                   t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    2595             :                                   basis_set_AO, basis_set_RI, ri_data, qs_env, &
    2596             :                                   nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
    2597           4 :                                   nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
    2598             : 
    2599          16 :          ALLOCATE (col_bsize(natom), row_bsize(natom))
    2600          16 :          col_bsize(:) = ri_data%bsizes_RI
    2601          16 :          row_bsize(:) = ri_data%bsizes_RI
    2602           4 :          CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
    2603           4 :          CALL dbt_create(virial_trace, t_2c_virial)
    2604           4 :          DEALLOCATE (col_bsize, row_bsize)
    2605             :       ELSE
    2606             :          CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    2607             :                                   t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    2608         112 :                                   basis_set_AO, basis_set_RI, ri_data, qs_env)
    2609             :       END IF
    2610             : 
    2611             :       ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
    2612         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
    2613         464 :       DO i_xyz = 1, 3
    2614        1490 :          DO i_mem = 1, SIZE(t_3c_der_RI_comp, 1)
    2615             :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    2616        1026 :                                    t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    2617        1026 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE., move_data=.TRUE.)
    2618             : 
    2619             :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    2620        1026 :                                    t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    2621        1026 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE.)
    2622        1374 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
    2623             :          END DO
    2624             :       END DO
    2625             : 
    2626         464 :       DO i_xyz = 1, 3
    2627         348 :          CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz))
    2628         464 :          CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz))
    2629             :       END DO
    2630             : 
    2631             :       ! Some utilities
    2632         116 :       spin_fac = 0.5_dp
    2633         116 :       IF (nspins == 2) spin_fac = 1.0_dp
    2634         116 :       IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
    2635             : 
    2636         348 :       ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
    2637         116 :       CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
    2638             : 
    2639         348 :       ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
    2640         116 :       CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
    2641             : 
    2642         116 :       CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
    2643             : 
    2644             :       ! Go over batches of the 2 AO indices to save memory
    2645         116 :       n_mem = ri_data%n_mem
    2646         464 :       ALLOCATE (batch_start(n_mem), batch_end(n_mem))
    2647         464 :       batch_start(:) = ri_data%starts_array_mem(:)
    2648         464 :       batch_end(:) = ri_data%ends_array_mem(:)
    2649             : 
    2650         348 :       ALLOCATE (batch_ranges(n_mem + 1))
    2651         464 :       batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
    2652         116 :       batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
    2653             : 
    2654         116 :       n_mem_RI = ri_data%n_mem_RI
    2655         464 :       ALLOCATE (batch_start_RI(n_mem_RI), batch_end_RI(n_mem_RI))
    2656         464 :       batch_start_RI(:) = ri_data%starts_array_RI_mem(:)
    2657         464 :       batch_end_RI(:) = ri_data%ends_array_RI_mem(:)
    2658             : 
    2659         348 :       ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
    2660         464 :       batch_ranges_RI(:n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
    2661         116 :       batch_ranges_RI(n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(n_mem_RI) + 1
    2662             : 
    2663             :       ! Pre-create all the needed tensors
    2664             :       CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
    2665             :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    2666         116 :                             name="(AO | AO)")
    2667         116 :       DEALLOCATE (dist1, dist2)
    2668         116 :       CALL dbt_create(rho_ao_1, rho_ao_2)
    2669             : 
    2670             :       CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
    2671             :                             ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
    2672         116 :       DEALLOCATE (dist1, dist2)
    2673         116 :       CALL dbt_create(t_2c_RI, t_SVS)
    2674         116 :       CALL dbt_create(t_2c_RI, t_R)
    2675         116 :       CALL dbt_create(t_2c_RI, t_2c_RI_tmp)
    2676             : 
    2677         116 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
    2678         116 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
    2679         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
    2680         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
    2681         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
    2682         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
    2683         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
    2684             : 
    2685         116 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
    2686         116 :       CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
    2687             : 
    2688         116 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
    2689             : 
    2690         116 :       CALL para_env%sync()
    2691         116 :       t1 = m_walltime()
    2692             : 
    2693             :       !Pre-calculate the necessary 2-center quantities
    2694         116 :       IF (.NOT. ri_data%same_op) THEN
    2695             :          !S^-1 * V * S^-1
    2696             :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI, &
    2697             :                            contract_1=[2], notcontract_1=[1], &
    2698             :                            contract_2=[1], notcontract_2=[2], &
    2699             :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2700          28 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2701          28 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2702             : 
    2703             :          CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_inv(1, 1), 0.0_dp, t_SVS, &
    2704             :                            contract_1=[2], notcontract_1=[1], &
    2705             :                            contract_2=[1], notcontract_2=[2], &
    2706             :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2707          28 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2708          28 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2709             :       ELSE
    2710             :          ! Simply V^-1
    2711          88 :          CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_SVS)
    2712             :       END IF
    2713             : 
    2714         116 :       CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2715             :       CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_RI, &
    2716         116 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2717         116 :       CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2718         116 :       CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2719             :       CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
    2720         116 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2721             :       CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI, &
    2722         116 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2723             :       CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_RI, &
    2724         116 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2725             :       CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_RI, &
    2726         116 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2727             : 
    2728         242 :       DO i_spin = 1, nspins
    2729             : 
    2730             :          !Prepare Pmat in tensor format
    2731         126 :          CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
    2732         126 :          CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
    2733         126 :          CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.TRUE.)
    2734         126 :          CALL dbt_destroy(t_2c_tmp)
    2735             : 
    2736         126 :          IF (.NOT. do_resp) THEN
    2737          94 :             CALL dbt_copy(rho_ao_1, rho_ao_2)
    2738          32 :          ELSE IF (do_resp .AND. resp_only_prv) THEN
    2739             : 
    2740          24 :             CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2741          24 :             CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2742          24 :             CALL dbt_copy(t_2c_tmp, rho_ao_2)
    2743             :             !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
    2744          24 :             CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
    2745          24 :             CALL dbt_destroy(t_2c_tmp)
    2746             :          ELSE
    2747             : 
    2748             :             !if not resp_only, need P-P_resp and P+P_resp
    2749           8 :             CALL dbt_copy(rho_ao_1, rho_ao_2)
    2750           8 :             CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
    2751           8 :             CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
    2752           8 :             CALL dbt_create(dbcsr_tmp, t_2c_tmp)
    2753           8 :             CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
    2754           8 :             CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.TRUE., move_data=.TRUE.)
    2755           8 :             CALL dbcsr_release(dbcsr_tmp)
    2756             : 
    2757           8 :             CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2758           8 :             CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
    2759           8 :             CALL dbt_destroy(t_2c_tmp)
    2760             : 
    2761             :          END IF
    2762         126 :          work_virial = 0.0_dp
    2763             : 
    2764         126 :          CALL timeset(routineN//"_3c", handle)
    2765             :          !Start looping of the batches
    2766         504 :          DO i_mem = 1, n_mem
    2767        1134 :             ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
    2768             : 
    2769         378 :             CALL dbt_batched_contract_init(rho_ao_1)
    2770             :             CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
    2771             :                               contract_1=[1], notcontract_1=[2], &
    2772             :                               contract_2=[3], notcontract_2=[1, 2], &
    2773             :                               map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2774         378 :                               bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
    2775         378 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2776         378 :             CALL dbt_batched_contract_finalize(rho_ao_1)
    2777             : 
    2778         378 :             CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.TRUE.)
    2779             : 
    2780        1512 :             DO j_mem = 1, n_mem
    2781        3402 :                jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
    2782             : 
    2783        1134 :                CALL dbt_batched_contract_init(rho_ao_2)
    2784             :                CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
    2785             :                                  contract_1=[1], notcontract_1=[2], &
    2786             :                                  contract_2=[3], notcontract_2=[1, 2], &
    2787             :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2788        1134 :                                  bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
    2789        1134 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2790        1134 :                CALL dbt_batched_contract_finalize(rho_ao_2)
    2791             : 
    2792        3402 :                bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
    2793        6840 :                bounds_cpy(:, 2) = [1, SUM(ri_data%bsizes_RI)]
    2794        3402 :                bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
    2795        1134 :                CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
    2796        1134 :                CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
    2797             : 
    2798        4914 :                DO k_mem = 1, n_mem_RI
    2799       10206 :                   kbounds(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
    2800             : 
    2801       10206 :                   bounds_cpy(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
    2802       10206 :                   bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
    2803       10206 :                   bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
    2804        3402 :                   CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
    2805             : 
    2806             :                   !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
    2807        3402 :                   CALL dbt_batched_contract_init(t_SVS)
    2808             :                   CALL dbt_contract(1.0_dp, t_SVS, t_3c_3, 0.0_dp, t_3c_4, &
    2809             :                                     contract_1=[2], notcontract_1=[1], &
    2810             :                                     contract_2=[1], notcontract_2=[2, 3], &
    2811             :                                     map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2812        3402 :                                     retain_sparsity=.TRUE., unit_nr=unit_nr_dbcsr, flop=nflop)
    2813        3402 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2814        3402 :                   CALL dbt_batched_contract_finalize(t_SVS)
    2815             : 
    2816        3402 :                   CALL dbt_copy(t_3c_4, t_3c_5, summation=.TRUE., move_data=.TRUE.)
    2817             : 
    2818       10206 :                   ijbounds(:, 1) = ibounds(:, 1)
    2819       10206 :                   ijbounds(:, 2) = jbounds(:, 1)
    2820             : 
    2821             :                   !Contract R_PS = (acP) M_acS
    2822        3402 :                   CALL dbt_batched_contract_init(t_R)
    2823             :                   CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_R, &
    2824             :                                     contract_1=[2, 3], notcontract_1=[1], &
    2825             :                                     contract_2=[2, 3], notcontract_2=[1], &
    2826             :                                     map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2827             :                                     bounds_1=ijbounds, bounds_3=kbounds, &
    2828        3402 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2829        3402 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2830        4536 :                   CALL dbt_batched_contract_finalize(t_R)
    2831             : 
    2832             :                END DO !k_mem
    2833             :             END DO !j_mem
    2834             : 
    2835         378 :             CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.TRUE.)
    2836             : 
    2837             :             !The force from the 3c derivatives
    2838         378 :             pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2839             : 
    2840        1476 :             DO k_mem = 1, SIZE(t_3c_der_RI_comp, 1)
    2841        4392 :                DO i_xyz = 1, 3
    2842        3294 :                   CALL dbt_clear(t_3c_der_RI(i_xyz))
    2843             :                   CALL decompress_tensor(t_3c_der_RI(i_xyz), t_3c_der_RI_ind(k_mem, i_xyz)%ind, &
    2844        4392 :                                          t_3c_der_RI_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
    2845             :                END DO
    2846             :                CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_RI, atom_of_kind, kind_of, &
    2847        1476 :                                             idx_to_at_RI, pref)
    2848             :             END DO
    2849             : 
    2850         378 :             pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
    2851         378 :             IF (do_resp) THEN
    2852          96 :                pref = 0.5_dp*pref
    2853          96 :                CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
    2854             :             END IF
    2855             : 
    2856        1476 :             DO k_mem = 1, SIZE(t_3c_der_AO_comp, 1)
    2857        4392 :                DO i_xyz = 1, 3
    2858        3294 :                   CALL dbt_clear(t_3c_der_AO(i_xyz))
    2859             :                   CALL decompress_tensor(t_3c_der_AO(i_xyz), t_3c_der_AO_ind(k_mem, i_xyz)%ind, &
    2860        4392 :                                          t_3c_der_AO_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
    2861             :                END DO
    2862             :                CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_AO, atom_of_kind, kind_of, &
    2863        1098 :                                             idx_to_at_AO, pref, deriv_dim=2)
    2864             : 
    2865        1476 :                IF (do_resp) THEN
    2866             :                   CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_AO, atom_of_kind, kind_of, &
    2867         276 :                                                idx_to_at_AO, pref, deriv_dim=2)
    2868             :                END IF
    2869             :             END DO
    2870             : 
    2871             :             !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
    2872         378 :             IF (use_virial) THEN
    2873          12 :                pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2874          12 :                CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.TRUE.)
    2875             :                CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_RI, &
    2876             :                                    basis_set_AO, basis_set_AO, ri_data%ri_metric, &
    2877          12 :                                    der_eps=ri_data%eps_schwarz_forces, op_pos=1)
    2878             : 
    2879          12 :                CALL dbt_clear(t_3c_virial)
    2880             :             END IF
    2881             : 
    2882         378 :             CALL dbt_clear(t_3c_help_1)
    2883         504 :             CALL dbt_clear(t_3c_help_2)
    2884             :          END DO !i_mem
    2885         126 :          CALL timestop(handle)
    2886             : 
    2887         126 :          CALL timeset(routineN//"_2c", handle)
    2888             :          !Now we deal with all the 2-center quantities
    2889             :          !Calculate S^-1 * R * S^-1
    2890             :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_R, 0.0_dp, t_2c_RI_tmp, &
    2891             :                            contract_1=[2], notcontract_1=[1], &
    2892             :                            contract_2=[1], notcontract_2=[2], &
    2893             :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2894         126 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2895         126 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2896             : 
    2897             :          CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
    2898             :                            contract_1=[2], notcontract_1=[1], &
    2899             :                            contract_2=[1], notcontract_2=[2], &
    2900             :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2901         126 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2902         126 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2903             : 
    2904             :          !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
    2905         126 :          pref = 0.5_dp*hf_fraction*spin_fac
    2906         126 :          IF (.NOT. ri_data%same_op) pref = -pref
    2907         126 :          CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, kind_of, idx_to_at_RI, pref)
    2908             : 
    2909             :          !Calculate the contribution to the virial on the fly
    2910         126 :          IF (use_virial_prv) THEN
    2911           4 :             CALL dbt_copy(t_2c_RI, t_2c_virial)
    2912           4 :             CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
    2913             :             CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
    2914           4 :                                 basis_set_RI, basis_set_RI, ri_data%hfx_pot)
    2915             :          END IF
    2916             : 
    2917             :          !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
    2918         126 :          IF (.NOT. ri_data%same_op) THEN
    2919             :             CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI_tmp, &
    2920             :                               contract_1=[2], notcontract_1=[1], &
    2921             :                               contract_2=[1], notcontract_2=[2], &
    2922             :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2923          28 :                               unit_nr=unit_nr_dbcsr, flop=nflop)
    2924          28 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2925             : 
    2926             :             CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
    2927             :                               contract_1=[2], notcontract_1=[1], &
    2928             :                               contract_2=[1], notcontract_2=[2], &
    2929             :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2930          28 :                               unit_nr=unit_nr_dbcsr, flop=nflop)
    2931          28 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2932             : 
    2933          28 :             pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
    2934          28 :             CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_RI, pref)
    2935             : 
    2936          28 :             IF (use_virial_prv) THEN
    2937           0 :                CALL dbt_copy(t_2c_RI, t_2c_virial)
    2938           0 :                CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
    2939             :                CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
    2940           0 :                                    basis_set_RI, basis_set_RI, ri_data%ri_metric)
    2941             :             END IF
    2942             :          END IF
    2943         126 :          CALL dbt_clear(t_2c_RI)
    2944         126 :          CALL dbt_clear(t_2c_RI_tmp)
    2945         126 :          CALL dbt_clear(t_R)
    2946         126 :          CALL dbt_clear(t_3c_help_1)
    2947         126 :          CALL dbt_clear(t_3c_help_2)
    2948         126 :          CALL timestop(handle)
    2949             : 
    2950         494 :          IF (use_virial_prv) THEN
    2951          16 :             DO k_xyz = 1, 3
    2952          52 :                DO j_xyz = 1, 3
    2953         156 :                   DO i_xyz = 1, 3
    2954             :                      virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
    2955         144 :                                                        + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
    2956             :                   END DO
    2957             :                END DO
    2958             :             END DO
    2959             :          END IF
    2960             :       END DO !i_spin
    2961             : 
    2962         116 :       CALL dbt_batched_contract_finalize(t_3c_int)
    2963         116 :       CALL dbt_batched_contract_finalize(t_3c_int_2)
    2964         116 :       CALL dbt_batched_contract_finalize(t_3c_1)
    2965         116 :       CALL dbt_batched_contract_finalize(t_3c_2)
    2966         116 :       CALL dbt_batched_contract_finalize(t_3c_3)
    2967         116 :       CALL dbt_batched_contract_finalize(t_3c_4)
    2968         116 :       CALL dbt_batched_contract_finalize(t_3c_5)
    2969         116 :       CALL dbt_batched_contract_finalize(t_3c_sparse)
    2970             : 
    2971         116 :       CALL para_env%sync()
    2972         116 :       t2 = m_walltime()
    2973             : 
    2974         116 :       CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.TRUE.)
    2975             : 
    2976             :       !clean-up
    2977         116 :       CALL dbt_destroy(rho_ao_1)
    2978         116 :       CALL dbt_destroy(rho_ao_2)
    2979         116 :       CALL dbt_destroy(t_3c_ao_ri_ao)
    2980         116 :       CALL dbt_destroy(t_3c_ri_ao_ao)
    2981         116 :       CALL dbt_destroy(t_3c_int)
    2982         116 :       CALL dbt_destroy(t_3c_int_2)
    2983         116 :       CALL dbt_destroy(t_3c_1)
    2984         116 :       CALL dbt_destroy(t_3c_2)
    2985         116 :       CALL dbt_destroy(t_3c_3)
    2986         116 :       CALL dbt_destroy(t_3c_4)
    2987         116 :       CALL dbt_destroy(t_3c_5)
    2988         116 :       CALL dbt_destroy(t_3c_help_1)
    2989         116 :       CALL dbt_destroy(t_3c_help_2)
    2990         116 :       CALL dbt_destroy(t_3c_sparse)
    2991         116 :       CALL dbt_destroy(t_SVS)
    2992         116 :       CALL dbt_destroy(t_R)
    2993         116 :       CALL dbt_destroy(t_2c_RI)
    2994         116 :       CALL dbt_destroy(t_2c_RI_tmp)
    2995             : 
    2996         464 :       DO i_xyz = 1, 3
    2997         348 :          CALL dbt_destroy(t_3c_der_RI(i_xyz))
    2998         348 :          CALL dbt_destroy(t_3c_der_AO(i_xyz))
    2999         348 :          CALL dbt_destroy(t_2c_der_RI(i_xyz))
    3000         464 :          IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
    3001             :       END DO
    3002             : 
    3003         464 :       DO i_xyz = 1, 3
    3004        1490 :          DO i_mem = 1, SIZE(t_3c_der_AO_comp, 1)
    3005        1026 :             CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
    3006        1374 :             CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
    3007             :          END DO
    3008             :       END DO
    3009        2168 :       DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
    3010             : 
    3011         342 :       DO ibasis = 1, SIZE(basis_set_AO)
    3012         226 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3013         226 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3014         226 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    3015         342 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    3016             :       END DO
    3017             : 
    3018         116 :       IF (use_virial) THEN
    3019           4 :          CALL release_neighbor_list_sets(nl_2c_met)
    3020           4 :          CALL release_neighbor_list_sets(nl_2c_pot)
    3021           4 :          CALL neighbor_list_3c_destroy(nl_3c)
    3022           4 :          CALL dbcsr_release(virial_trace)
    3023           4 :          CALL dbt_destroy(t_2c_virial)
    3024           4 :          CALL dbt_destroy(t_3c_virial)
    3025             :       END IF
    3026             : 
    3027        2552 :    END SUBROUTINE hfx_ri_forces_Pmat
    3028             : 
    3029             : ! **************************************************************************************************
    3030             : !> \brief the general routine that calls the relevant force code
    3031             : !> \param qs_env ...
    3032             : !> \param ri_data ...
    3033             : !> \param nspins ...
    3034             : !> \param hf_fraction ...
    3035             : !> \param rho_ao ...
    3036             : !> \param rho_ao_resp ...
    3037             : !> \param mos ...
    3038             : !> \param use_virial ...
    3039             : !> \param resp_only ...
    3040             : !> \param rescale_factor ...
    3041             : ! **************************************************************************************************
    3042         130 :    SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
    3043         130 :                                    mos, use_virial, resp_only, rescale_factor)
    3044             : 
    3045             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3046             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3047             :       INTEGER, INTENT(IN)                                :: nspins
    3048             :       REAL(KIND=dp), INTENT(IN)                          :: hf_fraction
    3049             :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL      :: rho_ao
    3050             :       TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL         :: rho_ao_resp
    3051             :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
    3052             :          OPTIONAL                                        :: mos
    3053             :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial, resp_only
    3054             :       REAL(dp), INTENT(IN), OPTIONAL                     :: rescale_factor
    3055             : 
    3056             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces'
    3057             : 
    3058             :       INTEGER                                            :: handle, ispin
    3059             :       INTEGER, DIMENSION(2)                              :: homo
    3060         130 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: mo_eigenvalues
    3061             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    3062         390 :       TYPE(dbcsr_type), DIMENSION(2)                     :: mo_coeff_b
    3063             :       TYPE(dbcsr_type), POINTER                          :: mo_coeff_b_tmp
    3064             : 
    3065         130 :       CALL timeset(routineN, handle)
    3066             : 
    3067         144 :       SELECT CASE (ri_data%flavor)
    3068             :       CASE (ri_mo)
    3069             : 
    3070          32 :          DO ispin = 1, nspins
    3071          18 :             NULLIFY (mo_coeff_b_tmp)
    3072          18 :             CPASSERT(mos(ispin)%uniform_occupation)
    3073          18 :             CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
    3074             : 
    3075          18 :             IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
    3076          32 :             CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
    3077             :          END DO
    3078             : 
    3079          32 :          DO ispin = 1, nspins
    3080          18 :             CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
    3081          14 :             homo(ispin) = mos(ispin)%homo
    3082             :          END DO
    3083             : 
    3084          14 :          CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
    3085             : 
    3086             :       CASE (ri_pmat)
    3087             : 
    3088             :          CALL hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
    3089         216 :                                  resp_only, rescale_factor)
    3090             :       END SELECT
    3091             : 
    3092         274 :       DO ispin = 1, nspins
    3093         274 :          CALL dbcsr_release(mo_coeff_b(ispin))
    3094             :       END DO
    3095             : 
    3096         130 :       CALL timestop(handle)
    3097             : 
    3098         130 :    END SUBROUTINE hfx_ri_update_forces
    3099             : 
    3100             : ! **************************************************************************************************
    3101             : !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
    3102             : !> \param t_3c_der_RI_comp compressed RI derivatives
    3103             : !> \param t_3c_der_AO_comp compressed AO derivatives
    3104             : !> \param t_3c_der_RI_ind ...
    3105             : !> \param t_3c_der_AO_ind ...
    3106             : !> \param t_2c_der_RI format based on standard atomic block sizes
    3107             : !> \param t_2c_der_metric format based on standard atomic block sizes
    3108             : !> \param ri_ao_ao_template ...
    3109             : !> \param basis_set_AO ...
    3110             : !> \param basis_set_RI ...
    3111             : !> \param ri_data ...
    3112             : !> \param qs_env ...
    3113             : !> \param nl_2c_pot ...
    3114             : !> \param nl_2c_met ...
    3115             : !> \param nl_3c_out ...
    3116             : !> \param t_3c_virial ...
    3117             : ! **************************************************************************************************
    3118        4160 :    SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    3119             :                                   t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
    3120             :                                   basis_set_AO, basis_set_RI, ri_data, qs_env, &
    3121             :                                   nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
    3122             : 
    3123             :       TYPE(hfx_compression_type), ALLOCATABLE, &
    3124             :          DIMENSION(:, :), INTENT(INOUT)                  :: t_3c_der_RI_comp, t_3c_der_AO_comp
    3125             :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_RI_ind, t_3c_der_AO_ind
    3126             :       TYPE(dbt_type), DIMENSION(3), INTENT(OUT)          :: t_2c_der_RI, t_2c_der_metric
    3127             :       TYPE(dbt_type), INTENT(INOUT)                      :: ri_ao_ao_template
    3128             :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    3129             :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    3130             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3131             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3132             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3133             :          OPTIONAL, POINTER                               :: nl_2c_pot, nl_2c_met
    3134             :       TYPE(neighbor_list_3c_type), OPTIONAL              :: nl_3c_out
    3135             :       TYPE(dbt_type), INTENT(INOUT), OPTIONAL            :: t_3c_virial
    3136             : 
    3137             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
    3138             : 
    3139             :       INTEGER                                            :: handle, i_mem, i_xyz, n_mem, natom, &
    3140             :                                                             nkind, nthreads
    3141             :       INTEGER(int_8)                                     :: nze, nze_tot
    3142         130 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dist1, dist2, dist_AO_1, dist_AO_2, &
    3143         130 :                                                             dist_RI, dummy_end, dummy_start, &
    3144         260 :                                                             end_blocks, start_blocks
    3145             :       INTEGER, DIMENSION(3)                              :: pcoord, pdims
    3146         260 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    3147             :       REAL(dp)                                           :: compression_factor, memory, occ
    3148             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
    3149        1690 :       TYPE(dbcsr_type), DIMENSION(1, 3)                  :: t_2c_der_metric_prv, t_2c_der_RI_prv
    3150         390 :       TYPE(dbt_pgrid_type)                               :: pgrid
    3151        3380 :       TYPE(dbt_type)                                     :: t_2c_template, t_2c_tmp, t_3c_template
    3152         130 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :)    :: t_3c_der_AO_prv, t_3c_der_RI_prv
    3153             :       TYPE(dft_control_type), POINTER                    :: dft_control
    3154             :       TYPE(distribution_2d_type), POINTER                :: dist_2d
    3155             :       TYPE(distribution_3d_type)                         :: dist_3d, dist_3d_out
    3156         130 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c, mp_comm_t3c_out
    3157             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3158             :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    3159             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3160         130 :          POINTER                                         :: nl_2c
    3161         130 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3162         130 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3163             : 
    3164         130 :       NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
    3165             : 
    3166         130 :       CALL timeset(routineN, handle)
    3167             : 
    3168             :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
    3169         130 :                       particle_set=particle_set, dft_control=dft_control, para_env=para_env)
    3170             : 
    3171             :       !TODO: is such a pgrid correct?
    3172             :       !Dealing with the 3c derivatives
    3173         130 :       nthreads = 1
    3174         130 : !$    nthreads = omp_get_num_threads()
    3175         130 :       pdims = 0
    3176         520 :       CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
    3177             : 
    3178             :       CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, pgrid, &
    3179             :                             ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
    3180             :                             map1=[1], map2=[2, 3], &
    3181         130 :                             name="der (RI AO | AO)")
    3182             : 
    3183        4550 :       ALLOCATE (t_3c_der_AO_prv(1, 1, 3), t_3c_der_RI_prv(1, 1, 3))
    3184         520 :       DO i_xyz = 1, 3
    3185         390 :          CALL dbt_create(t_3c_template, t_3c_der_RI_prv(1, 1, i_xyz))
    3186         520 :          CALL dbt_create(t_3c_template, t_3c_der_AO_prv(1, 1, i_xyz))
    3187             :       END DO
    3188         130 :       IF (PRESENT(t_3c_virial)) THEN
    3189           4 :          CALL dbt_create(t_3c_template, t_3c_virial)
    3190             :       END IF
    3191         130 :       CALL dbt_destroy(t_3c_template)
    3192             : 
    3193         130 :       CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
    3194         130 :       CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
    3195             :       CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
    3196         130 :                                   nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
    3197             : 
    3198             :       CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
    3199         130 :                                    "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
    3200             : 
    3201         130 :       IF (PRESENT(nl_3c_out)) THEN
    3202           4 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
    3203           4 :          CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
    3204             :          CALL distribution_3d_create(dist_3d_out, dist_RI, dist_AO_1, dist_AO_2, &
    3205           4 :                                      nkind, particle_set, mp_comm_t3c_out, own_comm=.TRUE.)
    3206             :          CALL build_3c_neighbor_lists(nl_3c_out, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d_out, &
    3207             :                                       ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.FALSE., &
    3208           4 :                                       own_dist=.TRUE.)
    3209             :       END IF
    3210         130 :       DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
    3211         130 :       CALL dbt_pgrid_destroy(pgrid)
    3212             : 
    3213         130 :       n_mem = ri_data%n_mem
    3214             :       CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
    3215         130 :                                  start_blocks, end_blocks)
    3216         130 :       DEALLOCATE (dummy_start, dummy_end)
    3217             : 
    3218      553008 :       ALLOCATE (t_3c_der_AO_comp(n_mem, 3), t_3c_der_RI_comp(n_mem, 3))
    3219        3628 :       ALLOCATE (t_3c_der_AO_ind(n_mem, 3), t_3c_der_RI_ind(n_mem, 3))
    3220             : 
    3221         130 :       memory = 0.0_dp
    3222         130 :       nze_tot = 0
    3223         518 :       DO i_mem = 1, n_mem
    3224             :          CALL build_3c_derivatives(t_3c_der_RI_prv, t_3c_der_AO_prv, ri_data%filter_eps, qs_env, &
    3225             :                                    nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, &
    3226             :                                    ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
    3227        1164 :                                    bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
    3228             : 
    3229        1682 :          DO i_xyz = 1, 3
    3230        1164 :             CALL dbt_copy(t_3c_der_RI_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.TRUE.)
    3231        1164 :             CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
    3232        1164 :             CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
    3233        1164 :             nze_tot = nze_tot + nze
    3234             : 
    3235        1164 :             CALL alloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), 1)
    3236             :             CALL compress_tensor(ri_ao_ao_template, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    3237        1164 :                                  t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
    3238        1164 :             CALL dbt_clear(ri_ao_ao_template)
    3239             : 
    3240             :             !put AO derivative as middle index
    3241        1164 :             CALL dbt_copy(t_3c_der_AO_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.TRUE.)
    3242        1164 :             CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
    3243        1164 :             CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
    3244        1164 :             nze_tot = nze_tot + nze
    3245             : 
    3246        1164 :             CALL alloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), 1)
    3247             :             CALL compress_tensor(ri_ao_ao_template, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    3248        1164 :                                  t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
    3249        3880 :             CALL dbt_clear(ri_ao_ao_template)
    3250             :          END DO
    3251             :       END DO
    3252             : 
    3253         130 :       CALL neighbor_list_3c_destroy(nl_3c)
    3254         520 :       DO i_xyz = 1, 3
    3255         390 :          CALL dbt_destroy(t_3c_der_RI_prv(1, 1, i_xyz))
    3256         520 :          CALL dbt_destroy(t_3c_der_AO_prv(1, 1, i_xyz))
    3257             :       END DO
    3258             : 
    3259         130 :       CALL para_env%sum(memory)
    3260         130 :       compression_factor = REAL(nze_tot, dp)*1.0E-06*8.0_dp/memory
    3261         130 :       IF (ri_data%unit_nr > 0) THEN
    3262             :          WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
    3263          14 :             "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
    3264             : 
    3265             :          WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T60,F21.2))") &
    3266          14 :             "MEMORY_INFO| Compression factor:                  ", compression_factor
    3267             :       END IF
    3268             : 
    3269             :       !Deal with the 2-center derivatives
    3270         130 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
    3271         390 :       ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
    3272         260 :       ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
    3273         530 :       row_bsize(:) = ri_data%bsizes_RI
    3274         530 :       col_bsize(:) = ri_data%bsizes_RI
    3275             : 
    3276             :       CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
    3277         130 :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3278             : 
    3279         520 :       DO i_xyz = 1, 3
    3280             :          CALL dbcsr_create(t_2c_der_RI_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
    3281         520 :                            dbcsr_type_antisymmetric, row_bsize, col_bsize)
    3282             :       END DO
    3283             : 
    3284             :       CALL build_2c_derivatives(t_2c_der_RI_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
    3285         130 :                                 basis_set_RI, ri_data%hfx_pot)
    3286         130 :       CALL release_neighbor_list_sets(nl_2c)
    3287             : 
    3288         130 :       IF (PRESENT(nl_2c_pot)) THEN
    3289           4 :          NULLIFY (nl_2c_pot)
    3290             :          CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
    3291           4 :                                       "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
    3292             :       END IF
    3293             : 
    3294             :       !copy 2c derivative tensor into the standard format
    3295             :       CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
    3296         130 :                             ri_data%bsizes_RI_split, name='(RI| RI)')
    3297         130 :       DEALLOCATE (dist1, dist2)
    3298             : 
    3299         520 :       DO i_xyz = 1, 3
    3300         390 :          CALL dbt_create(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
    3301         390 :          CALL dbt_copy_matrix_to_tensor(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
    3302             : 
    3303         390 :          CALL dbt_create(t_2c_template, t_2c_der_RI(i_xyz))
    3304         390 :          CALL dbt_copy(t_2c_tmp, t_2c_der_RI(i_xyz), move_data=.TRUE.)
    3305             : 
    3306         390 :          CALL dbt_destroy(t_2c_tmp)
    3307         520 :          CALL dbcsr_release(t_2c_der_RI_prv(1, i_xyz))
    3308             :       END DO
    3309             : 
    3310             :       !Repeat with the metric, if required
    3311         130 :       IF (.NOT. ri_data%same_op) THEN
    3312             : 
    3313             :          CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3314          32 :                                       "HFX_2c_nl_RI", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3315             : 
    3316         128 :          DO i_xyz = 1, 3
    3317             :             CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
    3318         128 :                               dbcsr_type_antisymmetric, row_bsize, col_bsize)
    3319             :          END DO
    3320             : 
    3321             :          CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
    3322          32 :                                    basis_set_RI, basis_set_RI, ri_data%ri_metric)
    3323          32 :          CALL release_neighbor_list_sets(nl_2c)
    3324             : 
    3325          32 :          IF (PRESENT(nl_2c_met)) THEN
    3326           0 :             NULLIFY (nl_2c_met)
    3327             :             CALL build_2c_neighbor_lists(nl_2c_met, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3328           0 :                                          "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
    3329             :          END IF
    3330             : 
    3331         128 :          DO i_xyz = 1, 3
    3332          96 :             CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
    3333          96 :             CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
    3334             : 
    3335          96 :             CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
    3336          96 :             CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.TRUE.)
    3337             : 
    3338          96 :             CALL dbt_destroy(t_2c_tmp)
    3339         128 :             CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
    3340             :          END DO
    3341             : 
    3342             :       END IF
    3343             : 
    3344         130 :       CALL dbt_destroy(t_2c_template)
    3345         130 :       CALL dbcsr_distribution_release(dbcsr_dist)
    3346         130 :       DEALLOCATE (row_bsize, col_bsize)
    3347             : 
    3348         130 :       CALL timestop(handle)
    3349             : 
    3350        1690 :    END SUBROUTINE precalc_derivatives
    3351             : 
    3352             : ! **************************************************************************************************
    3353             : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
    3354             : !>        force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
    3355             : !>        the atom on which the force acts
    3356             : !> \param force ...
    3357             : !> \param t_3c_contr ...
    3358             : !> \param t_3c_der ...
    3359             : !> \param atom_of_kind ...
    3360             : !> \param kind_of ...
    3361             : !> \param idx_to_at ...
    3362             : !> \param pref ...
    3363             : !> \param do_mp2 ...
    3364             : !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
    3365             : ! **************************************************************************************************
    3366        3862 :    SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
    3367             :                                       pref, do_mp2, deriv_dim)
    3368             : 
    3369             :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3370             :       TYPE(dbt_type), INTENT(INOUT)                      :: t_3c_contr
    3371             :       TYPE(dbt_type), DIMENSION(3), INTENT(INOUT)        :: t_3c_der
    3372             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3373             :       REAL(dp), INTENT(IN)                               :: pref
    3374             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_mp2
    3375             :       INTEGER, INTENT(IN), OPTIONAL                      :: deriv_dim
    3376             : 
    3377             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
    3378             : 
    3379             :       INTEGER                                            :: deriv_dim_prv, handle, i_xyz, iat, &
    3380             :                                                             iat_of_kind, ikind, j_xyz
    3381             :       INTEGER, DIMENSION(3)                              :: ind
    3382             :       LOGICAL                                            :: do_mp2_prv, found
    3383             :       REAL(dp)                                           :: new_force
    3384        3862 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET  :: contr_blk, der_blk
    3385             :       REAL(dp), DIMENSION(3)                             :: scoord
    3386             :       TYPE(dbt_iterator_type)                            :: iter
    3387             : 
    3388        3862 :       CALL timeset(routineN, handle)
    3389             : 
    3390        3862 :       do_mp2_prv = .FALSE.
    3391        3862 :       IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
    3392             : 
    3393        3862 :       deriv_dim_prv = 1
    3394        3862 :       IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
    3395             : 
    3396             : !$OMP PARALLEL DEFAULT(NONE) &
    3397             : !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
    3398        3862 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
    3399             :       DO i_xyz = 1, 3
    3400             :          CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
    3401             :          DO WHILE (dbt_iterator_blocks_left(iter))
    3402             :             CALL dbt_iterator_next_block(iter, ind)
    3403             : 
    3404             :             CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
    3405             :             CPASSERT(found)
    3406             :             CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
    3407             : 
    3408             :             IF (found) THEN
    3409             : 
    3410             :                !take the trace of the blocks
    3411             :                new_force = pref*SUM(der_blk(:, :, :)*contr_blk(:, :, :))
    3412             : 
    3413             :                !the first index of the derivative tensor defines the atom
    3414             :                iat = idx_to_at(ind(deriv_dim_prv))
    3415             :                iat_of_kind = atom_of_kind(iat)
    3416             :                ikind = kind_of(iat)
    3417             : 
    3418             :                IF (.NOT. do_mp2_prv) THEN
    3419             : !$OMP ATOMIC
    3420             :                   force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3421             :                                                              + new_force
    3422             :                ELSE
    3423             : !$OMP ATOMIC
    3424             :                   force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
    3425             :                                                                  + new_force
    3426             :                END IF
    3427             : 
    3428             :                DEALLOCATE (contr_blk)
    3429             :             END IF
    3430             :             DEALLOCATE (der_blk)
    3431             :          END DO !iter
    3432             :          CALL dbt_iterator_stop(iter)
    3433             :       END DO
    3434             : !$OMP END PARALLEL
    3435        3862 :       CALL timestop(handle)
    3436             : 
    3437        7724 :    END SUBROUTINE get_force_from_3c_trace
    3438             : 
    3439             : ! **************************************************************************************************
    3440             : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
    3441             : !> \param force ...
    3442             : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
    3443             : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
    3444             : !> \param atom_of_kind ...
    3445             : !> \param kind_of ...
    3446             : !> \param idx_to_at ...
    3447             : !> \param pref ...
    3448             : !> \param do_mp2 ...
    3449             : !> \param do_ovlp ...
    3450             : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
    3451             : ! **************************************************************************************************
    3452         610 :    SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
    3453             :                                pref, do_mp2, do_ovlp)
    3454             : 
    3455             :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3456             :       TYPE(dbt_type), INTENT(INOUT)                      :: t_2c_contr
    3457             :       TYPE(dbt_type), DIMENSION(3), INTENT(INOUT)        :: t_2c_der
    3458             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3459             :       REAL(dp), INTENT(IN)                               :: pref
    3460             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_mp2, do_ovlp
    3461             : 
    3462             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_2c_der_force'
    3463             : 
    3464             :       INTEGER                                            :: handle, i_xyz, iat, iat_of_kind, ikind, &
    3465             :                                                             j_xyz, jat, jat_of_kind, jkind
    3466             :       INTEGER, DIMENSION(2)                              :: ind
    3467             :       LOGICAL                                            :: do_mp2_prv, do_ovlp_prv, found
    3468             :       REAL(dp)                                           :: new_force
    3469         610 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET     :: contr_blk, der_blk
    3470             :       REAL(dp), DIMENSION(3)                             :: scoord
    3471             :       TYPE(dbt_iterator_type)                            :: iter
    3472             : 
    3473             :       !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
    3474             :       !update the relevant force
    3475             : 
    3476         610 :       CALL timeset(routineN, handle)
    3477             : 
    3478         610 :       do_mp2_prv = .FALSE.
    3479         610 :       IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
    3480             : 
    3481         610 :       do_ovlp_prv = .FALSE.
    3482         610 :       IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
    3483             : 
    3484             : !$OMP PARALLEL DEFAULT(NONE) &
    3485             : !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
    3486             : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
    3487         610 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
    3488             :       DO i_xyz = 1, 3
    3489             :          CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
    3490             :          DO WHILE (dbt_iterator_blocks_left(iter))
    3491             :             CALL dbt_iterator_next_block(iter, ind)
    3492             : 
    3493             :             IF (ind(1) == ind(2)) CYCLE
    3494             : 
    3495             :             CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
    3496             :             CPASSERT(found)
    3497             :             CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
    3498             : 
    3499             :             IF (found) THEN
    3500             : 
    3501             :                !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
    3502             :                !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
    3503             :                new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
    3504             : 
    3505             :                iat = idx_to_at(ind(1))
    3506             :                iat_of_kind = atom_of_kind(iat)
    3507             :                ikind = kind_of(iat)
    3508             : 
    3509             :                IF (do_mp2_prv) THEN
    3510             : !$OMP ATOMIC
    3511             :                   force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
    3512             :                                                                  + new_force
    3513             :                ELSE IF (do_ovlp_prv) THEN
    3514             : !$OMP ATOMIC
    3515             :                   force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
    3516             :                                                              + new_force
    3517             :                ELSE
    3518             : !$OMP ATOMIC
    3519             :                   force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3520             :                                                              + new_force
    3521             :                END IF
    3522             : 
    3523             :                jat = idx_to_at(ind(2))
    3524             :                jat_of_kind = atom_of_kind(jat)
    3525             :                jkind = kind_of(jat)
    3526             : 
    3527             :                IF (do_mp2_prv) THEN
    3528             : !$OMP ATOMIC
    3529             :                   force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
    3530             :                                                                  - new_force
    3531             :                ELSE IF (do_ovlp_prv) THEN
    3532             : !$OMP ATOMIC
    3533             :                   force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
    3534             :                                                              - new_force
    3535             :                ELSE
    3536             : !$OMP ATOMIC
    3537             :                   force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
    3538             :                                                              - new_force
    3539             :                END IF
    3540             : 
    3541             :                DEALLOCATE (contr_blk)
    3542             :             END IF
    3543             : 
    3544             :             DEALLOCATE (der_blk)
    3545             :          END DO !iter
    3546             :          CALL dbt_iterator_stop(iter)
    3547             : 
    3548             :       END DO !i_xyz
    3549             : !$OMP END PARALLEL
    3550         610 :       CALL timestop(handle)
    3551             : 
    3552        1220 :    END SUBROUTINE get_2c_der_force
    3553             : 
    3554             : ! **************************************************************************************************
    3555             : !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
    3556             : !>        and a is a MO
    3557             : !> \param force ...
    3558             : !> \param t_mo_coeff ...
    3559             : !> \param t_2c_MO_AO ...
    3560             : !> \param atom_of_kind ...
    3561             : !> \param kind_of ...
    3562             : !> \param idx_to_at ...
    3563             : !> \param pref ...
    3564             : !> \param i_xyz ...
    3565             : ! **************************************************************************************************
    3566          54 :    SUBROUTINE get_MO_AO_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
    3567             : 
    3568             :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3569             :       TYPE(dbt_type), INTENT(INOUT)                      :: t_mo_coeff, t_2c_MO_AO
    3570             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3571             :       REAL(dp), INTENT(IN)                               :: pref
    3572             :       INTEGER, INTENT(IN)                                :: i_xyz
    3573             : 
    3574             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_MO_AO_force'
    3575             : 
    3576             :       INTEGER                                            :: handle, iat, iat_of_kind, ikind, j_xyz
    3577             :       INTEGER, DIMENSION(2)                              :: ind
    3578             :       LOGICAL                                            :: found
    3579             :       REAL(dp)                                           :: new_force
    3580          54 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET     :: mo_ao_blk, mo_coeff_blk
    3581             :       REAL(dp), DIMENSION(3)                             :: scoord
    3582             :       TYPE(dbt_iterator_type)                            :: iter
    3583             : 
    3584          54 :       CALL timeset(routineN, handle)
    3585             : 
    3586             : !$OMP PARALLEL DEFAULT(NONE) &
    3587             : !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
    3588          54 : !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
    3589             :       CALL dbt_iterator_start(iter, t_2c_MO_AO)
    3590             :       DO WHILE (dbt_iterator_blocks_left(iter))
    3591             :          CALL dbt_iterator_next_block(iter, ind)
    3592             : 
    3593             :          CALL dbt_get_block(t_2c_MO_AO, ind, mo_ao_blk, found)
    3594             :          CPASSERT(found)
    3595             :          CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
    3596             : 
    3597             :          IF (found) THEN
    3598             : 
    3599             :             new_force = pref*SUM(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
    3600             : 
    3601             :             iat = idx_to_at(ind(2)) !AO index is column index
    3602             :             iat_of_kind = atom_of_kind(iat)
    3603             :             ikind = kind_of(iat)
    3604             : 
    3605             : !$OMP ATOMIC
    3606             :             force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3607             :                                                        + new_force
    3608             : 
    3609             :             DEALLOCATE (mo_coeff_blk)
    3610             :          END IF
    3611             : 
    3612             :          DEALLOCATE (mo_ao_blk)
    3613             :       END DO !iter
    3614             :       CALL dbt_iterator_stop(iter)
    3615             : !$OMP END PARALLEL
    3616             : 
    3617          54 :       CALL timestop(handle)
    3618             : 
    3619         108 :    END SUBROUTINE get_MO_AO_force
    3620             : 
    3621             : ! **************************************************************************************************
    3622             : !> \brief Print RI-HFX quantities, as required by the PRINT subsection
    3623             : !> \param ri_data ...
    3624             : !> \param qs_env ...
    3625             : ! **************************************************************************************************
    3626         148 :    SUBROUTINE print_ri_hfx(ri_data, qs_env)
    3627             : 
    3628             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3629             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3630             : 
    3631             :       CHARACTER(Len=2)                                   :: symbol
    3632             :       CHARACTER(Len=8)                                   :: rifmt
    3633             :       INTEGER                                            :: atype, i_RI, ia, ib, ibasis, ikind, &
    3634             :                                                             iset, isgf, ishell, iso, l, m, natom, &
    3635             :                                                             ncols, nkind, nrows, nset, nsgf, &
    3636             :                                                             nspins, unit_nr
    3637             :       INTEGER, DIMENSION(3)                              :: periodic
    3638         148 :       INTEGER, DIMENSION(:), POINTER                     :: npgf, nshell
    3639         148 :       INTEGER, DIMENSION(:, :), POINTER                  :: lshell
    3640             :       LOGICAL                                            :: mult_by_S, print_density, &
    3641             :                                                             print_ri_metric, skip_ri_metric
    3642         148 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: density_coeffs, density_coeffs_2
    3643             :       REAL(dp), DIMENSION(3, 3)                          :: hmat
    3644         148 :       REAL(dp), DIMENSION(:, :), POINTER                 :: zet
    3645         148 :       REAL(dp), DIMENSION(:, :, :), POINTER              :: gcc
    3646             :       TYPE(cell_type), POINTER                           :: cell
    3647             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3648             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
    3649             :       TYPE(cp_fm_type)                                   :: matrix_s_fm
    3650             :       TYPE(cp_logger_type), POINTER                      :: logger
    3651         148 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
    3652         296 :       TYPE(dbcsr_type), DIMENSION(1)                     :: matrix_s
    3653             :       TYPE(dft_control_type), POINTER                    :: dft_control
    3654             :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    3655         148 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    3656             :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    3657             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3658         148 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3659         148 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3660             :       TYPE(qs_rho_type), POINTER                         :: rho
    3661             :       TYPE(section_vals_type), POINTER                   :: input, print_section
    3662             : 
    3663         148 :       NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, &
    3664         148 :                para_env, blacs_env, fm_struct, orb_basis, dft_control)
    3665             : 
    3666         148 :       CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
    3667         148 :       logger => cp_get_default_logger()
    3668         148 :       print_density = .FALSE.
    3669         148 :       print_ri_metric = .FALSE.
    3670             : 
    3671             :       !Do we print the RI density coeffs  and/or RI_metric 2c integrals?
    3672         148 :       print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
    3673         148 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
    3674           0 :                 cp_p_file)) print_density = .TRUE.
    3675         148 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
    3676           0 :                 cp_p_file)) print_ri_metric = .TRUE.
    3677             : 
    3678             :       !common stuff
    3679         148 :       IF (print_density .OR. print_ri_metric) THEN
    3680             : 
    3681             :          !Set up basis sets and interaction radii
    3682           0 :          CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set)
    3683           0 :          ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    3684           0 :          CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    3685           0 :          CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    3686           0 :          CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    3687           0 :          CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    3688             : 
    3689           0 :          DO ibasis = 1, nkind
    3690           0 :             ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3691           0 :             CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    3692           0 :             orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3693           0 :             CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    3694             :          END DO
    3695             :       END IF
    3696             : 
    3697           0 :       IF (print_density) THEN
    3698           0 :          CALL get_qs_env(qs_env, rho=rho)
    3699           0 :          CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
    3700           0 :          nspins = SIZE(rho_ao, 1)
    3701             : 
    3702           0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
    3703           0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%SKIP_RI_METRIC", l_val=skip_ri_metric)
    3704             : 
    3705           0 :          IF (mult_by_s .AND. skip_ri_metric) THEN
    3706           0 :             CPABORT("MULTIPLY_BY_RI_2C_INTEGRALS and SKIP_RI_METRIC are mutually exclusive.")
    3707             :          END IF
    3708             : 
    3709             :          CALL get_RI_density_coeffs(density_coeffs, rho_ao, 1, basis_set_AO, basis_set_RI, &
    3710           0 :                                     mult_by_s, skip_ri_metric, ri_data, qs_env)
    3711           0 :          IF (nspins == 2) &
    3712             :             CALL get_RI_density_coeffs(density_coeffs_2, rho_ao, 2, basis_set_AO, basis_set_RI, &
    3713           0 :                                        mult_by_s, skip_ri_metric, ri_data, qs_env)
    3714             : 
    3715             :          unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
    3716             :                                         extension=".dat", file_status="REPLACE", &
    3717           0 :                                         file_action="WRITE", file_form="FORMATTED")
    3718             : 
    3719           0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%FILE_FORMAT", c_val=rifmt)
    3720           0 :          CALL uppercase(rifmt)
    3721             : 
    3722           0 :          IF (unit_nr > 0) THEN
    3723           0 :             SELECT CASE (rifmt)
    3724             :             CASE DEFAULT
    3725           0 :                CPABORT("NA")
    3726             :             CASE ("BASIC")
    3727           0 :                IF (nspins == 1) THEN
    3728             :                   WRITE (unit_nr, FMT="(A,A,A)") &
    3729           0 :                      "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3730           0 :                      TRIM(logger%iter_info%project_name), " project"
    3731           0 :                   DO i_RI = 1, SIZE(density_coeffs)
    3732           0 :                      WRITE (unit_nr, FMT="(F20.12)") density_coeffs(i_RI)
    3733             :                   END DO
    3734             :                ELSE
    3735             :                   WRITE (unit_nr, FMT="(A,A,A)") &
    3736           0 :                      "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3737           0 :                      TRIM(logger%iter_info%project_name), " project. Spin up, spin down"
    3738           0 :                   DO i_RI = 1, SIZE(density_coeffs)
    3739           0 :                      WRITE (unit_nr, FMT="(F20.12,F20.12)") density_coeffs(i_RI), density_coeffs_2(i_RI)
    3740             :                   END DO
    3741             :                END IF
    3742             :             CASE ("EXTENDED")
    3743             :                WRITE (unit_nr, FMT="(A,A,A)") &
    3744           0 :                   "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3745           0 :                   TRIM(logger%iter_info%project_name), " project"
    3746             : 
    3747           0 :                CALL get_qs_env(qs_env, cell=cell, particle_set=particle_set)
    3748           0 :                CALL get_cell(cell, periodic=periodic, h=hmat)
    3749           0 :                natom = SIZE(particle_set)
    3750           0 :                ib = 0
    3751           0 :                DO ia = 1, natom
    3752           0 :                   CALL get_atomic_kind(particle_set(ia)%atomic_kind, kind_number=ikind)
    3753           0 :                   ri_basis => basis_set_RI(ikind)%gto_basis_set
    3754           0 :                   CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf)
    3755           0 :                   DO ibasis = 1, nsgf
    3756           0 :                      ib = ib + 1
    3757           0 :                      IF (nspins == 1) THEN
    3758           0 :                         WRITE (unit_nr, FMT="(I10,3I7,F20.12)") ib, ia, ikind, ibasis, &
    3759           0 :                            density_coeffs(ib)
    3760             :                      ELSE
    3761           0 :                         WRITE (unit_nr, FMT="(I10,3I7,F20.12,F20.12)") ib, ia, ikind, ibasis, &
    3762           0 :                            density_coeffs(ib), density_coeffs_2(ib)
    3763             :                      END IF
    3764             :                   END DO
    3765             :                END DO
    3766           0 :                WRITE (unit_nr, FMT="(A)") "# Cell Periodicity "
    3767           0 :                WRITE (unit_nr, FMT="(3I5)") periodic
    3768           0 :                WRITE (unit_nr, FMT="(A)") "# Cell Matrix [Bohr]"
    3769           0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(1, 1:3)
    3770           0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(2, 1:3)
    3771           0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(3, 1:3)
    3772           0 :                WRITE (unit_nr, FMT="(A)") "# El  Type   Number                        Coordinates [Bohr]"
    3773           0 :                DO ia = 1, natom
    3774             :                   CALL get_atomic_kind(atomic_kind=particle_set(ia)%atomic_kind, &
    3775           0 :                                        kind_number=atype, element_symbol=symbol)
    3776           0 :                   WRITE (unit_nr, FMT="(2X,A2,I5,I10,3F20.12)") symbol, atype, ia, particle_set(ia)%r(1:3)
    3777             :                END DO
    3778           0 :                WRITE (unit_nr, FMT="(A)") "# Basis Set Information"
    3779           0 :                DO ibasis = 1, nkind
    3780           0 :                   ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3781             :                   CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf, npgf=npgf, &
    3782             :                                          zet=zet, gcc=gcc, &
    3783           0 :                                          nset=nset, nshell=nshell, l=lshell)
    3784           0 :                   WRITE (unit_nr, FMT="(A)") "# Basis      Functions"
    3785           0 :                   WRITE (unit_nr, FMT="(I7,I15)") ibasis, nsgf
    3786           0 :                   WRITE (unit_nr, FMT="(A)") "#  Nr.      l       m     set   shell "
    3787           0 :                   isgf = 0
    3788           0 :                   DO iset = 1, nset
    3789           0 :                      DO ishell = 1, nshell(iset)
    3790           0 :                         l = lshell(ishell, iset)
    3791           0 :                         DO iso = 1, nso(l)
    3792           0 :                            isgf = isgf + 1
    3793           0 :                            m = iso - 1 - l
    3794           0 :                            WRITE (unit_nr, FMT="(I6,I7,I8,2I8)") isgf, l, m, iset, ishell
    3795             :                         END DO
    3796             :                      END DO
    3797             :                   END DO
    3798           0 :                   WRITE (unit_nr, FMT="(A)") "#  Basis set exponents and contractions "
    3799           0 :                   DO iset = 1, nset
    3800           0 :                      WRITE (unit_nr, FMT="(I7)") iset
    3801           0 :                      WRITE (unit_nr, FMT="(A)") "#  Exponent    Shells ...  "
    3802           0 :                      DO m = 1, npgf(iset)
    3803           0 :                         WRITE (unit_nr, FMT="(E18.12,50F18.12)") zet(m, iset), gcc(m, 1:nshell(iset), iset)
    3804             :                      END DO
    3805             :                   END DO
    3806             : 
    3807             :                END DO
    3808             :             END SELECT
    3809             :          END IF
    3810             : 
    3811           0 :          CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
    3812             :       END IF
    3813             : 
    3814         148 :       IF (print_ri_metric) THEN
    3815             :          !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
    3816           0 :          CALL calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
    3817             : 
    3818             :          !convert 2c integrals to fm for dumping
    3819           0 :          CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    3820           0 :          CALL dbcsr_get_info(matrix_s(1), nfullrows_total=nrows, nfullcols_total=ncols)
    3821             :          CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
    3822           0 :                                   nrow_global=nrows, ncol_global=ncols)
    3823           0 :          CALL cp_fm_create(matrix_s_fm, fm_struct)
    3824             : 
    3825           0 :          CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
    3826           0 :          CALL dbcsr_release(matrix_s(1))
    3827             : 
    3828             :          unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
    3829             :                                         extension=".fm", file_status="REPLACE", &
    3830           0 :                                         file_action="WRITE", file_form="UNFORMATTED")
    3831           0 :          CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
    3832             : 
    3833           0 :          CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
    3834             : 
    3835           0 :          CALL cp_fm_struct_release(fm_struct)
    3836           0 :          CALL cp_fm_release(matrix_s_fm)
    3837             :       END IF
    3838             : 
    3839             :       !clean-up
    3840         148 :       IF (print_density .OR. print_ri_metric) THEN
    3841           0 :          DO ibasis = 1, nkind
    3842           0 :             ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3843           0 :             CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    3844           0 :             orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3845           0 :             CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    3846             :          END DO
    3847             :       END IF
    3848             : 
    3849         444 :    END SUBROUTINE print_ri_hfx
    3850             : 
    3851             : ! **************************************************************************************************
    3852             : !> \brief Calculate the RI metric 2-center integrals
    3853             : !> \param matrix_s ...
    3854             : !> \param basis_set_RI ...
    3855             : !> \param ri_data ...
    3856             : !> \param qs_env ...
    3857             : ! **************************************************************************************************
    3858           0 :    SUBROUTINE calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
    3859             : 
    3860             :       TYPE(dbcsr_type), DIMENSION(1)                     :: matrix_s
    3861             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_set_RI
    3862             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3863             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3864             : 
    3865           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    3866             :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
    3867             :       TYPE(distribution_2d_type), POINTER                :: dist_2d
    3868             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3869           0 :          POINTER                                         :: nl_2c
    3870             : 
    3871           0 :       NULLIFY (nl_2c, dist_2d, row_bsize, col_bsize)
    3872             : 
    3873           0 :       CALL get_qs_env(qs_env, distribution_2d=dist_2d)
    3874           0 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
    3875           0 :       ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
    3876           0 :       ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
    3877           0 :       row_bsize(:) = ri_data%bsizes_RI
    3878           0 :       col_bsize(:) = ri_data%bsizes_RI
    3879             : 
    3880           0 :       CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
    3881             : 
    3882             :       CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3883           0 :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3884             :       CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
    3885           0 :                               basis_set_RI, ri_data%ri_metric)
    3886             : 
    3887           0 :       CALL release_neighbor_list_sets(nl_2c)
    3888           0 :       CALL dbcsr_distribution_release(dbcsr_dist)
    3889           0 :       DEALLOCATE (row_bsize, col_bsize)
    3890             : 
    3891           0 :    END SUBROUTINE calc_RI_2c_ints
    3892             : 
    3893             : ! **************************************************************************************************
    3894             : !> \brief Projects the density on the RI basis and return the array of the RI coefficients
    3895             : !> \param density_coeffs ...
    3896             : !> \param rho_ao ...
    3897             : !> \param ispin ...
    3898             : !> \param basis_set_AO ...
    3899             : !> \param basis_set_RI ...
    3900             : !> \param multiply_by_S ...
    3901             : !> \param skip_ri_metric ...
    3902             : !> \param ri_data ...
    3903             : !> \param qs_env ...
    3904             : ! **************************************************************************************************
    3905           0 :    SUBROUTINE get_RI_density_coeffs(density_coeffs, rho_ao, ispin, basis_set_AO, basis_set_RI, &
    3906             :                                     multiply_by_S, skip_ri_metric, ri_data, qs_env)
    3907             : 
    3908             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: density_coeffs
    3909             :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: rho_ao
    3910             :       INTEGER, INTENT(IN)                                :: ispin
    3911             :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_set_AO, basis_set_RI
    3912             :       LOGICAL, INTENT(IN)                                :: multiply_by_S, skip_ri_metric
    3913             :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3914             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3915             : 
    3916             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_RI_density_coeffs'
    3917             : 
    3918             :       INTEGER                                            :: a, b, handle, i_mem, idx, n_dependent, &
    3919             :                                                             n_mem, n_mem_RI, natom, &
    3920             :                                                             nblk_per_thread, nblks, nkind
    3921             :       INTEGER(int_8)                                     :: nze
    3922           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_block_end, batch_block_start, &
    3923           0 :                                                             dist1, dist2, dist3, dummy1, dummy2, &
    3924           0 :                                                             idx1, idx2, idx3
    3925             :       INTEGER, DIMENSION(2)                              :: ind, pdims_2d
    3926             :       INTEGER, DIMENSION(2, 3)                           :: bounds_cpy
    3927             :       INTEGER, DIMENSION(3)                              :: dims_3c, pcoord_3d, pdims_3d
    3928             :       LOGICAL                                            :: calc_ints, found
    3929             :       REAL(dp)                                           :: occ, threshold
    3930           0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: blk
    3931           0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: blk_3d
    3932             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3933             :       TYPE(dbcsr_type)                                   :: ri_2c_inv
    3934           0 :       TYPE(dbcsr_type), DIMENSION(1)                     :: ri_2c_ints
    3935           0 :       TYPE(dbt_distribution_type)                        :: dist_2d, dist_3d
    3936             :       TYPE(dbt_iterator_type)                            :: iter
    3937           0 :       TYPE(dbt_pgrid_type)                               :: pgrid_2d, pgrid_3d
    3938           0 :       TYPE(dbt_type)                                     :: density_coeffs_t, density_tmp, rho_ao_t, &
    3939           0 :                                                             rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
    3940           0 :                                                             t2c_ri_inv, t2c_ri_tmp
    3941           0 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_batched
    3942             :       TYPE(dft_control_type), POINTER                    :: dft_control
    3943             :       TYPE(distribution_3d_type)                         :: dist_nl3c
    3944           0 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c
    3945             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3946             :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    3947           0 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3948           0 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3949             : 
    3950           0 :       NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
    3951             : 
    3952           0 :       CALL timeset(routineN, handle)
    3953             : 
    3954             :       ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
    3955             :       !                                                 = sum_nu d_nu nu(r)
    3956             :       ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
    3957             : 
    3958           0 :       IF (.NOT. ri_data%flavor == ri_pmat) THEN
    3959           0 :          CPABORT("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
    3960             :       END IF
    3961             : 
    3962             :       CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
    3963           0 :                       particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
    3964           0 :       n_mem = ri_data%n_mem
    3965           0 :       n_mem_RI = ri_data%n_mem_RI
    3966             : 
    3967             :       ! Calculate RI 2c int tensor and its inverse. Skip this if requested
    3968           0 :       IF (.NOT. skip_ri_metric) THEN
    3969           0 :          CALL calc_RI_2c_ints(ri_2c_ints, basis_set_RI, ri_data, qs_env)
    3970           0 :          CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints(1), matrix_type=dbcsr_type_no_symmetry)
    3971             : 
    3972           0 :          SELECT CASE (ri_data%t2c_method)
    3973             :          CASE (hfx_ri_do_2c_iter)
    3974           0 :             threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
    3975           0 :             CALL invert_hotelling(ri_2c_inv, ri_2c_ints(1), threshold=threshold, silent=.FALSE.)
    3976             :          CASE (hfx_ri_do_2c_cholesky)
    3977           0 :             CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
    3978           0 :             CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
    3979           0 :             CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
    3980             :          CASE (hfx_ri_do_2c_diag)
    3981           0 :             CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
    3982             :             CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
    3983           0 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
    3984             :          END SELECT
    3985             : 
    3986           0 :          CALL dbt_create(ri_2c_ints(1), t2c_ri_tmp)
    3987             :          CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
    3988             :                                ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
    3989             :                                name="(RI | RI)")
    3990           0 :          CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
    3991             : 
    3992           0 :          CALL dbt_copy_matrix_to_tensor(ri_2c_ints(1), t2c_ri_tmp)
    3993           0 :          CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.TRUE.)
    3994           0 :          CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
    3995             : 
    3996           0 :          CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
    3997           0 :          CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.TRUE.)
    3998           0 :          CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
    3999             : 
    4000           0 :          CALL dbcsr_release(ri_2c_ints(1))
    4001           0 :          CALL dbcsr_release(ri_2c_inv)
    4002           0 :          CALL dbt_destroy(t2c_ri_tmp)
    4003           0 :          DEALLOCATE (dist1, dist2)
    4004             :       END IF
    4005             : 
    4006             :       ! The AO density tensor
    4007           0 :       CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    4008             :       CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
    4009             :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    4010           0 :                             name="(AO | AO)")
    4011           0 :       DEALLOCATE (dist1, dist2)
    4012             : 
    4013           0 :       CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    4014           0 :       CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
    4015           0 :       CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
    4016           0 :       CALL dbt_destroy(rho_ao_tmp)
    4017             : 
    4018             :       ! Put in in 3D
    4019           0 :       ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
    4020           0 :       dist3(1) = 0
    4021           0 :       CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
    4022           0 :       CALL dbt_default_distvec(1, 1, [1], dist3)
    4023             : 
    4024           0 :       pdims_3d(1) = pdims_2d(1)
    4025           0 :       pdims_3d(2) = pdims_2d(2)
    4026           0 :       pdims_3d(3) = 1
    4027             : 
    4028           0 :       CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
    4029           0 :       CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
    4030             :       CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
    4031           0 :                       ri_data%bsizes_AO_split, [1])
    4032           0 :       DEALLOCATE (dist1, dist2, dist3)
    4033           0 :       CALL dbt_pgrid_destroy(pgrid_3d)
    4034           0 :       CALL dbt_distribution_destroy(dist_3d)
    4035             : 
    4036             :       ! copy density
    4037           0 :       nblks = 0
    4038             : !$OMP PARALLEL DEFAULT(NONE) &
    4039             : !$OMP SHARED(rho_ao_t,nblks) &
    4040           0 : !$OMP PRIVATE(iter,ind,blk,found)
    4041             :       CALL dbt_iterator_start(iter, rho_ao_t)
    4042             :       DO WHILE (dbt_iterator_blocks_left(iter))
    4043             :          CALL dbt_iterator_next_block(iter, ind)
    4044             :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4045             :          IF (found) THEN
    4046             : !$OMP ATOMIC
    4047             :             nblks = nblks + 1
    4048             :             DEALLOCATE (blk)
    4049             :          END IF
    4050             :       END DO
    4051             :       CALL dbt_iterator_stop(iter)
    4052             : !$OMP END PARALLEL
    4053             : 
    4054           0 :       ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
    4055           0 :       idx3 = 1
    4056           0 :       nblks = 0
    4057             : !$OMP PARALLEL DEFAULT(NONE) &
    4058             : !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
    4059           0 : !$OMP PRIVATE(iter,ind,blk,found)
    4060             :       CALL dbt_iterator_start(iter, rho_ao_t)
    4061             :       DO WHILE (dbt_iterator_blocks_left(iter))
    4062             :          CALL dbt_iterator_next_block(iter, ind)
    4063             :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4064             :          IF (found) THEN
    4065             : !$OMP CRITICAL
    4066             :             nblks = nblks + 1
    4067             :             idx1(nblks) = ind(1)
    4068             :             idx2(nblks) = ind(2)
    4069             : !$OMP END CRITICAL
    4070             :             DEALLOCATE (blk)
    4071             :          END IF
    4072             :       END DO
    4073             :       CALL dbt_iterator_stop(iter)
    4074             : !$OMP END PARALLEL
    4075             : 
    4076           0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
    4077             :       nblk_per_thread = nblks/omp_get_num_threads() + 1
    4078             :       a = omp_get_thread_num()*nblk_per_thread + 1
    4079             :       b = MIN(a + nblk_per_thread, nblks)
    4080             :       CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
    4081             : !$OMP END PARALLEL
    4082             : 
    4083             : !$OMP PARALLEL DEFAULT(NONE) &
    4084             : !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
    4085           0 : !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
    4086             :       CALL dbt_iterator_start(iter, rho_ao_t)
    4087             :       DO WHILE (dbt_iterator_blocks_left(iter))
    4088             :          CALL dbt_iterator_next_block(iter, ind)
    4089             :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4090             :          IF (found) THEN
    4091             :             ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
    4092             :             blk_3d(:, :, 1) = blk(:, :)
    4093             : !$OMP CRITICAL
    4094             :             CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
    4095             : !$OMP END CRITICAL
    4096             :             DEALLOCATE (blk, blk_3d)
    4097             :          END IF
    4098             :       END DO
    4099             :       CALL dbt_iterator_stop(iter)
    4100             : !$OMP END PARALLEL
    4101             : 
    4102             :       ! The 1D tensor with the density coeffs
    4103           0 :       pdims_2d(1) = para_env%num_pe
    4104           0 :       pdims_2d(2) = 1
    4105             : 
    4106           0 :       ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
    4107           0 :       CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
    4108           0 :       CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
    4109             : 
    4110           0 :       CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
    4111           0 :       CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
    4112           0 :       CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
    4113           0 :       CALL dbt_create(density_coeffs_t, density_tmp)
    4114           0 :       DEALLOCATE (dist1, dist2)
    4115           0 :       CALL dbt_pgrid_destroy(pgrid_2d)
    4116           0 :       CALL dbt_distribution_destroy(dist_2d)
    4117             : 
    4118           0 :       CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
    4119             : 
    4120             :       ! The 3c integrals tensor, in case we compute them here
    4121           0 :       pdims_3d = 0
    4122           0 :       CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[MAX(1, natom/n_mem), natom, natom])
    4123           0 :       ALLOCATE (t_3c_int_batched(1, 1))
    4124             :       CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
    4125             :                             ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
    4126           0 :                             name="(RI | AO AO)")
    4127             : 
    4128           0 :       CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
    4129           0 :       CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
    4130             :       CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
    4131           0 :                                   mp_comm_t3c, own_comm=.TRUE.)
    4132           0 :       DEALLOCATE (dist1, dist2, dist3)
    4133           0 :       CALL dbt_pgrid_destroy(pgrid_3d)
    4134             : 
    4135             :       CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_nl3c, ri_data%ri_metric, &
    4136           0 :                                    "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
    4137             : 
    4138           0 :       n_mem = ri_data%n_mem
    4139           0 :       CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
    4140             : 
    4141           0 :       calc_ints = .FALSE.
    4142           0 :       CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
    4143           0 :       IF (nze == 0) calc_ints = .TRUE.
    4144             : 
    4145           0 :       DO i_mem = 1, n_mem
    4146           0 :          IF (calc_ints) THEN
    4147             :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
    4148             :                                     basis_set_RI, basis_set_AO, basis_set_AO, &
    4149             :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
    4150             :                                     desymmetrize=.FALSE., &
    4151           0 :                                     bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
    4152           0 :             CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
    4153           0 :             CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.TRUE., summation=.TRUE.)
    4154           0 :             CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
    4155             :          ELSE
    4156             :             bounds_cpy(:, 2) = [SUM(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
    4157           0 :                                 SUM(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
    4158           0 :             bounds_cpy(:, 1) = [1, SUM(ri_data%bsizes_AO)]
    4159           0 :             bounds_cpy(:, 3) = [1, SUM(ri_data%bsizes_AO)]
    4160             :             CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
    4161           0 :                           order=[2, 1, 3], bounds=bounds_cpy)
    4162             :          END IF
    4163             : 
    4164             :          !contract the integrals with the density P_pq (pq|R)
    4165             :          CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
    4166             :                            contract_1=[2, 3], notcontract_1=[1], &
    4167             :                            contract_2=[1, 2], notcontract_2=[3], &
    4168           0 :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4169           0 :          CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
    4170             : 
    4171           0 :          IF (skip_ri_metric) THEN
    4172           0 :             CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
    4173             :          ELSE
    4174             :             !contract the above vector with the inverse metric
    4175             :             CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
    4176             :                               contract_1=[2], notcontract_1=[1], &
    4177             :                               contract_2=[1], notcontract_2=[2], &
    4178           0 :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4179             :          END IF
    4180             : 
    4181             :       END DO
    4182           0 :       CALL neighbor_list_3c_destroy(nl_3c)
    4183             : 
    4184           0 :       IF (multiply_by_s) THEN
    4185             :          CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
    4186             :                            contract_1=[2], notcontract_1=[1], &
    4187             :                            contract_2=[1], notcontract_2=[2], &
    4188           0 :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4189           0 :          CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
    4190             :       END IF
    4191             : 
    4192           0 :       ALLOCATE (density_coeffs(SUM(ri_data%bsizes_RI)))
    4193           0 :       density_coeffs = 0.0
    4194             : 
    4195             : !$OMP PARALLEL DEFAULT(NONE) &
    4196             : !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
    4197           0 : !$OMP PRIVATE(iter,ind,blk,found,idx)
    4198             :       CALL dbt_iterator_start(iter, density_coeffs_t)
    4199             :       DO WHILE (dbt_iterator_blocks_left(iter))
    4200             :          CALL dbt_iterator_next_block(iter, ind)
    4201             :          CALL dbt_get_block(density_coeffs_t, ind, blk, found)
    4202             :          IF (found) THEN
    4203             : 
    4204             :             idx = SUM(ri_data%bsizes_RI_split(1:ind(1) - 1))
    4205             : !$OMP CRITICAL
    4206             :             density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
    4207             : !$OMP END CRITICAL
    4208             :             DEALLOCATE (blk)
    4209             :          END IF
    4210             :       END DO
    4211             :       CALL dbt_iterator_stop(iter)
    4212             : !$OMP END PARALLEL
    4213           0 :       CALL para_env%sum(density_coeffs)
    4214             : 
    4215           0 :       CALL dbt_destroy(density_tmp)
    4216           0 :       CALL dbt_destroy(rho_ao_t)
    4217           0 :       CALL dbt_destroy(rho_ao_t_3d)
    4218           0 :       CALL dbt_destroy(density_coeffs_t)
    4219           0 :       CALL dbt_destroy(t_3c_int_batched(1, 1))
    4220             : 
    4221           0 :       IF (.NOT. skip_ri_metric) THEN
    4222           0 :          CALL dbt_destroy(t2c_ri_ints)
    4223           0 :          CALL dbt_destroy(t2c_ri_inv)
    4224             :       END IF
    4225             : 
    4226           0 :       CALL timestop(handle)
    4227             : 
    4228           0 :    END SUBROUTINE get_RI_density_coeffs
    4229             : 
    4230             : ! **************************************************************************************************
    4231             : !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
    4232             : !> \param idx_to_at ...
    4233             : !> \param bsizes_split ...
    4234             : !> \param bsizes_orig ...
    4235             : !> \return ...
    4236             : ! **************************************************************************************************
    4237         936 :    SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
    4238             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: idx_to_at
    4239             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: bsizes_split, bsizes_orig
    4240             : 
    4241             :       INTEGER                                            :: full_sum, iat, iblk, split_sum
    4242             : 
    4243         936 :       iat = 1
    4244         936 :       full_sum = bsizes_orig(iat)
    4245         936 :       split_sum = 0
    4246        4662 :       DO iblk = 1, SIZE(bsizes_split)
    4247        3726 :          split_sum = split_sum + bsizes_split(iblk)
    4248             : 
    4249        3726 :          IF (split_sum .GT. full_sum) THEN
    4250        1340 :             iat = iat + 1
    4251        1340 :             full_sum = full_sum + bsizes_orig(iat)
    4252             :          END IF
    4253             : 
    4254        4662 :          idx_to_at(iblk) = iat
    4255             :       END DO
    4256             : 
    4257         936 :    END SUBROUTINE get_idx_to_atom
    4258             : 
    4259             : ! **************************************************************************************************
    4260             : !> \brief Function for calculating sqrt of a matrix
    4261             : !> \param values ...
    4262             : !> \return ...
    4263             : ! **************************************************************************************************
    4264           0 :    FUNCTION my_sqrt(values)
    4265             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: values
    4266             :       REAL(KIND=dp), DIMENSION(SIZE(values))             :: my_sqrt
    4267             : 
    4268           0 :       my_sqrt = SQRT(values)
    4269             :    END FUNCTION
    4270             : 
    4271             : ! **************************************************************************************************
    4272             : !> \brief Function for calculation inverse sqrt of a matrix
    4273             : !> \param values ...
    4274             : !> \return ...
    4275             : ! **************************************************************************************************
    4276           0 :    FUNCTION my_invsqrt(values)
    4277             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: values
    4278             :       REAL(KIND=dp), DIMENSION(SIZE(values))             :: my_invsqrt
    4279             : 
    4280           0 :       my_invsqrt = SQRT(1.0_dp/values)
    4281             :    END FUNCTION
    4282         328 : END MODULE

Generated by: LCOV version 1.15