LCOV - code coverage report
Current view: top level - src - hfx_ri.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 80.6 % 1907 1537
Test Date: 2025-07-25 12:55:17 Functions: 69.6 % 23 16

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief RI-methods for HFX
      10              : ! **************************************************************************************************
      11              : 
      12              : MODULE hfx_ri
      13              : 
      14              :    USE OMP_LIB,                         ONLY: omp_get_num_threads,&
      15              :                                               omp_get_thread_num
      16              :    USE arnoldi_api,                     ONLY: arnoldi_extremal
      17              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      18              :                                               get_atomic_kind,&
      19              :                                               get_atomic_kind_set
      20              :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      21              :                                               gto_basis_set_p_type,&
      22              :                                               gto_basis_set_type
      23              :    USE cell_types,                      ONLY: cell_type,&
      24              :                                               get_cell
      25              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      26              :    USE cp_control_types,                ONLY: dft_control_type
      27              :    USE cp_dbcsr_api,                    ONLY: &
      28              :         dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      29              :         dbcsr_distribution_get, dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_filter, &
      30              :         dbcsr_get_info, dbcsr_get_num_blocks, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
      31              :         dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
      32              :         dbcsr_type_symmetric
      33              :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      34              :                                               cp_dbcsr_cholesky_invert
      35              :    USE cp_dbcsr_contrib,                ONLY: dbcsr_add_on_diag,&
      36              :                                               dbcsr_dot,&
      37              :                                               dbcsr_frobenius_norm
      38              :    USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
      39              :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      40              :                                               copy_fm_to_dbcsr,&
      41              :                                               cp_dbcsr_dist2d_to_dist,&
      42              :                                               dbcsr_deallocate_matrix_set
      43              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      44              :                                               cp_fm_struct_release,&
      45              :                                               cp_fm_struct_type
      46              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      47              :                                               cp_fm_release,&
      48              :                                               cp_fm_type,&
      49              :                                               cp_fm_write_unformatted
      50              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      51              :                                               cp_logger_type
      52              :    USE cp_output_handling,              ONLY: cp_p_file,&
      53              :                                               cp_print_key_finished_output,&
      54              :                                               cp_print_key_should_output,&
      55              :                                               cp_print_key_unit_nr
      56              :    USE dbt_api,                         ONLY: &
      57              :         dbt_batched_contract_finalize, dbt_batched_contract_init, dbt_clear, dbt_contract, &
      58              :         dbt_copy, dbt_copy_matrix_to_tensor, dbt_copy_tensor_to_matrix, dbt_create, &
      59              :         dbt_default_distvec, dbt_destroy, dbt_distribution_destroy, dbt_distribution_new, &
      60              :         dbt_distribution_type, dbt_filter, dbt_get_block, dbt_get_info, dbt_get_num_blocks_total, &
      61              :         dbt_iterator_blocks_left, dbt_iterator_next_block, dbt_iterator_start, dbt_iterator_stop, &
      62              :         dbt_iterator_type, dbt_mp_environ_pgrid, dbt_nd_mp_comm, dbt_pgrid_create, &
      63              :         dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, dbt_reserve_blocks, dbt_scale, dbt_type
      64              :    USE distribution_2d_types,           ONLY: distribution_2d_type
      65              :    USE hfx_types,                       ONLY: alloc_containers,&
      66              :                                               block_ind_type,&
      67              :                                               dealloc_containers,&
      68              :                                               hfx_compression_type,&
      69              :                                               hfx_ri_init,&
      70              :                                               hfx_ri_release,&
      71              :                                               hfx_ri_type
      72              :    USE input_constants,                 ONLY: hfx_ri_do_2c_cholesky,&
      73              :                                               hfx_ri_do_2c_diag,&
      74              :                                               hfx_ri_do_2c_iter
      75              :    USE input_cp2k_hfx,                  ONLY: ri_mo,&
      76              :                                               ri_pmat
      77              :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      78              :                                               section_vals_type,&
      79              :                                               section_vals_val_get
      80              :    USE iterate_matrix,                  ONLY: invert_hotelling,&
      81              :                                               matrix_sqrt_newton_schulz
      82              :    USE kinds,                           ONLY: default_string_length,&
      83              :                                               dp,&
      84              :                                               int_8
      85              :    USE machine,                         ONLY: m_walltime
      86              :    USE message_passing,                 ONLY: mp_cart_type,&
      87              :                                               mp_comm_type,&
      88              :                                               mp_para_env_type
      89              :    USE orbital_pointers,                ONLY: nso
      90              :    USE particle_methods,                ONLY: get_particle_set
      91              :    USE particle_types,                  ONLY: particle_type
      92              :    USE qs_environment_types,            ONLY: get_qs_env,&
      93              :                                               qs_environment_type
      94              :    USE qs_force_types,                  ONLY: qs_force_type
      95              :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      96              :    USE qs_interactions,                 ONLY: init_interaction_radii_orb_basis
      97              :    USE qs_kind_types,                   ONLY: qs_kind_type
      98              :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      99              :    USE qs_mo_types,                     ONLY: get_mo_set,&
     100              :                                               mo_set_type
     101              :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type,&
     102              :                                               release_neighbor_list_sets
     103              :    USE qs_rho_types,                    ONLY: qs_rho_get,&
     104              :                                               qs_rho_type
     105              :    USE qs_tensors,                      ONLY: &
     106              :         build_2c_derivatives, build_2c_integrals, build_2c_neighbor_lists, build_3c_derivatives, &
     107              :         build_3c_integrals, build_3c_neighbor_lists, calc_2c_virial, calc_3c_virial, &
     108              :         compress_tensor, decompress_tensor, get_tensor_occupancy, neighbor_list_3c_destroy
     109              :    USE qs_tensors_types,                ONLY: create_2c_tensor,&
     110              :                                               create_3c_tensor,&
     111              :                                               create_tensor_batches,&
     112              :                                               distribution_3d_create,&
     113              :                                               distribution_3d_type,&
     114              :                                               neighbor_list_3c_type,&
     115              :                                               split_block_sizes
     116              :    USE string_utilities,                ONLY: uppercase
     117              :    USE util,                            ONLY: sort
     118              :    USE virial_types,                    ONLY: virial_type
     119              : #include "./base/base_uses.f90"
     120              : 
     121              : !$ USE OMP_LIB, ONLY: omp_get_num_threads
     122              : 
     123              :    IMPLICIT NONE
     124              :    PRIVATE
     125              : 
     126              :    PUBLIC :: hfx_ri_update_ks, hfx_ri_update_forces, get_force_from_3c_trace, get_2c_der_force, &
     127              :              get_idx_to_atom, print_ri_hfx, hfx_ri_pre_scf_calc_tensors, check_inverse
     128              : 
     129              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'hfx_ri'
     130              : CONTAINS
     131              : 
     132              : ! **************************************************************************************************
     133              : !> \brief Switches the RI_FLAVOR from MO to RHO or vice-versa
     134              : !> \param ri_data ...
     135              : !> \param qs_env ...
     136              : !> \note As a side product, the ri_data is mostly reinitialized and the integrals recomputed
     137              : ! **************************************************************************************************
     138           22 :    SUBROUTINE switch_ri_flavor(ri_data, qs_env)
     139              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     140              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     141              : 
     142              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'switch_ri_flavor'
     143              : 
     144              :       INTEGER                                            :: handle, n_mem, new_flavor
     145           22 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     146              :       TYPE(dft_control_type), POINTER                    :: dft_control
     147              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     148           22 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     149           22 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     150              : 
     151           22 :       NULLIFY (qs_kind_set, particle_set, atomic_kind_set, para_env, dft_control)
     152              : 
     153           22 :       CALL timeset(routineN, handle)
     154              : 
     155              :       CALL get_qs_env(qs_env, para_env=para_env, dft_control=dft_control, atomic_kind_set=atomic_kind_set, &
     156           22 :                       particle_set=particle_set, qs_kind_set=qs_kind_set)
     157              : 
     158           22 :       CALL hfx_ri_release(ri_data, write_stats=.FALSE.)
     159              : 
     160           22 :       IF (ri_data%flavor == ri_pmat) THEN
     161              :          new_flavor = ri_mo
     162              :       ELSE
     163           12 :          new_flavor = ri_pmat
     164              :       END IF
     165           22 :       ri_data%flavor = new_flavor
     166              : 
     167           22 :       n_mem = ri_data%n_mem_input
     168           22 :       ri_data%n_mem_input = ri_data%n_mem_flavor_switch
     169           22 :       ri_data%n_mem_flavor_switch = n_mem
     170              : 
     171           22 :       CALL hfx_ri_init(ri_data, qs_kind_set, particle_set, atomic_kind_set, para_env)
     172              : 
     173              :       !Need to recalculate the integrals in the new flavor
     174              :       !TODO: should we backup the integrals and symmetrize/desymmetrize them instead of recomputing ?!?
     175              :       !      that only makes sense if actual integral calculation is not negligible
     176           22 :       IF (ri_data%flavor == ri_pmat) THEN
     177           12 :          CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
     178              :       ELSE
     179           10 :          CALL hfx_ri_pre_scf_mo(qs_env, ri_data, dft_control%nspins)
     180              :       END IF
     181              : 
     182           22 :       IF (ri_data%unit_nr > 0) THEN
     183            0 :          IF (ri_data%flavor == ri_pmat) THEN
     184            0 :             WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR RHO"
     185              :          ELSE
     186            0 :             WRITE (ri_data%unit_nr, '(T2,A)') "HFX_RI_INFO| temporarily switched to RI_FLAVOR MO"
     187              :          END IF
     188              :       END IF
     189              : 
     190           22 :       CALL timestop(handle)
     191              : 
     192           22 :    END SUBROUTINE switch_ri_flavor
     193              : 
     194              : ! **************************************************************************************************
     195              : !> \brief Pre-SCF steps in MO flavor of RI HFX
     196              : !>
     197              : !> Calculate 2-center & 3-center integrals (see hfx_ri_pre_scf_calc_tensors) and contract
     198              : !> K(P, S) = sum_R K_2(P, R)^{-1} K_1(R, S)^{1/2}
     199              : !> B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
     200              : !> \param qs_env ...
     201              : !> \param ri_data ...
     202              : !> \param nspins ...
     203              : ! **************************************************************************************************
     204           26 :    SUBROUTINE hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
     205              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     206              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     207              :       INTEGER, INTENT(IN)                                :: nspins
     208              : 
     209              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_pre_scf_mo'
     210              : 
     211              :       INTEGER                                            :: handle, handle2, ispin, n_dependent, &
     212              :                                                             unit_nr, unit_nr_dbcsr
     213              :       REAL(KIND=dp)                                      :: threshold
     214              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     215          156 :       TYPE(dbcsr_type), DIMENSION(1) :: dbcsr_work_1, dbcsr_work_2, t_2c_int_mat, t_2c_op_pot, &
     216          130 :          t_2c_op_pot_sqrt, t_2c_op_pot_sqrt_inv, t_2c_op_RI, t_2c_op_RI_inv
     217           26 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_int, t_2c_work
     218           26 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int
     219              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     220              : 
     221           26 :       CALL timeset(routineN, handle)
     222              : 
     223           26 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
     224           26 :       unit_nr = ri_data%unit_nr
     225              : 
     226           26 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
     227              : 
     228           26 :       CALL timeset(routineN//"_int", handle2)
     229              : 
     230          806 :       ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int(1, 1))
     231           26 :       CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int)
     232              : 
     233           26 :       CALL timestop(handle2)
     234              : 
     235           26 :       CALL timeset(routineN//"_2c", handle2)
     236           26 :       IF (.NOT. ri_data%same_op) THEN
     237            4 :          SELECT CASE (ri_data%t2c_method)
     238              :          CASE (hfx_ri_do_2c_iter)
     239            0 :             CALL dbcsr_create(t_2c_op_RI_inv(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     240            0 :             threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
     241            0 :             CALL invert_hotelling(t_2c_op_RI_inv(1), t_2c_op_RI(1), threshold=threshold, silent=.FALSE.)
     242              :          CASE (hfx_ri_do_2c_cholesky)
     243            4 :             CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
     244            4 :             CALL cp_dbcsr_cholesky_decompose(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env)
     245            4 :             CALL cp_dbcsr_cholesky_invert(t_2c_op_RI_inv(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
     246              :          CASE (hfx_ri_do_2c_diag)
     247            0 :             CALL dbcsr_copy(t_2c_op_RI_inv(1), t_2c_op_RI(1))
     248              :             CALL cp_dbcsr_power(t_2c_op_RI_inv(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
     249            4 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     250              :          END SELECT
     251              : 
     252            4 :          IF (ri_data%check_2c_inv) THEN
     253            0 :             CALL check_inverse(t_2c_op_RI_inv(1), t_2c_op_RI(1), unit_nr=unit_nr)
     254              :          END IF
     255              : 
     256            4 :          CALL dbcsr_release(t_2c_op_RI(1))
     257              : 
     258            4 :          SELECT CASE (ri_data%t2c_method)
     259              :          CASE (hfx_ri_do_2c_iter)
     260            0 :             CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     261            0 :             CALL dbcsr_create(t_2c_op_pot_sqrt_inv(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     262              :             CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_op_pot_sqrt_inv(1), t_2c_op_pot(1), &
     263              :                                            ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
     264            0 :                                            ri_data%max_iter_lanczos)
     265              : 
     266            0 :             CALL dbcsr_release(t_2c_op_pot_sqrt_inv(1))
     267              :          CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
     268            4 :             CALL dbcsr_copy(t_2c_op_pot_sqrt(1), t_2c_op_pot(1))
     269              :             CALL cp_dbcsr_power(t_2c_op_pot_sqrt(1), 0.5_dp, ri_data%eps_eigval, n_dependent, &
     270            8 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     271              :          END SELECT
     272              : 
     273              :          !We need S^-1 and (P|Q) for the forces.
     274            4 :          CALL dbt_create(t_2c_op_RI_inv(1), t_2c_work(1))
     275            4 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_RI_inv(1), t_2c_work(1))
     276            4 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     277            4 :          CALL dbt_destroy(t_2c_work(1))
     278            4 :          CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     279              : 
     280            4 :          CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
     281            4 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
     282            4 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
     283            4 :          CALL dbt_destroy(t_2c_work(1))
     284            4 :          CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
     285              : 
     286            4 :          IF (ri_data%check_2c_inv) THEN
     287            0 :             CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt=t_2c_op_pot_sqrt(1), unit_nr=unit_nr)
     288              :          END IF
     289            4 :          CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_no_symmetry)
     290              :          CALL dbcsr_multiply("N", "N", 1.0_dp, t_2c_op_RI_inv(1), t_2c_op_pot_sqrt(1), &
     291            4 :                              0.0_dp, t_2c_int_mat(1), filter_eps=ri_data%filter_eps)
     292            4 :          CALL dbcsr_release(t_2c_op_RI_inv(1))
     293            4 :          CALL dbcsr_release(t_2c_op_pot_sqrt(1))
     294              :       ELSE
     295           22 :          SELECT CASE (ri_data%t2c_method)
     296              :          CASE (hfx_ri_do_2c_iter)
     297            0 :             CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     298            0 :             CALL dbcsr_create(t_2c_op_pot_sqrt(1), template=t_2c_op_pot(1), matrix_type=dbcsr_type_symmetric)
     299              :             CALL matrix_sqrt_newton_schulz(t_2c_op_pot_sqrt(1), t_2c_int_mat(1), t_2c_op_pot(1), &
     300              :                                            ri_data%filter_eps, ri_data%t2c_sqrt_order, ri_data%eps_lanczos, &
     301            0 :                                            ri_data%max_iter_lanczos)
     302            0 :             CALL dbcsr_release(t_2c_op_pot_sqrt(1))
     303              :          CASE (hfx_ri_do_2c_diag, hfx_ri_do_2c_cholesky)
     304           22 :             CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_pot(1))
     305              :             CALL cp_dbcsr_power(t_2c_int_mat(1), -0.5_dp, ri_data%eps_eigval, n_dependent, &
     306           44 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     307              :          END SELECT
     308           22 :          IF (ri_data%check_2c_inv) THEN
     309            0 :             CALL check_sqrt(t_2c_op_pot(1), matrix_sqrt_inv=t_2c_int_mat(1), unit_nr=unit_nr)
     310              :          END IF
     311              : 
     312              :          !We need (P|Q)^-1 for the forces
     313           22 :          CALL dbcsr_copy(dbcsr_work_1(1), t_2c_int_mat(1))
     314           22 :          CALL dbcsr_create(dbcsr_work_2(1), template=t_2c_int_mat(1))
     315           22 :          CALL dbcsr_multiply("N", "N", 1.0_dp, dbcsr_work_1(1), t_2c_int_mat(1), 0.0_dp, dbcsr_work_2(1))
     316           22 :          CALL dbcsr_release(dbcsr_work_1(1))
     317           22 :          CALL dbt_create(dbcsr_work_2(1), t_2c_work(1))
     318           22 :          CALL dbt_copy_matrix_to_tensor(dbcsr_work_2(1), t_2c_work(1))
     319           22 :          CALL dbcsr_release(dbcsr_work_2(1))
     320           22 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     321           22 :          CALL dbt_destroy(t_2c_work(1))
     322           22 :          CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     323              :       END IF
     324              : 
     325           26 :       CALL dbcsr_release(t_2c_op_pot(1))
     326              : 
     327           26 :       CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
     328           26 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
     329           26 :       CALL dbcsr_release(t_2c_int_mat(1))
     330           58 :       DO ispin = 1, nspins
     331           58 :          CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(ispin, 1))
     332              :       END DO
     333           26 :       CALL dbt_destroy(t_2c_int(1))
     334           26 :       CALL timestop(handle2)
     335              : 
     336           26 :       CALL timeset(routineN//"_3c", handle2)
     337           26 :       CALL dbt_copy(t_3c_int(1, 1), ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     338           26 :       CALL dbt_filter(ri_data%t_3c_int_ctr_1(1, 1), ri_data%filter_eps)
     339           26 :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), ri_data%t_3c_int_ctr_2(1, 1))
     340           26 :       CALL dbt_destroy(t_3c_int(1, 1))
     341           26 :       CALL timestop(handle2)
     342              : 
     343           26 :       CALL timestop(handle)
     344              : 
     345          260 :    END SUBROUTINE
     346              : 
     347              : ! **************************************************************************************************
     348              : !> \brief ...
     349              : !> \param matrix_1 ...
     350              : !> \param matrix_2 ...
     351              : !> \param name ...
     352              : !> \param unit_nr ...
     353              : ! **************************************************************************************************
     354            0 :    SUBROUTINE check_inverse(matrix_1, matrix_2, name, unit_nr)
     355              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_1, matrix_2
     356              :       CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
     357              :       INTEGER, INTENT(IN)                                :: unit_nr
     358              : 
     359              :       CHARACTER(len=default_string_length)               :: name_prv
     360              :       REAL(KIND=dp)                                      :: error, frob_matrix, frob_matrix_base
     361              :       TYPE(dbcsr_type)                                   :: matrix_tmp
     362              : 
     363            0 :       IF (PRESENT(name)) THEN
     364            0 :          name_prv = name
     365              :       ELSE
     366            0 :          CALL dbcsr_get_info(matrix_1, name=name_prv)
     367              :       END IF
     368              : 
     369            0 :       CALL dbcsr_create(matrix_tmp, template=matrix_1)
     370              :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_1, matrix_2, &
     371            0 :                           0.0_dp, matrix_tmp)
     372            0 :       frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp)
     373            0 :       CALL dbcsr_add_on_diag(matrix_tmp, -1.0_dp)
     374            0 :       frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     375            0 :       error = frob_matrix/frob_matrix_base
     376            0 :       IF (unit_nr > 0) THEN
     377              :          WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
     378            0 :             "HFX_RI_INFO| Error for INV(", TRIM(name_prv), "):", error
     379              :       END IF
     380              : 
     381            0 :       CALL dbcsr_release(matrix_tmp)
     382            0 :    END SUBROUTINE
     383              : 
     384              : ! **************************************************************************************************
     385              : !> \brief ...
     386              : !> \param matrix ...
     387              : !> \param matrix_sqrt ...
     388              : !> \param matrix_sqrt_inv ...
     389              : !> \param name ...
     390              : !> \param unit_nr ...
     391              : ! **************************************************************************************************
     392            0 :    SUBROUTINE check_sqrt(matrix, matrix_sqrt, matrix_sqrt_inv, name, unit_nr)
     393              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     394              :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: matrix_sqrt, matrix_sqrt_inv
     395              :       CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
     396              :       INTEGER, INTENT(IN)                                :: unit_nr
     397              : 
     398              :       CHARACTER(len=default_string_length)               :: name_prv
     399              :       REAL(KIND=dp)                                      :: frob_matrix
     400              :       TYPE(dbcsr_type)                                   :: matrix_copy, matrix_tmp
     401              : 
     402            0 :       IF (PRESENT(name)) THEN
     403            0 :          name_prv = name
     404              :       ELSE
     405            0 :          CALL dbcsr_get_info(matrix, name=name_prv)
     406              :       END IF
     407            0 :       IF (PRESENT(matrix_sqrt)) THEN
     408            0 :          CALL dbcsr_create(matrix_tmp, template=matrix)
     409            0 :          CALL dbcsr_copy(matrix_copy, matrix_sqrt)
     410              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt, matrix_copy, &
     411            0 :                              0.0_dp, matrix_tmp)
     412            0 :          CALL dbcsr_add(matrix_tmp, matrix, 1.0_dp, -1.0_dp)
     413            0 :          frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     414            0 :          IF (unit_nr > 0) THEN
     415              :             WRITE (UNIT=unit_nr, FMT="(T3,A,A,A,T73,ES8.1)") &
     416            0 :                "HFX_RI_INFO| Error for SQRT(", TRIM(name_prv), "):", frob_matrix
     417              :          END IF
     418            0 :          CALL dbcsr_release(matrix_tmp)
     419            0 :          CALL dbcsr_release(matrix_copy)
     420              :       END IF
     421              : 
     422            0 :       IF (PRESENT(matrix_sqrt_inv)) THEN
     423            0 :          CALL dbcsr_create(matrix_tmp, template=matrix)
     424            0 :          CALL dbcsr_copy(matrix_copy, matrix_sqrt_inv)
     425              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_sqrt_inv, matrix_copy, &
     426            0 :                              0.0_dp, matrix_tmp)
     427            0 :          CALL check_inverse(matrix_tmp, matrix, name="SQRT("//TRIM(name_prv)//")", unit_nr=unit_nr)
     428            0 :          CALL dbcsr_release(matrix_tmp)
     429            0 :          CALL dbcsr_release(matrix_copy)
     430              :       END IF
     431              : 
     432            0 :    END SUBROUTINE
     433              : 
     434              : ! **************************************************************************************************
     435              : !> \brief Calculate 2-center and 3-center integrals
     436              : !>
     437              : !> 2c: K_1(P, R) = (P|v1|R) and K_2(P, R) = (P|v2|R)
     438              : !> 3c: int_3c(mu, lambda, P) = (mu lambda |v2| P)
     439              : !> v_1 is HF operator, v_2 is RI metric
     440              : !> \param qs_env ...
     441              : !> \param ri_data ...
     442              : !> \param t_2c_int_RI K_2(P, R) note: even with k-point, only need on central cell
     443              : !> \param t_2c_int_pot K_1(P, R)
     444              : !> \param t_3c_int int_3c(mu, lambda, P)
     445              : !> \param do_kpoints ...
     446              : !> \notes the integral tensor arrays are already allocated on entry
     447              : ! **************************************************************************************************
     448         4620 :    SUBROUTINE hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_int_RI, t_2c_int_pot, t_3c_int, do_kpoints)
     449              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     450              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     451              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(OUT)        :: t_2c_int_RI, t_2c_int_pot
     452              :       TYPE(dbt_type), DIMENSION(:, :)                    :: t_3c_int
     453              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_kpoints
     454              : 
     455              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_calc_tensors'
     456              : 
     457              :       CHARACTER                                          :: symm
     458              :       INTEGER                                            :: handle, i_img, i_mem, ibasis, j_img, &
     459              :                                                             n_mem, natom, nblks, nimg, nkind, &
     460              :                                                             nthreads
     461              :       INTEGER(int_8)                                     :: nze
     462          196 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: dist_AO_1, dist_AO_2, dist_RI, dist_RI_ext, &
     463          392 :          ends_array_mc_block_int, ends_array_mc_int, sizes_AO, sizes_RI, sizes_RI_ext, &
     464          196 :          sizes_RI_ext_split, starts_array_mc_block_int, starts_array_mc_int
     465              :       INTEGER, DIMENSION(3)                              :: pcoord, pdims
     466          392 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
     467              :       LOGICAL                                            :: converged, do_kpoints_prv
     468              :       REAL(dp)                                           :: max_ev, min_ev, occ, RI_range
     469          196 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     470              :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     471         1372 :       TYPE(dbt_distribution_type)                        :: t_dist
     472          588 :       TYPE(dbt_pgrid_type)                               :: pgrid
     473         1372 :       TYPE(dbt_type)                                     :: t_3c_tmp
     474          196 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_batched
     475              :       TYPE(dft_control_type), POINTER                    :: dft_control
     476              :       TYPE(distribution_2d_type), POINTER                :: dist_2d
     477              :       TYPE(distribution_3d_type)                         :: dist_3d
     478              :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
     479          196 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
     480              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
     481          196 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c
     482              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     483              :       TYPE(neighbor_list_3c_type)                        :: nl_3c
     484              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     485          196 :          POINTER                                         :: nl_2c_pot, nl_2c_RI
     486          196 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     487          196 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     488              :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     489              : 
     490          196 :       CALL timeset(routineN, handle)
     491          196 :       NULLIFY (col_bsize, row_bsize, dist_2d, nl_2c_pot, nl_2c_RI, &
     492          196 :                particle_set, qs_kind_set, ks_env, para_env)
     493              : 
     494              :       CALL get_qs_env(qs_env, natom=natom, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set, &
     495          196 :                       distribution_2d=dist_2d, ks_env=ks_env, dft_control=dft_control, para_env=para_env)
     496              : 
     497          196 :       RI_range = 0.0_dp
     498          196 :       do_kpoints_prv = .FALSE.
     499          196 :       IF (PRESENT(do_kpoints)) do_kpoints_prv = do_kpoints
     500          196 :       nimg = 1
     501          196 :       IF (do_kpoints_prv) THEN
     502           80 :          nimg = ri_data%nimg
     503           80 :          RI_range = ri_data%kp_RI_range
     504              :       END IF
     505              : 
     506          784 :       ALLOCATE (sizes_RI(natom), sizes_AO(natom))
     507         1448 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
     508          196 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
     509          196 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_RI, basis=basis_set_RI)
     510          196 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
     511          196 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=sizes_AO, basis=basis_set_AO)
     512              : 
     513          528 :       DO ibasis = 1, SIZE(basis_set_AO)
     514          332 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
     515          332 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
     516              :          ! interaction radii should be based on eps_pgf_orb controlled in RI section
     517              :          ! (since hartree-fock needs very tight eps_pgf_orb for Kohn-Sham/Fock matrix but eps_pgf_orb
     518              :          ! can be much looser in RI HFX since no systematic error is introduced with tensor sparsity)
     519          332 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
     520          528 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
     521              :       END DO
     522              : 
     523          196 :       n_mem = ri_data%n_mem
     524              :       CALL create_tensor_batches(sizes_RI, n_mem, starts_array_mc_int, ends_array_mc_int, &
     525              :                                  starts_array_mc_block_int, ends_array_mc_block_int)
     526              : 
     527          196 :       DEALLOCATE (starts_array_mc_int, ends_array_mc_int)
     528              : 
     529              :       !We separate the K-points and standard 3c integrals, because handle quite differently
     530          196 :       IF (.NOT. do_kpoints_prv) THEN
     531              : 
     532          116 :          nthreads = 1
     533          116 : !$       nthreads = omp_get_num_threads()
     534          116 :          pdims = 0
     535          464 :          CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(n_mem*nthreads)), natom, natom])
     536              : 
     537         1044 :          ALLOCATE (t_3c_int_batched(1, 1))
     538              :          CALL create_3c_tensor(t_3c_int_batched(1, 1), dist_RI, dist_AO_1, dist_AO_2, pgrid, &
     539              :                                sizes_RI, sizes_AO, sizes_AO, map1=[1], map2=[2, 3], &
     540          116 :                                name="(RI | AO AO)")
     541              : 
     542          116 :          CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
     543          116 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
     544          116 :          CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
     545              :          CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
     546          116 :                                      nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
     547          116 :          DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
     548              : 
     549              :          CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
     550              :                                ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
     551              :                                map1=[1], map2=[2, 3], &
     552          116 :                                name="O (RI AO | AO)")
     553              : 
     554              :          CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
     555          116 :                                       "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
     556              : 
     557          450 :          DO i_mem = 1, n_mem
     558              :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps/2, qs_env, nl_3c, &
     559              :                                     basis_set_RI, basis_set_AO, basis_set_AO, &
     560              :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
     561              :                                     desymmetrize=.FALSE., &
     562         1002 :                                     bounds_i=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)])
     563          334 :             CALL dbt_copy(t_3c_int_batched(1, 1), t_3c_int(1, 1), summation=.TRUE., move_data=.TRUE.)
     564          450 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps/2)
     565              :          END DO
     566              : 
     567          116 :          CALL dbt_destroy(t_3c_int_batched(1, 1))
     568              : 
     569          116 :          CALL neighbor_list_3c_destroy(nl_3c)
     570              : 
     571          116 :          CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
     572              : 
     573          116 :          IF (ri_data%flavor == ri_pmat) THEN ! desymmetrize
     574              :             ! desymmetrize
     575           90 :             CALL dbt_copy(t_3c_int(1, 1), t_3c_tmp)
     576           90 :             CALL dbt_copy(t_3c_tmp, t_3c_int(1, 1), order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
     577              : 
     578              :             ! For RI-RHO filter_eps_storage is reserved for screening tensor contracted with RI-metric
     579              :             ! with RI metric but not to bare integral tensor
     580           90 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps)
     581              :          ELSE
     582           26 :             CALL dbt_filter(t_3c_int(1, 1), ri_data%filter_eps_storage/2)
     583              :          END IF
     584              : 
     585          116 :          CALL dbt_destroy(t_3c_tmp)
     586              : 
     587              :       ELSE !do_kpoints
     588              : 
     589           80 :          nthreads = 1
     590           80 : !$       nthreads = omp_get_num_threads()
     591           80 :          pdims = 0
     592          320 :          CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[natom, natom, MAX(1, natom/(n_mem*nthreads))])
     593              : 
     594              :          !In k-points HFX, we stack all images along the RI direction in the same tensors, in order
     595              :          !to avoid storing nimg x ncell_RI different tensors (very memory intensive)
     596           80 :          nblks = SIZE(ri_data%bsizes_RI_split)
     597          400 :          ALLOCATE (sizes_RI_ext(natom*ri_data%ncell_RI), sizes_RI_ext_split(nblks*ri_data%ncell_RI))
     598          562 :          DO i_img = 1, ri_data%ncell_RI
     599         1446 :             sizes_RI_ext((i_img - 1)*natom + 1:i_img*natom) = sizes_RI(:)
     600         2744 :             sizes_RI_ext_split((i_img - 1)*nblks + 1:i_img*nblks) = ri_data%bsizes_RI_split(:)
     601              :          END DO
     602              : 
     603              :          CALL create_3c_tensor(t_3c_tmp, dist_AO_1, dist_AO_2, dist_RI, &
     604              :                                pgrid, sizes_AO, sizes_AO, sizes_RI, map1=[1, 2], map2=[3], &
     605           80 :                                name="(AO AO | RI)")
     606           80 :          CALL dbt_destroy(t_3c_tmp)
     607              : 
     608              :          !For the integrals to work, the distribution along the RI direction must be repeated
     609          240 :          ALLOCATE (dist_RI_ext(natom*ri_data%ncell_RI))
     610          562 :          DO i_img = 1, ri_data%ncell_RI
     611         1526 :             dist_RI_ext((i_img - 1)*natom + 1:i_img*natom) = dist_RI(:)
     612              :          END DO
     613              : 
     614         2798 :          ALLOCATE (t_3c_int_batched(nimg, 1))
     615           80 :          CALL dbt_distribution_new(t_dist, pgrid, dist_AO_1, dist_AO_2, dist_RI_ext)
     616              :          CALL dbt_create(t_3c_int_batched(1, 1), "KP_3c_ints", t_dist, [1, 2], [3], &
     617           80 :                          sizes_AO, sizes_AO, sizes_RI_ext)
     618         1998 :          DO i_img = 2, nimg
     619         1998 :             CALL dbt_create(t_3c_int_batched(1, 1), t_3c_int_batched(i_img, 1))
     620              :          END DO
     621           80 :          CALL dbt_distribution_destroy(t_dist)
     622              : 
     623           80 :          CALL get_qs_env(qs_env, nkind=nkind, particle_set=particle_set, atomic_kind_set=atomic_kind_set)
     624           80 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
     625           80 :          CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
     626              :          CALL distribution_3d_create(dist_3d, dist_AO_1, dist_AO_2, dist_RI, &
     627           80 :                                      nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
     628           80 :          DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
     629              : 
     630              :          ! create 3c tensor for storage of ints
     631              :          CALL build_3c_neighbor_lists(nl_3c, basis_set_AO, basis_set_AO, basis_set_RI, dist_3d, ri_data%ri_metric, &
     632           80 :                                       "HFX_3c_nl", qs_env, op_pos=2, sym_ij=.FALSE., own_dist=.TRUE.)
     633              : 
     634              :          CALL create_3c_tensor(t_3c_int(1, 1), dist_RI, dist_AO_1, dist_AO_2, ri_data%pgrid, &
     635              :                                sizes_RI_ext_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
     636              :                                map1=[1], map2=[2, 3], &
     637           80 :                                name="O (RI AO | AO)")
     638         1998 :          DO j_img = 2, nimg
     639         1998 :             CALL dbt_create(t_3c_int(1, 1), t_3c_int(1, j_img))
     640              :          END DO
     641              : 
     642           80 :          CALL dbt_create(t_3c_int(1, 1), t_3c_tmp)
     643          220 :          DO i_mem = 1, n_mem
     644              :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
     645              :                                     basis_set_AO, basis_set_AO, basis_set_RI, &
     646              :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=2, &
     647              :                                     desymmetrize=.FALSE., do_kpoints=.TRUE., do_hfx_kpoints=.TRUE., &
     648              :                                     bounds_k=[starts_array_mc_block_int(i_mem), ends_array_mc_block_int(i_mem)], &
     649          420 :                                     RI_range=RI_range, img_to_RI_cell=ri_data%img_to_RI_cell)
     650              : 
     651         3998 :             DO i_img = 1, nimg
     652              :                !we start with (mu^0 sigma^i | P^j) and finish with (P^i | mu^0 sigma^j)
     653         3778 :                CALL get_tensor_occupancy(t_3c_int_batched(i_img, 1), nze, occ)
     654         3918 :                IF (nze > 0) THEN
     655         2540 :                   CALL dbt_copy(t_3c_int_batched(i_img, 1), t_3c_tmp, order=[3, 2, 1], move_data=.TRUE.)
     656         2540 :                   CALL dbt_filter(t_3c_tmp, ri_data%filter_eps)
     657              :                   CALL dbt_copy(t_3c_tmp, t_3c_int(1, i_img), order=[1, 3, 2], &
     658         2540 :                                 summation=.TRUE., move_data=.TRUE.)
     659              :                END IF
     660              :             END DO
     661              :          END DO
     662              : 
     663         2078 :          DO i_img = 1, nimg
     664         2078 :             CALL dbt_destroy(t_3c_int_batched(i_img, 1))
     665              :          END DO
     666         2078 :          DEALLOCATE (t_3c_int_batched)
     667           80 :          CALL neighbor_list_3c_destroy(nl_3c)
     668           80 :          CALL dbt_destroy(t_3c_tmp)
     669              :       END IF
     670          196 :       CALL dbt_pgrid_destroy(pgrid)
     671              : 
     672              :       CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
     673              :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.NOT. do_kpoints_prv, &
     674          196 :                                    dist_2d=dist_2d)
     675              : 
     676          196 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
     677          588 :       ALLOCATE (row_bsize(SIZE(sizes_RI)))
     678          392 :       ALLOCATE (col_bsize(SIZE(sizes_RI)))
     679          750 :       row_bsize(:) = sizes_RI
     680          750 :       col_bsize(:) = sizes_RI
     681              : 
     682              :       !Use non-symmetric nl and matrices for k-points
     683          196 :       symm = dbcsr_type_symmetric
     684          196 :       IF (do_kpoints_prv) symm = dbcsr_type_no_symmetry
     685              : 
     686          196 :       CALL dbcsr_create(t_2c_int_pot(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
     687         2114 :       DO i_img = 2, nimg
     688         2114 :          CALL dbcsr_create(t_2c_int_pot(i_img), template=t_2c_int_pot(1))
     689              :       END DO
     690              : 
     691          196 :       IF (.NOT. ri_data%same_op) THEN
     692          106 :          CALL dbcsr_create(t_2c_int_RI(1), "(R|P) HFX", dbcsr_dist, symm, row_bsize, col_bsize)
     693         2024 :          DO i_img = 2, nimg
     694         2024 :             CALL dbcsr_create(t_2c_int_RI(i_img), template=t_2c_int_RI(1))
     695              :          END DO
     696              :       END IF
     697          196 :       DEALLOCATE (col_bsize, row_bsize)
     698              : 
     699          196 :       CALL dbcsr_distribution_release(dbcsr_dist)
     700              : 
     701              :       CALL build_2c_integrals(t_2c_int_pot, ri_data%filter_eps_2c, qs_env, nl_2c_pot, basis_set_RI, basis_set_RI, &
     702          196 :                               ri_data%hfx_pot, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
     703          196 :       CALL release_neighbor_list_sets(nl_2c_pot)
     704              : 
     705          196 :       IF (.NOT. ri_data%same_op) THEN
     706              :          CALL build_2c_neighbor_lists(nl_2c_RI, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
     707              :                                       "HFX_2c_nl_RI", qs_env, sym_ij=.NOT. do_kpoints_prv, &
     708          106 :                                       dist_2d=dist_2d)
     709              : 
     710              :          CALL build_2c_integrals(t_2c_int_RI, ri_data%filter_eps_2c, qs_env, nl_2c_RI, basis_set_RI, basis_set_RI, &
     711          106 :                                  ri_data%ri_metric, do_kpoints=do_kpoints_prv, do_hfx_kpoints=do_kpoints_prv)
     712              : 
     713          106 :          CALL release_neighbor_list_sets(nl_2c_RI)
     714              :       END IF
     715              : 
     716          528 :       DO ibasis = 1, SIZE(basis_set_AO)
     717          332 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
     718          332 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
     719              :          ! reset interaction radii of orb basis
     720          332 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
     721          528 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
     722              :       END DO
     723              : 
     724          196 :       IF (ri_data%calc_condnum) THEN
     725              :          CALL arnoldi_extremal(t_2c_int_pot(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
     726            6 :                                max_iter=ri_data%max_iter_lanczos, converged=converged)
     727              : 
     728            6 :          IF (.NOT. converged) THEN
     729            0 :             CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (HFX potential).")
     730              :          END IF
     731              : 
     732            6 :          IF (ri_data%unit_nr > 0) THEN
     733            3 :             WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (HFX potential)"
     734            3 :             IF (min_ev > 0) THEN
     735              :                WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
     736            3 :                   "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
     737              :             ELSE
     738              :                WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
     739            0 :                   "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
     740              :             END IF
     741              :          END IF
     742              : 
     743            6 :          IF (.NOT. ri_data%same_op) THEN
     744              :             CALL arnoldi_extremal(t_2c_int_RI(1), max_ev, min_ev, threshold=ri_data%eps_lanczos, &
     745            4 :                                   max_iter=ri_data%max_iter_lanczos, converged=converged)
     746              : 
     747            4 :             IF (.NOT. converged) THEN
     748            0 :                CPWARN("Not converged: unreliable condition number estimate of (P|Q) matrix (RI metric).")
     749              :             END IF
     750              : 
     751            4 :             IF (ri_data%unit_nr > 0) THEN
     752            2 :                WRITE (ri_data%unit_nr, '(T2,A)') "2-Norm Condition Number of (P|Q) integrals (RI metric)"
     753            2 :                IF (min_ev > 0) THEN
     754              :                   WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,A4,ES11.3E3,T63,A,F8.4)') &
     755            2 :                      "CN : max/min ev: ", max_ev, " / ", min_ev, "=", max_ev/min_ev, "Log(2-CN):", LOG10(max_ev/min_ev)
     756              :                ELSE
     757              :                   WRITE (ri_data%unit_nr, '(T4,A,ES11.3E3,T32,A,ES11.3E3,T63,A)') &
     758            0 :                      "CN : max/min ev: ", max_ev, " / ", min_ev, "Log(CN): infinity"
     759              :                END IF
     760              :             END IF
     761              :          END IF
     762              :       END IF
     763              : 
     764          196 :       CALL timestop(handle)
     765         1292 :    END SUBROUTINE
     766              : 
     767              : ! **************************************************************************************************
     768              : !> \brief Pre-SCF steps in rho flavor of RI HFX
     769              : !>
     770              : !> K(P, S) = sum_{R,Q} K_2(P, R)^{-1} K_1(R, Q) K_2(Q, S)^{-1}
     771              : !> Calculate B(mu, lambda, R) = sum_P int_3c(mu, lambda, P) K(P, R)
     772              : !> \param qs_env ...
     773              : !> \param ri_data ...
     774              : ! **************************************************************************************************
     775           90 :    SUBROUTINE hfx_ri_pre_scf_Pmat(qs_env, ri_data)
     776              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     777              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
     778              : 
     779              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_pre_scf_Pmat'
     780              : 
     781              :       INTEGER                                            :: handle, handle2, i_mem, j_mem, &
     782              :                                                             n_dependent, unit_nr, unit_nr_dbcsr
     783              :       INTEGER(int_8)                                     :: nflop, nze, nze_O
     784           90 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_ranges_AO, batch_ranges_RI
     785              :       INTEGER, DIMENSION(2, 1)                           :: bounds_i
     786              :       INTEGER, DIMENSION(2, 2)                           :: bounds_j
     787              :       INTEGER, DIMENSION(3)                              :: dims_3c
     788              :       REAL(KIND=dp)                                      :: compression_factor, memory_3c, occ, &
     789              :                                                             threshold
     790              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     791          360 :       TYPE(dbcsr_type), DIMENSION(1)                     :: t_2c_int_mat, t_2c_op_pot, t_2c_op_RI, &
     792          270 :                                                             t_2c_tmp, t_2c_tmp_2
     793          810 :       TYPE(dbt_type)                                     :: t_3c_2
     794          180 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_int, t_2c_work
     795           90 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_1
     796              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     797              : 
     798           90 :       CALL timeset(routineN, handle)
     799              : 
     800           90 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
     801           90 :       unit_nr = ri_data%unit_nr
     802              : 
     803           90 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
     804              : 
     805           90 :       CALL timeset(routineN//"_int", handle2)
     806              : 
     807         2790 :       ALLOCATE (t_2c_int(1), t_2c_work(1), t_3c_int_1(1, 1))
     808           90 :       CALL hfx_ri_pre_scf_calc_tensors(qs_env, ri_data, t_2c_op_RI, t_2c_op_pot, t_3c_int_1)
     809              : 
     810           90 :       CALL dbt_copy(t_3c_int_1(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 2, 3], move_data=.TRUE.)
     811              : 
     812           90 :       CALL dbt_destroy(t_3c_int_1(1, 1))
     813              : 
     814           90 :       CALL timestop(handle2)
     815              : 
     816           90 :       CALL timeset(routineN//"_2c", handle2)
     817              : 
     818           90 :       IF (ri_data%same_op) t_2c_op_RI(1) = t_2c_op_pot(1)
     819           90 :       CALL dbcsr_create(t_2c_int_mat(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     820           90 :       threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
     821              : 
     822           90 :       SELECT CASE (ri_data%t2c_method)
     823              :       CASE (hfx_ri_do_2c_iter)
     824              :          CALL invert_hotelling(t_2c_int_mat(1), t_2c_op_RI(1), &
     825            0 :                                threshold=threshold, silent=.FALSE.)
     826              :       CASE (hfx_ri_do_2c_cholesky)
     827           88 :          CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
     828           88 :          CALL cp_dbcsr_cholesky_decompose(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env)
     829           88 :          CALL cp_dbcsr_cholesky_invert(t_2c_int_mat(1), para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
     830              :       CASE (hfx_ri_do_2c_diag)
     831            2 :          CALL dbcsr_copy(t_2c_int_mat(1), t_2c_op_RI(1))
     832              :          CALL cp_dbcsr_power(t_2c_int_mat(1), -1.0_dp, ri_data%eps_eigval, n_dependent, &
     833           92 :                              para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
     834              :       END SELECT
     835              : 
     836           90 :       IF (ri_data%check_2c_inv) THEN
     837            0 :          CALL check_inverse(t_2c_int_mat(1), t_2c_op_RI(1), unit_nr=unit_nr)
     838              :       END IF
     839              : 
     840              :       !Need to save the (P|Q)^-1 tensor for forces (inverse metric if not same_op)
     841           90 :       CALL dbt_create(t_2c_int_mat(1), t_2c_work(1))
     842           90 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_work(1))
     843           90 :       CALL dbt_copy(t_2c_work(1), ri_data%t_2c_inv(1, 1), move_data=.TRUE.)
     844           90 :       CALL dbt_destroy(t_2c_work(1))
     845           90 :       CALL dbt_filter(ri_data%t_2c_inv(1, 1), ri_data%filter_eps)
     846           90 :       IF (.NOT. ri_data%same_op) THEN
     847              :          !Also save the RI (P|Q) integral
     848           22 :          CALL dbt_create(t_2c_op_pot(1), t_2c_work(1))
     849           22 :          CALL dbt_copy_matrix_to_tensor(t_2c_op_pot(1), t_2c_work(1))
     850           22 :          CALL dbt_copy(t_2c_work(1), ri_data%t_2c_pot(1, 1), move_data=.TRUE.)
     851           22 :          CALL dbt_destroy(t_2c_work(1))
     852           22 :          CALL dbt_filter(ri_data%t_2c_pot(1, 1), ri_data%filter_eps)
     853              :       END IF
     854              : 
     855           90 :       IF (ri_data%same_op) THEN
     856           68 :          CALL dbcsr_release(t_2c_op_pot(1))
     857              :       ELSE
     858           22 :          CALL dbcsr_create(t_2c_tmp(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     859           22 :          CALL dbcsr_create(t_2c_tmp_2(1), template=t_2c_op_RI(1), matrix_type=dbcsr_type_no_symmetry)
     860           22 :          CALL dbcsr_release(t_2c_op_RI(1))
     861              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_int_mat(1), t_2c_op_pot(1), 0.0_dp, t_2c_tmp(1), &
     862           22 :                              filter_eps=ri_data%filter_eps)
     863              : 
     864           22 :          CALL dbcsr_release(t_2c_op_pot(1))
     865              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, t_2c_tmp(1), t_2c_int_mat(1), 0.0_dp, t_2c_tmp_2(1), &
     866           22 :                              filter_eps=ri_data%filter_eps)
     867           22 :          CALL dbcsr_release(t_2c_tmp(1))
     868           22 :          CALL dbcsr_release(t_2c_int_mat(1))
     869           22 :          t_2c_int_mat(1) = t_2c_tmp_2(1)
     870              :       END IF
     871              : 
     872           90 :       CALL dbt_create(t_2c_int_mat(1), t_2c_int(1), name="(RI|RI)")
     873           90 :       CALL dbt_copy_matrix_to_tensor(t_2c_int_mat(1), t_2c_int(1))
     874           90 :       CALL dbcsr_release(t_2c_int_mat(1))
     875           90 :       CALL dbt_copy(t_2c_int(1), ri_data%t_2c_int(1, 1), move_data=.TRUE.)
     876           90 :       CALL dbt_destroy(t_2c_int(1))
     877           90 :       CALL dbt_filter(ri_data%t_2c_int(1, 1), ri_data%filter_eps)
     878              : 
     879           90 :       CALL timestop(handle2)
     880              : 
     881           90 :       CALL dbt_create(ri_data%t_3c_int_ctr_3(1, 1), t_3c_2)
     882              : 
     883           90 :       CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
     884              : 
     885           90 :       memory_3c = 0.0_dp
     886           90 :       nze_O = 0
     887              : 
     888          270 :       ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
     889          270 :       ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
     890          354 :       batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
     891           90 :       batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
     892          354 :       batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
     893           90 :       batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
     894              : 
     895              :       CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_3(1, 1), batch_range_1=batch_ranges_RI, &
     896           90 :                                      batch_range_2=batch_ranges_AO)
     897              :       CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges_RI, &
     898           90 :                                      batch_range_2=batch_ranges_AO)
     899              : 
     900          354 :       DO i_mem = 1, ri_data%n_mem_RI
     901          792 :          bounds_i(:, 1) = [ri_data%starts_array_RI_mem(i_mem), ri_data%ends_array_RI_mem(i_mem)]
     902              : 
     903          264 :          CALL dbt_batched_contract_init(ri_data%t_2c_int(1, 1))
     904         1044 :          DO j_mem = 1, ri_data%n_mem
     905         2340 :             bounds_j(:, 1) = [ri_data%starts_array_mem(j_mem), ri_data%ends_array_mem(j_mem)]
     906         2340 :             bounds_j(:, 2) = [1, dims_3c(3)]
     907          780 :             CALL timeset(routineN//"_RIx3C", handle2)
     908              :             CALL dbt_contract(1.0_dp, ri_data%t_2c_int(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
     909              :                               0.0_dp, t_3c_2, &
     910              :                               contract_1=[2], notcontract_1=[1], &
     911              :                               contract_2=[1], notcontract_2=[2, 3], &
     912              :                               map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps_storage, &
     913              :                               bounds_2=bounds_i, &
     914              :                               bounds_3=bounds_j, &
     915              :                               unit_nr=unit_nr_dbcsr, &
     916          780 :                               flop=nflop)
     917              : 
     918          780 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
     919          780 :             CALL timestop(handle2)
     920              : 
     921          780 :             CALL timeset(routineN//"_copy_2", handle2)
     922          780 :             CALL dbt_copy(t_3c_2, ri_data%t_3c_int_ctr_1(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     923              : 
     924          780 :             CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
     925          780 :             nze_O = nze_O + nze
     926              : 
     927              :             CALL compress_tensor(ri_data%t_3c_int_ctr_1(1, 1), ri_data%blk_indices(j_mem, i_mem)%ind, &
     928          780 :                                  ri_data%store_3c(j_mem, i_mem), ri_data%filter_eps_storage, memory_3c)
     929              : 
     930         3384 :             CALL timestop(handle2)
     931              :          END DO
     932          354 :          CALL dbt_batched_contract_finalize(ri_data%t_2c_int(1, 1))
     933              :       END DO
     934           90 :       CALL dbt_batched_contract_finalize(t_3c_2)
     935           90 :       CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_3(1, 1))
     936              : 
     937           90 :       CALL para_env%sum(memory_3c)
     938           90 :       compression_factor = REAL(nze_O, dp)*1.0E-06*8.0_dp/memory_3c
     939              : 
     940           90 :       IF (unit_nr > 0) THEN
     941              :          WRITE (UNIT=unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
     942           24 :             "MEMORY_INFO| Memory for 3-center HFX integrals (compressed):", memory_3c, ' MiB'
     943              : 
     944              :          WRITE (UNIT=unit_nr, FMT="((T3,A,T60,F21.2))") &
     945           24 :             "MEMORY_INFO| Compression factor:                  ", compression_factor
     946              :       END IF
     947              : 
     948           90 :       CALL dbt_clear(ri_data%t_2c_int(1, 1))
     949           90 :       CALL dbt_destroy(t_3c_2)
     950              : 
     951           90 :       CALL dbt_copy(ri_data%t_3c_int_ctr_3(1, 1), ri_data%t_3c_int_ctr_2(1, 1), order=[2, 1, 3], move_data=.TRUE.)
     952              : 
     953           90 :       CALL timestop(handle)
     954          810 :    END SUBROUTINE
     955              : 
     956              : ! **************************************************************************************************
     957              : !> \brief Sorts 2d indices w.r.t. rows and columns
     958              : !> \param blk_ind ...
     959              : ! **************************************************************************************************
     960            0 :    SUBROUTINE sort_unique_blkind_2d(blk_ind)
     961              :       INTEGER, ALLOCATABLE, DIMENSION(:, :), &
     962              :          INTENT(INOUT)                                   :: blk_ind
     963              : 
     964              :       INTEGER                                            :: end_ind, iblk, iblk_all, irow, nblk, &
     965              :                                                             ncols, start_ind
     966            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ind_1, ind_2, sort_1, sort_2
     967            0 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blk_ind_tmp
     968              : 
     969            0 :       nblk = SIZE(blk_ind, 1)
     970              : 
     971            0 :       ALLOCATE (sort_1(nblk))
     972            0 :       ALLOCATE (ind_1(nblk))
     973              : 
     974            0 :       sort_1(:) = blk_ind(:, 1)
     975            0 :       CALL sort(sort_1, nblk, ind_1)
     976              : 
     977            0 :       blk_ind(:, :) = blk_ind(ind_1, :)
     978              : 
     979            0 :       start_ind = 1
     980              : 
     981            0 :       DO WHILE (start_ind <= nblk)
     982            0 :          irow = blk_ind(start_ind, 1)
     983            0 :          end_ind = start_ind
     984              : 
     985            0 :          IF (end_ind + 1 <= nblk) THEN
     986            0 :          DO WHILE (blk_ind(end_ind + 1, 1) == irow)
     987            0 :             end_ind = end_ind + 1
     988            0 :             IF (end_ind + 1 > nblk) EXIT
     989              :          END DO
     990              :          END IF
     991              : 
     992            0 :          ncols = end_ind - start_ind + 1
     993            0 :          ALLOCATE (sort_2(ncols))
     994            0 :          ALLOCATE (ind_2(ncols))
     995            0 :          sort_2(:) = blk_ind(start_ind:end_ind, 2)
     996            0 :          CALL sort(sort_2, ncols, ind_2)
     997            0 :          ind_2 = ind_2 + start_ind - 1
     998              : 
     999            0 :          blk_ind(start_ind:end_ind, :) = blk_ind(ind_2, :)
    1000            0 :          start_ind = end_ind + 1
    1001              : 
    1002            0 :          DEALLOCATE (sort_2, ind_2)
    1003              :       END DO
    1004              : 
    1005            0 :       ALLOCATE (blk_ind_tmp(nblk, 2))
    1006            0 :       blk_ind_tmp = 0
    1007              : 
    1008              :       iblk = 0
    1009            0 :       DO iblk_all = 1, nblk
    1010            0 :          IF (iblk >= 1) THEN
    1011            0 :             IF (ALL(blk_ind_tmp(iblk, :) == blk_ind(iblk_all, :))) THEN
    1012              :                CYCLE
    1013              :             END IF
    1014              :          END IF
    1015            0 :          iblk = iblk + 1
    1016            0 :          blk_ind_tmp(iblk, :) = blk_ind(iblk_all, :)
    1017              :       END DO
    1018            0 :       nblk = iblk
    1019              : 
    1020            0 :       DEALLOCATE (blk_ind)
    1021            0 :       ALLOCATE (blk_ind(nblk, 2))
    1022              : 
    1023            0 :       blk_ind(:, :) = blk_ind_tmp(:nblk, :)
    1024              : 
    1025            0 :    END SUBROUTINE
    1026              : 
    1027              : ! **************************************************************************************************
    1028              : !> \brief ...
    1029              : !> \param qs_env ...
    1030              : !> \param ri_data ...
    1031              : !> \param ks_matrix ...
    1032              : !> \param ehfx ...
    1033              : !> \param mos ...
    1034              : !> \param rho_ao ...
    1035              : !> \param geometry_did_change ...
    1036              : !> \param nspins ...
    1037              : !> \param hf_fraction ...
    1038              : ! **************************************************************************************************
    1039         1956 :    SUBROUTINE hfx_ri_update_ks(qs_env, ri_data, ks_matrix, ehfx, mos, rho_ao, &
    1040              :                                geometry_did_change, nspins, hf_fraction)
    1041              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1042              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1043              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: ks_matrix
    1044              :       REAL(KIND=dp), INTENT(OUT)                         :: ehfx
    1045              :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
    1046              :          OPTIONAL                                        :: mos
    1047              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
    1048              :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1049              :       INTEGER, INTENT(IN)                                :: nspins
    1050              :       REAL(KIND=dp), INTENT(IN)                          :: hf_fraction
    1051              : 
    1052              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_update_ks'
    1053              : 
    1054              :       CHARACTER(1)                                       :: mtype
    1055              :       INTEGER                                            :: handle, handle2, i, ispin, j
    1056              :       INTEGER(int_8)                                     :: nblks
    1057              :       INTEGER, DIMENSION(2)                              :: homo
    1058              :       LOGICAL                                            :: is_antisymmetric
    1059              :       REAL(dp)                                           :: etmp, fac
    1060         1956 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: mo_eigenvalues
    1061              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    1062         1956 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: my_ks_matrix, my_rho_ao
    1063         5868 :       TYPE(dbcsr_type), DIMENSION(2)                     :: mo_coeff_b
    1064              :       TYPE(dbcsr_type), POINTER                          :: mo_coeff_b_tmp
    1065              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1066              : 
    1067         1956 :       CALL timeset(routineN, handle)
    1068              : 
    1069         1956 :       IF (nspins == 1) THEN
    1070         1428 :          fac = 0.5_dp*hf_fraction
    1071              :       ELSE
    1072          528 :          fac = 1.0_dp*hf_fraction
    1073              :       END IF
    1074              : 
    1075              :       !If incoming assymetric matrices, need to convert to normal
    1076         1956 :       NULLIFY (my_ks_matrix, my_rho_ao)
    1077         1956 :       CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, matrix_type=mtype)
    1078         1956 :       is_antisymmetric = mtype == dbcsr_type_antisymmetric
    1079         1956 :       IF (is_antisymmetric) THEN
    1080          960 :          ALLOCATE (my_ks_matrix(SIZE(ks_matrix, 1), SIZE(ks_matrix, 2)))
    1081          960 :          ALLOCATE (my_rho_ao(SIZE(rho_ao, 1), SIZE(rho_ao, 2)))
    1082              : 
    1083          320 :          DO i = 1, SIZE(ks_matrix, 1)
    1084          480 :             DO j = 1, SIZE(ks_matrix, 2)
    1085          160 :                ALLOCATE (my_ks_matrix(i, j)%matrix, my_rho_ao(i, j)%matrix)
    1086              :                CALL dbcsr_create(my_ks_matrix(i, j)%matrix, template=ks_matrix(i, j)%matrix, &
    1087          160 :                                  matrix_type=dbcsr_type_no_symmetry)
    1088          160 :                CALL dbcsr_desymmetrize(ks_matrix(i, j)%matrix, my_ks_matrix(i, j)%matrix)
    1089              :                CALL dbcsr_create(my_rho_ao(i, j)%matrix, template=rho_ao(i, j)%matrix, &
    1090          160 :                                  matrix_type=dbcsr_type_no_symmetry)
    1091          320 :                CALL dbcsr_desymmetrize(rho_ao(i, j)%matrix, my_rho_ao(i, j)%matrix)
    1092              :             END DO
    1093              :          END DO
    1094              :       ELSE
    1095         1796 :          my_ks_matrix => ks_matrix
    1096         1796 :          my_rho_ao => rho_ao
    1097              :       END IF
    1098              : 
    1099              :       !Case analysis on RI_FLAVOR: we switch if the input flavor is MO, there is no provided MO, and
    1100              :       !                            the current flavor is not yet RHO. We switch back to MO if there are
    1101              :       !                            MOs available and the current flavor is actually RHO
    1102         1956 :       IF (ri_data%input_flavor == ri_mo .AND. (.NOT. PRESENT(mos)) .AND. ri_data%flavor == ri_mo) THEN
    1103           12 :          CALL switch_ri_flavor(ri_data, qs_env)
    1104         1944 :       ELSE IF (ri_data%input_flavor == ri_mo .AND. PRESENT(mos) .AND. ri_data%flavor == ri_pmat) THEN
    1105           10 :          CALL switch_ri_flavor(ri_data, qs_env)
    1106              :       END IF
    1107              : 
    1108         2284 :       SELECT CASE (ri_data%flavor)
    1109              :       CASE (ri_mo)
    1110          328 :          CPASSERT(PRESENT(mos))
    1111          328 :          CALL timeset(routineN//"_MO", handle2)
    1112              : 
    1113          834 :          DO ispin = 1, nspins
    1114          506 :             NULLIFY (mo_coeff_b_tmp)
    1115          506 :             CPASSERT(mos(ispin)%uniform_occupation)
    1116          506 :             CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
    1117              : 
    1118          506 :             IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
    1119          834 :             CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
    1120              :          END DO
    1121              : 
    1122          834 :          DO ispin = 1, nspins
    1123          506 :             CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
    1124          834 :             homo(ispin) = mos(ispin)%homo
    1125              :          END DO
    1126              : 
    1127          328 :          CALL timestop(handle2)
    1128              : 
    1129              :          CALL hfx_ri_update_ks_mo(qs_env, ri_data, my_ks_matrix, mo_coeff_b, homo, &
    1130          328 :                                   geometry_did_change, nspins, fac)
    1131              :       CASE (ri_pmat)
    1132              : 
    1133         1628 :          NULLIFY (para_env)
    1134         1628 :          CALL get_qs_env(qs_env, para_env=para_env)
    1135         3606 :          DO ispin = 1, SIZE(my_rho_ao, 1)
    1136         1978 :             nblks = dbcsr_get_num_blocks(my_rho_ao(ispin, 1)%matrix)
    1137         1978 :             CALL para_env%sum(nblks)
    1138         3606 :             IF (nblks == 0) THEN
    1139            0 :                CPABORT("received empty density matrix")
    1140              :             END IF
    1141              :          END DO
    1142              : 
    1143              :          CALL hfx_ri_update_ks_pmat(qs_env, ri_data, my_ks_matrix, my_rho_ao, &
    1144         3912 :                                     geometry_did_change, nspins, fac)
    1145              : 
    1146              :       END SELECT
    1147              : 
    1148         4440 :       DO ispin = 1, nspins
    1149         4440 :          CALL dbcsr_release(mo_coeff_b(ispin))
    1150              :       END DO
    1151              : 
    1152         4440 :       DO ispin = 1, nspins
    1153         4440 :          CALL dbcsr_filter(my_ks_matrix(ispin, 1)%matrix, ri_data%filter_eps)
    1154              :       END DO
    1155              : 
    1156         1956 :       CALL timeset(routineN//"_energy", handle2)
    1157              :       ! Calculate the exchange energy
    1158         1956 :       ehfx = 0.0_dp
    1159         4440 :       DO ispin = 1, nspins
    1160              :          CALL dbcsr_dot(my_ks_matrix(ispin, 1)%matrix, my_rho_ao(ispin, 1)%matrix, &
    1161         2484 :                         etmp)
    1162         4440 :          ehfx = ehfx + 0.5_dp*etmp
    1163              : 
    1164              :       END DO
    1165         1956 :       CALL timestop(handle2)
    1166              : 
    1167              :       !Anti-symmetric case
    1168         1956 :       IF (is_antisymmetric) THEN
    1169          320 :          DO i = 1, SIZE(ks_matrix, 1)
    1170          480 :             DO j = 1, SIZE(ks_matrix, 2)
    1171          160 :                CALL dbcsr_complete_redistribute(my_ks_matrix(i, j)%matrix, ks_matrix(i, j)%matrix)
    1172          320 :                CALL dbcsr_complete_redistribute(my_rho_ao(i, j)%matrix, rho_ao(i, j)%matrix)
    1173              :             END DO
    1174              :          END DO
    1175          160 :          CALL dbcsr_deallocate_matrix_set(my_ks_matrix)
    1176          160 :          CALL dbcsr_deallocate_matrix_set(my_rho_ao)
    1177              :       END IF
    1178              : 
    1179         1956 :       CALL timestop(handle)
    1180         1956 :    END SUBROUTINE
    1181              : 
    1182              : ! **************************************************************************************************
    1183              : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in MO flavor
    1184              : !>
    1185              : !> C(mu, i) (MO coefficients)
    1186              : !> M(mu, i, R) = sum_nu B(mu, nu, R) C(nu, i)
    1187              : !> KS(mu, lambda) = sum_{i,R} M(mu, i, R) M(lambda, i, R)
    1188              : !> \param qs_env ...
    1189              : !> \param ri_data ...
    1190              : !> \param ks_matrix ...
    1191              : !> \param mo_coeff C(mu, i)
    1192              : !> \param homo ...
    1193              : !> \param geometry_did_change ...
    1194              : !> \param nspins ...
    1195              : !> \param fac ...
    1196              : ! **************************************************************************************************
    1197          328 :    SUBROUTINE hfx_ri_update_ks_mo(qs_env, ri_data, ks_matrix, mo_coeff, &
    1198          328 :                                   homo, geometry_did_change, nspins, fac)
    1199              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1200              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1201              :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: ks_matrix
    1202              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1203              :       INTEGER, DIMENSION(:)                              :: homo
    1204              :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1205              :       INTEGER, INTENT(IN)                                :: nspins
    1206              :       REAL(dp), INTENT(IN)                               :: fac
    1207              : 
    1208              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_mo'
    1209              : 
    1210              :       INTEGER                                            :: bsize, bsum, comm_2d_handle, handle, &
    1211              :                                                             handle2, i_mem, iblock, iproc, ispin, &
    1212              :                                                             n_mem, n_mos, nblock, unit_nr_dbcsr
    1213              :       INTEGER(int_8)                                     :: nblks, nflop
    1214          328 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: batch_ranges_1, batch_ranges_2, dist1, dist2, dist3, &
    1215          328 :          mem_end, mem_end_block_1, mem_end_block_2, mem_size, mem_start, mem_start_block_1, &
    1216          328 :          mem_start_block_2, mo_bsizes_1, mo_bsizes_2
    1217          328 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: bounds
    1218              :       INTEGER, DIMENSION(2)                              :: pdims_2d
    1219              :       INTEGER, DIMENSION(3)                              :: pdims
    1220              :       LOGICAL                                            :: do_initialize
    1221              :       REAL(dp)                                           :: t1, t2
    1222              :       TYPE(dbcsr_distribution_type)                      :: ks_dist
    1223         1640 :       TYPE(dbt_pgrid_type)                               :: pgrid, pgrid_2d
    1224         8200 :       TYPE(dbt_type)                                     :: ks_t, ks_t_mat, mo_coeff_t, &
    1225         2952 :                                                             mo_coeff_t_split
    1226          328 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_mo_1, t_3c_int_mo_2
    1227              :       TYPE(mp_comm_type)                                 :: comm_2d
    1228              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1229              : 
    1230          328 :       CALL timeset(routineN, handle)
    1231              : 
    1232          328 :       CPASSERT(SIZE(ks_matrix, 2) == 1)
    1233              : 
    1234          328 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1235              : 
    1236          328 :       IF (geometry_did_change) THEN
    1237           16 :          CALL hfx_ri_pre_scf_mo(qs_env, ri_data, nspins)
    1238              :       END IF
    1239              : 
    1240          328 :       nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_1(1, 1))
    1241          328 :       IF (nblks == 0) THEN
    1242            0 :          CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1243              :       END IF
    1244              : 
    1245          834 :       DO ispin = 1, nspins
    1246          506 :          nblks = dbt_get_num_blocks_total(ri_data%t_2c_int(ispin, 1))
    1247          834 :          IF (nblks == 0) THEN
    1248            0 :             CPABORT("2-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1249              :          END IF
    1250              :       END DO
    1251              : 
    1252          328 :       IF (.NOT. ALLOCATED(ri_data%t_3c_int_mo)) THEN
    1253           18 :          do_initialize = .TRUE.
    1254           18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_RI))
    1255           18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS))
    1256           18 :          CPASSERT(.NOT. ALLOCATED(ri_data%t_3c_ctr_KS_copy))
    1257          256 :          ALLOCATE (ri_data%t_3c_int_mo(nspins, 1, 1))
    1258          238 :          ALLOCATE (ri_data%t_3c_ctr_RI(nspins, 1, 1))
    1259          238 :          ALLOCATE (ri_data%t_3c_ctr_KS(nspins, 1, 1))
    1260          238 :          ALLOCATE (ri_data%t_3c_ctr_KS_copy(nspins, 1, 1))
    1261              :       ELSE
    1262              :          do_initialize = .FALSE.
    1263              :       END IF
    1264              : 
    1265          328 :       CALL get_qs_env(qs_env, para_env=para_env)
    1266              : 
    1267          328 :       ALLOCATE (bounds(2, 1))
    1268              : 
    1269          328 :       CALL dbcsr_get_info(ks_matrix(1, 1)%matrix, distribution=ks_dist)
    1270          328 :       CALL dbcsr_distribution_get(ks_dist, group=comm_2d_handle, nprows=pdims_2d(1), npcols=pdims_2d(2))
    1271              : 
    1272          328 :       CALL comm_2d%set_handle(comm_2d_handle)
    1273          328 :       pgrid_2d = dbt_nd_mp_comm(comm_2d, [1], [2], pdims_2d=pdims_2d)
    1274              : 
    1275              :       CALL create_2c_tensor(ks_t, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_fit, ri_data%bsizes_AO_fit, &
    1276          328 :                             name="(AO | AO)")
    1277              : 
    1278          328 :       DEALLOCATE (dist1, dist2)
    1279              : 
    1280          328 :       CALL para_env%sync()
    1281          328 :       t1 = m_walltime()
    1282              : 
    1283         7544 :       ALLOCATE (t_3c_int_mo_1(1, 1), t_3c_int_mo_2(1, 1))
    1284          834 :       DO ispin = 1, nspins
    1285              : 
    1286          506 :          CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
    1287         1518 :          ALLOCATE (mo_bsizes_2(n_mos))
    1288         3192 :          mo_bsizes_2 = 1
    1289              : 
    1290              :          CALL create_tensor_batches(mo_bsizes_2, ri_data%n_mem, mem_start, mem_end, &
    1291          506 :                                     mem_start_block_2, mem_end_block_2)
    1292          506 :          n_mem = ri_data%n_mem
    1293         1518 :          ALLOCATE (mem_size(n_mem))
    1294              : 
    1295         2374 :          DO i_mem = 1, n_mem
    1296         4554 :             bsize = SUM(mo_bsizes_2(mem_start_block_2(i_mem):mem_end_block_2(i_mem)))
    1297         2374 :             mem_size(i_mem) = bsize
    1298              :          END DO
    1299              : 
    1300          506 :          CALL split_block_sizes(mem_size, mo_bsizes_1, ri_data%max_bsize_MO)
    1301         1012 :          ALLOCATE (mem_start_block_1(n_mem))
    1302         1012 :          ALLOCATE (mem_end_block_1(n_mem))
    1303          506 :          nblock = SIZE(mo_bsizes_1)
    1304          506 :          iblock = 0
    1305         2374 :          DO i_mem = 1, n_mem
    1306              :             bsum = 0
    1307          506 :             DO
    1308         1868 :                iblock = iblock + 1
    1309         1868 :                CPASSERT(iblock <= nblock)
    1310         1868 :                bsum = bsum + mo_bsizes_1(iblock)
    1311         1868 :                IF (bsum == mem_size(i_mem)) THEN
    1312         1868 :                   IF (i_mem == 1) THEN
    1313          506 :                      mem_start_block_1(i_mem) = 1
    1314              :                   ELSE
    1315         1362 :                      mem_start_block_1(i_mem) = mem_end_block_1(i_mem - 1) + 1
    1316              :                   END IF
    1317         1868 :                   mem_end_block_1(i_mem) = iblock
    1318              :                   EXIT
    1319              :                END IF
    1320              :             END DO
    1321              :          END DO
    1322              : 
    1323         1518 :          ALLOCATE (batch_ranges_1(ri_data%n_mem + 1))
    1324         2374 :          batch_ranges_1(:ri_data%n_mem) = mem_start_block_1(:)
    1325          506 :          batch_ranges_1(ri_data%n_mem + 1) = mem_end_block_1(ri_data%n_mem) + 1
    1326              : 
    1327         1012 :          ALLOCATE (batch_ranges_2(ri_data%n_mem + 1))
    1328         2374 :          batch_ranges_2(:ri_data%n_mem) = mem_start_block_2(:)
    1329          506 :          batch_ranges_2(ri_data%n_mem + 1) = mem_end_block_2(ri_data%n_mem) + 1
    1330              : 
    1331          506 :          iproc = para_env%mepos
    1332              : 
    1333              :          CALL create_3c_tensor(t_3c_int_mo_1(1, 1), dist1, dist2, dist3, ri_data%pgrid_1, &
    1334              :                                ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, mo_bsizes_1, &
    1335              :                                [1, 2], [3], &
    1336          506 :                                name="(AO RI | MO)")
    1337              : 
    1338          506 :          DEALLOCATE (dist1, dist2, dist3)
    1339              : 
    1340              :          CALL create_3c_tensor(t_3c_int_mo_2(1, 1), dist1, dist2, dist3, ri_data%pgrid_2, &
    1341              :                                mo_bsizes_1, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    1342              :                                [1], [2, 3], &
    1343          506 :                                name="(MO | RI AO)")
    1344              : 
    1345          506 :          DEALLOCATE (dist1, dist2, dist3)
    1346              : 
    1347              :          CALL create_2c_tensor(mo_coeff_t_split, dist1, dist2, pgrid_2d, ri_data%bsizes_AO_split, mo_bsizes_1, &
    1348              :                                name="(AO | MO)")
    1349              : 
    1350          506 :          DEALLOCATE (dist1, dist2)
    1351              : 
    1352          506 :          CPASSERT(homo(ispin)/ri_data%n_mem > 0)
    1353              : 
    1354          506 :          IF (do_initialize) THEN
    1355           22 :             pdims(:) = 0
    1356              : 
    1357              :             CALL dbt_pgrid_create(para_env, pdims, pgrid, &
    1358              :                                   tensor_dims=[SIZE(ri_data%bsizes_RI_fit), &
    1359              :                                                (homo(ispin) - 1)/ri_data%n_mem + 1, &
    1360           88 :                                                SIZE(ri_data%bsizes_AO_fit)])
    1361              :             CALL create_3c_tensor(ri_data%t_3c_int_mo(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
    1362              :                                   ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
    1363              :                                   [1], [2, 3], &
    1364           22 :                                   name="(RI | MO AO)")
    1365              : 
    1366           22 :             DEALLOCATE (dist1, dist2, dist3)
    1367              : 
    1368              :             CALL create_3c_tensor(ri_data%t_3c_ctr_KS(ispin, 1, 1), dist1, dist2, dist3, pgrid, &
    1369              :                                   ri_data%bsizes_RI_fit, mo_bsizes_2, ri_data%bsizes_AO_fit, &
    1370              :                                   [1, 2], [3], &
    1371           22 :                                   name="(RI MO | AO)")
    1372           22 :             DEALLOCATE (dist1, dist2, dist3)
    1373           22 :             CALL dbt_pgrid_destroy(pgrid)
    1374              : 
    1375           22 :             CALL dbt_create(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%t_3c_ctr_RI(ispin, 1, 1), name="(RI | MO AO)")
    1376           22 :             CALL dbt_create(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1377              :          END IF
    1378              : 
    1379          506 :          CALL dbt_create(mo_coeff(ispin), mo_coeff_t, name="MO coeffs")
    1380          506 :          CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), mo_coeff_t)
    1381          506 :          CALL dbt_copy(mo_coeff_t, mo_coeff_t_split, move_data=.TRUE.)
    1382          506 :          CALL dbt_filter(mo_coeff_t_split, ri_data%filter_eps_mo)
    1383          506 :          CALL dbt_destroy(mo_coeff_t)
    1384              : 
    1385          506 :          CALL dbt_batched_contract_init(ks_t)
    1386          506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1387          506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1388              : 
    1389          506 :          CALL dbt_batched_contract_init(ri_data%t_2c_int(ispin, 1))
    1390          506 :          CALL dbt_batched_contract_init(ri_data%t_3c_int_mo(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1391          506 :          CALL dbt_batched_contract_init(ri_data%t_3c_ctr_RI(ispin, 1, 1), batch_range_2=batch_ranges_2)
    1392              : 
    1393         2374 :          DO i_mem = 1, n_mem
    1394              : 
    1395         5604 :             bounds(:, 1) = [mem_start(i_mem), mem_end(i_mem)]
    1396              : 
    1397         1868 :             CALL dbt_batched_contract_init(mo_coeff_t_split)
    1398         1868 :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1))
    1399              :             CALL dbt_batched_contract_init(t_3c_int_mo_1(1, 1), &
    1400         1868 :                                            batch_range_3=batch_ranges_1)
    1401         1868 :             CALL timeset(routineN//"_MOx3C_R", handle2)
    1402              :             CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_1(1, 1), &
    1403              :                               0.0_dp, t_3c_int_mo_1(1, 1), &
    1404              :                               contract_1=[1], notcontract_1=[2], &
    1405              :                               contract_2=[3], notcontract_2=[1, 2], &
    1406              :                               map_1=[3], map_2=[1, 2], &
    1407              :                               bounds_2=bounds, &
    1408              :                               filter_eps=ri_data%filter_eps_mo/2, &
    1409              :                               unit_nr=unit_nr_dbcsr, &
    1410              :                               move_data=.FALSE., &
    1411         1868 :                               flop=nflop)
    1412              : 
    1413         1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1414              : 
    1415         1868 :             CALL timestop(handle2)
    1416         1868 :             CALL dbt_batched_contract_finalize(mo_coeff_t_split)
    1417         1868 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
    1418         1868 :             CALL dbt_batched_contract_finalize(t_3c_int_mo_1(1, 1))
    1419              : 
    1420         1868 :             CALL timeset(routineN//"_copy_1", handle2)
    1421         1868 :             CALL dbt_copy(t_3c_int_mo_1(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[3, 1, 2], move_data=.TRUE.)
    1422         1868 :             CALL timestop(handle2)
    1423              : 
    1424         1868 :             CALL dbt_batched_contract_init(mo_coeff_t_split)
    1425         1868 :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1))
    1426              :             CALL dbt_batched_contract_init(t_3c_int_mo_2(1, 1), &
    1427         1868 :                                            batch_range_1=batch_ranges_1)
    1428              : 
    1429         1868 :             CALL timeset(routineN//"_MOx3C_L", handle2)
    1430              :             CALL dbt_contract(1.0_dp, mo_coeff_t_split, ri_data%t_3c_int_ctr_2(1, 1), &
    1431              :                               0.0_dp, t_3c_int_mo_2(1, 1), &
    1432              :                               contract_1=[1], notcontract_1=[2], &
    1433              :                               contract_2=[1], notcontract_2=[2, 3], &
    1434              :                               map_1=[1], map_2=[2, 3], &
    1435              :                               bounds_2=bounds, &
    1436              :                               filter_eps=ri_data%filter_eps_mo/2, &
    1437              :                               unit_nr=unit_nr_dbcsr, &
    1438              :                               move_data=.FALSE., &
    1439         1868 :                               flop=nflop)
    1440              : 
    1441         1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1442              : 
    1443         1868 :             CALL timestop(handle2)
    1444              : 
    1445         1868 :             CALL dbt_batched_contract_finalize(mo_coeff_t_split)
    1446         1868 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
    1447         1868 :             CALL dbt_batched_contract_finalize(t_3c_int_mo_2(1, 1))
    1448              : 
    1449         1868 :             CALL timeset(routineN//"_copy_1", handle2)
    1450              :             CALL dbt_copy(t_3c_int_mo_2(1, 1), ri_data%t_3c_int_mo(ispin, 1, 1), order=[2, 1, 3], &
    1451         1868 :                           summation=.TRUE., move_data=.TRUE.)
    1452              : 
    1453         1868 :             CALL dbt_filter(ri_data%t_3c_int_mo(ispin, 1, 1), ri_data%filter_eps_mo)
    1454         1868 :             CALL timestop(handle2)
    1455              : 
    1456         1868 :             CALL timeset(routineN//"_RIx3C", handle2)
    1457              : 
    1458              :             CALL dbt_contract(1.0_dp, ri_data%t_2c_int(ispin, 1), ri_data%t_3c_int_mo(ispin, 1, 1), &
    1459              :                               0.0_dp, ri_data%t_3c_ctr_RI(ispin, 1, 1), &
    1460              :                               contract_1=[1], notcontract_1=[2], &
    1461              :                               contract_2=[1], notcontract_2=[2, 3], &
    1462              :                               map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    1463              :                               unit_nr=unit_nr_dbcsr, &
    1464         1868 :                               flop=nflop)
    1465              : 
    1466         1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1467              : 
    1468         1868 :             CALL timestop(handle2)
    1469              : 
    1470         1868 :             CALL timeset(routineN//"_copy_2", handle2)
    1471              : 
    1472              :             ! note: this copy should not involve communication (same block sizes, same 3d distribution on same process grid)
    1473         1868 :             CALL dbt_copy(ri_data%t_3c_ctr_RI(ispin, 1, 1), ri_data%t_3c_ctr_KS(ispin, 1, 1), move_data=.TRUE.)
    1474         1868 :             CALL dbt_copy(ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1475         1868 :             CALL timestop(handle2)
    1476              : 
    1477         1868 :             CALL timeset(routineN//"_3Cx3C", handle2)
    1478              :             CALL dbt_contract(-fac, ri_data%t_3c_ctr_KS(ispin, 1, 1), ri_data%t_3c_ctr_KS_copy(ispin, 1, 1), &
    1479              :                               1.0_dp, ks_t, &
    1480              :                               contract_1=[1, 2], notcontract_1=[3], &
    1481              :                               contract_2=[1, 2], notcontract_2=[3], &
    1482              :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
    1483              :                               unit_nr=unit_nr_dbcsr, move_data=.TRUE., &
    1484         1868 :                               flop=nflop)
    1485              : 
    1486         1868 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1487              : 
    1488        15450 :             CALL timestop(handle2)
    1489              :          END DO
    1490              : 
    1491          506 :          CALL dbt_batched_contract_finalize(ks_t)
    1492          506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS(ispin, 1, 1))
    1493          506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_KS_copy(ispin, 1, 1))
    1494              : 
    1495          506 :          CALL dbt_batched_contract_finalize(ri_data%t_2c_int(ispin, 1))
    1496          506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_int_mo(ispin, 1, 1))
    1497          506 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_ctr_RI(ispin, 1, 1))
    1498              : 
    1499          506 :          CALL dbt_destroy(t_3c_int_mo_1(1, 1))
    1500          506 :          CALL dbt_destroy(t_3c_int_mo_2(1, 1))
    1501          506 :          CALL dbt_clear(ri_data%t_3c_int_mo(ispin, 1, 1))
    1502              : 
    1503          506 :          CALL dbt_destroy(mo_coeff_t_split)
    1504              : 
    1505          506 :          CALL dbt_filter(ks_t, ri_data%filter_eps)
    1506              : 
    1507          506 :          CALL dbt_create(ks_matrix(ispin, 1)%matrix, ks_t_mat)
    1508          506 :          CALL dbt_copy(ks_t, ks_t_mat, move_data=.TRUE.)
    1509          506 :          CALL dbt_copy_tensor_to_matrix(ks_t_mat, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
    1510          506 :          CALL dbt_destroy(ks_t_mat)
    1511              : 
    1512            0 :          DEALLOCATE (mem_end, mem_start, mo_bsizes_2, mem_size, mem_start_block_1, mem_end_block_1, &
    1513         2858 :                      mem_start_block_2, mem_end_block_2, batch_ranges_1, batch_ranges_2)
    1514              : 
    1515              :       END DO
    1516              : 
    1517          328 :       CALL dbt_pgrid_destroy(pgrid_2d)
    1518          328 :       CALL dbt_destroy(ks_t)
    1519              : 
    1520          328 :       CALL para_env%sync()
    1521          328 :       t2 = m_walltime()
    1522              : 
    1523          328 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    1524              : 
    1525          328 :       CALL timestop(handle)
    1526              : 
    1527         2952 :    END SUBROUTINE
    1528              : 
    1529              : ! **************************************************************************************************
    1530              : !> \brief Calculate Fock (AKA Kohn-Sham) matrix in rho flavor
    1531              : !>
    1532              : !> M(mu, lambda, R) = sum_{nu} int_3c(mu, nu, R) P(nu, lambda)
    1533              : !> KS(mu, lambda) = sum_{nu,R} B(mu, nu, R) M(lambda, nu, R)
    1534              : !> \param qs_env ...
    1535              : !> \param ri_data ...
    1536              : !> \param ks_matrix ...
    1537              : !> \param rho_ao ...
    1538              : !> \param geometry_did_change ...
    1539              : !> \param nspins ...
    1540              : !> \param fac ...
    1541              : ! **************************************************************************************************
    1542         1628 :    SUBROUTINE hfx_ri_update_ks_Pmat(qs_env, ri_data, ks_matrix, rho_ao, &
    1543              :                                     geometry_did_change, nspins, fac)
    1544              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1545              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1546              :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: ks_matrix, rho_ao
    1547              :       LOGICAL, INTENT(IN)                                :: geometry_did_change
    1548              :       INTEGER, INTENT(IN)                                :: nspins
    1549              :       REAL(dp), INTENT(IN)                               :: fac
    1550              : 
    1551              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_ks_Pmat'
    1552              : 
    1553              :       INTEGER                                            :: handle, handle2, i_mem, ispin, j_mem, &
    1554              :                                                             n_mem, n_mem_RI, unit_nr, unit_nr_dbcsr
    1555              :       INTEGER(int_8)                                     :: flops_ks_max, flops_p_max, nblks, nflop, &
    1556              :                                                             nze, nze_3c, nze_3c_1, nze_3c_2, &
    1557              :                                                             nze_ks, nze_rho
    1558         1628 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_ranges_AO, batch_ranges_RI, dist1, &
    1559         1628 :                                                             dist2
    1560              :       INTEGER, DIMENSION(2, 1)                           :: bounds_i
    1561              :       INTEGER, DIMENSION(2, 2)                           :: bounds_ij, bounds_j
    1562              :       INTEGER, DIMENSION(3)                              :: dims_3c
    1563              :       REAL(dp)                                           :: memory_3c, occ, occ_3c, occ_3c_1, &
    1564              :                                                             occ_3c_2, occ_ks, occ_rho, t1, t2, &
    1565              :                                                             unused
    1566        40700 :       TYPE(dbt_type)                                     :: ks_t, ks_tmp, rho_ao_tmp, t_3c_1, &
    1567        21164 :                                                             t_3c_3, tensor_old
    1568              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1569              : 
    1570         1628 :       IF (.NOT. fac > EPSILON(0.0_dp)) RETURN
    1571              : 
    1572         1628 :       CALL timeset(routineN, handle)
    1573              : 
    1574         1628 :       NULLIFY (para_env)
    1575              : 
    1576              :       ! get a useful output_unit
    1577         1628 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1578         1628 :       unit_nr = ri_data%unit_nr
    1579              : 
    1580         1628 :       CALL get_qs_env(qs_env, para_env=para_env)
    1581              : 
    1582         1628 :       CPASSERT(SIZE(ks_matrix, 2) == 1)
    1583              : 
    1584         1628 :       IF (geometry_did_change) THEN
    1585           78 :          CALL hfx_ri_pre_scf_Pmat(qs_env, ri_data)
    1586          182 :          DO ispin = 1, nspins
    1587          104 :             CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), 0.0_dp)
    1588          182 :             CALL dbt_scale(ri_data%ks_t(ispin, 1), 0.0_dp)
    1589              :          END DO
    1590              :       END IF
    1591              : 
    1592         1628 :       nblks = dbt_get_num_blocks_total(ri_data%t_3c_int_ctr_2(1, 1))
    1593         1628 :       IF (nblks == 0) THEN
    1594            0 :          CPABORT("3-center integrals are not available (first call requires geometry_did_change=.TRUE.)")
    1595              :       END IF
    1596              : 
    1597         1628 :       n_mem = ri_data%n_mem
    1598         1628 :       n_mem_RI = ri_data%n_mem_RI
    1599              : 
    1600         1628 :       CALL dbt_create(ks_matrix(1, 1)%matrix, ks_tmp)
    1601         1628 :       CALL dbt_create(rho_ao(1, 1)%matrix, rho_ao_tmp)
    1602              : 
    1603              :       CALL create_2c_tensor(ks_t, dist1, dist2, ri_data%pgrid_2d, &
    1604              :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    1605              :                             name="(AO | AO)")
    1606         1628 :       DEALLOCATE (dist1, dist2)
    1607              : 
    1608         1628 :       CALL dbt_create(ri_data%t_3c_int_ctr_2(1, 1), t_3c_1)
    1609         1628 :       CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), t_3c_3)
    1610              : 
    1611         1628 :       CALL para_env%sync()
    1612         1628 :       t1 = m_walltime()
    1613              : 
    1614         1628 :       flops_ks_max = 0; flops_p_max = 0
    1615              : 
    1616         4884 :       ALLOCATE (batch_ranges_RI(ri_data%n_mem_RI + 1))
    1617         4884 :       ALLOCATE (batch_ranges_AO(ri_data%n_mem + 1))
    1618         6466 :       batch_ranges_RI(:ri_data%n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
    1619         1628 :       batch_ranges_RI(ri_data%n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(ri_data%n_mem_RI) + 1
    1620         6466 :       batch_ranges_AO(:ri_data%n_mem) = ri_data%starts_array_mem_block(:)
    1621         1628 :       batch_ranges_AO(ri_data%n_mem + 1) = ri_data%ends_array_mem_block(ri_data%n_mem) + 1
    1622              : 
    1623         1628 :       memory_3c = 0.0_dp
    1624         3606 :       DO ispin = 1, nspins
    1625              : 
    1626         1978 :          CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze_3c, occ_3c)
    1627              : 
    1628              :          nze_rho = 0
    1629              :          occ_rho = 0.0_dp
    1630         1978 :          nze_3c_1 = 0
    1631         1978 :          occ_3c_1 = 0.0_dp
    1632         1978 :          nze_3c_2 = 0
    1633         1978 :          occ_3c_2 = 0.0_dp
    1634              : 
    1635         1978 :          CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    1636              : 
    1637              :          !We work with Delta P: the diff between previous SCF step and this one, for increased sparsity
    1638         1978 :          CALL dbt_scale(ri_data%rho_ao_t(ispin, 1), -1.0_dp)
    1639         1978 :          CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
    1640              : 
    1641         1978 :          CALL get_tensor_occupancy(ri_data%rho_ao_t(ispin, 1), nze_rho, occ_rho)
    1642              : 
    1643              :          CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_1(1, 1), batch_range_1=batch_ranges_AO, &
    1644         1978 :                                         batch_range_2=batch_ranges_RI)
    1645         1978 :          CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_AO, batch_range_2=batch_ranges_RI)
    1646              : 
    1647         1978 :          CALL dbt_create(ri_data%t_3c_int_ctr_1(1, 1), tensor_old)
    1648              : 
    1649         7820 :          DO i_mem = 1, n_mem
    1650              : 
    1651         5842 :             CALL dbt_batched_contract_init(ri_data%rho_ao_t(ispin, 1))
    1652              :             CALL dbt_batched_contract_init(ri_data%t_3c_int_ctr_2(1, 1), batch_range_2=batch_ranges_RI, &
    1653         5842 :                                            batch_range_3=batch_ranges_AO)
    1654         5842 :             CALL dbt_batched_contract_init(t_3c_1, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges_AO)
    1655        23184 :             DO j_mem = 1, n_mem_RI
    1656              : 
    1657        17342 :                CALL timeset(routineN//"_Px3C", handle2)
    1658              : 
    1659        17342 :                CALL dbt_get_info(t_3c_1, nfull_total=dims_3c)
    1660        52026 :                bounds_i(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
    1661        52026 :                bounds_j(:, 1) = [1, dims_3c(1)]
    1662        52026 :                bounds_j(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
    1663              : 
    1664              :                CALL dbt_contract(1.0_dp, ri_data%rho_ao_t(ispin, 1), ri_data%t_3c_int_ctr_2(1, 1), &
    1665              :                                  0.0_dp, t_3c_1, &
    1666              :                                  contract_1=[2], notcontract_1=[1], &
    1667              :                                  contract_2=[3], notcontract_2=[1, 2], &
    1668              :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    1669              :                                  bounds_2=bounds_i, &
    1670              :                                  bounds_3=bounds_j, &
    1671              :                                  unit_nr=unit_nr_dbcsr, &
    1672        17342 :                                  flop=nflop)
    1673              : 
    1674        17342 :                CALL timestop(handle2)
    1675              : 
    1676        17342 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1677              : 
    1678        17342 :                CALL get_tensor_occupancy(t_3c_1, nze, occ)
    1679        17342 :                nze_3c_1 = nze_3c_1 + nze
    1680        17342 :                occ_3c_1 = occ_3c_1 + occ
    1681              : 
    1682        17342 :                CALL timeset(routineN//"_copy_2", handle2)
    1683        17342 :                CALL dbt_copy(t_3c_1, t_3c_3, order=[3, 2, 1], move_data=.TRUE.)
    1684        17342 :                CALL timestop(handle2)
    1685              : 
    1686        52026 :                bounds_ij(:, 1) = [ri_data%starts_array_mem(i_mem), ri_data%ends_array_mem(i_mem)]
    1687        52026 :                bounds_ij(:, 2) = [ri_data%starts_array_RI_mem(j_mem), ri_data%ends_array_RI_mem(j_mem)]
    1688              : 
    1689              :                CALL decompress_tensor(tensor_old, ri_data%blk_indices(i_mem, j_mem)%ind, &
    1690        17342 :                                       ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
    1691              : 
    1692        17342 :                CALL dbt_copy(tensor_old, ri_data%t_3c_int_ctr_1(1, 1), move_data=.TRUE.)
    1693              : 
    1694        17342 :                CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_1(1, 1), nze, occ)
    1695        17342 :                nze_3c_2 = nze_3c_2 + nze
    1696        17342 :                occ_3c_2 = occ_3c_2 + occ
    1697        17342 :                CALL timeset(routineN//"_KS", handle2)
    1698        17342 :                CALL dbt_batched_contract_init(ks_t)
    1699              :                CALL dbt_contract(-fac, ri_data%t_3c_int_ctr_1(1, 1), t_3c_3, &
    1700              :                                  1.0_dp, ks_t, &
    1701              :                                  contract_1=[1, 2], notcontract_1=[3], &
    1702              :                                  contract_2=[1, 2], notcontract_2=[3], &
    1703              :                                  map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps/n_mem, &
    1704              :                                  bounds_1=bounds_ij, &
    1705              :                                  unit_nr=unit_nr_dbcsr, &
    1706        17342 :                                  flop=nflop, move_data=.TRUE.)
    1707              : 
    1708        17342 :                CALL dbt_batched_contract_finalize(ks_t, unit_nr=unit_nr_dbcsr)
    1709        17342 :                CALL timestop(handle2)
    1710              : 
    1711        75210 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1712              : 
    1713              :             END DO
    1714         5842 :             CALL dbt_batched_contract_finalize(ri_data%rho_ao_t(ispin, 1), unit_nr=unit_nr_dbcsr)
    1715         5842 :             CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_2(1, 1))
    1716         7820 :             CALL dbt_batched_contract_finalize(t_3c_1)
    1717              :          END DO
    1718         1978 :          CALL dbt_batched_contract_finalize(ri_data%t_3c_int_ctr_1(1, 1))
    1719         1978 :          CALL dbt_batched_contract_finalize(t_3c_3)
    1720              : 
    1721         7820 :          DO i_mem = 1, n_mem
    1722        25162 :             DO j_mem = 1, n_mem_RI
    1723         5842 :                ASSOCIATE (blk_indices => ri_data%blk_indices(i_mem, j_mem), t_3c => ri_data%t_3c_int_ctr_1(1, 1))
    1724              :                   CALL decompress_tensor(tensor_old, blk_indices%ind, &
    1725        17342 :                                          ri_data%store_3c(i_mem, j_mem), ri_data%filter_eps_storage)
    1726        17342 :                   CALL dbt_copy(tensor_old, t_3c, move_data=.TRUE.)
    1727              : 
    1728        17342 :                   unused = 0
    1729              :                   CALL compress_tensor(t_3c, blk_indices%ind, ri_data%store_3c(i_mem, j_mem), &
    1730        34684 :                                        ri_data%filter_eps_storage, unused)
    1731              :                END ASSOCIATE
    1732              :             END DO
    1733              :          END DO
    1734              : 
    1735         1978 :          CALL dbt_destroy(tensor_old)
    1736              : 
    1737         1978 :          CALL get_tensor_occupancy(ks_t, nze_ks, occ_ks)
    1738              : 
    1739              :          !rho_ao_t holds the density difference, and ks_t is built upon it => need the full picture
    1740         1978 :          CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    1741         1978 :          CALL dbt_copy(rho_ao_tmp, ri_data%rho_ao_t(ispin, 1), move_data=.TRUE.)
    1742         1978 :          CALL dbt_copy(ks_t, ri_data%ks_t(ispin, 1), summation=.TRUE., move_data=.TRUE.)
    1743              : 
    1744         1978 :          CALL dbt_copy(ri_data%ks_t(ispin, 1), ks_tmp)
    1745         1978 :          CALL dbt_copy_tensor_to_matrix(ks_tmp, ks_matrix(ispin, 1)%matrix, summation=.TRUE.)
    1746         1978 :          CALL dbt_clear(ks_tmp)
    1747              : 
    1748         9540 :          IF (unit_nr > 0 .AND. geometry_did_change) THEN
    1749              :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1750           35 :                'Occupancy of density matrix P:', REAL(nze_rho, dp), '/', occ_rho*100, '%'
    1751              :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1752           35 :                'Occupancy of 3c ints:', REAL(nze_3c, dp), '/', occ_3c*100, '%'
    1753              :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1754           35 :                'Occupancy after contraction with K:', REAL(nze_3c_2, dp), '/', occ_3c_2*100, '%'
    1755              :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1756           35 :                'Occupancy after contraction with P:', REAL(nze_3c_1, dp), '/', occ_3c_1*100, '%'
    1757              :             WRITE (unit_nr, '(T6,A,T63,ES7.1,1X,A1,1X,F7.3,A1)') &
    1758           35 :                'Occupancy of Kohn-Sham matrix:', REAL(nze_ks, dp), '/', occ_ks*100, '%'
    1759              :          END IF
    1760              : 
    1761              :       END DO
    1762              : 
    1763         1628 :       CALL para_env%sync()
    1764         1628 :       t2 = m_walltime()
    1765              : 
    1766         1628 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    1767              : 
    1768         1628 :       CALL dbt_destroy(t_3c_1)
    1769         1628 :       CALL dbt_destroy(t_3c_3)
    1770              : 
    1771         1628 :       CALL dbt_destroy(rho_ao_tmp)
    1772         1628 :       CALL dbt_destroy(ks_t)
    1773         1628 :       CALL dbt_destroy(ks_tmp)
    1774              : 
    1775         1628 :       CALL timestop(handle)
    1776              : 
    1777         4884 :    END SUBROUTINE
    1778              : 
    1779              : ! **************************************************************************************************
    1780              : !> \brief Implementation based on the MO flavor
    1781              : !> \param qs_env ...
    1782              : !> \param ri_data ...
    1783              : !> \param nspins ...
    1784              : !> \param hf_fraction ...
    1785              : !> \param mo_coeff ...
    1786              : !> \param use_virial ...
    1787              : !> \note There is no response code for forces with the MO flavor
    1788              : ! **************************************************************************************************
    1789           14 :    SUBROUTINE hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff, use_virial)
    1790              : 
    1791              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1792              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    1793              :       INTEGER, INTENT(IN)                                :: nspins
    1794              :       REAL(dp), INTENT(IN)                               :: hf_fraction
    1795              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1796              :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial
    1797              : 
    1798              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hfx_ri_forces_mo'
    1799              : 
    1800              :       INTEGER :: dummy_int, handle, i_mem, i_xyz, ibasis, ispin, j_mem, k_mem, n_mem, n_mem_input, &
    1801              :          n_mem_input_RI, n_mem_RI, n_mem_RI_fit, n_mos, natom, nkind, unit_nr_dbcsr
    1802              :       INTEGER(int_8)                                     :: nflop
    1803           14 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_blk_end, batch_blk_start, &
    1804           14 :          batch_end, batch_end_RI, batch_end_RI_fit, batch_ranges, batch_ranges_RI, &
    1805           14 :          batch_ranges_RI_fit, batch_start, batch_start_RI, batch_start_RI_fit, bsizes_MO, dist1, &
    1806           14 :          dist2, dist3, idx_to_at_AO, idx_to_at_RI, kind_of
    1807              :       INTEGER, DIMENSION(2, 1)                           :: bounds_ctr_1d
    1808              :       INTEGER, DIMENSION(2, 2)                           :: bounds_ctr_2d
    1809              :       INTEGER, DIMENSION(3)                              :: pdims
    1810              :       LOGICAL                                            :: use_virial_prv
    1811              :       REAL(dp)                                           :: pref, spin_fac, t1, t2
    1812           14 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    1813           14 :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
    1814              :       TYPE(cell_type), POINTER                           :: cell
    1815           14 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1816           70 :       TYPE(dbt_pgrid_type)                               :: pgrid_1, pgrid_2
    1817          602 :       TYPE(dbt_type) :: t_2c_RI, t_2c_RI_inv, t_2c_RI_met, t_2c_RI_PQ, t_2c_tmp, t_3c_0, t_3c_1, &
    1818          700 :          t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_6, t_3c_ao_ri_ao, t_3c_ao_ri_mo, t_3c_desymm, &
    1819          434 :          t_3c_mo_ri_ao, t_3c_mo_ri_mo, t_3c_ri_ao_ao, t_3c_RI_ctr, t_3c_ri_mo_mo, &
    1820          350 :          t_3c_ri_mo_mo_fit, t_3c_work, t_mo_coeff, t_mo_cpy
    1821           14 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:) :: t_2c_der_metric, t_2c_der_RI, t_2c_MO_AO, &
    1822           28 :          t_2c_MO_AO_ctr, t_3c_der_AO, t_3c_der_AO_ctr_1, t_3c_der_RI, t_3c_der_RI_ctr_1, &
    1823           14 :          t_3c_der_RI_ctr_2
    1824              :       TYPE(dft_control_type), POINTER                    :: dft_control
    1825              :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    1826           14 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    1827              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    1828              :       TYPE(hfx_compression_type), ALLOCATABLE, &
    1829           14 :          DIMENSION(:, :)                                 :: t_3c_der_AO_comp, t_3c_der_RI_comp
    1830              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1831           14 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1832           14 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    1833           14 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1834              : 
    1835              :       ! 1) Precompute the derivatives that are needed (3c, 3c RI and metric)
    1836              :       ! 2) Go over batched of occupied MOs so as to save memory and optimize contractions
    1837              :       ! 3) Contract all 3c integrals and derivatives with MO coeffs
    1838              :       ! 4) Contract relevant quantities with the inverse 2c RI (metric or pot)
    1839              :       ! 5) First force contribution with the 2c RI derivative d/dx (Q|R)
    1840              :       ! 6) If metric, do the additional contraction  with S_pq^-1 (Q|R)
    1841              :       ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
    1842              :       ! 8) If metric, do the last force contribution due to d/dx S^-1 (First contract (ab|P), then S^-1)
    1843              : 
    1844           14 :       use_virial_prv = .FALSE.
    1845           14 :       IF (PRESENT(use_virial)) use_virial_prv = use_virial
    1846           14 :       IF (use_virial_prv) THEN
    1847            0 :          CPABORT("Stress tensor with RI-HFX MO flavor not implemented.")
    1848              :       END IF
    1849              : 
    1850           14 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    1851              : 
    1852              :       CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
    1853              :                       atomic_kind_set=atomic_kind_set, cell=cell, force=force, &
    1854              :                       matrix_s=matrix_s, para_env=para_env, dft_control=dft_control, &
    1855           14 :                       qs_kind_set=qs_kind_set)
    1856              : 
    1857           14 :       pdims(:) = 0
    1858              :       CALL dbt_pgrid_create(para_env, pdims, pgrid_1, tensor_dims=[SIZE(ri_data%bsizes_AO_split), &
    1859              :                                                                    SIZE(ri_data%bsizes_RI_split), &
    1860           56 :                                                                    SIZE(ri_data%bsizes_AO_split)])
    1861           14 :       pdims(:) = 0
    1862              :       CALL dbt_pgrid_create(para_env, pdims, pgrid_2, tensor_dims=[SIZE(ri_data%bsizes_RI_split), &
    1863              :                                                                    SIZE(ri_data%bsizes_AO_split), &
    1864           56 :                                                                    SIZE(ri_data%bsizes_AO_split)])
    1865              : 
    1866              :       CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, pgrid_1, &
    1867              :                             ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    1868           14 :                             [1, 2], [3], name="(AO RI | AO)")
    1869           14 :       DEALLOCATE (dist1, dist2, dist3)
    1870              :       CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, pgrid_2, &
    1871              :                             ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    1872           14 :                             [1], [2, 3], name="(RI | AO AO)")
    1873           14 :       DEALLOCATE (dist1, dist2, dist3)
    1874              : 
    1875          104 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    1876           14 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    1877           14 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    1878           14 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    1879           14 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    1880              : 
    1881           38 :       DO ibasis = 1, SIZE(basis_set_AO)
    1882           24 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    1883           24 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    1884           24 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    1885           38 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    1886              :       END DO
    1887              : 
    1888              :       ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_2c_MO_AO(3), t_2c_MO_AO_ctr(3), t_3c_der_AO(3), &
    1889         1148 :                 t_3c_der_AO_ctr_1(3), t_3c_der_RI(3), t_3c_der_RI_ctr_1(3), t_3c_der_RI_ctr_2(3))
    1890              : 
    1891              :       ! 1) Precompute the derivatives
    1892              :       CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    1893              :                                t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    1894           14 :                                basis_set_AO, basis_set_RI, ri_data, qs_env)
    1895              : 
    1896           38 :       DO ibasis = 1, SIZE(basis_set_AO)
    1897           24 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    1898           24 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    1899           24 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    1900           38 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    1901              :       END DO
    1902              : 
    1903           14 :       n_mem = SIZE(t_3c_der_RI_comp, 1)
    1904           56 :       DO i_xyz = 1, 3
    1905           42 :          CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_RI(i_xyz))
    1906           42 :          CALL dbt_create(t_3c_ao_ri_ao, t_3c_der_AO(i_xyz))
    1907              : 
    1908          194 :          DO i_mem = 1, n_mem
    1909              :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    1910          138 :                                    t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    1911          138 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
    1912              : 
    1913              :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    1914          138 :                                    t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    1915          180 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz), order=[2, 1, 3], move_data=.TRUE., summation=.TRUE.)
    1916              :          END DO
    1917              :       END DO
    1918              : 
    1919           56 :       DO i_xyz = 1, 3
    1920          194 :          DO i_mem = 1, n_mem
    1921          138 :             CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
    1922          180 :             CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
    1923              :          END DO
    1924              :       END DO
    1925          290 :       DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
    1926              : 
    1927              :       ! Get the 3c integrals (desymmetrized)
    1928           14 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_desymm)
    1929           14 :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm)
    1930              :       CALL dbt_copy(ri_data%t_3c_int_ctr_1(1, 1), t_3c_desymm, order=[3, 2, 1], &
    1931           14 :                     summation=.TRUE., move_data=.TRUE.)
    1932              : 
    1933           14 :       CALL dbt_destroy(t_3c_ao_ri_ao)
    1934           14 :       CALL dbt_destroy(t_3c_ri_ao_ao)
    1935              : 
    1936              :       ! Some utilities
    1937           14 :       spin_fac = 0.5_dp
    1938           14 :       IF (nspins == 2) spin_fac = 1.0_dp
    1939              : 
    1940           42 :       ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
    1941           14 :       CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
    1942              : 
    1943           42 :       ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
    1944           14 :       CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
    1945              : 
    1946           14 :       CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
    1947              : 
    1948              :       ! 2-center RI tensors
    1949              :       CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
    1950           14 :                             ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
    1951           14 :       DEALLOCATE (dist1, dist2)
    1952              : 
    1953              :       CALL create_2c_tensor(t_2c_RI_PQ, dist1, dist2, ri_data%pgrid_2d, &
    1954              :                             ri_data%bsizes_RI_fit, ri_data%bsizes_RI_fit, name="(RI | RI)")
    1955           14 :       DEALLOCATE (dist1, dist2)
    1956              : 
    1957           14 :       IF (.NOT. ri_data%same_op) THEN
    1958              :          !precompute the (P|Q)*S^-1 product
    1959            4 :          CALL dbt_create(t_2c_RI_PQ, t_2c_RI_inv)
    1960            4 :          CALL dbt_create(t_2c_RI_PQ, t_2c_RI_met)
    1961            4 :          CALL dbt_create(ri_data%t_2c_inv(1, 1), t_2c_tmp)
    1962              : 
    1963              :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), &
    1964              :                            0.0_dp, t_2c_tmp, &
    1965              :                            contract_1=[2], notcontract_1=[1], &
    1966              :                            contract_2=[1], notcontract_2=[2], &
    1967              :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    1968            4 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    1969            4 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    1970              : 
    1971            4 :          CALL dbt_copy(t_2c_tmp, t_2c_RI_inv, move_data=.TRUE.)
    1972            4 :          CALL dbt_destroy(t_2c_tmp)
    1973              :       END IF
    1974              : 
    1975              :       !3 loops in MO force evaluations. To be consistent with input MEMORY_CUT, need to take the cubic root
    1976              :       !No need to cut memory further because SCF tensors alrady dense
    1977           14 :       n_mem_input = FLOOR((ri_data%n_mem_input - 0.1_dp)**(1._dp/3._dp)) + 1
    1978           14 :       n_mem_input_RI = FLOOR((ri_data%n_mem_input - 0.1_dp)/n_mem_input**2) + 1
    1979              : 
    1980              :       !batches on RI_split and RI_fit blocks
    1981           14 :       n_mem_RI = n_mem_input_RI
    1982              :       CALL create_tensor_batches(ri_data%bsizes_RI_split, n_mem_RI, batch_start_RI, batch_end_RI, &
    1983           14 :                                  batch_blk_start, batch_blk_end)
    1984           42 :       ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
    1985           28 :       batch_ranges_RI(1:n_mem_RI) = batch_blk_start(1:n_mem_RI)
    1986           14 :       batch_ranges_RI(n_mem_RI + 1) = batch_blk_end(n_mem_RI) + 1
    1987           14 :       DEALLOCATE (batch_blk_start, batch_blk_end)
    1988              : 
    1989           14 :       n_mem_RI_fit = n_mem_input_RI
    1990              :       CALL create_tensor_batches(ri_data%bsizes_RI_fit, n_mem_RI_fit, batch_start_RI_fit, batch_end_RI_fit, &
    1991           14 :                                  batch_blk_start, batch_blk_end)
    1992           42 :       ALLOCATE (batch_ranges_RI_fit(n_mem_RI_fit + 1))
    1993           28 :       batch_ranges_RI_fit(1:n_mem_RI_fit) = batch_blk_start(1:n_mem_RI_fit)
    1994           14 :       batch_ranges_RI_fit(n_mem_RI_fit + 1) = batch_blk_end(n_mem_RI_fit) + 1
    1995           14 :       DEALLOCATE (batch_blk_start, batch_blk_end)
    1996              : 
    1997           32 :       DO ispin = 1, nspins
    1998              : 
    1999              :          ! 2 )Prepare the batches for this spin
    2000           18 :          CALL dbcsr_get_info(mo_coeff(ispin), nfullcols_total=n_mos)
    2001              :          !note: optimized GPU block size for SCF is 64x1x64. Here we do 8x8x64
    2002           36 :          CALL split_block_sizes([n_mos], bsizes_MO, max_size=FLOOR(SQRT(ri_data%max_bsize_MO - 0.1)) + 1)
    2003              : 
    2004              :          !batching on MO blocks
    2005           18 :          n_mem = n_mem_input
    2006              :          CALL create_tensor_batches(bsizes_MO, n_mem, batch_start, batch_end, &
    2007           18 :                                     batch_blk_start, batch_blk_end)
    2008           54 :          ALLOCATE (batch_ranges(n_mem + 1))
    2009           40 :          batch_ranges(1:n_mem) = batch_blk_start(1:n_mem)
    2010           18 :          batch_ranges(n_mem + 1) = batch_blk_end(n_mem) + 1
    2011           18 :          DEALLOCATE (batch_blk_start, batch_blk_end)
    2012              : 
    2013              :          ! Initialize the different tensors needed (Note: keep MO coeffs as (MO | AO) for less transpose)
    2014              :          CALL create_2c_tensor(t_mo_coeff, dist1, dist2, ri_data%pgrid_2d, bsizes_MO, &
    2015           18 :                                ri_data%bsizes_AO_split, name="MO coeffs")
    2016           18 :          DEALLOCATE (dist1, dist2)
    2017           18 :          CALL dbt_create(mo_coeff(ispin), t_2c_tmp, name="MO coeffs")
    2018           18 :          CALL dbt_copy_matrix_to_tensor(mo_coeff(ispin), t_2c_tmp)
    2019           18 :          CALL dbt_copy(t_2c_tmp, t_mo_coeff, order=[2, 1], move_data=.TRUE.)
    2020           18 :          CALL dbt_destroy(t_2c_tmp)
    2021              : 
    2022           18 :          CALL dbt_create(t_mo_coeff, t_mo_cpy)
    2023           18 :          CALL dbt_copy(t_mo_coeff, t_mo_cpy)
    2024           72 :          DO i_xyz = 1, 3
    2025           54 :             CALL dbt_create(t_mo_coeff, t_2c_MO_AO_ctr(i_xyz))
    2026           72 :             CALL dbt_create(t_mo_coeff, t_2c_MO_AO(i_xyz))
    2027              :          END DO
    2028              : 
    2029              :          CALL create_3c_tensor(t_3c_ao_ri_mo, dist1, dist2, dist3, pgrid_1, ri_data%bsizes_AO_split, &
    2030           18 :                                ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(AO RI| MO)")
    2031           18 :          DEALLOCATE (dist1, dist2, dist3)
    2032              : 
    2033           18 :          CALL dbt_create(t_3c_ao_ri_mo, t_3c_0)
    2034           18 :          CALL dbt_destroy(t_3c_ao_ri_mo)
    2035              : 
    2036              :          CALL create_3c_tensor(t_3c_mo_ri_ao, dist1, dist2, dist3, pgrid_1, bsizes_MO, ri_data%bsizes_RI_split, &
    2037           18 :                                ri_data%bsizes_AO_split, [1, 2], [3], name="(MO RI | AO)")
    2038           18 :          DEALLOCATE (dist1, dist2, dist3)
    2039           18 :          CALL dbt_create(t_3c_mo_ri_ao, t_3c_1)
    2040              : 
    2041           72 :          DO i_xyz = 1, 3
    2042           54 :             CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_RI_ctr_1(i_xyz))
    2043           72 :             CALL dbt_create(t_3c_mo_ri_ao, t_3c_der_AO_ctr_1(i_xyz))
    2044              :          END DO
    2045              : 
    2046              :          CALL create_3c_tensor(t_3c_mo_ri_mo, dist1, dist2, dist3, pgrid_1, bsizes_MO, &
    2047           18 :                                ri_data%bsizes_RI_split, bsizes_MO, [1, 2], [3], name="(MO RI | MO)")
    2048           18 :          DEALLOCATE (dist1, dist2, dist3)
    2049           18 :          CALL dbt_create(t_3c_mo_ri_mo, t_3c_work)
    2050              : 
    2051              :          CALL create_3c_tensor(t_3c_ri_mo_mo, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_split, &
    2052           18 :                                bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
    2053           18 :          DEALLOCATE (dist1, dist2, dist3)
    2054              : 
    2055           18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_2)
    2056           18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_3)
    2057           18 :          CALL dbt_create(t_3c_ri_mo_mo, t_3c_RI_ctr)
    2058           72 :          DO i_xyz = 1, 3
    2059           72 :             CALL dbt_create(t_3c_ri_mo_mo, t_3c_der_RI_ctr_2(i_xyz))
    2060              :          END DO
    2061              : 
    2062              :          !Very large RI_fit blocks => new pgrid to make sure distribution is ideal
    2063           18 :          pdims(:) = 0
    2064              :          CALL create_3c_tensor(t_3c_ri_mo_mo_fit, dist1, dist2, dist3, pgrid_2, ri_data%bsizes_RI_fit, &
    2065           18 :                                bsizes_MO, bsizes_MO, [1], [2, 3], name="(RI| MO MO)")
    2066           18 :          DEALLOCATE (dist1, dist2, dist3)
    2067              : 
    2068           18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_4)
    2069           18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_5)
    2070           18 :          CALL dbt_create(t_3c_ri_mo_mo_fit, t_3c_6)
    2071              : 
    2072           18 :          CALL dbt_batched_contract_init(t_3c_desymm, batch_range_2=batch_ranges_RI)
    2073           18 :          CALL dbt_batched_contract_init(t_3c_0, batch_range_2=batch_ranges_RI, batch_range_3=batch_ranges)
    2074              : 
    2075           72 :          DO i_xyz = 1, 3
    2076           54 :             CALL dbt_batched_contract_init(t_3c_der_AO(i_xyz), batch_range_2=batch_ranges_RI)
    2077           72 :             CALL dbt_batched_contract_init(t_3c_der_RI(i_xyz), batch_range_2=batch_ranges_RI)
    2078              :          END DO
    2079              : 
    2080           18 :          CALL para_env%sync()
    2081           18 :          t1 = m_walltime()
    2082              : 
    2083              :          ! 2) Loop over batches
    2084           40 :          DO i_mem = 1, n_mem
    2085              : 
    2086           22 :             bounds_ctr_1d(1, 1) = batch_start(i_mem)
    2087           22 :             bounds_ctr_1d(2, 1) = batch_end(i_mem)
    2088              : 
    2089           22 :             bounds_ctr_2d(1, 1) = 1
    2090           96 :             bounds_ctr_2d(2, 1) = SUM(ri_data%bsizes_AO)
    2091              : 
    2092              :             ! 3) Do the first AO to MO contraction here
    2093           22 :             CALL timeset(routineN//"_AO2MO_1", handle)
    2094           22 :             CALL dbt_batched_contract_init(t_mo_coeff)
    2095           44 :             DO k_mem = 1, n_mem_RI
    2096           22 :                bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2097           22 :                bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2098              : 
    2099              :                CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_desymm, &
    2100              :                                  1.0_dp, t_3c_0, &
    2101              :                                  contract_1=[2], notcontract_1=[1], &
    2102              :                                  contract_2=[3], notcontract_2=[1, 2], &
    2103              :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2104              :                                  bounds_2=bounds_ctr_1d, &
    2105              :                                  bounds_3=bounds_ctr_2d, &
    2106           22 :                                  unit_nr=unit_nr_dbcsr, flop=nflop)
    2107           44 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2108              :             END DO
    2109           22 :             CALL dbt_copy(t_3c_0, t_3c_1, order=[3, 2, 1], move_data=.TRUE.)
    2110              : 
    2111           88 :             DO i_xyz = 1, 3
    2112          132 :                DO k_mem = 1, n_mem_RI
    2113           66 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2114           66 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2115              : 
    2116              :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_AO(i_xyz), &
    2117              :                                     1.0_dp, t_3c_0, &
    2118              :                                     contract_1=[2], notcontract_1=[1], &
    2119              :                                     contract_2=[3], notcontract_2=[1, 2], &
    2120              :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2121              :                                     bounds_2=bounds_ctr_1d, &
    2122              :                                     bounds_3=bounds_ctr_2d, &
    2123           66 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2124          132 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2125              :                END DO
    2126           66 :                CALL dbt_copy(t_3c_0, t_3c_der_AO_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
    2127              : 
    2128          132 :                DO k_mem = 1, n_mem_RI
    2129           66 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2130           66 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2131              : 
    2132              :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI(i_xyz), &
    2133              :                                     1.0_dp, t_3c_0, &
    2134              :                                     contract_1=[2], notcontract_1=[1], &
    2135              :                                     contract_2=[3], notcontract_2=[1, 2], &
    2136              :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2137              :                                     bounds_2=bounds_ctr_1d, &
    2138              :                                     bounds_3=bounds_ctr_2d, &
    2139           66 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2140          132 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2141              :                END DO
    2142           88 :                CALL dbt_copy(t_3c_0, t_3c_der_RI_ctr_1(i_xyz), order=[3, 2, 1], move_data=.TRUE.)
    2143              :             END DO
    2144           22 :             CALL dbt_batched_contract_finalize(t_mo_coeff)
    2145           22 :             CALL timestop(handle)
    2146              : 
    2147           22 :             CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI)
    2148              :             CALL dbt_batched_contract_init(t_3c_work, batch_range_1=batch_ranges, batch_range_2=batch_ranges_RI, &
    2149           22 :                                            batch_range_3=batch_ranges)
    2150           22 :             CALL dbt_batched_contract_init(t_3c_2, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2151              :             CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
    2152           22 :                                            batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2153              : 
    2154              :             CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI_fit, &
    2155           22 :                                            batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2156           22 :             CALL dbt_batched_contract_init(t_3c_5, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2157              : 
    2158           88 :             DO i_xyz = 1, 3
    2159              :                CALL dbt_batched_contract_init(t_3c_der_RI_ctr_1(i_xyz), batch_range_1=batch_ranges, &
    2160           66 :                                               batch_range_2=batch_ranges_RI)
    2161              :                CALL dbt_batched_contract_init(t_3c_der_AO_ctr_1(i_xyz), batch_range_1=batch_ranges, &
    2162           88 :                                               batch_range_2=batch_ranges_RI)
    2163              : 
    2164              :             END DO
    2165              : 
    2166           22 :             IF (.NOT. ri_data%same_op) THEN
    2167            8 :                CALL dbt_batched_contract_init(t_3c_6, batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2168              :             END IF
    2169              : 
    2170           52 :             DO j_mem = 1, n_mem
    2171              : 
    2172           30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2173           30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2174              : 
    2175           30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2176           30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2177              : 
    2178              :                ! 3) Do the second AO to MO contraction here, followed by the S^-1 contraction
    2179           30 :                CALL timeset(routineN//"_AO2MO_2", handle)
    2180           30 :                CALL dbt_batched_contract_init(t_mo_coeff)
    2181           60 :                DO k_mem = 1, n_mem_RI
    2182           30 :                   bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2183           30 :                   bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2184              : 
    2185              :                   CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_1, &
    2186              :                                     1.0_dp, t_3c_work, &
    2187              :                                     contract_1=[2], notcontract_1=[1], &
    2188              :                                     contract_2=[3], notcontract_2=[1, 2], &
    2189              :                                     map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2190              :                                     bounds_2=bounds_ctr_1d, &
    2191              :                                     bounds_3=bounds_ctr_2d, &
    2192           30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2193           60 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2194              :                END DO
    2195           30 :                CALL dbt_batched_contract_finalize(t_mo_coeff)
    2196           30 :                CALL timestop(handle)
    2197              : 
    2198           30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2199           30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2200           30 :                bounds_ctr_2d(1, 2) = batch_start(j_mem)
    2201           30 :                bounds_ctr_2d(2, 2) = batch_end(j_mem)
    2202              : 
    2203              :                ! 4) Contract 3c MO integrals with S^-1 as well
    2204           30 :                CALL timeset(routineN//"_2c_inv", handle)
    2205           30 :                CALL dbt_copy(t_3c_work, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
    2206           60 :                DO k_mem = 1, n_mem_RI
    2207           30 :                   bounds_ctr_1d(1, 1) = batch_start_RI(k_mem)
    2208           30 :                   bounds_ctr_1d(2, 1) = batch_end_RI(k_mem)
    2209              : 
    2210           30 :                   CALL dbt_batched_contract_init(ri_data%t_2c_inv(1, 1))
    2211              :                   CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_3c_3, &
    2212              :                                     1.0_dp, t_3c_2, &
    2213              :                                     contract_1=[2], notcontract_1=[1], &
    2214              :                                     contract_2=[1], notcontract_2=[2, 3], &
    2215              :                                     map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2216              :                                     bounds_1=bounds_ctr_1d, &
    2217              :                                     bounds_3=bounds_ctr_2d, &
    2218           30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2219           30 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2220           60 :                   CALL dbt_batched_contract_finalize(ri_data%t_2c_inv(1, 1))
    2221              :                END DO
    2222           30 :                CALL dbt_copy(t_3c_ri_mo_mo, t_3c_3)
    2223           30 :                CALL timestop(handle)
    2224              : 
    2225              :                !Only contract (ab|P') with MO coeffs since need AO rep for the force of (a'b|P)
    2226           30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2227           30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2228              : 
    2229           30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2230           30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2231              : 
    2232           30 :                CALL timeset(routineN//"_AO2MO_2", handle)
    2233           30 :                CALL dbt_batched_contract_init(t_mo_coeff)
    2234          120 :                DO i_xyz = 1, 3
    2235          180 :                   DO k_mem = 1, n_mem_RI
    2236           90 :                      bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2237           90 :                      bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2238              : 
    2239              :                      CALL dbt_contract(1.0_dp, t_mo_coeff, t_3c_der_RI_ctr_1(i_xyz), &
    2240              :                                        1.0_dp, t_3c_work, &
    2241              :                                        contract_1=[2], notcontract_1=[1], &
    2242              :                                        contract_2=[3], notcontract_2=[1, 2], &
    2243              :                                        map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2244              :                                        bounds_2=bounds_ctr_1d, &
    2245              :                                        bounds_3=bounds_ctr_2d, &
    2246           90 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2247          180 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2248              :                   END DO
    2249          120 :                   CALL dbt_copy(t_3c_work, t_3c_der_RI_ctr_2(i_xyz), order=[2, 1, 3], move_data=.TRUE.)
    2250              :                END DO
    2251           30 :                CALL dbt_batched_contract_finalize(t_mo_coeff)
    2252           30 :                CALL timestop(handle)
    2253              : 
    2254           30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2255           30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2256           30 :                bounds_ctr_2d(1, 2) = batch_start(j_mem)
    2257           30 :                bounds_ctr_2d(2, 2) = batch_end(j_mem)
    2258              : 
    2259              :                ! 5) Force due to d/dx (P|Q)
    2260           30 :                CALL timeset(routineN//"_PQ_der", handle)
    2261           30 :                CALL dbt_copy(t_3c_2, t_3c_4, move_data=.TRUE.)
    2262           30 :                CALL dbt_copy(t_3c_4, t_3c_5)
    2263           60 :                DO k_mem = 1, n_mem_RI_fit
    2264           30 :                   bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2265           30 :                   bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2266              : 
    2267           30 :                   CALL dbt_batched_contract_init(t_2c_RI_PQ)
    2268              :                   CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
    2269              :                                     1.0_dp, t_2c_RI_PQ, &
    2270              :                                     contract_1=[2, 3], notcontract_1=[1], &
    2271              :                                     contract_2=[2, 3], notcontract_2=[1], &
    2272              :                                     bounds_1=bounds_ctr_2d, &
    2273              :                                     bounds_2=bounds_ctr_1d, &
    2274              :                                     map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2275           30 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2276           30 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2277           60 :                   CALL dbt_batched_contract_finalize(t_2c_RI_PQ)
    2278              :                END DO
    2279           30 :                CALL timestop(handle)
    2280              : 
    2281              :                ! 6) If metric, do the additional contraction  with S_pq^-1 (Q|R) (not on the derivatives)
    2282           30 :                IF (.NOT. ri_data%same_op) THEN
    2283           16 :                   CALL timeset(routineN//"_metric", handle)
    2284           32 :                   DO k_mem = 1, n_mem_RI_fit
    2285           16 :                      bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2286           16 :                      bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2287              : 
    2288           16 :                      CALL dbt_batched_contract_init(t_2c_RI_inv)
    2289              :                      CALL dbt_contract(1.0_dp, t_2c_RI_inv, t_3c_4, &
    2290              :                                        1.0_dp, t_3c_6, &
    2291              :                                        contract_1=[2], notcontract_1=[1], &
    2292              :                                        contract_2=[1], notcontract_2=[2, 3], &
    2293              :                                        bounds_1=bounds_ctr_1d, &
    2294              :                                        bounds_3=bounds_ctr_2d, &
    2295              :                                        map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2296           16 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2297           16 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2298           32 :                      CALL dbt_batched_contract_finalize(t_2c_RI_inv)
    2299              :                   END DO
    2300           16 :                   CALL dbt_copy(t_3c_6, t_3c_4, move_data=.TRUE.)
    2301              : 
    2302              :                   ! 8) and get the force due to d/dx S^-1
    2303           32 :                   DO k_mem = 1, n_mem_RI_fit
    2304           16 :                      bounds_ctr_1d(1, 1) = batch_start_RI_fit(k_mem)
    2305           16 :                      bounds_ctr_1d(2, 1) = batch_end_RI_fit(k_mem)
    2306              : 
    2307           16 :                      CALL dbt_batched_contract_init(t_2c_RI_met)
    2308              :                      CALL dbt_contract(1.0_dp, t_3c_4, t_3c_5, &
    2309              :                                        1.0_dp, t_2c_RI_met, &
    2310              :                                        contract_1=[2, 3], notcontract_1=[1], &
    2311              :                                        contract_2=[2, 3], notcontract_2=[1], &
    2312              :                                        bounds_1=bounds_ctr_2d, &
    2313              :                                        bounds_2=bounds_ctr_1d, &
    2314              :                                        map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2315           16 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2316           16 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2317           32 :                      CALL dbt_batched_contract_finalize(t_2c_RI_met)
    2318              :                   END DO
    2319           16 :                   CALL timestop(handle)
    2320              :                END IF
    2321           30 :                CALL dbt_copy(t_3c_ri_mo_mo_fit, t_3c_5)
    2322              : 
    2323              :                ! 7) Do the force contribution due to 3c integrals (a'b|P) and (ab|P')
    2324              : 
    2325              :                ! (ab|P')
    2326           30 :                CALL timeset(routineN//"_3c_RI", handle)
    2327           30 :                pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2328           30 :                CALL dbt_copy(t_3c_4, t_3c_RI_ctr, move_data=.TRUE.)
    2329              :                CALL get_force_from_3c_trace(force, t_3c_RI_ctr, t_3c_der_RI_ctr_2, atom_of_kind, kind_of, &
    2330           30 :                                             idx_to_at_RI, pref)
    2331           30 :                CALL timestop(handle)
    2332              : 
    2333              :                ! (a'b|P) Note that derivative remains in AO rep until the actual force evaluation,
    2334              :                ! which also prevents doing a direct 3-center trace
    2335           30 :                bounds_ctr_2d(1, 1) = batch_start(i_mem)
    2336           30 :                bounds_ctr_2d(2, 1) = batch_end(i_mem)
    2337              : 
    2338           30 :                bounds_ctr_1d(1, 1) = batch_start(j_mem)
    2339           30 :                bounds_ctr_1d(2, 1) = batch_end(j_mem)
    2340              : 
    2341           30 :                CALL timeset(routineN//"_3c_AO", handle)
    2342           30 :                CALL dbt_copy(t_3c_RI_ctr, t_3c_work, order=[2, 1, 3], move_data=.TRUE.)
    2343          120 :                DO i_xyz = 1, 3
    2344              : 
    2345           90 :                   CALL dbt_batched_contract_init(t_2c_MO_AO_ctr(i_xyz))
    2346          180 :                   DO k_mem = 1, n_mem_RI
    2347           90 :                      bounds_ctr_2d(1, 2) = batch_start_RI(k_mem)
    2348           90 :                      bounds_ctr_2d(2, 2) = batch_end_RI(k_mem)
    2349              : 
    2350              :                      CALL dbt_contract(1.0_dp, t_3c_work, t_3c_der_AO_ctr_1(i_xyz), &
    2351              :                                        1.0_dp, t_2c_MO_AO_ctr(i_xyz), &
    2352              :                                        contract_1=[1, 2], notcontract_1=[3], &
    2353              :                                        contract_2=[1, 2], notcontract_2=[3], &
    2354              :                                        map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2355              :                                        bounds_1=bounds_ctr_2d, &
    2356              :                                        bounds_2=bounds_ctr_1d, &
    2357           90 :                                        unit_nr=unit_nr_dbcsr, flop=nflop)
    2358          180 :                      ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2359              :                   END DO
    2360          120 :                   CALL dbt_batched_contract_finalize(t_2c_MO_AO_ctr(i_xyz))
    2361              :                END DO
    2362          232 :                CALL timestop(handle)
    2363              : 
    2364              :             END DO !j_mem
    2365           22 :             CALL dbt_batched_contract_finalize(t_3c_1)
    2366           22 :             CALL dbt_batched_contract_finalize(t_3c_work)
    2367           22 :             CALL dbt_batched_contract_finalize(t_3c_2)
    2368           22 :             CALL dbt_batched_contract_finalize(t_3c_3)
    2369           22 :             CALL dbt_batched_contract_finalize(t_3c_4)
    2370           22 :             CALL dbt_batched_contract_finalize(t_3c_5)
    2371              : 
    2372           88 :             DO i_xyz = 1, 3
    2373           66 :                CALL dbt_batched_contract_finalize(t_3c_der_RI_ctr_1(i_xyz))
    2374           88 :                CALL dbt_batched_contract_finalize(t_3c_der_AO_ctr_1(i_xyz))
    2375              :             END DO
    2376              : 
    2377           62 :             IF (.NOT. ri_data%same_op) THEN
    2378            8 :                CALL dbt_batched_contract_finalize(t_3c_6)
    2379              :             END IF
    2380              : 
    2381              :          END DO !i_mem
    2382           18 :          CALL dbt_batched_contract_finalize(t_3c_desymm)
    2383           18 :          CALL dbt_batched_contract_finalize(t_3c_0)
    2384              : 
    2385           72 :          DO i_xyz = 1, 3
    2386           54 :             CALL dbt_batched_contract_finalize(t_3c_der_AO(i_xyz))
    2387           72 :             CALL dbt_batched_contract_finalize(t_3c_der_RI(i_xyz))
    2388              :          END DO
    2389              : 
    2390              :          !Force contribution due to 3-center AO derivatives (a'b|P)
    2391           18 :          pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
    2392           72 :          DO i_xyz = 1, 3
    2393           54 :             CALL dbt_copy(t_2c_MO_AO_ctr(i_xyz), t_2c_MO_AO(i_xyz), move_data=.TRUE.) !ensures matching distributions
    2394           54 :             CALL get_mo_ao_force(force, t_mo_cpy, t_2c_MO_AO(i_xyz), atom_of_kind, kind_of, idx_to_at_AO, pref, i_xyz)
    2395           72 :             CALL dbt_clear(t_2c_MO_AO(i_xyz))
    2396              :          END DO
    2397              : 
    2398              :          !Force contribution of d/dx (P|Q)
    2399           18 :          pref = 0.5_dp*hf_fraction*spin_fac
    2400           18 :          IF (.NOT. ri_data%same_op) pref = -pref
    2401              : 
    2402              :          !Making sure dists of the t_2c_RI tensors match
    2403           18 :          CALL dbt_copy(t_2c_RI_PQ, t_2c_RI, move_data=.TRUE.)
    2404              :          CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, &
    2405           18 :                                kind_of, idx_to_at_RI, pref)
    2406           18 :          CALL dbt_clear(t_2c_RI)
    2407              : 
    2408              :          !Force contribution due to the inverse metric
    2409           18 :          IF (.NOT. ri_data%same_op) THEN
    2410            4 :             pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
    2411              : 
    2412            4 :             CALL dbt_copy(t_2c_RI_met, t_2c_RI, move_data=.TRUE.)
    2413              :             CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, &
    2414            4 :                                   kind_of, idx_to_at_RI, pref)
    2415            4 :             CALL dbt_clear(t_2c_RI)
    2416              :          END IF
    2417              : 
    2418           18 :          CALL dbt_destroy(t_3c_0)
    2419           18 :          CALL dbt_destroy(t_3c_1)
    2420           18 :          CALL dbt_destroy(t_3c_2)
    2421           18 :          CALL dbt_destroy(t_3c_3)
    2422           18 :          CALL dbt_destroy(t_3c_4)
    2423           18 :          CALL dbt_destroy(t_3c_5)
    2424           18 :          CALL dbt_destroy(t_3c_6)
    2425           18 :          CALL dbt_destroy(t_3c_work)
    2426           18 :          CALL dbt_destroy(t_3c_RI_ctr)
    2427           18 :          CALL dbt_destroy(t_3c_mo_ri_ao)
    2428           18 :          CALL dbt_destroy(t_3c_mo_ri_mo)
    2429           18 :          CALL dbt_destroy(t_3c_ri_mo_mo)
    2430           18 :          CALL dbt_destroy(t_3c_ri_mo_mo_fit)
    2431           18 :          CALL dbt_destroy(t_mo_coeff)
    2432           18 :          CALL dbt_destroy(t_mo_cpy)
    2433           72 :          DO i_xyz = 1, 3
    2434           54 :             CALL dbt_destroy(t_2c_MO_AO(i_xyz))
    2435           54 :             CALL dbt_destroy(t_2c_MO_AO_ctr(i_xyz))
    2436           54 :             CALL dbt_destroy(t_3c_der_RI_ctr_1(i_xyz))
    2437           54 :             CALL dbt_destroy(t_3c_der_AO_ctr_1(i_xyz))
    2438           72 :             CALL dbt_destroy(t_3c_der_RI_ctr_2(i_xyz))
    2439              :          END DO
    2440           86 :          DEALLOCATE (batch_ranges, batch_start, batch_end)
    2441              :       END DO !ispin
    2442              : 
    2443              :       ! Clean-up
    2444           14 :       CALL dbt_pgrid_destroy(pgrid_1)
    2445           14 :       CALL dbt_pgrid_destroy(pgrid_2)
    2446           14 :       CALL dbt_destroy(t_3c_desymm)
    2447           14 :       CALL dbt_destroy(t_2c_RI)
    2448           14 :       CALL dbt_destroy(t_2c_RI_PQ)
    2449           14 :       IF (.NOT. ri_data%same_op) THEN
    2450            4 :          CALL dbt_destroy(t_2c_RI_met)
    2451            4 :          CALL dbt_destroy(t_2c_RI_inv)
    2452              :       END IF
    2453           56 :       DO i_xyz = 1, 3
    2454           42 :          CALL dbt_destroy(t_3c_der_AO(i_xyz))
    2455           42 :          CALL dbt_destroy(t_3c_der_RI(i_xyz))
    2456           42 :          CALL dbt_destroy(t_2c_der_RI(i_xyz))
    2457           56 :          IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
    2458              :       END DO
    2459           14 :       CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_1(1, 1))
    2460              : 
    2461           14 :       CALL para_env%sync()
    2462           14 :       t2 = m_walltime()
    2463              : 
    2464           14 :       ri_data%dbcsr_time = ri_data%dbcsr_time + t2 - t1
    2465              : 
    2466          476 :    END SUBROUTINE hfx_ri_forces_mo
    2467              : 
    2468              : ! **************************************************************************************************
    2469              : !> \brief New sparser implementation
    2470              : !> \param qs_env ...
    2471              : !> \param ri_data ...
    2472              : !> \param nspins ...
    2473              : !> \param hf_fraction ...
    2474              : !> \param rho_ao ...
    2475              : !> \param rho_ao_resp ...
    2476              : !> \param use_virial ...
    2477              : !> \param resp_only ...
    2478              : !> \param rescale_factor ...
    2479              : ! **************************************************************************************************
    2480          118 :    SUBROUTINE hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
    2481              :                                  use_virial, resp_only, rescale_factor)
    2482              : 
    2483              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2484              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    2485              :       INTEGER, INTENT(IN)                                :: nspins
    2486              :       REAL(dp), INTENT(IN)                               :: hf_fraction
    2487              :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: rho_ao
    2488              :       TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL         :: rho_ao_resp
    2489              :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial, resp_only
    2490              :       REAL(dp), INTENT(IN), OPTIONAL                     :: rescale_factor
    2491              : 
    2492              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_forces_Pmat'
    2493              : 
    2494              :       INTEGER                                            :: dummy_int, handle, i_mem, i_spin, i_xyz, &
    2495              :                                                             ibasis, j_mem, j_xyz, k_mem, k_xyz, &
    2496              :                                                             n_mem, n_mem_RI, natom, nkind, &
    2497              :                                                             unit_nr_dbcsr
    2498              :       INTEGER(int_8)                                     :: nflop
    2499          118 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: atom_of_kind, batch_end, batch_end_RI, batch_ranges, &
    2500          118 :          batch_ranges_RI, batch_start, batch_start_RI, dist1, dist2, dist3, idx_to_at_AO, &
    2501          118 :          idx_to_at_RI, kind_of
    2502              :       INTEGER, DIMENSION(2, 1)                           :: ibounds, jbounds, kbounds
    2503              :       INTEGER, DIMENSION(2, 2)                           :: ijbounds
    2504              :       INTEGER, DIMENSION(2, 3)                           :: bounds_cpy
    2505          236 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    2506              :       LOGICAL                                            :: do_resp, resp_only_prv, use_virial_prv
    2507              :       REAL(dp)                                           :: pref, spin_fac, t1, t2
    2508              :       REAL(dp), DIMENSION(3, 3)                          :: work_virial
    2509          118 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    2510          118 :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_AO_ind, t_3c_der_RI_ind
    2511              :       TYPE(cell_type), POINTER                           :: cell
    2512              :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
    2513              :       TYPE(dbcsr_type)                                   :: dbcsr_tmp, virial_trace
    2514         6844 :       TYPE(dbt_type) :: rho_ao_1, rho_ao_2, t_2c_RI, t_2c_RI_tmp, t_2c_tmp, t_2c_virial, t_3c_1, &
    2515         7788 :          t_3c_2, t_3c_3, t_3c_4, t_3c_5, t_3c_ao_ri_ao, t_3c_help_1, t_3c_help_2, t_3c_int, &
    2516         5782 :          t_3c_int_2, t_3c_ri_ao_ao, t_3c_sparse, t_3c_virial, t_R, t_SVS
    2517          118 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:)          :: t_2c_der_metric, t_2c_der_RI, &
    2518          118 :                                                             t_3c_der_AO, t_3c_der_RI
    2519              :       TYPE(dft_control_type), POINTER                    :: dft_control
    2520              :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    2521          118 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    2522              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    2523              :       TYPE(hfx_compression_type), ALLOCATABLE, &
    2524          118 :          DIMENSION(:, :)                                 :: t_3c_der_AO_comp, t_3c_der_RI_comp
    2525              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2526              :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    2527              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    2528          118 :          POINTER                                         :: nl_2c_met, nl_2c_pot
    2529          118 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2530          118 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    2531          118 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2532              :       TYPE(virial_type), POINTER                         :: virial
    2533              : 
    2534              :       !The idea is the following: we need to compute the gradients
    2535              :       ! d/dx [P_ab P_cd (acP) S^-1_PQ (Q|R) S^-1_RS (Sbd)]
    2536              :       ! Which we do in a few steps:
    2537              :       ! 1) Contract the density matrices with the 3c integrals: M_acS = P_ab P_cd (Sbd)
    2538              :       ! 2) Calculate the 3c contributions: d/dx (acP) [S^-1_PQ (Q|R) S^-1_RS M_acS]
    2539              :       !    For maximum perf, we first multiply all 2c matrices together, than contract with retain_sparsity
    2540              :       ! 3) Contract the 3c integrals and the M tensor together in order to only work with 2c quantities:
    2541              :       !    R_PS = (acP) M_acS
    2542              :       ! 4) From there, we can easily calculate the 2c contributions to the force:
    2543              :       !    Potential: [S^-1*R*S^-1]_QR d/dx (Q|R)
    2544              :       !    Metric:    [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
    2545              : 
    2546          118 :       NULLIFY (particle_set, virial, cell, force, atomic_kind_set, nl_2c_pot, nl_2c_met)
    2547          118 :       NULLIFY (orb_basis, ri_basis, qs_kind_set, particle_set, dft_control, dbcsr_dist)
    2548              : 
    2549          118 :       use_virial_prv = .FALSE.
    2550          118 :       IF (PRESENT(use_virial)) use_virial_prv = use_virial
    2551              : 
    2552          118 :       do_resp = .FALSE.
    2553          118 :       IF (PRESENT(rho_ao_resp)) THEN
    2554           30 :          IF (ASSOCIATED(rho_ao_resp(1)%matrix)) do_resp = .TRUE.
    2555              :       END IF
    2556              : 
    2557          118 :       resp_only_prv = .FALSE.
    2558          118 :       IF (PRESENT(resp_only)) resp_only_prv = resp_only
    2559              : 
    2560          118 :       unit_nr_dbcsr = ri_data%unit_nr_dbcsr
    2561              : 
    2562              :       CALL get_qs_env(qs_env, natom=natom, particle_set=particle_set, nkind=nkind, &
    2563              :                       atomic_kind_set=atomic_kind_set, virial=virial, &
    2564              :                       cell=cell, force=force, para_env=para_env, dft_control=dft_control, &
    2565          118 :                       qs_kind_set=qs_kind_set, dbcsr_dist=dbcsr_dist)
    2566              : 
    2567              :       CALL create_3c_tensor(t_3c_ao_ri_ao, dist1, dist2, dist3, ri_data%pgrid_1, &
    2568              :                             ri_data%bsizes_AO_split, ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, &
    2569          118 :                             [1, 2], [3], name="(AO RI | AO)")
    2570          118 :       DEALLOCATE (dist1, dist2, dist3)
    2571              : 
    2572              :       CALL create_3c_tensor(t_3c_ri_ao_ao, dist1, dist2, dist3, ri_data%pgrid_2, &
    2573              :                             ri_data%bsizes_RI_split, ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    2574          118 :                             [1], [2, 3], name="(RI | AO AO)")
    2575          118 :       DEALLOCATE (dist1, dist2, dist3)
    2576              : 
    2577          928 :       ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    2578          118 :       CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    2579          118 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    2580          118 :       CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    2581          118 :       CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    2582              : 
    2583          346 :       DO ibasis = 1, SIZE(basis_set_AO)
    2584          228 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    2585          228 :          CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    2586          228 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    2587          346 :          CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    2588              :       END DO
    2589              : 
    2590              :       ! Precompute the derivatives
    2591         5782 :       ALLOCATE (t_2c_der_metric(3), t_2c_der_RI(3), t_3c_der_AO(3), t_3c_der_RI(3))
    2592          118 :       IF (use_virial) THEN
    2593              :          CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    2594              :                                   t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    2595              :                                   basis_set_AO, basis_set_RI, ri_data, qs_env, &
    2596              :                                   nl_2c_pot=nl_2c_pot, nl_2c_met=nl_2c_met, &
    2597            4 :                                   nl_3c_out=nl_3c, t_3c_virial=t_3c_virial)
    2598              : 
    2599           16 :          ALLOCATE (col_bsize(natom), row_bsize(natom))
    2600           16 :          col_bsize(:) = ri_data%bsizes_RI
    2601           16 :          row_bsize(:) = ri_data%bsizes_RI
    2602            4 :          CALL dbcsr_create(virial_trace, "virial_trace", dbcsr_dist, dbcsr_type_no_symmetry, row_bsize, col_bsize)
    2603            4 :          CALL dbt_create(virial_trace, t_2c_virial)
    2604            4 :          DEALLOCATE (col_bsize, row_bsize)
    2605              :       ELSE
    2606              :          CALL precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    2607              :                                   t_2c_der_RI, t_2c_der_metric, t_3c_ri_ao_ao, &
    2608          114 :                                   basis_set_AO, basis_set_RI, ri_data, qs_env)
    2609              :       END IF
    2610              : 
    2611              :       ! Keep track of derivative sparsity to be able to use retain_sparsity in contraction
    2612          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_sparse)
    2613          472 :       DO i_xyz = 1, 3
    2614         1510 :          DO i_mem = 1, SIZE(t_3c_der_RI_comp, 1)
    2615              :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    2616         1038 :                                    t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    2617         1038 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE., move_data=.TRUE.)
    2618              : 
    2619              :             CALL decompress_tensor(t_3c_ri_ao_ao, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    2620         1038 :                                    t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage)
    2621         1038 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, summation=.TRUE.)
    2622         1392 :             CALL dbt_copy(t_3c_ri_ao_ao, t_3c_sparse, order=[1, 3, 2], summation=.TRUE., move_data=.TRUE.)
    2623              :          END DO
    2624              :       END DO
    2625              : 
    2626          472 :       DO i_xyz = 1, 3
    2627          354 :          CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_RI(i_xyz))
    2628          472 :          CALL dbt_create(t_3c_ri_ao_ao, t_3c_der_AO(i_xyz))
    2629              :       END DO
    2630              : 
    2631              :       ! Some utilities
    2632          118 :       spin_fac = 0.5_dp
    2633          118 :       IF (nspins == 2) spin_fac = 1.0_dp
    2634          118 :       IF (PRESENT(rescale_factor)) spin_fac = spin_fac*rescale_factor
    2635              : 
    2636          354 :       ALLOCATE (idx_to_at_RI(SIZE(ri_data%bsizes_RI_split)))
    2637          118 :       CALL get_idx_to_atom(idx_to_at_RI, ri_data%bsizes_RI_split, ri_data%bsizes_RI)
    2638              : 
    2639          354 :       ALLOCATE (idx_to_at_AO(SIZE(ri_data%bsizes_AO_split)))
    2640          118 :       CALL get_idx_to_atom(idx_to_at_AO, ri_data%bsizes_AO_split, ri_data%bsizes_AO)
    2641              : 
    2642          118 :       CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
    2643              : 
    2644              :       ! Go over batches of the 2 AO indices to save memory
    2645          118 :       n_mem = ri_data%n_mem
    2646          472 :       ALLOCATE (batch_start(n_mem), batch_end(n_mem))
    2647          470 :       batch_start(:) = ri_data%starts_array_mem(:)
    2648          470 :       batch_end(:) = ri_data%ends_array_mem(:)
    2649              : 
    2650          354 :       ALLOCATE (batch_ranges(n_mem + 1))
    2651          470 :       batch_ranges(:n_mem) = ri_data%starts_array_mem_block(:)
    2652          118 :       batch_ranges(n_mem + 1) = ri_data%ends_array_mem_block(n_mem) + 1
    2653              : 
    2654          118 :       n_mem_RI = ri_data%n_mem_RI
    2655          472 :       ALLOCATE (batch_start_RI(n_mem_RI), batch_end_RI(n_mem_RI))
    2656          470 :       batch_start_RI(:) = ri_data%starts_array_RI_mem(:)
    2657          470 :       batch_end_RI(:) = ri_data%ends_array_RI_mem(:)
    2658              : 
    2659          354 :       ALLOCATE (batch_ranges_RI(n_mem_RI + 1))
    2660          470 :       batch_ranges_RI(:n_mem_RI) = ri_data%starts_array_RI_mem_block(:)
    2661          118 :       batch_ranges_RI(n_mem_RI + 1) = ri_data%ends_array_RI_mem_block(n_mem_RI) + 1
    2662              : 
    2663              :       ! Pre-create all the needed tensors
    2664              :       CALL create_2c_tensor(rho_ao_1, dist1, dist2, ri_data%pgrid_2d, &
    2665              :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    2666          118 :                             name="(AO | AO)")
    2667          118 :       DEALLOCATE (dist1, dist2)
    2668          118 :       CALL dbt_create(rho_ao_1, rho_ao_2)
    2669              : 
    2670              :       CALL create_2c_tensor(t_2c_RI, dist1, dist2, ri_data%pgrid_2d, &
    2671              :                             ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, name="(RI | RI)")
    2672          118 :       DEALLOCATE (dist1, dist2)
    2673          118 :       CALL dbt_create(t_2c_RI, t_SVS)
    2674          118 :       CALL dbt_create(t_2c_RI, t_R)
    2675          118 :       CALL dbt_create(t_2c_RI, t_2c_RI_tmp)
    2676              : 
    2677          118 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_1)
    2678          118 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_2)
    2679          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_3)
    2680          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_4)
    2681          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_5)
    2682          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_1)
    2683          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_help_2)
    2684              : 
    2685          118 :       CALL dbt_create(t_3c_ao_ri_ao, t_3c_int)
    2686          118 :       CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), t_3c_int)
    2687              : 
    2688          118 :       CALL dbt_create(t_3c_ri_ao_ao, t_3c_int_2)
    2689              : 
    2690          118 :       CALL para_env%sync()
    2691          118 :       t1 = m_walltime()
    2692              : 
    2693              :       !Pre-calculate the necessary 2-center quantities
    2694          118 :       IF (.NOT. ri_data%same_op) THEN
    2695              :          !S^-1 * V * S^-1
    2696              :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI, &
    2697              :                            contract_1=[2], notcontract_1=[1], &
    2698              :                            contract_2=[1], notcontract_2=[2], &
    2699              :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2700           30 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2701           30 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2702              : 
    2703              :          CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_inv(1, 1), 0.0_dp, t_SVS, &
    2704              :                            contract_1=[2], notcontract_1=[1], &
    2705              :                            contract_2=[1], notcontract_2=[2], &
    2706              :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2707           30 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2708           30 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2709              :       ELSE
    2710              :          ! Simply V^-1
    2711           88 :          CALL dbt_copy(ri_data%t_2c_inv(1, 1), t_SVS)
    2712              :       END IF
    2713              : 
    2714          118 :       CALL dbt_batched_contract_init(t_3c_int, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2715              :       CALL dbt_batched_contract_init(t_3c_int_2, batch_range_1=batch_ranges_RI, &
    2716          118 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2717          118 :       CALL dbt_batched_contract_init(t_3c_1, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2718          118 :       CALL dbt_batched_contract_init(t_3c_2, batch_range_1=batch_ranges, batch_range_3=batch_ranges)
    2719              :       CALL dbt_batched_contract_init(t_3c_3, batch_range_1=batch_ranges_RI, &
    2720          118 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2721              :       CALL dbt_batched_contract_init(t_3c_4, batch_range_1=batch_ranges_RI, &
    2722          118 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2723              :       CALL dbt_batched_contract_init(t_3c_5, batch_range_1=batch_ranges_RI, &
    2724          118 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2725              :       CALL dbt_batched_contract_init(t_3c_sparse, batch_range_1=batch_ranges_RI, &
    2726          118 :                                      batch_range_2=batch_ranges, batch_range_3=batch_ranges)
    2727              : 
    2728          248 :       DO i_spin = 1, nspins
    2729              : 
    2730              :          !Prepare Pmat in tensor format
    2731          130 :          CALL dbt_create(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
    2732          130 :          CALL dbt_copy_matrix_to_tensor(rho_ao(i_spin, 1)%matrix, t_2c_tmp)
    2733          130 :          CALL dbt_copy(t_2c_tmp, rho_ao_1, move_data=.TRUE.)
    2734          130 :          CALL dbt_destroy(t_2c_tmp)
    2735              : 
    2736          130 :          IF (.NOT. do_resp) THEN
    2737           98 :             CALL dbt_copy(rho_ao_1, rho_ao_2)
    2738           32 :          ELSE IF (do_resp .AND. resp_only_prv) THEN
    2739              : 
    2740           24 :             CALL dbt_create(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2741           24 :             CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2742           24 :             CALL dbt_copy(t_2c_tmp, rho_ao_2)
    2743              :             !symmetry allows to take 2*P_resp rasther than explicitely take all cross products
    2744           24 :             CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
    2745           24 :             CALL dbt_destroy(t_2c_tmp)
    2746              :          ELSE
    2747              : 
    2748              :             !if not resp_only, need P-P_resp and P+P_resp
    2749            8 :             CALL dbt_copy(rho_ao_1, rho_ao_2)
    2750            8 :             CALL dbcsr_create(dbcsr_tmp, template=rho_ao_resp(i_spin)%matrix)
    2751            8 :             CALL dbcsr_add(dbcsr_tmp, rho_ao_resp(i_spin)%matrix, 0.0_dp, -1.0_dp)
    2752            8 :             CALL dbt_create(dbcsr_tmp, t_2c_tmp)
    2753            8 :             CALL dbt_copy_matrix_to_tensor(dbcsr_tmp, t_2c_tmp)
    2754            8 :             CALL dbt_copy(t_2c_tmp, rho_ao_1, summation=.TRUE., move_data=.TRUE.)
    2755            8 :             CALL dbcsr_release(dbcsr_tmp)
    2756              : 
    2757            8 :             CALL dbt_copy_matrix_to_tensor(rho_ao_resp(i_spin)%matrix, t_2c_tmp)
    2758            8 :             CALL dbt_copy(t_2c_tmp, rho_ao_2, summation=.TRUE., move_data=.TRUE.)
    2759            8 :             CALL dbt_destroy(t_2c_tmp)
    2760              : 
    2761              :          END IF
    2762          130 :          work_virial = 0.0_dp
    2763              : 
    2764          130 :          CALL timeset(routineN//"_3c", handle)
    2765              :          !Start looping of the batches
    2766          516 :          DO i_mem = 1, n_mem
    2767         1158 :             ibounds(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
    2768              : 
    2769          386 :             CALL dbt_batched_contract_init(rho_ao_1)
    2770              :             CALL dbt_contract(1.0_dp, rho_ao_1, t_3c_int, 0.0_dp, t_3c_1, &
    2771              :                               contract_1=[1], notcontract_1=[2], &
    2772              :                               contract_2=[3], notcontract_2=[1, 2], &
    2773              :                               map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2774          386 :                               bounds_2=ibounds, unit_nr=unit_nr_dbcsr, flop=nflop)
    2775          386 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2776          386 :             CALL dbt_batched_contract_finalize(rho_ao_1)
    2777              : 
    2778          386 :             CALL dbt_copy(t_3c_1, t_3c_2, order=[3, 2, 1], move_data=.TRUE.)
    2779              : 
    2780         1536 :             DO j_mem = 1, n_mem
    2781         3450 :                jbounds(:, 1) = [batch_start(j_mem), batch_end(j_mem)]
    2782              : 
    2783         1150 :                CALL dbt_batched_contract_init(rho_ao_2)
    2784              :                CALL dbt_contract(1.0_dp, rho_ao_2, t_3c_2, 0.0_dp, t_3c_1, &
    2785              :                                  contract_1=[1], notcontract_1=[2], &
    2786              :                                  contract_2=[3], notcontract_2=[1, 2], &
    2787              :                                  map_1=[3], map_2=[1, 2], filter_eps=ri_data%filter_eps, &
    2788         1150 :                                  bounds_2=jbounds, unit_nr=unit_nr_dbcsr, flop=nflop)
    2789         1150 :                ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2790         1150 :                CALL dbt_batched_contract_finalize(rho_ao_2)
    2791              : 
    2792         3450 :                bounds_cpy(:, 1) = [batch_start(i_mem), batch_end(i_mem)]
    2793         6920 :                bounds_cpy(:, 2) = [1, SUM(ri_data%bsizes_RI)]
    2794         3450 :                bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
    2795         1150 :                CALL dbt_copy(t_3c_int, t_3c_int_2, order=[2, 1, 3], bounds=bounds_cpy)
    2796         1150 :                CALL dbt_copy(t_3c_1, t_3c_3, order=[2, 1, 3], move_data=.TRUE.)
    2797              : 
    2798         4970 :                DO k_mem = 1, n_mem_RI
    2799        10302 :                   kbounds(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
    2800              : 
    2801        10302 :                   bounds_cpy(:, 1) = [batch_start_RI(k_mem), batch_end_RI(k_mem)]
    2802        10302 :                   bounds_cpy(:, 2) = [batch_start(i_mem), batch_end(i_mem)]
    2803        10302 :                   bounds_cpy(:, 3) = [batch_start(j_mem), batch_end(j_mem)]
    2804         3434 :                   CALL dbt_copy(t_3c_sparse, t_3c_4, bounds=bounds_cpy)
    2805              : 
    2806              :                   !Contract with the 2-center product S^-1 * V * S^-1 while keeping sparsity of derivatives
    2807         3434 :                   CALL dbt_batched_contract_init(t_SVS)
    2808              :                   CALL dbt_contract(1.0_dp, t_SVS, t_3c_3, 0.0_dp, t_3c_4, &
    2809              :                                     contract_1=[2], notcontract_1=[1], &
    2810              :                                     contract_2=[1], notcontract_2=[2, 3], &
    2811              :                                     map_1=[1], map_2=[2, 3], filter_eps=ri_data%filter_eps, &
    2812         3434 :                                     retain_sparsity=.TRUE., unit_nr=unit_nr_dbcsr, flop=nflop)
    2813         3434 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2814         3434 :                   CALL dbt_batched_contract_finalize(t_SVS)
    2815              : 
    2816         3434 :                   CALL dbt_copy(t_3c_4, t_3c_5, summation=.TRUE., move_data=.TRUE.)
    2817              : 
    2818        10302 :                   ijbounds(:, 1) = ibounds(:, 1)
    2819        10302 :                   ijbounds(:, 2) = jbounds(:, 1)
    2820              : 
    2821              :                   !Contract R_PS = (acP) M_acS
    2822         3434 :                   CALL dbt_batched_contract_init(t_R)
    2823              :                   CALL dbt_contract(1.0_dp, t_3c_int_2, t_3c_3, 1.0_dp, t_R, &
    2824              :                                     contract_1=[2, 3], notcontract_1=[1], &
    2825              :                                     contract_2=[2, 3], notcontract_2=[1], &
    2826              :                                     map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2827              :                                     bounds_1=ijbounds, bounds_3=kbounds, &
    2828         3434 :                                     unit_nr=unit_nr_dbcsr, flop=nflop)
    2829         3434 :                   ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2830         4584 :                   CALL dbt_batched_contract_finalize(t_R)
    2831              : 
    2832              :                END DO !k_mem
    2833              :             END DO !j_mem
    2834              : 
    2835          386 :             CALL dbt_copy(t_3c_5, t_3c_help_1, move_data=.TRUE.)
    2836              : 
    2837              :             !The force from the 3c derivatives
    2838          386 :             pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2839              : 
    2840         1500 :             DO k_mem = 1, SIZE(t_3c_der_RI_comp, 1)
    2841         4456 :                DO i_xyz = 1, 3
    2842         3342 :                   CALL dbt_clear(t_3c_der_RI(i_xyz))
    2843              :                   CALL decompress_tensor(t_3c_der_RI(i_xyz), t_3c_der_RI_ind(k_mem, i_xyz)%ind, &
    2844         4456 :                                          t_3c_der_RI_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
    2845              :                END DO
    2846              :                CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_RI, atom_of_kind, kind_of, &
    2847         1500 :                                             idx_to_at_RI, pref)
    2848              :             END DO
    2849              : 
    2850          386 :             pref = -0.5_dp*4.0_dp*hf_fraction*spin_fac
    2851          386 :             IF (do_resp) THEN
    2852           96 :                pref = 0.5_dp*pref
    2853           96 :                CALL dbt_copy(t_3c_help_1, t_3c_help_2, order=[1, 3, 2])
    2854              :             END IF
    2855              : 
    2856         1500 :             DO k_mem = 1, SIZE(t_3c_der_AO_comp, 1)
    2857         4456 :                DO i_xyz = 1, 3
    2858         3342 :                   CALL dbt_clear(t_3c_der_AO(i_xyz))
    2859              :                   CALL decompress_tensor(t_3c_der_AO(i_xyz), t_3c_der_AO_ind(k_mem, i_xyz)%ind, &
    2860         4456 :                                          t_3c_der_AO_comp(k_mem, i_xyz), ri_data%filter_eps_storage)
    2861              :                END DO
    2862              :                CALL get_force_from_3c_trace(force, t_3c_help_1, t_3c_der_AO, atom_of_kind, kind_of, &
    2863         1114 :                                             idx_to_at_AO, pref, deriv_dim=2)
    2864              : 
    2865         1500 :                IF (do_resp) THEN
    2866              :                   CALL get_force_from_3c_trace(force, t_3c_help_2, t_3c_der_AO, atom_of_kind, kind_of, &
    2867          276 :                                                idx_to_at_AO, pref, deriv_dim=2)
    2868              :                END IF
    2869              :             END DO
    2870              : 
    2871              :             !The 3c virial contribution. Note: only fraction of integrals correspondig to i_mem calculated
    2872          386 :             IF (use_virial) THEN
    2873           12 :                pref = -0.5_dp*2.0_dp*hf_fraction*spin_fac
    2874           12 :                CALL dbt_copy(t_3c_help_1, t_3c_virial, move_data=.TRUE.)
    2875              :                CALL calc_3c_virial(work_virial, t_3c_virial, pref, qs_env, nl_3c, basis_set_RI, &
    2876              :                                    basis_set_AO, basis_set_AO, ri_data%ri_metric, &
    2877           12 :                                    der_eps=ri_data%eps_schwarz_forces, op_pos=1)
    2878              : 
    2879           12 :                CALL dbt_clear(t_3c_virial)
    2880              :             END IF
    2881              : 
    2882          386 :             CALL dbt_clear(t_3c_help_1)
    2883          516 :             CALL dbt_clear(t_3c_help_2)
    2884              :          END DO !i_mem
    2885          130 :          CALL timestop(handle)
    2886              : 
    2887          130 :          CALL timeset(routineN//"_2c", handle)
    2888              :          !Now we deal with all the 2-center quantities
    2889              :          !Calculate S^-1 * R * S^-1
    2890              :          CALL dbt_contract(1.0_dp, ri_data%t_2c_inv(1, 1), t_R, 0.0_dp, t_2c_RI_tmp, &
    2891              :                            contract_1=[2], notcontract_1=[1], &
    2892              :                            contract_2=[1], notcontract_2=[2], &
    2893              :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2894          130 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2895          130 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2896              : 
    2897              :          CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
    2898              :                            contract_1=[2], notcontract_1=[1], &
    2899              :                            contract_2=[1], notcontract_2=[2], &
    2900              :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2901          130 :                            unit_nr=unit_nr_dbcsr, flop=nflop)
    2902          130 :          ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2903              : 
    2904              :          !Calculate the potential contribution to the force: [S^-1*R*S^-1]_QR d/dx (Q|R)
    2905          130 :          pref = 0.5_dp*hf_fraction*spin_fac
    2906          130 :          IF (.NOT. ri_data%same_op) pref = -pref
    2907          130 :          CALL get_2c_der_force(force, t_2c_RI, t_2c_der_RI, atom_of_kind, kind_of, idx_to_at_RI, pref)
    2908              : 
    2909              :          !Calculate the contribution to the virial on the fly
    2910          130 :          IF (use_virial_prv) THEN
    2911            4 :             CALL dbt_copy(t_2c_RI, t_2c_virial)
    2912            4 :             CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
    2913              :             CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_pot, &
    2914            4 :                                 basis_set_RI, basis_set_RI, ri_data%hfx_pot)
    2915              :          END IF
    2916              : 
    2917              :          !And that from the metric: [S^-1*R*S^-1*(Q|R)*S^-1]_UV d/dx S_UV
    2918          130 :          IF (.NOT. ri_data%same_op) THEN
    2919              :             CALL dbt_contract(1.0_dp, t_2c_RI, ri_data%t_2c_pot(1, 1), 0.0_dp, t_2c_RI_tmp, &
    2920              :                               contract_1=[2], notcontract_1=[1], &
    2921              :                               contract_2=[1], notcontract_2=[2], &
    2922              :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2923           32 :                               unit_nr=unit_nr_dbcsr, flop=nflop)
    2924           32 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2925              : 
    2926              :             CALL dbt_contract(1.0_dp, t_2c_RI_tmp, ri_data%t_2c_inv(1, 1), 0.0_dp, t_2c_RI, &
    2927              :                               contract_1=[2], notcontract_1=[1], &
    2928              :                               contract_2=[1], notcontract_2=[2], &
    2929              :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps, &
    2930           32 :                               unit_nr=unit_nr_dbcsr, flop=nflop)
    2931           32 :             ri_data%dbcsr_nflop = ri_data%dbcsr_nflop + nflop
    2932              : 
    2933           32 :             pref = 0.5_dp*2.0_dp*hf_fraction*spin_fac
    2934           32 :             CALL get_2c_der_force(force, t_2c_RI, t_2c_der_metric, atom_of_kind, kind_of, idx_to_at_RI, pref)
    2935              : 
    2936           32 :             IF (use_virial_prv) THEN
    2937            0 :                CALL dbt_copy(t_2c_RI, t_2c_virial)
    2938            0 :                CALL dbt_copy_tensor_to_matrix(t_2c_virial, virial_trace)
    2939              :                CALL calc_2c_virial(work_virial, virial_trace, pref, qs_env, nl_2c_met, &
    2940            0 :                                    basis_set_RI, basis_set_RI, ri_data%ri_metric)
    2941              :             END IF
    2942              :          END IF
    2943          130 :          CALL dbt_clear(t_2c_RI)
    2944          130 :          CALL dbt_clear(t_2c_RI_tmp)
    2945          130 :          CALL dbt_clear(t_R)
    2946          130 :          CALL dbt_clear(t_3c_help_1)
    2947          130 :          CALL dbt_clear(t_3c_help_2)
    2948          130 :          CALL timestop(handle)
    2949              : 
    2950          508 :          IF (use_virial_prv) THEN
    2951           16 :             DO k_xyz = 1, 3
    2952           52 :                DO j_xyz = 1, 3
    2953          156 :                   DO i_xyz = 1, 3
    2954              :                      virial%pv_fock_4c(i_xyz, j_xyz) = virial%pv_fock_4c(i_xyz, j_xyz) &
    2955          144 :                                                        + work_virial(i_xyz, k_xyz)*cell%hmat(j_xyz, k_xyz)
    2956              :                   END DO
    2957              :                END DO
    2958              :             END DO
    2959              :          END IF
    2960              :       END DO !i_spin
    2961              : 
    2962          118 :       CALL dbt_batched_contract_finalize(t_3c_int)
    2963          118 :       CALL dbt_batched_contract_finalize(t_3c_int_2)
    2964          118 :       CALL dbt_batched_contract_finalize(t_3c_1)
    2965          118 :       CALL dbt_batched_contract_finalize(t_3c_2)
    2966          118 :       CALL dbt_batched_contract_finalize(t_3c_3)
    2967          118 :       CALL dbt_batched_contract_finalize(t_3c_4)
    2968          118 :       CALL dbt_batched_contract_finalize(t_3c_5)
    2969          118 :       CALL dbt_batched_contract_finalize(t_3c_sparse)
    2970              : 
    2971          118 :       CALL para_env%sync()
    2972          118 :       t2 = m_walltime()
    2973              : 
    2974          118 :       CALL dbt_copy(t_3c_int, ri_data%t_3c_int_ctr_2(1, 1), move_data=.TRUE.)
    2975              : 
    2976              :       !clean-up
    2977          118 :       CALL dbt_destroy(rho_ao_1)
    2978          118 :       CALL dbt_destroy(rho_ao_2)
    2979          118 :       CALL dbt_destroy(t_3c_ao_ri_ao)
    2980          118 :       CALL dbt_destroy(t_3c_ri_ao_ao)
    2981          118 :       CALL dbt_destroy(t_3c_int)
    2982          118 :       CALL dbt_destroy(t_3c_int_2)
    2983          118 :       CALL dbt_destroy(t_3c_1)
    2984          118 :       CALL dbt_destroy(t_3c_2)
    2985          118 :       CALL dbt_destroy(t_3c_3)
    2986          118 :       CALL dbt_destroy(t_3c_4)
    2987          118 :       CALL dbt_destroy(t_3c_5)
    2988          118 :       CALL dbt_destroy(t_3c_help_1)
    2989          118 :       CALL dbt_destroy(t_3c_help_2)
    2990          118 :       CALL dbt_destroy(t_3c_sparse)
    2991          118 :       CALL dbt_destroy(t_SVS)
    2992          118 :       CALL dbt_destroy(t_R)
    2993          118 :       CALL dbt_destroy(t_2c_RI)
    2994          118 :       CALL dbt_destroy(t_2c_RI_tmp)
    2995              : 
    2996          472 :       DO i_xyz = 1, 3
    2997          354 :          CALL dbt_destroy(t_3c_der_RI(i_xyz))
    2998          354 :          CALL dbt_destroy(t_3c_der_AO(i_xyz))
    2999          354 :          CALL dbt_destroy(t_2c_der_RI(i_xyz))
    3000          472 :          IF (.NOT. ri_data%same_op) CALL dbt_destroy(t_2c_der_metric(i_xyz))
    3001              :       END DO
    3002              : 
    3003          472 :       DO i_xyz = 1, 3
    3004         1510 :          DO i_mem = 1, SIZE(t_3c_der_AO_comp, 1)
    3005         1038 :             CALL dealloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), dummy_int)
    3006         1392 :             CALL dealloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), dummy_int)
    3007              :          END DO
    3008              :       END DO
    3009         2194 :       DEALLOCATE (t_3c_der_AO_ind, t_3c_der_RI_ind)
    3010              : 
    3011          346 :       DO ibasis = 1, SIZE(basis_set_AO)
    3012          228 :          orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3013          228 :          ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3014          228 :          CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    3015          346 :          CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    3016              :       END DO
    3017              : 
    3018          118 :       IF (use_virial) THEN
    3019            4 :          CALL release_neighbor_list_sets(nl_2c_met)
    3020            4 :          CALL release_neighbor_list_sets(nl_2c_pot)
    3021            4 :          CALL neighbor_list_3c_destroy(nl_3c)
    3022            4 :          CALL dbcsr_release(virial_trace)
    3023            4 :          CALL dbt_destroy(t_2c_virial)
    3024            4 :          CALL dbt_destroy(t_3c_virial)
    3025              :       END IF
    3026              : 
    3027         2596 :    END SUBROUTINE hfx_ri_forces_Pmat
    3028              : 
    3029              : ! **************************************************************************************************
    3030              : !> \brief the general routine that calls the relevant force code
    3031              : !> \param qs_env ...
    3032              : !> \param ri_data ...
    3033              : !> \param nspins ...
    3034              : !> \param hf_fraction ...
    3035              : !> \param rho_ao ...
    3036              : !> \param rho_ao_resp ...
    3037              : !> \param mos ...
    3038              : !> \param use_virial ...
    3039              : !> \param resp_only ...
    3040              : !> \param rescale_factor ...
    3041              : ! **************************************************************************************************
    3042          132 :    SUBROUTINE hfx_ri_update_forces(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, &
    3043          132 :                                    mos, use_virial, resp_only, rescale_factor)
    3044              : 
    3045              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3046              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3047              :       INTEGER, INTENT(IN)                                :: nspins
    3048              :       REAL(KIND=dp), INTENT(IN)                          :: hf_fraction
    3049              :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL      :: rho_ao
    3050              :       TYPE(dbcsr_p_type), DIMENSION(:), OPTIONAL         :: rho_ao_resp
    3051              :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN), &
    3052              :          OPTIONAL                                        :: mos
    3053              :       LOGICAL, INTENT(IN), OPTIONAL                      :: use_virial, resp_only
    3054              :       REAL(dp), INTENT(IN), OPTIONAL                     :: rescale_factor
    3055              : 
    3056              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'hfx_ri_update_forces'
    3057              : 
    3058              :       INTEGER                                            :: handle, ispin
    3059              :       INTEGER, DIMENSION(2)                              :: homo
    3060          132 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: mo_eigenvalues
    3061              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    3062          396 :       TYPE(dbcsr_type), DIMENSION(2)                     :: mo_coeff_b
    3063              :       TYPE(dbcsr_type), POINTER                          :: mo_coeff_b_tmp
    3064              : 
    3065          132 :       CALL timeset(routineN, handle)
    3066              : 
    3067          146 :       SELECT CASE (ri_data%flavor)
    3068              :       CASE (ri_mo)
    3069              : 
    3070           32 :          DO ispin = 1, nspins
    3071           18 :             NULLIFY (mo_coeff_b_tmp)
    3072           18 :             CPASSERT(mos(ispin)%uniform_occupation)
    3073           18 :             CALL get_mo_set(mo_set=mos(ispin), mo_coeff=mo_coeff, eigenvalues=mo_eigenvalues, mo_coeff_b=mo_coeff_b_tmp)
    3074              : 
    3075           18 :             IF (.NOT. mos(ispin)%use_mo_coeff_b) CALL copy_fm_to_dbcsr(mo_coeff, mo_coeff_b_tmp)
    3076           32 :             CALL dbcsr_copy(mo_coeff_b(ispin), mo_coeff_b_tmp)
    3077              :          END DO
    3078              : 
    3079           32 :          DO ispin = 1, nspins
    3080           18 :             CALL dbcsr_scale(mo_coeff_b(ispin), SQRT(mos(ispin)%maxocc))
    3081           14 :             homo(ispin) = mos(ispin)%homo
    3082              :          END DO
    3083              : 
    3084           14 :          CALL hfx_ri_forces_mo(qs_env, ri_data, nspins, hf_fraction, mo_coeff_b, use_virial)
    3085              : 
    3086              :       CASE (ri_pmat)
    3087              : 
    3088              :          CALL hfx_ri_forces_Pmat(qs_env, ri_data, nspins, hf_fraction, rho_ao, rho_ao_resp, use_virial, &
    3089          220 :                                  resp_only, rescale_factor)
    3090              :       END SELECT
    3091              : 
    3092          280 :       DO ispin = 1, nspins
    3093          280 :          CALL dbcsr_release(mo_coeff_b(ispin))
    3094              :       END DO
    3095              : 
    3096          132 :       CALL timestop(handle)
    3097              : 
    3098          132 :    END SUBROUTINE hfx_ri_update_forces
    3099              : 
    3100              : ! **************************************************************************************************
    3101              : !> \brief Calculate the derivatives tensors for the force, in a format fit for contractions
    3102              : !> \param t_3c_der_RI_comp compressed RI derivatives
    3103              : !> \param t_3c_der_AO_comp compressed AO derivatives
    3104              : !> \param t_3c_der_RI_ind ...
    3105              : !> \param t_3c_der_AO_ind ...
    3106              : !> \param t_2c_der_RI format based on standard atomic block sizes
    3107              : !> \param t_2c_der_metric format based on standard atomic block sizes
    3108              : !> \param ri_ao_ao_template ...
    3109              : !> \param basis_set_AO ...
    3110              : !> \param basis_set_RI ...
    3111              : !> \param ri_data ...
    3112              : !> \param qs_env ...
    3113              : !> \param nl_2c_pot ...
    3114              : !> \param nl_2c_met ...
    3115              : !> \param nl_3c_out ...
    3116              : !> \param t_3c_virial ...
    3117              : ! **************************************************************************************************
    3118         4224 :    SUBROUTINE precalc_derivatives(t_3c_der_RI_comp, t_3c_der_AO_comp, t_3c_der_RI_ind, t_3c_der_AO_ind, &
    3119              :                                   t_2c_der_RI, t_2c_der_metric, ri_ao_ao_template, &
    3120              :                                   basis_set_AO, basis_set_RI, ri_data, qs_env, &
    3121              :                                   nl_2c_pot, nl_2c_met, nl_3c_out, t_3c_virial)
    3122              : 
    3123              :       TYPE(hfx_compression_type), ALLOCATABLE, &
    3124              :          DIMENSION(:, :), INTENT(INOUT)                  :: t_3c_der_RI_comp, t_3c_der_AO_comp
    3125              :       TYPE(block_ind_type), ALLOCATABLE, DIMENSION(:, :) :: t_3c_der_RI_ind, t_3c_der_AO_ind
    3126              :       TYPE(dbt_type), DIMENSION(3), INTENT(OUT)          :: t_2c_der_RI, t_2c_der_metric
    3127              :       TYPE(dbt_type), INTENT(INOUT)                      :: ri_ao_ao_template
    3128              :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    3129              :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    3130              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3131              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3132              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3133              :          OPTIONAL, POINTER                               :: nl_2c_pot, nl_2c_met
    3134              :       TYPE(neighbor_list_3c_type), OPTIONAL              :: nl_3c_out
    3135              :       TYPE(dbt_type), INTENT(INOUT), OPTIONAL            :: t_3c_virial
    3136              : 
    3137              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'precalc_derivatives'
    3138              : 
    3139              :       INTEGER                                            :: handle, i_mem, i_xyz, n_mem, natom, &
    3140              :                                                             nkind, nthreads
    3141              :       INTEGER(int_8)                                     :: nze, nze_tot
    3142          132 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: dist1, dist2, dist_AO_1, dist_AO_2, &
    3143          132 :                                                             dist_RI, dummy_end, dummy_start, &
    3144          264 :                                                             end_blocks, start_blocks
    3145              :       INTEGER, DIMENSION(3)                              :: pcoord, pdims
    3146          264 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    3147              :       REAL(dp)                                           :: compression_factor, memory, occ
    3148              :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
    3149         1716 :       TYPE(dbcsr_type), DIMENSION(1, 3)                  :: t_2c_der_metric_prv, t_2c_der_RI_prv
    3150          396 :       TYPE(dbt_pgrid_type)                               :: pgrid
    3151         3432 :       TYPE(dbt_type)                                     :: t_2c_template, t_2c_tmp, t_3c_template
    3152          132 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :, :)    :: t_3c_der_AO_prv, t_3c_der_RI_prv
    3153              :       TYPE(dft_control_type), POINTER                    :: dft_control
    3154              :       TYPE(distribution_2d_type), POINTER                :: dist_2d
    3155              :       TYPE(distribution_3d_type)                         :: dist_3d, dist_3d_out
    3156          132 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c, mp_comm_t3c_out
    3157              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3158              :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    3159              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3160          132 :          POINTER                                         :: nl_2c
    3161          132 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3162          132 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3163              : 
    3164          132 :       NULLIFY (qs_kind_set, dist_2d, nl_2c, particle_set, dft_control, para_env)
    3165              : 
    3166          132 :       CALL timeset(routineN, handle)
    3167              : 
    3168              :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, distribution_2d=dist_2d, natom=natom, &
    3169          132 :                       particle_set=particle_set, dft_control=dft_control, para_env=para_env)
    3170              : 
    3171              :       !TODO: is such a pgrid correct?
    3172              :       !Dealing with the 3c derivatives
    3173          132 :       nthreads = 1
    3174          132 : !$    nthreads = omp_get_num_threads()
    3175          132 :       pdims = 0
    3176          528 :       CALL dbt_pgrid_create(para_env, pdims, pgrid, tensor_dims=[MAX(1, natom/(ri_data%n_mem*nthreads)), natom, natom])
    3177              : 
    3178              :       CALL create_3c_tensor(t_3c_template, dist_RI, dist_AO_1, dist_AO_2, pgrid, &
    3179              :                             ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, &
    3180              :                             map1=[1], map2=[2, 3], &
    3181          132 :                             name="der (RI AO | AO)")
    3182              : 
    3183         4620 :       ALLOCATE (t_3c_der_AO_prv(1, 1, 3), t_3c_der_RI_prv(1, 1, 3))
    3184          528 :       DO i_xyz = 1, 3
    3185          396 :          CALL dbt_create(t_3c_template, t_3c_der_RI_prv(1, 1, i_xyz))
    3186          528 :          CALL dbt_create(t_3c_template, t_3c_der_AO_prv(1, 1, i_xyz))
    3187              :       END DO
    3188          132 :       IF (PRESENT(t_3c_virial)) THEN
    3189            4 :          CALL dbt_create(t_3c_template, t_3c_virial)
    3190              :       END IF
    3191          132 :       CALL dbt_destroy(t_3c_template)
    3192              : 
    3193          132 :       CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
    3194          132 :       CALL mp_comm_t3c%create(pgrid%mp_comm_2d, 3, pdims)
    3195              :       CALL distribution_3d_create(dist_3d, dist_RI, dist_AO_1, dist_AO_2, &
    3196          132 :                                   nkind, particle_set, mp_comm_t3c, own_comm=.TRUE.)
    3197              : 
    3198              :       CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d, ri_data%ri_metric, &
    3199          132 :                                    "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
    3200              : 
    3201          132 :       IF (PRESENT(nl_3c_out)) THEN
    3202            4 :          CALL dbt_mp_environ_pgrid(pgrid, pdims, pcoord)
    3203            4 :          CALL mp_comm_t3c_out%create(pgrid%mp_comm_2d, 3, pdims)
    3204              :          CALL distribution_3d_create(dist_3d_out, dist_RI, dist_AO_1, dist_AO_2, &
    3205            4 :                                      nkind, particle_set, mp_comm_t3c_out, own_comm=.TRUE.)
    3206              :          CALL build_3c_neighbor_lists(nl_3c_out, basis_set_RI, basis_set_AO, basis_set_AO, dist_3d_out, &
    3207              :                                       ri_data%ri_metric, "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.FALSE., &
    3208            4 :                                       own_dist=.TRUE.)
    3209              :       END IF
    3210          132 :       DEALLOCATE (dist_RI, dist_AO_1, dist_AO_2)
    3211          132 :       CALL dbt_pgrid_destroy(pgrid)
    3212              : 
    3213          132 :       n_mem = ri_data%n_mem
    3214              :       CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy_start, dummy_end, &
    3215          132 :                                  start_blocks, end_blocks)
    3216          132 :       DEALLOCATE (dummy_start, dummy_end)
    3217              : 
    3218       561504 :       ALLOCATE (t_3c_der_AO_comp(n_mem, 3), t_3c_der_RI_comp(n_mem, 3))
    3219         3672 :       ALLOCATE (t_3c_der_AO_ind(n_mem, 3), t_3c_der_RI_ind(n_mem, 3))
    3220              : 
    3221          132 :       memory = 0.0_dp
    3222          132 :       nze_tot = 0
    3223          524 :       DO i_mem = 1, n_mem
    3224              :          CALL build_3c_derivatives(t_3c_der_RI_prv, t_3c_der_AO_prv, ri_data%filter_eps, qs_env, &
    3225              :                                    nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, &
    3226              :                                    ri_data%ri_metric, der_eps=ri_data%eps_schwarz_forces, op_pos=1, &
    3227         1176 :                                    bounds_i=[start_blocks(i_mem), end_blocks(i_mem)])
    3228              : 
    3229         1700 :          DO i_xyz = 1, 3
    3230         1176 :             CALL dbt_copy(t_3c_der_RI_prv(1, 1, i_xyz), ri_ao_ao_template, move_data=.TRUE.)
    3231         1176 :             CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
    3232         1176 :             CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
    3233         1176 :             nze_tot = nze_tot + nze
    3234              : 
    3235         1176 :             CALL alloc_containers(t_3c_der_RI_comp(i_mem, i_xyz), 1)
    3236              :             CALL compress_tensor(ri_ao_ao_template, t_3c_der_RI_ind(i_mem, i_xyz)%ind, &
    3237         1176 :                                  t_3c_der_RI_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
    3238         1176 :             CALL dbt_clear(ri_ao_ao_template)
    3239              : 
    3240              :             !put AO derivative as middle index
    3241         1176 :             CALL dbt_copy(t_3c_der_AO_prv(1, 1, i_xyz), ri_ao_ao_template, order=[1, 3, 2], move_data=.TRUE.)
    3242         1176 :             CALL dbt_filter(ri_ao_ao_template, ri_data%filter_eps)
    3243         1176 :             CALL get_tensor_occupancy(ri_ao_ao_template, nze, occ)
    3244         1176 :             nze_tot = nze_tot + nze
    3245              : 
    3246         1176 :             CALL alloc_containers(t_3c_der_AO_comp(i_mem, i_xyz), 1)
    3247              :             CALL compress_tensor(ri_ao_ao_template, t_3c_der_AO_ind(i_mem, i_xyz)%ind, &
    3248         1176 :                                  t_3c_der_AO_comp(i_mem, i_xyz), ri_data%filter_eps_storage, memory)
    3249         3920 :             CALL dbt_clear(ri_ao_ao_template)
    3250              :          END DO
    3251              :       END DO
    3252              : 
    3253          132 :       CALL neighbor_list_3c_destroy(nl_3c)
    3254          528 :       DO i_xyz = 1, 3
    3255          396 :          CALL dbt_destroy(t_3c_der_RI_prv(1, 1, i_xyz))
    3256          528 :          CALL dbt_destroy(t_3c_der_AO_prv(1, 1, i_xyz))
    3257              :       END DO
    3258              : 
    3259          132 :       CALL para_env%sum(memory)
    3260          132 :       compression_factor = REAL(nze_tot, dp)*1.0E-06*8.0_dp/memory
    3261          132 :       IF (ri_data%unit_nr > 0) THEN
    3262              :          WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T66,F11.2,A4))") &
    3263           15 :             "MEMORY_INFO| Memory for 3-center HFX derivatives (compressed):", memory, ' MiB'
    3264              : 
    3265              :          WRITE (UNIT=ri_data%unit_nr, FMT="((T3,A,T60,F21.2))") &
    3266           15 :             "MEMORY_INFO| Compression factor:                  ", compression_factor
    3267              :       END IF
    3268              : 
    3269              :       !Deal with the 2-center derivatives
    3270          132 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
    3271          396 :       ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
    3272          264 :       ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
    3273          536 :       row_bsize(:) = ri_data%bsizes_RI
    3274          536 :       col_bsize(:) = ri_data%bsizes_RI
    3275              : 
    3276              :       CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
    3277          132 :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3278              : 
    3279          528 :       DO i_xyz = 1, 3
    3280              :          CALL dbcsr_create(t_2c_der_RI_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
    3281          528 :                            dbcsr_type_antisymmetric, row_bsize, col_bsize)
    3282              :       END DO
    3283              : 
    3284              :       CALL build_2c_derivatives(t_2c_der_RI_prv, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
    3285          132 :                                 basis_set_RI, ri_data%hfx_pot)
    3286          132 :       CALL release_neighbor_list_sets(nl_2c)
    3287              : 
    3288          132 :       IF (PRESENT(nl_2c_pot)) THEN
    3289            4 :          NULLIFY (nl_2c_pot)
    3290              :          CALL build_2c_neighbor_lists(nl_2c_pot, basis_set_RI, basis_set_RI, ri_data%hfx_pot, &
    3291            4 :                                       "HFX_2c_nl_pot", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
    3292              :       END IF
    3293              : 
    3294              :       !copy 2c derivative tensor into the standard format
    3295              :       CALL create_2c_tensor(t_2c_template, dist1, dist2, ri_data%pgrid_2d, ri_data%bsizes_RI_split, &
    3296          132 :                             ri_data%bsizes_RI_split, name='(RI| RI)')
    3297          132 :       DEALLOCATE (dist1, dist2)
    3298              : 
    3299          528 :       DO i_xyz = 1, 3
    3300          396 :          CALL dbt_create(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
    3301          396 :          CALL dbt_copy_matrix_to_tensor(t_2c_der_RI_prv(1, i_xyz), t_2c_tmp)
    3302              : 
    3303          396 :          CALL dbt_create(t_2c_template, t_2c_der_RI(i_xyz))
    3304          396 :          CALL dbt_copy(t_2c_tmp, t_2c_der_RI(i_xyz), move_data=.TRUE.)
    3305              : 
    3306          396 :          CALL dbt_destroy(t_2c_tmp)
    3307          528 :          CALL dbcsr_release(t_2c_der_RI_prv(1, i_xyz))
    3308              :       END DO
    3309              : 
    3310              :       !Repeat with the metric, if required
    3311          132 :       IF (.NOT. ri_data%same_op) THEN
    3312              : 
    3313              :          CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3314           34 :                                       "HFX_2c_nl_RI", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3315              : 
    3316          136 :          DO i_xyz = 1, 3
    3317              :             CALL dbcsr_create(t_2c_der_metric_prv(1, i_xyz), "(R|P) HFX der", dbcsr_dist, &
    3318          136 :                               dbcsr_type_antisymmetric, row_bsize, col_bsize)
    3319              :          END DO
    3320              : 
    3321              :          CALL build_2c_derivatives(t_2c_der_metric_prv, ri_data%filter_eps_2c, qs_env, nl_2c, &
    3322           34 :                                    basis_set_RI, basis_set_RI, ri_data%ri_metric)
    3323           34 :          CALL release_neighbor_list_sets(nl_2c)
    3324              : 
    3325           34 :          IF (PRESENT(nl_2c_met)) THEN
    3326            0 :             NULLIFY (nl_2c_met)
    3327              :             CALL build_2c_neighbor_lists(nl_2c_met, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3328            0 :                                          "HFX_2c_nl_RI", qs_env, sym_ij=.FALSE., dist_2d=dist_2d)
    3329              :          END IF
    3330              : 
    3331          136 :          DO i_xyz = 1, 3
    3332          102 :             CALL dbt_create(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
    3333          102 :             CALL dbt_copy_matrix_to_tensor(t_2c_der_metric_prv(1, i_xyz), t_2c_tmp)
    3334              : 
    3335          102 :             CALL dbt_create(t_2c_template, t_2c_der_metric(i_xyz))
    3336          102 :             CALL dbt_copy(t_2c_tmp, t_2c_der_metric(i_xyz), move_data=.TRUE.)
    3337              : 
    3338          102 :             CALL dbt_destroy(t_2c_tmp)
    3339          136 :             CALL dbcsr_release(t_2c_der_metric_prv(1, i_xyz))
    3340              :          END DO
    3341              : 
    3342              :       END IF
    3343              : 
    3344          132 :       CALL dbt_destroy(t_2c_template)
    3345          132 :       CALL dbcsr_distribution_release(dbcsr_dist)
    3346          132 :       DEALLOCATE (row_bsize, col_bsize)
    3347              : 
    3348          132 :       CALL timestop(handle)
    3349              : 
    3350         1716 :    END SUBROUTINE precalc_derivatives
    3351              : 
    3352              : ! **************************************************************************************************
    3353              : !> \brief This routines calculates the force contribution from a trace over 3D tensors, i.e.
    3354              : !>        force = sum_ijk A_ijk B_ijk. An iteration over the blocks is made, which index determin
    3355              : !>        the atom on which the force acts
    3356              : !> \param force ...
    3357              : !> \param t_3c_contr ...
    3358              : !> \param t_3c_der ...
    3359              : !> \param atom_of_kind ...
    3360              : !> \param kind_of ...
    3361              : !> \param idx_to_at ...
    3362              : !> \param pref ...
    3363              : !> \param do_mp2 ...
    3364              : !> \param deriv_dim the dimension of the tensor corresponding to the derivative (by default 1)
    3365              : ! **************************************************************************************************
    3366         3894 :    SUBROUTINE get_force_from_3c_trace(force, t_3c_contr, t_3c_der, atom_of_kind, kind_of, idx_to_at, &
    3367              :                                       pref, do_mp2, deriv_dim)
    3368              : 
    3369              :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3370              :       TYPE(dbt_type), INTENT(INOUT)                      :: t_3c_contr
    3371              :       TYPE(dbt_type), DIMENSION(3), INTENT(INOUT)        :: t_3c_der
    3372              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3373              :       REAL(dp), INTENT(IN)                               :: pref
    3374              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_mp2
    3375              :       INTEGER, INTENT(IN), OPTIONAL                      :: deriv_dim
    3376              : 
    3377              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_force_from_3c_trace'
    3378              : 
    3379              :       INTEGER                                            :: deriv_dim_prv, handle, i_xyz, iat, &
    3380              :                                                             iat_of_kind, ikind, j_xyz
    3381              :       INTEGER, DIMENSION(3)                              :: ind
    3382              :       LOGICAL                                            :: do_mp2_prv, found
    3383              :       REAL(dp)                                           :: new_force
    3384         3894 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :), TARGET  :: contr_blk, der_blk
    3385              :       REAL(dp), DIMENSION(3)                             :: scoord
    3386              :       TYPE(dbt_iterator_type)                            :: iter
    3387              : 
    3388         3894 :       CALL timeset(routineN, handle)
    3389              : 
    3390         3894 :       do_mp2_prv = .FALSE.
    3391         3894 :       IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
    3392              : 
    3393         3894 :       deriv_dim_prv = 1
    3394         3894 :       IF (PRESENT(deriv_dim)) deriv_dim_prv = deriv_dim
    3395              : 
    3396              : !$OMP PARALLEL DEFAULT(NONE) &
    3397              : !$OMP SHARED(t_3c_der,t_3c_contr,force,do_mp2_prv,deriv_dim_prv,pref,idx_to_at,atom_of_kind,kind_of) &
    3398         3894 : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force,iat,iat_of_kind,ikind,scoord)
    3399              :       DO i_xyz = 1, 3
    3400              :          CALL dbt_iterator_start(iter, t_3c_der(i_xyz))
    3401              :          DO WHILE (dbt_iterator_blocks_left(iter))
    3402              :             CALL dbt_iterator_next_block(iter, ind)
    3403              : 
    3404              :             CALL dbt_get_block(t_3c_der(i_xyz), ind, der_blk, found)
    3405              :             CPASSERT(found)
    3406              :             CALL dbt_get_block(t_3c_contr, ind, contr_blk, found)
    3407              : 
    3408              :             IF (found) THEN
    3409              : 
    3410              :                !take the trace of the blocks
    3411              :                new_force = pref*SUM(der_blk(:, :, :)*contr_blk(:, :, :))
    3412              : 
    3413              :                !the first index of the derivative tensor defines the atom
    3414              :                iat = idx_to_at(ind(deriv_dim_prv))
    3415              :                iat_of_kind = atom_of_kind(iat)
    3416              :                ikind = kind_of(iat)
    3417              : 
    3418              :                IF (.NOT. do_mp2_prv) THEN
    3419              : !$OMP ATOMIC
    3420              :                   force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3421              :                                                              + new_force
    3422              :                ELSE
    3423              : !$OMP ATOMIC
    3424              :                   force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
    3425              :                                                                  + new_force
    3426              :                END IF
    3427              : 
    3428              :                DEALLOCATE (contr_blk)
    3429              :             END IF
    3430              :             DEALLOCATE (der_blk)
    3431              :          END DO !iter
    3432              :          CALL dbt_iterator_stop(iter)
    3433              :       END DO
    3434              : !$OMP END PARALLEL
    3435         3894 :       CALL timestop(handle)
    3436              : 
    3437         7788 :    END SUBROUTINE get_force_from_3c_trace
    3438              : 
    3439              : ! **************************************************************************************************
    3440              : !> \brief Update the forces due to the derivative of the a 2-center product d/dR (Q|R)
    3441              : !> \param force ...
    3442              : !> \param t_2c_contr A precontracted tensor containing sum_abcdPS (ab|P)(P|Q)^-1 (R|S)^-1 (S|cd) P_ac P_bd
    3443              : !> \param t_2c_der the d/dR (Q|R) tensor, in all 3 cartesian directions
    3444              : !> \param atom_of_kind ...
    3445              : !> \param kind_of ...
    3446              : !> \param idx_to_at ...
    3447              : !> \param pref ...
    3448              : !> \param do_mp2 ...
    3449              : !> \param do_ovlp ...
    3450              : !> \note IMPORTANT: t_tc_contr and t_2c_der need to have the same distribution
    3451              : ! **************************************************************************************************
    3452          618 :    SUBROUTINE get_2c_der_force(force, t_2c_contr, t_2c_der, atom_of_kind, kind_of, idx_to_at, &
    3453              :                                pref, do_mp2, do_ovlp)
    3454              : 
    3455              :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3456              :       TYPE(dbt_type), INTENT(INOUT)                      :: t_2c_contr
    3457              :       TYPE(dbt_type), DIMENSION(3), INTENT(INOUT)        :: t_2c_der
    3458              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3459              :       REAL(dp), INTENT(IN)                               :: pref
    3460              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_mp2, do_ovlp
    3461              : 
    3462              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_2c_der_force'
    3463              : 
    3464              :       INTEGER                                            :: handle, i_xyz, iat, iat_of_kind, ikind, &
    3465              :                                                             j_xyz, jat, jat_of_kind, jkind
    3466              :       INTEGER, DIMENSION(2)                              :: ind
    3467              :       LOGICAL                                            :: do_mp2_prv, do_ovlp_prv, found
    3468              :       REAL(dp)                                           :: new_force
    3469          618 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET     :: contr_blk, der_blk
    3470              :       REAL(dp), DIMENSION(3)                             :: scoord
    3471              :       TYPE(dbt_iterator_type)                            :: iter
    3472              : 
    3473              :       !Loop over the blocks of d/dR (Q|R), contract with the corresponding block of t_2c_contr and
    3474              :       !update the relevant force
    3475              : 
    3476          618 :       CALL timeset(routineN, handle)
    3477              : 
    3478          618 :       do_mp2_prv = .FALSE.
    3479          618 :       IF (PRESENT(do_mp2)) do_mp2_prv = do_mp2
    3480              : 
    3481          618 :       do_ovlp_prv = .FALSE.
    3482          618 :       IF (PRESENT(do_ovlp)) do_ovlp_prv = do_ovlp
    3483              : 
    3484              : !$OMP PARALLEL DEFAULT(NONE) &
    3485              : !$OMP SHARED(t_2c_der,t_2c_contr,force,do_mp2_prv,do_ovlp_prv,pref,idx_to_at,atom_of_kind,kind_of) &
    3486              : !$OMP PRIVATE(i_xyz,j_xyz,iter,ind,der_blk,contr_blk,found,new_force) &
    3487          618 : !$OMP PRIVATE(iat,jat,iat_of_kind,jat_of_kind,ikind,jkind,scoord)
    3488              :       DO i_xyz = 1, 3
    3489              :          CALL dbt_iterator_start(iter, t_2c_der(i_xyz))
    3490              :          DO WHILE (dbt_iterator_blocks_left(iter))
    3491              :             CALL dbt_iterator_next_block(iter, ind)
    3492              : 
    3493              :             IF (ind(1) == ind(2)) CYCLE
    3494              : 
    3495              :             CALL dbt_get_block(t_2c_der(i_xyz), ind, der_blk, found)
    3496              :             CPASSERT(found)
    3497              :             CALL dbt_get_block(t_2c_contr, ind, contr_blk, found)
    3498              : 
    3499              :             IF (found) THEN
    3500              : 
    3501              :                !an element of d/dR (Q|R) corresponds to 2 things because of translational invariance
    3502              :                !(Q'| R) = - (Q| R'), once wrt the center on Q, and once on R
    3503              :                new_force = pref*SUM(der_blk(:, :)*contr_blk(:, :))
    3504              : 
    3505              :                iat = idx_to_at(ind(1))
    3506              :                iat_of_kind = atom_of_kind(iat)
    3507              :                ikind = kind_of(iat)
    3508              : 
    3509              :                IF (do_mp2_prv) THEN
    3510              : !$OMP ATOMIC
    3511              :                   force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) = force(ikind)%mp2_non_sep(i_xyz, iat_of_kind) &
    3512              :                                                                  + new_force
    3513              :                ELSE IF (do_ovlp_prv) THEN
    3514              : !$OMP ATOMIC
    3515              :                   force(ikind)%overlap(i_xyz, iat_of_kind) = force(ikind)%overlap(i_xyz, iat_of_kind) &
    3516              :                                                              + new_force
    3517              :                ELSE
    3518              : !$OMP ATOMIC
    3519              :                   force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3520              :                                                              + new_force
    3521              :                END IF
    3522              : 
    3523              :                jat = idx_to_at(ind(2))
    3524              :                jat_of_kind = atom_of_kind(jat)
    3525              :                jkind = kind_of(jat)
    3526              : 
    3527              :                IF (do_mp2_prv) THEN
    3528              : !$OMP ATOMIC
    3529              :                   force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) = force(jkind)%mp2_non_sep(i_xyz, jat_of_kind) &
    3530              :                                                                  - new_force
    3531              :                ELSE IF (do_ovlp_prv) THEN
    3532              : !$OMP ATOMIC
    3533              :                   force(jkind)%overlap(i_xyz, jat_of_kind) = force(jkind)%overlap(i_xyz, jat_of_kind) &
    3534              :                                                              - new_force
    3535              :                ELSE
    3536              : !$OMP ATOMIC
    3537              :                   force(jkind)%fock_4c(i_xyz, jat_of_kind) = force(jkind)%fock_4c(i_xyz, jat_of_kind) &
    3538              :                                                              - new_force
    3539              :                END IF
    3540              : 
    3541              :                DEALLOCATE (contr_blk)
    3542              :             END IF
    3543              : 
    3544              :             DEALLOCATE (der_blk)
    3545              :          END DO !iter
    3546              :          CALL dbt_iterator_stop(iter)
    3547              : 
    3548              :       END DO !i_xyz
    3549              : !$OMP END PARALLEL
    3550          618 :       CALL timestop(handle)
    3551              : 
    3552         1236 :    END SUBROUTINE get_2c_der_force
    3553              : 
    3554              : ! **************************************************************************************************
    3555              : !> \brief Get the force from a contraction of type SUM_a,beta (a|beta') C_a,beta, where beta is an AO
    3556              : !>        and a is a MO
    3557              : !> \param force ...
    3558              : !> \param t_mo_coeff ...
    3559              : !> \param t_2c_MO_AO ...
    3560              : !> \param atom_of_kind ...
    3561              : !> \param kind_of ...
    3562              : !> \param idx_to_at ...
    3563              : !> \param pref ...
    3564              : !> \param i_xyz ...
    3565              : ! **************************************************************************************************
    3566           54 :    SUBROUTINE get_MO_AO_force(force, t_mo_coeff, t_2c_MO_AO, atom_of_kind, kind_of, idx_to_at, pref, i_xyz)
    3567              : 
    3568              :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force
    3569              :       TYPE(dbt_type), INTENT(INOUT)                      :: t_mo_coeff, t_2c_MO_AO
    3570              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: atom_of_kind, kind_of, idx_to_at
    3571              :       REAL(dp), INTENT(IN)                               :: pref
    3572              :       INTEGER, INTENT(IN)                                :: i_xyz
    3573              : 
    3574              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_MO_AO_force'
    3575              : 
    3576              :       INTEGER                                            :: handle, iat, iat_of_kind, ikind, j_xyz
    3577              :       INTEGER, DIMENSION(2)                              :: ind
    3578              :       LOGICAL                                            :: found
    3579              :       REAL(dp)                                           :: new_force
    3580           54 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :), TARGET     :: mo_ao_blk, mo_coeff_blk
    3581              :       REAL(dp), DIMENSION(3)                             :: scoord
    3582              :       TYPE(dbt_iterator_type)                            :: iter
    3583              : 
    3584           54 :       CALL timeset(routineN, handle)
    3585              : 
    3586              : !$OMP PARALLEL DEFAULT(NONE) &
    3587              : !$OMP SHARED(t_2c_MO_AO,t_mo_coeff,pref,force,idx_to_at,atom_of_kind,kind_of,i_xyz) &
    3588           54 : !$OMP PRIVATE(iter,ind,mo_ao_blk,mo_coeff_blk,found,new_force,iat,iat_of_kind,ikind,scoord,j_xyz)
    3589              :       CALL dbt_iterator_start(iter, t_2c_MO_AO)
    3590              :       DO WHILE (dbt_iterator_blocks_left(iter))
    3591              :          CALL dbt_iterator_next_block(iter, ind)
    3592              : 
    3593              :          CALL dbt_get_block(t_2c_MO_AO, ind, mo_ao_blk, found)
    3594              :          CPASSERT(found)
    3595              :          CALL dbt_get_block(t_mo_coeff, ind, mo_coeff_blk, found)
    3596              : 
    3597              :          IF (found) THEN
    3598              : 
    3599              :             new_force = pref*SUM(mo_ao_blk(:, :)*mo_coeff_blk(:, :))
    3600              : 
    3601              :             iat = idx_to_at(ind(2)) !AO index is column index
    3602              :             iat_of_kind = atom_of_kind(iat)
    3603              :             ikind = kind_of(iat)
    3604              : 
    3605              : !$OMP ATOMIC
    3606              :             force(ikind)%fock_4c(i_xyz, iat_of_kind) = force(ikind)%fock_4c(i_xyz, iat_of_kind) &
    3607              :                                                        + new_force
    3608              : 
    3609              :             DEALLOCATE (mo_coeff_blk)
    3610              :          END IF
    3611              : 
    3612              :          DEALLOCATE (mo_ao_blk)
    3613              :       END DO !iter
    3614              :       CALL dbt_iterator_stop(iter)
    3615              : !$OMP END PARALLEL
    3616              : 
    3617           54 :       CALL timestop(handle)
    3618              : 
    3619          108 :    END SUBROUTINE get_MO_AO_force
    3620              : 
    3621              : ! **************************************************************************************************
    3622              : !> \brief Print RI-HFX quantities, as required by the PRINT subsection
    3623              : !> \param ri_data ...
    3624              : !> \param qs_env ...
    3625              : ! **************************************************************************************************
    3626          166 :    SUBROUTINE print_ri_hfx(ri_data, qs_env)
    3627              : 
    3628              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3629              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3630              : 
    3631              :       CHARACTER(Len=2)                                   :: symbol
    3632              :       CHARACTER(Len=8)                                   :: rifmt
    3633              :       INTEGER                                            :: atype, i_RI, ia, ib, ibasis, ikind, &
    3634              :                                                             iset, isgf, ishell, iso, l, m, natom, &
    3635              :                                                             ncols, nkind, nrows, nset, nsgf, &
    3636              :                                                             nspins, unit_nr
    3637              :       INTEGER, DIMENSION(3)                              :: periodic
    3638          166 :       INTEGER, DIMENSION(:), POINTER                     :: npgf, nshell
    3639          166 :       INTEGER, DIMENSION(:, :), POINTER                  :: lshell
    3640              :       LOGICAL                                            :: mult_by_S, print_density, &
    3641              :                                                             print_ri_metric, skip_ri_metric
    3642          166 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: density_coeffs, density_coeffs_2
    3643              :       REAL(dp), DIMENSION(3, 3)                          :: hmat
    3644          166 :       REAL(dp), DIMENSION(:, :), POINTER                 :: zet
    3645          166 :       REAL(dp), DIMENSION(:, :, :), POINTER              :: gcc
    3646              :       TYPE(cell_type), POINTER                           :: cell
    3647              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3648              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
    3649              :       TYPE(cp_fm_type)                                   :: matrix_s_fm
    3650              :       TYPE(cp_logger_type), POINTER                      :: logger
    3651          166 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: rho_ao
    3652          332 :       TYPE(dbcsr_type), DIMENSION(1)                     :: matrix_s
    3653              :       TYPE(dft_control_type), POINTER                    :: dft_control
    3654              :       TYPE(gto_basis_set_p_type), ALLOCATABLE, &
    3655          166 :          DIMENSION(:), TARGET                            :: basis_set_AO, basis_set_RI
    3656              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis, ri_basis
    3657              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3658          166 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3659          166 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3660              :       TYPE(qs_rho_type), POINTER                         :: rho
    3661              :       TYPE(section_vals_type), POINTER                   :: input, print_section
    3662              : 
    3663          166 :       NULLIFY (rho_ao, input, print_section, logger, rho, particle_set, qs_kind_set, ri_basis, &
    3664          166 :                para_env, blacs_env, fm_struct, orb_basis, dft_control)
    3665              : 
    3666          166 :       CALL get_qs_env(qs_env, input=input, dft_control=dft_control)
    3667          166 :       logger => cp_get_default_logger()
    3668          166 :       print_density = .FALSE.
    3669          166 :       print_ri_metric = .FALSE.
    3670              : 
    3671              :       !Do we print the RI density coeffs  and/or RI_metric 2c integrals?
    3672          166 :       print_section => section_vals_get_subs_vals(input, "DFT%XC%HF%RI%PRINT")
    3673          166 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_DENSITY_COEFFS"), &
    3674            0 :                 cp_p_file)) print_density = .TRUE.
    3675          166 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, print_section, "RI_METRIC_2C_INTS"), &
    3676            0 :                 cp_p_file)) print_ri_metric = .TRUE.
    3677              : 
    3678              :       !common stuff
    3679          166 :       IF (print_density .OR. print_ri_metric) THEN
    3680              : 
    3681              :          !Set up basis sets and interaction radii
    3682            0 :          CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, particle_set=particle_set)
    3683            0 :          ALLOCATE (basis_set_RI(nkind), basis_set_AO(nkind))
    3684            0 :          CALL basis_set_list_setup(basis_set_RI, ri_data%ri_basis_type, qs_kind_set)
    3685            0 :          CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_RI)
    3686            0 :          CALL basis_set_list_setup(basis_set_AO, ri_data%orb_basis_type, qs_kind_set)
    3687            0 :          CALL get_particle_set(particle_set, qs_kind_set, basis=basis_set_AO)
    3688              : 
    3689            0 :          DO ibasis = 1, nkind
    3690            0 :             ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3691            0 :             CALL init_interaction_radii_orb_basis(ri_basis, ri_data%eps_pgf_orb)
    3692            0 :             orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3693            0 :             CALL init_interaction_radii_orb_basis(orb_basis, ri_data%eps_pgf_orb)
    3694              :          END DO
    3695              :       END IF
    3696              : 
    3697            0 :       IF (print_density) THEN
    3698            0 :          CALL get_qs_env(qs_env, rho=rho)
    3699            0 :          CALL qs_rho_get(rho, rho_ao_kp=rho_ao)
    3700            0 :          nspins = SIZE(rho_ao, 1)
    3701              : 
    3702            0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%MULTIPLY_BY_RI_2C_INTEGRALS", l_val=mult_by_s)
    3703            0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%SKIP_RI_METRIC", l_val=skip_ri_metric)
    3704              : 
    3705            0 :          IF (mult_by_s .AND. skip_ri_metric) THEN
    3706            0 :             CPABORT("MULTIPLY_BY_RI_2C_INTEGRALS and SKIP_RI_METRIC are mutually exclusive.")
    3707              :          END IF
    3708              : 
    3709              :          CALL get_RI_density_coeffs(density_coeffs, rho_ao, 1, basis_set_AO, basis_set_RI, &
    3710            0 :                                     mult_by_s, skip_ri_metric, ri_data, qs_env)
    3711            0 :          IF (nspins == 2) &
    3712              :             CALL get_RI_density_coeffs(density_coeffs_2, rho_ao, 2, basis_set_AO, basis_set_RI, &
    3713            0 :                                        mult_by_s, skip_ri_metric, ri_data, qs_env)
    3714              : 
    3715              :          unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS", &
    3716              :                                         extension=".dat", file_status="REPLACE", &
    3717            0 :                                         file_action="WRITE", file_form="FORMATTED")
    3718              : 
    3719            0 :          CALL section_vals_val_get(print_section, "RI_DENSITY_COEFFS%FILE_FORMAT", c_val=rifmt)
    3720            0 :          CALL uppercase(rifmt)
    3721              : 
    3722            0 :          IF (unit_nr > 0) THEN
    3723            0 :             SELECT CASE (rifmt)
    3724              :             CASE DEFAULT
    3725            0 :                CPABORT("NA")
    3726              :             CASE ("BASIC")
    3727            0 :                IF (nspins == 1) THEN
    3728              :                   WRITE (unit_nr, FMT="(A,A,A)") &
    3729            0 :                      "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3730            0 :                      TRIM(logger%iter_info%project_name), " project"
    3731            0 :                   DO i_RI = 1, SIZE(density_coeffs)
    3732            0 :                      WRITE (unit_nr, FMT="(F20.12)") density_coeffs(i_RI)
    3733              :                   END DO
    3734              :                ELSE
    3735              :                   WRITE (unit_nr, FMT="(A,A,A)") &
    3736            0 :                      "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3737            0 :                      TRIM(logger%iter_info%project_name), " project. Spin up, spin down"
    3738            0 :                   DO i_RI = 1, SIZE(density_coeffs)
    3739            0 :                      WRITE (unit_nr, FMT="(F20.12,F20.12)") density_coeffs(i_RI), density_coeffs_2(i_RI)
    3740              :                   END DO
    3741              :                END IF
    3742              :             CASE ("EXTENDED")
    3743              :                WRITE (unit_nr, FMT="(A,A,A)") &
    3744            0 :                   "# Coefficients of the electronic density projected on the RI_HFX basis for ", &
    3745            0 :                   TRIM(logger%iter_info%project_name), " project"
    3746              : 
    3747            0 :                CALL get_qs_env(qs_env, cell=cell, particle_set=particle_set)
    3748            0 :                CALL get_cell(cell, periodic=periodic, h=hmat)
    3749            0 :                natom = SIZE(particle_set)
    3750            0 :                ib = 0
    3751            0 :                DO ia = 1, natom
    3752            0 :                   CALL get_atomic_kind(particle_set(ia)%atomic_kind, kind_number=ikind)
    3753            0 :                   ri_basis => basis_set_RI(ikind)%gto_basis_set
    3754            0 :                   CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf)
    3755            0 :                   DO ibasis = 1, nsgf
    3756            0 :                      ib = ib + 1
    3757            0 :                      IF (nspins == 1) THEN
    3758            0 :                         WRITE (unit_nr, FMT="(I10,3I7,F20.12)") ib, ia, ikind, ibasis, &
    3759            0 :                            density_coeffs(ib)
    3760              :                      ELSE
    3761            0 :                         WRITE (unit_nr, FMT="(I10,3I7,F20.12,F20.12)") ib, ia, ikind, ibasis, &
    3762            0 :                            density_coeffs(ib), density_coeffs_2(ib)
    3763              :                      END IF
    3764              :                   END DO
    3765              :                END DO
    3766            0 :                WRITE (unit_nr, FMT="(A)") "# Cell Periodicity "
    3767            0 :                WRITE (unit_nr, FMT="(3I5)") periodic
    3768            0 :                WRITE (unit_nr, FMT="(A)") "# Cell Matrix [Bohr]"
    3769            0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(1, 1:3)
    3770            0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(2, 1:3)
    3771            0 :                WRITE (unit_nr, FMT="(3F20.12)") hmat(3, 1:3)
    3772            0 :                WRITE (unit_nr, FMT="(A)") "# El  Type   Number                        Coordinates [Bohr]"
    3773            0 :                DO ia = 1, natom
    3774              :                   CALL get_atomic_kind(atomic_kind=particle_set(ia)%atomic_kind, &
    3775            0 :                                        kind_number=atype, element_symbol=symbol)
    3776            0 :                   WRITE (unit_nr, FMT="(2X,A2,I5,I10,3F20.12)") symbol, atype, ia, particle_set(ia)%r(1:3)
    3777              :                END DO
    3778            0 :                WRITE (unit_nr, FMT="(A)") "# Basis Set Information"
    3779            0 :                DO ibasis = 1, nkind
    3780            0 :                   ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3781              :                   CALL get_gto_basis_set(gto_basis_set=ri_basis, nsgf=nsgf, npgf=npgf, &
    3782              :                                          zet=zet, gcc=gcc, &
    3783            0 :                                          nset=nset, nshell=nshell, l=lshell)
    3784            0 :                   WRITE (unit_nr, FMT="(A)") "# Basis      Functions"
    3785            0 :                   WRITE (unit_nr, FMT="(I7,I15)") ibasis, nsgf
    3786            0 :                   WRITE (unit_nr, FMT="(A)") "#  Nr.      l       m     set   shell "
    3787            0 :                   isgf = 0
    3788            0 :                   DO iset = 1, nset
    3789            0 :                      DO ishell = 1, nshell(iset)
    3790            0 :                         l = lshell(ishell, iset)
    3791            0 :                         DO iso = 1, nso(l)
    3792            0 :                            isgf = isgf + 1
    3793            0 :                            m = iso - 1 - l
    3794            0 :                            WRITE (unit_nr, FMT="(I6,I7,I8,2I8)") isgf, l, m, iset, ishell
    3795              :                         END DO
    3796              :                      END DO
    3797              :                   END DO
    3798            0 :                   WRITE (unit_nr, FMT="(A)") "#  Basis set exponents and contractions "
    3799            0 :                   DO iset = 1, nset
    3800            0 :                      WRITE (unit_nr, FMT="(I7)") iset
    3801            0 :                      WRITE (unit_nr, FMT="(A)") "#  Exponent    Shells ...  "
    3802            0 :                      DO m = 1, npgf(iset)
    3803            0 :                         WRITE (unit_nr, FMT="(E18.12,50F18.12)") zet(m, iset), gcc(m, 1:nshell(iset), iset)
    3804              :                      END DO
    3805              :                   END DO
    3806              : 
    3807              :                END DO
    3808              :             END SELECT
    3809              :          END IF
    3810              : 
    3811            0 :          CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_DENSITY_COEFFS")
    3812              :       END IF
    3813              : 
    3814          166 :       IF (print_ri_metric) THEN
    3815              :          !Recalculated the RI_metric 2c-integrals, as it is cheap, and not stored
    3816            0 :          CALL calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
    3817              : 
    3818              :          !convert 2c integrals to fm for dumping
    3819            0 :          CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    3820            0 :          CALL dbcsr_get_info(matrix_s(1), nfullrows_total=nrows, nfullcols_total=ncols)
    3821              :          CALL cp_fm_struct_create(fm_struct, context=blacs_env, para_env=para_env, &
    3822            0 :                                   nrow_global=nrows, ncol_global=ncols)
    3823            0 :          CALL cp_fm_create(matrix_s_fm, fm_struct)
    3824              : 
    3825            0 :          CALL copy_dbcsr_to_fm(matrix_s(1), matrix_s_fm)
    3826            0 :          CALL dbcsr_release(matrix_s(1))
    3827              : 
    3828              :          unit_nr = cp_print_key_unit_nr(logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS", &
    3829              :                                         extension=".fm", file_status="REPLACE", &
    3830            0 :                                         file_action="WRITE", file_form="UNFORMATTED")
    3831            0 :          CALL cp_fm_write_unformatted(matrix_s_fm, unit_nr)
    3832              : 
    3833            0 :          CALL cp_print_key_finished_output(unit_nr, logger, input, "DFT%XC%HF%RI%PRINT%RI_METRIC_2C_INTS")
    3834              : 
    3835            0 :          CALL cp_fm_struct_release(fm_struct)
    3836            0 :          CALL cp_fm_release(matrix_s_fm)
    3837              :       END IF
    3838              : 
    3839              :       !clean-up
    3840          166 :       IF (print_density .OR. print_ri_metric) THEN
    3841            0 :          DO ibasis = 1, nkind
    3842            0 :             ri_basis => basis_set_RI(ibasis)%gto_basis_set
    3843            0 :             CALL init_interaction_radii_orb_basis(ri_basis, dft_control%qs_control%eps_pgf_orb)
    3844            0 :             orb_basis => basis_set_AO(ibasis)%gto_basis_set
    3845            0 :             CALL init_interaction_radii_orb_basis(orb_basis, dft_control%qs_control%eps_pgf_orb)
    3846              :          END DO
    3847              :       END IF
    3848              : 
    3849          498 :    END SUBROUTINE print_ri_hfx
    3850              : 
    3851              : ! **************************************************************************************************
    3852              : !> \brief Calculate the RI metric 2-center integrals
    3853              : !> \param matrix_s ...
    3854              : !> \param basis_set_RI ...
    3855              : !> \param ri_data ...
    3856              : !> \param qs_env ...
    3857              : ! **************************************************************************************************
    3858            0 :    SUBROUTINE calc_RI_2c_ints(matrix_s, basis_set_RI, ri_data, qs_env)
    3859              : 
    3860              :       TYPE(dbcsr_type), DIMENSION(1)                     :: matrix_s
    3861              :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_set_RI
    3862              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3863              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3864              : 
    3865            0 :       INTEGER, DIMENSION(:), POINTER                     :: col_bsize, row_bsize
    3866              :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
    3867              :       TYPE(distribution_2d_type), POINTER                :: dist_2d
    3868              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    3869            0 :          POINTER                                         :: nl_2c
    3870              : 
    3871            0 :       NULLIFY (nl_2c, dist_2d, row_bsize, col_bsize)
    3872              : 
    3873            0 :       CALL get_qs_env(qs_env, distribution_2d=dist_2d)
    3874            0 :       CALL cp_dbcsr_dist2d_to_dist(dist_2d, dbcsr_dist)
    3875            0 :       ALLOCATE (row_bsize(SIZE(ri_data%bsizes_RI)))
    3876            0 :       ALLOCATE (col_bsize(SIZE(ri_data%bsizes_RI)))
    3877            0 :       row_bsize(:) = ri_data%bsizes_RI
    3878            0 :       col_bsize(:) = ri_data%bsizes_RI
    3879              : 
    3880            0 :       CALL dbcsr_create(matrix_s(1), "RI metric", dbcsr_dist, dbcsr_type_symmetric, row_bsize, col_bsize)
    3881              : 
    3882              :       CALL build_2c_neighbor_lists(nl_2c, basis_set_RI, basis_set_RI, ri_data%ri_metric, &
    3883            0 :                                    "HFX_2c_nl_pot", qs_env, sym_ij=.TRUE., dist_2d=dist_2d)
    3884              :       CALL build_2c_integrals(matrix_s, ri_data%filter_eps_2c, qs_env, nl_2c, basis_set_RI, &
    3885            0 :                               basis_set_RI, ri_data%ri_metric)
    3886              : 
    3887            0 :       CALL release_neighbor_list_sets(nl_2c)
    3888            0 :       CALL dbcsr_distribution_release(dbcsr_dist)
    3889            0 :       DEALLOCATE (row_bsize, col_bsize)
    3890              : 
    3891            0 :    END SUBROUTINE calc_RI_2c_ints
    3892              : 
    3893              : ! **************************************************************************************************
    3894              : !> \brief Projects the density on the RI basis and return the array of the RI coefficients
    3895              : !> \param density_coeffs ...
    3896              : !> \param rho_ao ...
    3897              : !> \param ispin ...
    3898              : !> \param basis_set_AO ...
    3899              : !> \param basis_set_RI ...
    3900              : !> \param multiply_by_S ...
    3901              : !> \param skip_ri_metric ...
    3902              : !> \param ri_data ...
    3903              : !> \param qs_env ...
    3904              : ! **************************************************************************************************
    3905            0 :    SUBROUTINE get_RI_density_coeffs(density_coeffs, rho_ao, ispin, basis_set_AO, basis_set_RI, &
    3906              :                                     multiply_by_S, skip_ri_metric, ri_data, qs_env)
    3907              : 
    3908              :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: density_coeffs
    3909              :       TYPE(dbcsr_p_type), DIMENSION(:, :)                :: rho_ao
    3910              :       INTEGER, INTENT(IN)                                :: ispin
    3911              :       TYPE(gto_basis_set_p_type), DIMENSION(:)           :: basis_set_AO, basis_set_RI
    3912              :       LOGICAL, INTENT(IN)                                :: multiply_by_S, skip_ri_metric
    3913              :       TYPE(hfx_ri_type), INTENT(INOUT)                   :: ri_data
    3914              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3915              : 
    3916              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_RI_density_coeffs'
    3917              : 
    3918              :       INTEGER                                            :: a, b, handle, i_mem, idx, n_dependent, &
    3919              :                                                             n_mem, n_mem_RI, natom, &
    3920              :                                                             nblk_per_thread, nblks, nkind
    3921              :       INTEGER(int_8)                                     :: nze
    3922            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_block_end, batch_block_start, &
    3923            0 :                                                             dist1, dist2, dist3, dummy1, dummy2, &
    3924            0 :                                                             idx1, idx2, idx3
    3925              :       INTEGER, DIMENSION(2)                              :: ind, pdims_2d
    3926              :       INTEGER, DIMENSION(2, 3)                           :: bounds_cpy
    3927              :       INTEGER, DIMENSION(3)                              :: dims_3c, pcoord_3d, pdims_3d
    3928              :       LOGICAL                                            :: calc_ints, found
    3929              :       REAL(dp)                                           :: occ, threshold
    3930            0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: blk
    3931            0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: blk_3d
    3932              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3933              :       TYPE(dbcsr_type)                                   :: ri_2c_inv
    3934            0 :       TYPE(dbcsr_type), DIMENSION(1)                     :: ri_2c_ints
    3935            0 :       TYPE(dbt_distribution_type)                        :: dist_2d, dist_3d
    3936              :       TYPE(dbt_iterator_type)                            :: iter
    3937            0 :       TYPE(dbt_pgrid_type)                               :: pgrid_2d, pgrid_3d
    3938            0 :       TYPE(dbt_type)                                     :: density_coeffs_t, density_tmp, rho_ao_t, &
    3939            0 :                                                             rho_ao_t_3d, rho_ao_tmp, t2c_ri_ints, &
    3940            0 :                                                             t2c_ri_inv, t2c_ri_tmp
    3941            0 :       TYPE(dbt_type), ALLOCATABLE, DIMENSION(:, :)       :: t_3c_int_batched
    3942              :       TYPE(dft_control_type), POINTER                    :: dft_control
    3943              :       TYPE(distribution_3d_type)                         :: dist_nl3c
    3944            0 :       TYPE(mp_cart_type)                                 :: mp_comm_t3c
    3945              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3946              :       TYPE(neighbor_list_3c_type)                        :: nl_3c
    3947            0 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    3948            0 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    3949              : 
    3950            0 :       NULLIFY (dft_control, para_env, blacs_env, particle_set, qs_kind_set)
    3951              : 
    3952            0 :       CALL timeset(routineN, handle)
    3953              : 
    3954              :       ! Projection of the density on the RI basis: n(r) = sum_pq sum_munu P_pq (pq|mu) (mu|nu)^-1 nu(r)
    3955              :       !                                                 = sum_nu d_nu nu(r)
    3956              :       ! the (pq|mu) (mu|nu)^-1 contraction is already stored in compressed format
    3957              : 
    3958            0 :       IF (.NOT. ri_data%flavor == ri_pmat) THEN
    3959            0 :          CPABORT("Can only calculate and print the RI density coefficients within the RHO flavor of RI-HFX")
    3960              :       END IF
    3961              : 
    3962              :       CALL get_qs_env(qs_env, dft_control=dft_control, para_env=para_env, blacs_env=blacs_env, nkind=nkind, &
    3963            0 :                       particle_set=particle_set, qs_kind_set=qs_kind_set, natom=natom)
    3964            0 :       n_mem = ri_data%n_mem
    3965            0 :       n_mem_RI = ri_data%n_mem_RI
    3966              : 
    3967              :       ! Calculate RI 2c int tensor and its inverse. Skip this if requested
    3968            0 :       IF (.NOT. skip_ri_metric) THEN
    3969            0 :          CALL calc_RI_2c_ints(ri_2c_ints, basis_set_RI, ri_data, qs_env)
    3970            0 :          CALL dbcsr_create(ri_2c_inv, template=ri_2c_ints(1), matrix_type=dbcsr_type_no_symmetry)
    3971              : 
    3972            0 :          SELECT CASE (ri_data%t2c_method)
    3973              :          CASE (hfx_ri_do_2c_iter)
    3974            0 :             threshold = MAX(ri_data%filter_eps, 1.0e-12_dp)
    3975            0 :             CALL invert_hotelling(ri_2c_inv, ri_2c_ints(1), threshold=threshold, silent=.FALSE.)
    3976              :          CASE (hfx_ri_do_2c_cholesky)
    3977            0 :             CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
    3978            0 :             CALL cp_dbcsr_cholesky_decompose(ri_2c_inv, para_env=para_env, blacs_env=blacs_env)
    3979            0 :             CALL cp_dbcsr_cholesky_invert(ri_2c_inv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
    3980              :          CASE (hfx_ri_do_2c_diag)
    3981            0 :             CALL dbcsr_copy(ri_2c_inv, ri_2c_ints(1))
    3982              :             CALL cp_dbcsr_power(ri_2c_inv, -1.0_dp, ri_data%eps_eigval, n_dependent, &
    3983            0 :                                 para_env, blacs_env, verbose=ri_data%unit_nr_dbcsr > 0)
    3984              :          END SELECT
    3985              : 
    3986            0 :          CALL dbt_create(ri_2c_ints(1), t2c_ri_tmp)
    3987              :          CALL create_2c_tensor(t2c_ri_ints, dist1, dist2, ri_data%pgrid_2d, &
    3988              :                                ri_data%bsizes_RI_split, ri_data%bsizes_RI_split, &
    3989              :                                name="(RI | RI)")
    3990            0 :          CALL dbt_create(t2c_ri_ints, t2c_ri_inv)
    3991              : 
    3992            0 :          CALL dbt_copy_matrix_to_tensor(ri_2c_ints(1), t2c_ri_tmp)
    3993            0 :          CALL dbt_copy(t2c_ri_tmp, t2c_ri_ints, move_data=.TRUE.)
    3994            0 :          CALL dbt_filter(t2c_ri_ints, ri_data%filter_eps)
    3995              : 
    3996            0 :          CALL dbt_copy_matrix_to_tensor(ri_2c_inv, t2c_ri_tmp)
    3997            0 :          CALL dbt_copy(t2c_ri_tmp, t2c_ri_inv, move_data=.TRUE.)
    3998            0 :          CALL dbt_filter(t2c_ri_inv, ri_data%filter_eps)
    3999              : 
    4000            0 :          CALL dbcsr_release(ri_2c_ints(1))
    4001            0 :          CALL dbcsr_release(ri_2c_inv)
    4002            0 :          CALL dbt_destroy(t2c_ri_tmp)
    4003            0 :          DEALLOCATE (dist1, dist2)
    4004              :       END IF
    4005              : 
    4006              :       ! The AO density tensor
    4007            0 :       CALL dbt_create(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    4008              :       CALL create_2c_tensor(rho_ao_t, dist1, dist2, ri_data%pgrid_2d, &
    4009              :                             ri_data%bsizes_AO_split, ri_data%bsizes_AO_split, &
    4010            0 :                             name="(AO | AO)")
    4011            0 :       DEALLOCATE (dist1, dist2)
    4012              : 
    4013            0 :       CALL dbt_copy_matrix_to_tensor(rho_ao(ispin, 1)%matrix, rho_ao_tmp)
    4014            0 :       CALL dbt_copy(rho_ao_tmp, rho_ao_t, move_data=.TRUE.)
    4015            0 :       CALL dbt_filter(rho_ao_t, ri_data%filter_eps)
    4016            0 :       CALL dbt_destroy(rho_ao_tmp)
    4017              : 
    4018              :       ! Put in in 3D
    4019            0 :       ALLOCATE (dist1(SIZE(ri_data%bsizes_AO_split)), dist2(SIZE(ri_data%bsizes_AO_split)), dist3(1))
    4020            0 :       dist3(1) = 0
    4021            0 :       CALL dbt_get_info(rho_ao_t, pdims=pdims_2d, proc_dist_1=dist1, proc_dist_2=dist2)
    4022            0 :       CALL dbt_default_distvec(1, 1, [1], dist3)
    4023              : 
    4024            0 :       pdims_3d(1) = pdims_2d(1)
    4025            0 :       pdims_3d(2) = pdims_2d(2)
    4026            0 :       pdims_3d(3) = 1
    4027              : 
    4028            0 :       CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d)
    4029            0 :       CALL dbt_distribution_new(dist_3d, pgrid_3d, dist1, dist2, dist3)
    4030              :       CALL dbt_create(rho_ao_t_3d, "rho_ao_3d", dist_3d, [1, 2], [3], ri_data%bsizes_AO_split, &
    4031            0 :                       ri_data%bsizes_AO_split, [1])
    4032            0 :       DEALLOCATE (dist1, dist2, dist3)
    4033            0 :       CALL dbt_pgrid_destroy(pgrid_3d)
    4034            0 :       CALL dbt_distribution_destroy(dist_3d)
    4035              : 
    4036              :       ! copy density
    4037            0 :       nblks = 0
    4038              : !$OMP PARALLEL DEFAULT(NONE) &
    4039              : !$OMP SHARED(rho_ao_t,nblks) &
    4040            0 : !$OMP PRIVATE(iter,ind,blk,found)
    4041              :       CALL dbt_iterator_start(iter, rho_ao_t)
    4042              :       DO WHILE (dbt_iterator_blocks_left(iter))
    4043              :          CALL dbt_iterator_next_block(iter, ind)
    4044              :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4045              :          IF (found) THEN
    4046              : !$OMP ATOMIC
    4047              :             nblks = nblks + 1
    4048              :             DEALLOCATE (blk)
    4049              :          END IF
    4050              :       END DO
    4051              :       CALL dbt_iterator_stop(iter)
    4052              : !$OMP END PARALLEL
    4053              : 
    4054            0 :       ALLOCATE (idx1(nblks), idx2(nblks), idx3(nblks))
    4055            0 :       idx3 = 1
    4056            0 :       nblks = 0
    4057              : !$OMP PARALLEL DEFAULT(NONE) &
    4058              : !$OMP SHARED(rho_ao_t,nblks,idx1,idx2) &
    4059            0 : !$OMP PRIVATE(iter,ind,blk,found)
    4060              :       CALL dbt_iterator_start(iter, rho_ao_t)
    4061              :       DO WHILE (dbt_iterator_blocks_left(iter))
    4062              :          CALL dbt_iterator_next_block(iter, ind)
    4063              :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4064              :          IF (found) THEN
    4065              : !$OMP CRITICAL
    4066              :             nblks = nblks + 1
    4067              :             idx1(nblks) = ind(1)
    4068              :             idx2(nblks) = ind(2)
    4069              : !$OMP END CRITICAL
    4070              :             DEALLOCATE (blk)
    4071              :          END IF
    4072              :       END DO
    4073              :       CALL dbt_iterator_stop(iter)
    4074              : !$OMP END PARALLEL
    4075              : 
    4076            0 : !$OMP PARALLEL DEFAULT(NONE) SHARED(rho_ao_t_3d,nblks,idx1,idx2,idx3) PRIVATE(nblk_per_thread,A,b)
    4077              :       nblk_per_thread = nblks/omp_get_num_threads() + 1
    4078              :       a = omp_get_thread_num()*nblk_per_thread + 1
    4079              :       b = MIN(a + nblk_per_thread, nblks)
    4080              :       CALL dbt_reserve_blocks(rho_ao_t_3d, idx1(a:b), idx2(a:b), idx3(a:b))
    4081              : !$OMP END PARALLEL
    4082              : 
    4083              : !$OMP PARALLEL DEFAULT(NONE) &
    4084              : !$OMP SHARED(rho_ao_t,rho_ao_t_3d) &
    4085            0 : !$OMP PRIVATE(iter,ind,blk,found,blk_3d)
    4086              :       CALL dbt_iterator_start(iter, rho_ao_t)
    4087              :       DO WHILE (dbt_iterator_blocks_left(iter))
    4088              :          CALL dbt_iterator_next_block(iter, ind)
    4089              :          CALL dbt_get_block(rho_ao_t, ind, blk, found)
    4090              :          IF (found) THEN
    4091              :             ALLOCATE (blk_3d(SIZE(blk, 1), SIZE(blk, 2), 1))
    4092              :             blk_3d(:, :, 1) = blk(:, :)
    4093              : !$OMP CRITICAL
    4094              :             CALL dbt_put_block(rho_ao_t_3d, [ind(1), ind(2), 1], [SIZE(blk, 1), SIZE(blk, 2), 1], blk_3d)
    4095              : !$OMP END CRITICAL
    4096              :             DEALLOCATE (blk, blk_3d)
    4097              :          END IF
    4098              :       END DO
    4099              :       CALL dbt_iterator_stop(iter)
    4100              : !$OMP END PARALLEL
    4101              : 
    4102              :       ! The 1D tensor with the density coeffs
    4103            0 :       pdims_2d(1) = para_env%num_pe
    4104            0 :       pdims_2d(2) = 1
    4105              : 
    4106            0 :       ALLOCATE (dist1(SIZE(ri_data%bsizes_RI_split)), dist2(1))
    4107            0 :       CALL dbt_default_distvec(SIZE(ri_data%bsizes_RI_split), pdims_2d(1), ri_data%bsizes_RI_split, dist1)
    4108            0 :       CALL dbt_default_distvec(1, pdims_2d(2), [1], dist2)
    4109              : 
    4110            0 :       CALL dbt_pgrid_create(para_env, pdims_2d, pgrid_2d)
    4111            0 :       CALL dbt_distribution_new(dist_2d, pgrid_2d, dist1, dist2)
    4112            0 :       CALL dbt_create(density_coeffs_t, "density_coeffs", dist_2d, [1], [2], ri_data%bsizes_RI_split, [1])
    4113            0 :       CALL dbt_create(density_coeffs_t, density_tmp)
    4114            0 :       DEALLOCATE (dist1, dist2)
    4115            0 :       CALL dbt_pgrid_destroy(pgrid_2d)
    4116            0 :       CALL dbt_distribution_destroy(dist_2d)
    4117              : 
    4118            0 :       CALL dbt_get_info(ri_data%t_3c_int_ctr_3(1, 1), nfull_total=dims_3c)
    4119              : 
    4120              :       ! The 3c integrals tensor, in case we compute them here
    4121            0 :       pdims_3d = 0
    4122            0 :       CALL dbt_pgrid_create(para_env, pdims_3d, pgrid_3d, tensor_dims=[MAX(1, natom/n_mem), natom, natom])
    4123            0 :       ALLOCATE (t_3c_int_batched(1, 1))
    4124              :       CALL create_3c_tensor(t_3c_int_batched(1, 1), dist1, dist2, dist3, pgrid_3d, &
    4125              :                             ri_data%bsizes_RI, ri_data%bsizes_AO, ri_data%bsizes_AO, map1=[1], map2=[2, 3], &
    4126            0 :                             name="(RI | AO AO)")
    4127              : 
    4128            0 :       CALL dbt_mp_environ_pgrid(pgrid_3d, pdims_3d, pcoord_3d)
    4129            0 :       CALL mp_comm_t3c%create(pgrid_3d%mp_comm_2d, 3, pdims_3d)
    4130              :       CALL distribution_3d_create(dist_nl3c, dist1, dist2, dist3, nkind, particle_set, &
    4131            0 :                                   mp_comm_t3c, own_comm=.TRUE.)
    4132            0 :       DEALLOCATE (dist1, dist2, dist3)
    4133            0 :       CALL dbt_pgrid_destroy(pgrid_3d)
    4134              : 
    4135              :       CALL build_3c_neighbor_lists(nl_3c, basis_set_RI, basis_set_AO, basis_set_AO, dist_nl3c, ri_data%ri_metric, &
    4136            0 :                                    "HFX_3c_nl", qs_env, op_pos=1, sym_jk=.TRUE., own_dist=.TRUE.)
    4137              : 
    4138            0 :       n_mem = ri_data%n_mem
    4139            0 :       CALL create_tensor_batches(ri_data%bsizes_RI, n_mem, dummy1, dummy2, batch_block_start, batch_block_end)
    4140              : 
    4141            0 :       calc_ints = .FALSE.
    4142            0 :       CALL get_tensor_occupancy(ri_data%t_3c_int_ctr_2(1, 1), nze, occ)
    4143            0 :       IF (nze == 0) calc_ints = .TRUE.
    4144              : 
    4145            0 :       DO i_mem = 1, n_mem
    4146            0 :          IF (calc_ints) THEN
    4147              :             CALL build_3c_integrals(t_3c_int_batched, ri_data%filter_eps, qs_env, nl_3c, &
    4148              :                                     basis_set_RI, basis_set_AO, basis_set_AO, &
    4149              :                                     ri_data%ri_metric, int_eps=ri_data%eps_schwarz, op_pos=1, &
    4150              :                                     desymmetrize=.FALSE., &
    4151            0 :                                     bounds_i=[batch_block_start(i_mem), batch_block_end(i_mem)])
    4152            0 :             CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), order=[1, 3, 2])
    4153            0 :             CALL dbt_copy(t_3c_int_batched(1, 1), ri_data%t_3c_int_ctr_3(1, 1), move_data=.TRUE., summation=.TRUE.)
    4154            0 :             CALL dbt_filter(ri_data%t_3c_int_ctr_3(1, 1), ri_data%filter_eps)
    4155              :          ELSE
    4156              :             bounds_cpy(:, 2) = [SUM(ri_data%bsizes_RI(1:batch_block_start(i_mem) - 1)) + 1, &
    4157            0 :                                 SUM(ri_data%bsizes_RI(1:batch_block_end(i_mem)))]
    4158            0 :             bounds_cpy(:, 1) = [1, SUM(ri_data%bsizes_AO)]
    4159            0 :             bounds_cpy(:, 3) = [1, SUM(ri_data%bsizes_AO)]
    4160              :             CALL dbt_copy(ri_data%t_3c_int_ctr_2(1, 1), ri_data%t_3c_int_ctr_3(1, 1), &
    4161            0 :                           order=[2, 1, 3], bounds=bounds_cpy)
    4162              :          END IF
    4163              : 
    4164              :          !contract the integrals with the density P_pq (pq|R)
    4165              :          CALL dbt_contract(1.0_dp, ri_data%t_3c_int_ctr_3(1, 1), rho_ao_t_3d, 0.0_dp, density_tmp, &
    4166              :                            contract_1=[2, 3], notcontract_1=[1], &
    4167              :                            contract_2=[1, 2], notcontract_2=[3], &
    4168            0 :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4169            0 :          CALL dbt_clear(ri_data%t_3c_int_ctr_3(1, 1))
    4170              : 
    4171            0 :          IF (skip_ri_metric) THEN
    4172            0 :             CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
    4173              :          ELSE
    4174              :             !contract the above vector with the inverse metric
    4175              :             CALL dbt_contract(1.0_dp, t2c_ri_inv, density_tmp, 1.0_dp, density_coeffs_t, &
    4176              :                               contract_1=[2], notcontract_1=[1], &
    4177              :                               contract_2=[1], notcontract_2=[2], &
    4178            0 :                               map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4179              :          END IF
    4180              : 
    4181              :       END DO
    4182            0 :       CALL neighbor_list_3c_destroy(nl_3c)
    4183              : 
    4184            0 :       IF (multiply_by_s) THEN
    4185              :          CALL dbt_contract(1.0_dp, t2c_ri_ints, density_coeffs_t, 0.0_dp, density_tmp, &
    4186              :                            contract_1=[2], notcontract_1=[1], &
    4187              :                            contract_2=[1], notcontract_2=[2], &
    4188            0 :                            map_1=[1], map_2=[2], filter_eps=ri_data%filter_eps)
    4189            0 :          CALL dbt_copy(density_tmp, density_coeffs_t, move_data=.TRUE.)
    4190              :       END IF
    4191              : 
    4192            0 :       ALLOCATE (density_coeffs(SUM(ri_data%bsizes_RI)))
    4193            0 :       density_coeffs = 0.0
    4194              : 
    4195              : !$OMP PARALLEL DEFAULT(NONE) &
    4196              : !$OMP SHARED(density_coeffs_t,ri_data,density_coeffs) &
    4197            0 : !$OMP PRIVATE(iter,ind,blk,found,idx)
    4198              :       CALL dbt_iterator_start(iter, density_coeffs_t)
    4199              :       DO WHILE (dbt_iterator_blocks_left(iter))
    4200              :          CALL dbt_iterator_next_block(iter, ind)
    4201              :          CALL dbt_get_block(density_coeffs_t, ind, blk, found)
    4202              :          IF (found) THEN
    4203              : 
    4204              :             idx = SUM(ri_data%bsizes_RI_split(1:ind(1) - 1))
    4205              : !$OMP CRITICAL
    4206              :             density_coeffs(idx + 1:idx + ri_data%bsizes_RI_split(ind(1))) = blk(:, 1)
    4207              : !$OMP END CRITICAL
    4208              :             DEALLOCATE (blk)
    4209              :          END IF
    4210              :       END DO
    4211              :       CALL dbt_iterator_stop(iter)
    4212              : !$OMP END PARALLEL
    4213            0 :       CALL para_env%sum(density_coeffs)
    4214              : 
    4215            0 :       CALL dbt_destroy(density_tmp)
    4216            0 :       CALL dbt_destroy(rho_ao_t)
    4217            0 :       CALL dbt_destroy(rho_ao_t_3d)
    4218            0 :       CALL dbt_destroy(density_coeffs_t)
    4219            0 :       CALL dbt_destroy(t_3c_int_batched(1, 1))
    4220              : 
    4221            0 :       IF (.NOT. skip_ri_metric) THEN
    4222            0 :          CALL dbt_destroy(t2c_ri_ints)
    4223            0 :          CALL dbt_destroy(t2c_ri_inv)
    4224              :       END IF
    4225              : 
    4226            0 :       CALL timestop(handle)
    4227              : 
    4228            0 :    END SUBROUTINE get_RI_density_coeffs
    4229              : 
    4230              : ! **************************************************************************************************
    4231              : !> \brief a small utility function that returns the atom corresponding to a block of a split tensor
    4232              : !> \param idx_to_at ...
    4233              : !> \param bsizes_split ...
    4234              : !> \param bsizes_orig ...
    4235              : !> \return ...
    4236              : ! **************************************************************************************************
    4237         1078 :    SUBROUTINE get_idx_to_atom(idx_to_at, bsizes_split, bsizes_orig)
    4238              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: idx_to_at
    4239              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: bsizes_split, bsizes_orig
    4240              : 
    4241              :       INTEGER                                            :: full_sum, iat, iblk, split_sum
    4242              : 
    4243         1078 :       iat = 1
    4244         1078 :       full_sum = bsizes_orig(iat)
    4245         1078 :       split_sum = 0
    4246         5178 :       DO iblk = 1, SIZE(bsizes_split)
    4247         4100 :          split_sum = split_sum + bsizes_split(iblk)
    4248              : 
    4249         4100 :          IF (split_sum .GT. full_sum) THEN
    4250         1482 :             iat = iat + 1
    4251         1482 :             full_sum = full_sum + bsizes_orig(iat)
    4252              :          END IF
    4253              : 
    4254         5178 :          idx_to_at(iblk) = iat
    4255              :       END DO
    4256              : 
    4257         1078 :    END SUBROUTINE get_idx_to_atom
    4258              : 
    4259              : ! **************************************************************************************************
    4260              : !> \brief Function for calculating sqrt of a matrix
    4261              : !> \param values ...
    4262              : !> \return ...
    4263              : ! **************************************************************************************************
    4264            0 :    FUNCTION my_sqrt(values)
    4265              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: values
    4266              :       REAL(KIND=dp), DIMENSION(SIZE(values))             :: my_sqrt
    4267              : 
    4268            0 :       my_sqrt = SQRT(values)
    4269              :    END FUNCTION
    4270              : 
    4271              : ! **************************************************************************************************
    4272              : !> \brief Function for calculation inverse sqrt of a matrix
    4273              : !> \param values ...
    4274              : !> \return ...
    4275              : ! **************************************************************************************************
    4276            0 :    FUNCTION my_invsqrt(values)
    4277              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: values
    4278              :       REAL(KIND=dp), DIMENSION(SIZE(values))             :: my_invsqrt
    4279              : 
    4280            0 :       my_invsqrt = SQRT(1.0_dp/values)
    4281              :    END FUNCTION
    4282          328 : END MODULE
        

Generated by: LCOV version 2.0-1