LCOV - code coverage report
Current view: top level - src - xas_tdp_integrals.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:e7e05ae) Lines: 527 540 97.6 %
Date: 2024-04-18 06:59:28 Functions: 10 10 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief 3-center integrals machinery for the XAS_TDP method
      10             : !> \author A. Bussy (03.2020)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE xas_tdp_integrals
      14             :    USE OMP_LIB,                         ONLY: omp_get_max_threads,&
      15             :                                               omp_get_num_threads,&
      16             :                                               omp_get_thread_num
      17             :    USE ai_contraction_sphi,             ONLY: ab_contract,&
      18             :                                               libxsmm_abc_contract
      19             :    USE atomic_kind_types,               ONLY: atomic_kind_type
      20             :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      21             :                                               gto_basis_set_p_type,&
      22             :                                               gto_basis_set_type
      23             :    USE cell_types,                      ONLY: cell_type
      24             :    USE constants_operator,              ONLY: operator_coulomb
      25             :    USE cp_array_utils,                  ONLY: cp_1d_i_p_type,&
      26             :                                               cp_2d_r_p_type
      27             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      28             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist
      29             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param,&
      30             :                                               cp_eri_mme_set_params
      31             :    USE cp_files,                        ONLY: close_file,&
      32             :                                               open_file
      33             :    USE dbcsr_api,                       ONLY: dbcsr_distribution_get,&
      34             :                                               dbcsr_distribution_release,&
      35             :                                               dbcsr_distribution_type
      36             :    USE dbt_api,                         ONLY: &
      37             :         dbt_create, dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, &
      38             :         dbt_finalize, dbt_pgrid_create, dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, &
      39             :         dbt_reserve_blocks, dbt_type
      40             :    USE distribution_1d_types,           ONLY: distribution_1d_type
      41             :    USE distribution_2d_types,           ONLY: distribution_2d_create,&
      42             :                                               distribution_2d_type
      43             :    USE eri_mme_integrate,               ONLY: eri_mme_2c_integrate
      44             :    USE eri_mme_types,                   ONLY: eri_mme_init,&
      45             :                                               eri_mme_release
      46             :    USE gamma,                           ONLY: init_md_ftable
      47             :    USE generic_os_integrals,            ONLY: int_operators_r12_ab_os
      48             :    USE input_constants,                 ONLY: do_potential_coulomb,&
      49             :                                               do_potential_id,&
      50             :                                               do_potential_short,&
      51             :                                               do_potential_truncated
      52             :    USE input_section_types,             ONLY: section_vals_val_get
      53             :    USE kinds,                           ONLY: dp
      54             :    USE libint_2c_3c,                    ONLY: cutoff_screen_factor,&
      55             :                                               eri_2center,&
      56             :                                               eri_3center,&
      57             :                                               libint_potential_type
      58             :    USE libint_wrapper,                  ONLY: cp_libint_cleanup_2eri,&
      59             :                                               cp_libint_cleanup_3eri,&
      60             :                                               cp_libint_init_2eri,&
      61             :                                               cp_libint_init_3eri,&
      62             :                                               cp_libint_set_contrdepth,&
      63             :                                               cp_libint_t
      64             :    USE mathlib,                         ONLY: invmat_symm
      65             :    USE message_passing,                 ONLY: mp_comm_type,&
      66             :                                               mp_para_env_type
      67             :    USE molecule_types,                  ONLY: molecule_type
      68             :    USE orbital_pointers,                ONLY: ncoset
      69             :    USE particle_methods,                ONLY: get_particle_set
      70             :    USE particle_types,                  ONLY: particle_type
      71             :    USE qs_environment_types,            ONLY: get_qs_env,&
      72             :                                               qs_environment_type
      73             :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      74             :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      75             :                                               qs_kind_type
      76             :    USE qs_neighbor_list_types,          ONLY: &
      77             :         get_iterator_info, get_neighbor_list_set_p, neighbor_list_iterate, &
      78             :         neighbor_list_iterator_create, neighbor_list_iterator_p_type, &
      79             :         neighbor_list_iterator_release, neighbor_list_set_p_type, nl_set_sub_iterator, &
      80             :         nl_sub_iterate, release_neighbor_list_sets
      81             :    USE qs_neighbor_lists,               ONLY: atom2d_build,&
      82             :                                               atom2d_cleanup,&
      83             :                                               build_neighbor_lists,&
      84             :                                               local_atoms_type,&
      85             :                                               pair_radius_setup
      86             :    USE qs_o3c_types,                    ONLY: get_o3c_iterator_info,&
      87             :                                               init_o3c_container,&
      88             :                                               o3c_container_type,&
      89             :                                               o3c_iterate,&
      90             :                                               o3c_iterator_create,&
      91             :                                               o3c_iterator_release,&
      92             :                                               o3c_iterator_type,&
      93             :                                               release_o3c_container
      94             :    USE t_c_g0,                          ONLY: get_lmax_init,&
      95             :                                               init
      96             :    USE xas_tdp_types,                   ONLY: xas_tdp_control_type,&
      97             :                                               xas_tdp_env_type
      98             : #include "./base/base_uses.f90"
      99             : 
     100             :    IMPLICIT NONE
     101             :    PRIVATE
     102             : 
     103             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_integrals'
     104             : 
     105             :    PUBLIC :: create_pqX_tensor, fill_pqX_tensor, compute_ri_3c_coulomb, compute_ri_3c_exchange, &
     106             :              build_xas_tdp_3c_nl, build_xas_tdp_ovlp_nl, get_opt_3c_dist2d, &
     107             :              compute_ri_coulomb2_int, compute_ri_exchange2_int
     108             : 
     109             : CONTAINS
     110             : 
     111             : ! **************************************************************************************************
     112             : !> \brief Prepares a tensor to hold 3-center integrals (pq|X), where p,q are distributed according
     113             : !>        to the given 2d dbcsr distribution of the given . The third dimension of the tensor is
     114             : !>        iteslf not distributed (i.e. the t_pgrid's third dimension has size 1). The blocks are
     115             : !>        reserved according to the neighbor lists
     116             : !> \param pq_X the tensor to store the integrals
     117             : !> \param ab_nl the 1st and 2nd center neighbor list
     118             : !> \param ac_nl the 1st and 3rd center neighbor list
     119             : !> \param matrix_dist ...
     120             : !> \param blk_size_1 the block size in the first dimension
     121             : !> \param blk_size_2 the block size in the second dimension
     122             : !> \param blk_size_3 the block size in the third dimension
     123             : !> \param only_bc_same_center only keep block if, for the corresponding integral (ab|c), b and c
     124             : !>        share the same center, i.e. r_bc = 0.0
     125             : ! **************************************************************************************************
     126        1320 :    SUBROUTINE create_pqX_tensor(pq_X, ab_nl, ac_nl, matrix_dist, blk_size_1, blk_size_2, &
     127         132 :                                 blk_size_3, only_bc_same_center)
     128             : 
     129             :       TYPE(dbt_type), INTENT(OUT)                        :: pq_X
     130             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     131             :          POINTER                                         :: ab_nl, ac_nl
     132             :       TYPE(dbcsr_distribution_type), INTENT(IN)          :: matrix_dist
     133             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_size_1, blk_size_2, blk_size_3
     134             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     135             : 
     136             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'create_pqX_tensor'
     137             : 
     138             :       INTEGER                                            :: A, b, group_handle, handle, i, iatom, &
     139             :                                                             ikind, jatom, katom, kkind, nblk, &
     140             :                                                             nblk_3, nblk_per_thread, nkind
     141         132 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx1, idx2, idx3
     142             :       INTEGER, DIMENSION(3)                              :: pdims
     143         132 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, row_dist
     144         132 :       INTEGER, DIMENSION(:, :), POINTER                  :: mat_pgrid
     145             :       LOGICAL                                            :: my_sort_bc, symmetric
     146             :       REAL(dp), DIMENSION(3)                             :: rab, rac, rbc
     147        1188 :       TYPE(dbt_distribution_type)                        :: t_dist
     148         396 :       TYPE(dbt_pgrid_type)                               :: t_pgrid
     149             :       TYPE(mp_comm_type)                                 :: group
     150             :       TYPE(neighbor_list_iterator_p_type), &
     151         132 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     152             : 
     153         132 :       NULLIFY (ab_iter, ac_iter, col_dist, row_dist, mat_pgrid)
     154             : 
     155         132 :       CALL timeset(routineN, handle)
     156             : 
     157         132 :       my_sort_bc = .FALSE.
     158         132 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     159             : 
     160         132 :       CALL get_neighbor_list_set_p(ab_nl, symmetric=symmetric)
     161         132 :       CPASSERT(symmetric)
     162             : 
     163             :       !get 2D distribution info from matrix_dist
     164             :       CALL dbcsr_distribution_get(matrix_dist, pgrid=mat_pgrid, group=group_handle, &
     165         132 :                                   row_dist=row_dist, col_dist=col_dist)
     166         132 :       CALL group%set_handle(group_handle)
     167             : 
     168             :       !create the corresponding tensor proc grid and dist
     169         132 :       pdims(1) = SIZE(mat_pgrid, 1); pdims(2) = SIZE(mat_pgrid, 2); pdims(3) = 1
     170         132 :       CALL dbt_pgrid_create(group, pdims, t_pgrid)
     171             : 
     172         132 :       nblk_3 = SIZE(blk_size_3)
     173             :       CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=row_dist, nd_dist_2=col_dist, &
     174        2572 :                                 nd_dist_3=[(0, i=1, nblk_3)])
     175             : 
     176             :       !create the tensor itself.
     177             :       CALL dbt_create(pq_X, name="(pq|X)", dist=t_dist, map1_2d=[1, 2], map2_2d=[3], &
     178         132 :                       blk_size_1=blk_size_1, blk_size_2=blk_size_2, blk_size_3=blk_size_3)
     179             : 
     180             :       !count the blocks to reserve !note: dbcsr takes care of only keeping unique indices
     181         132 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     182         132 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     183         132 :       nblk = 0
     184       17360 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     185       17228 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, nkind=nkind, r=rab)
     186             : 
     187       50578 :          DO kkind = 1, nkind
     188       33218 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     189             : 
     190       71754 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     191             : 
     192       21308 :                IF (my_sort_bc) THEN
     193             :                   !we check for rbc or rac because of symmetry in ab_nl
     194         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     195        2760 :                   rbc(:) = rac(:) - rab(:)
     196        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     197             : 
     198             :                END IF
     199             : 
     200       21308 :                nblk = nblk + 1
     201             :             END DO !ac_iter
     202             :          END DO !kkind
     203             :       END DO !ab_iter
     204         132 :       CALL neighbor_list_iterator_release(ab_iter)
     205         132 :       CALL neighbor_list_iterator_release(ac_iter)
     206             : 
     207         864 :       ALLOCATE (idx1(nblk), idx2(nblk), idx3(nblk))
     208             : 
     209             :       !actually reserve the blocks
     210         132 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     211         132 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     212         132 :       nblk = 0
     213       17360 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     214       17228 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, jatom=jatom, nkind=nkind, r=rab)
     215             : 
     216       50578 :          DO kkind = 1, nkind
     217       33218 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     218             : 
     219       71754 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     220       21308 :                CALL get_iterator_info(ac_iter, jatom=katom, r=rac)
     221             : 
     222       21308 :                IF (my_sort_bc) THEN
     223             :                   !we check for rbc or rac because of symmetry in ab_nl
     224         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     225        2760 :                   rbc(:) = rac(:) - rab(:)
     226        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     227             : 
     228             :                END IF
     229             : 
     230       21018 :                nblk = nblk + 1
     231             : 
     232       21018 :                idx1(nblk) = iatom
     233       21018 :                idx2(nblk) = jatom
     234       21308 :                idx3(nblk) = katom
     235             : 
     236             :             END DO !ac_iter
     237             :          END DO !kkind
     238             :       END DO !ab_iter
     239         132 :       CALL neighbor_list_iterator_release(ab_iter)
     240         132 :       CALL neighbor_list_iterator_release(ac_iter)
     241             : 
     242             : !TODO: Parallelize creation of block list.
     243         132 : !$OMP PARALLEL DEFAULT(NONE) SHARED(pq_X, nblk, idx1, idx2, idx3) PRIVATE(nblk_per_thread,A,b)
     244             :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     245             :       a = omp_get_thread_num()*nblk_per_thread + 1
     246             :       b = MIN(a + nblk_per_thread, nblk)
     247             :       CALL dbt_reserve_blocks(pq_X, idx1(a:b), idx2(a:b), idx3(a:b))
     248             : !$OMP END PARALLEL
     249         132 :       CALL dbt_finalize(pq_X)
     250             : 
     251             :       !clean-up
     252         132 :       CALL dbt_distribution_destroy(t_dist)
     253         132 :       CALL dbt_pgrid_destroy(t_pgrid)
     254             : 
     255         132 :       CALL timestop(handle)
     256             : 
     257         660 :    END SUBROUTINE create_pqX_tensor
     258             : 
     259             : ! **************************************************************************************************
     260             : !> \brief Fills the given 3 dimensional (pq|X) tensor with integrals
     261             : !> \param pq_X the tensor to fill
     262             : !> \param ab_nl the neighbor list for the first 2 centers
     263             : !> \param ac_nl the neighbor list for the first and third centers
     264             : !> \param basis_set_list_a basis sets for first center
     265             : !> \param basis_set_list_b basis sets for second center
     266             : !> \param basis_set_list_c basis sets for third center
     267             : !> \param potential_parameter the operator for the integrals
     268             : !> \param qs_env ...
     269             : !> \param only_bc_same_center same as in create_pqX_tensor
     270             : !> \param eps_screen threshold for possible screening
     271             : !> \note The following indices are happily mixed within this routine: First center i,a,p
     272             : !>       Second center: j,b,q       Third center: k,c,X
     273             : ! **************************************************************************************************
     274         132 :    SUBROUTINE fill_pqX_tensor(pq_X, ab_nl, ac_nl, basis_set_list_a, basis_set_list_b, &
     275             :                               basis_set_list_c, potential_parameter, qs_env, &
     276             :                               only_bc_same_center, eps_screen)
     277             : 
     278             :       TYPE(dbt_type)                                     :: pq_X
     279             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     280             :          POINTER                                         :: ab_nl, ac_nl
     281             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b, &
     282             :                                                             basis_set_list_c
     283             :       TYPE(libint_potential_type)                        :: potential_parameter
     284             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     285             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     286             :       REAL(dp), INTENT(IN), OPTIONAL                     :: eps_screen
     287             : 
     288             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fill_pqX_tensor'
     289             : 
     290             :       INTEGER :: egfa, egfb, egfc, handle, i, iatom, ibasis, ikind, ilist, imax, iset, jatom, &
     291             :          jkind, jset, katom, kkind, kset, m_max, max_ncob, max_ncoc, max_nset, max_nsgfa, &
     292             :          max_nsgfb, maxli, maxlj, maxlk, mepos, nbasis, ncoa, ncob, ncoc, ni, nj, nk, nseta, &
     293             :          nsetb, nsetc, nthread, sgfa, sgfb, sgfc, unit_id
     294         132 :       INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
     295         132 :                                                             lc_min, npgfa, npgfb, npgfc, nsgfa, &
     296         132 :                                                             nsgfb, nsgfc
     297         132 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb, first_sgfc
     298             :       LOGICAL                                            :: do_screen, my_sort_bc
     299             :       REAL(dp)                                           :: dij, dik, djk, my_eps_screen, ri(3), &
     300             :                                                             rij(3), rik(3), rj(3), rjk(3), rk(3), &
     301             :                                                             sabc_ext, screen_radius
     302         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: ccp_buffer, cpp_buffer
     303         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: max_contr, max_contra, max_contrb, &
     304         132 :                                                             max_contrc
     305         132 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc, sabc, work
     306         132 :       REAL(dp), DIMENSION(:), POINTER                    :: set_radius_a, set_radius_b, set_radius_c
     307         132 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc
     308         132 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spb, spc, tspa
     309             :       TYPE(cp_libint_t)                                  :: lib
     310         132 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list
     311             :       TYPE(gto_basis_set_type), POINTER                  :: basis_set, basis_set_a, basis_set_b, &
     312             :                                                             basis_set_c
     313             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     314             :       TYPE(o3c_container_type), POINTER                  :: o3c
     315             :       TYPE(o3c_iterator_type)                            :: o3c_iterator
     316             : 
     317         132 :       NULLIFY (basis_set, basis_set_list, para_env, la_max, la_min)
     318         132 :       NULLIFY (lb_max, lb_min, lc_max, lc_min, npgfa, npgfb, npgfc, nsgfa, nsgfb, nsgfc)
     319         132 :       NULLIFY (first_sgfa, first_sgfb, first_sgfc, set_radius_a, set_radius_b, set_radius_c)
     320         132 :       NULLIFY (rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc)
     321         132 :       NULLIFY (basis_set_a, basis_set_b, basis_set_c, tspa, spb, spc)
     322             : 
     323         132 :       CALL timeset(routineN, handle)
     324             : 
     325             :       !Need the max l for each basis for libint (and overall max #of sets for screening)
     326         132 :       nbasis = SIZE(basis_set_list_a)
     327         132 :       max_nsgfa = 0
     328         132 :       max_nset = 0
     329         132 :       maxli = 0
     330         344 :       DO ibasis = 1, nbasis
     331             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_a(ibasis)%gto_basis_set, &
     332         212 :                                 maxl=imax, nset=iset, nsgf_set=nsgfa)
     333         212 :          maxli = MAX(maxli, imax)
     334         212 :          max_nset = MAX(max_nset, iset)
     335        1142 :          max_nsgfa = MAX(max_nsgfa, MAXVAL(nsgfa))
     336             :       END DO
     337             :       max_nsgfb = 0
     338             :       max_ncob = 0
     339             :       maxlj = 0
     340         344 :       DO ibasis = 1, nbasis
     341             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_b(ibasis)%gto_basis_set, &
     342         212 :                                 maxl=imax, nset=iset, nsgf_set=nsgfb, npgf=npgfb)
     343         212 :          maxlj = MAX(maxlj, imax)
     344         212 :          max_nset = MAX(max_nset, iset)
     345        1010 :          max_nsgfb = MAX(max_nsgfb, MAXVAL(nsgfb))
     346        1354 :          max_ncob = MAX(max_ncob, MAXVAL(npgfb)*ncoset(maxlj))
     347             :       END DO
     348             :       maxlk = 0
     349             :       max_ncoc = 0
     350         344 :       DO ibasis = 1, nbasis
     351             :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_c(ibasis)%gto_basis_set, &
     352         212 :                                 maxl=imax, nset=iset, npgf=npgfc)
     353         212 :          maxlk = MAX(maxlk, imax)
     354         212 :          max_nset = MAX(max_nset, iset)
     355         844 :          max_ncoc = MAX(max_ncoc, MAXVAL(npgfc)*ncoset(maxlk))
     356             :       END DO
     357         132 :       m_max = maxli + maxlj + maxlk
     358             : 
     359             :       !Screening
     360         132 :       do_screen = .FALSE.
     361         132 :       IF (PRESENT(eps_screen)) THEN
     362          42 :          do_screen = .TRUE.
     363          42 :          my_eps_screen = eps_screen
     364             :       END IF
     365         132 :       screen_radius = 0.0_dp
     366         132 :       IF (potential_parameter%potential_type == do_potential_truncated .OR. &
     367             :           potential_parameter%potential_type == do_potential_short) THEN
     368             : 
     369          12 :          screen_radius = potential_parameter%cutoff_radius*cutoff_screen_factor
     370         120 :       ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
     371             : 
     372          72 :          screen_radius = 1000000.0_dp
     373             :       END IF
     374             : 
     375             :       !get maximum contraction values for abc_contract screening
     376         132 :       IF (do_screen) THEN
     377             : 
     378             :          !Allocate max_contraction arrays such that we have a specific value for each set/kind
     379           0 :          ALLOCATE (max_contr(max_nset, nbasis), max_contra(max_nset, nbasis), &
     380         546 :                    max_contrb(max_nset, nbasis), max_contrc(max_nset, nbasis))
     381             : 
     382             :          !Not the most elegent, but better than copying 3 times the same
     383         168 :          DO ilist = 1, 3
     384             : 
     385         126 :             IF (ilist == 1) basis_set_list => basis_set_list_a
     386         126 :             IF (ilist == 2) basis_set_list => basis_set_list_b
     387         126 :             IF (ilist == 3) basis_set_list => basis_set_list_c
     388             : 
     389        1392 :             max_contr = 0.0_dp
     390             : 
     391         330 :             DO ibasis = 1, nbasis
     392         204 :                basis_set => basis_set_list(ibasis)%gto_basis_set
     393             : 
     394        1018 :                DO iset = 1, basis_set%nset
     395             : 
     396         688 :                   ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     397         688 :                   sgfa = basis_set%first_sgf(1, iset)
     398         688 :                   egfa = sgfa + basis_set%nsgf_set(iset) - 1
     399             : 
     400             :                   max_contr(iset, ibasis) = &
     401      465504 :                      MAXVAL((/(SUM(ABS(basis_set%sphi(1:ncoa, i))), i=sgfa, egfa)/))
     402             : 
     403             :                END DO !iset
     404             :             END DO !ibasis
     405             : 
     406         548 :             IF (ilist == 1) max_contra(:, :) = max_contr(:, :)
     407         548 :             IF (ilist == 2) max_contrb(:, :) = max_contr(:, :)
     408         590 :             IF (ilist == 3) max_contrc(:, :) = max_contr(:, :)
     409             :          END DO !ilist
     410          42 :          DEALLOCATE (max_contr)
     411             :       END IF !do_screen
     412             : 
     413             :       !To minimize memory ops in contraction, we need to pre-allocate buffers, pre-tranpose sphi_a
     414             :       !and also trim sphi in general to have contiguous arrays
     415        5040 :       ALLOCATE (tspa(max_nset, nbasis), spb(max_nset, nbasis), spc(max_nset, nbasis))
     416         344 :       DO ibasis = 1, nbasis
     417        1372 :          DO iset = 1, max_nset
     418        1028 :             NULLIFY (tspa(iset, ibasis)%array)
     419        1028 :             NULLIFY (spb(iset, ibasis)%array)
     420        1240 :             NULLIFY (spc(iset, ibasis)%array)
     421             :          END DO
     422             :       END DO
     423             : 
     424         528 :       DO ilist = 1, 3
     425             : 
     426        1164 :          DO ibasis = 1, nbasis
     427         636 :             IF (ilist == 1) basis_set => basis_set_list_a(ibasis)%gto_basis_set
     428         636 :             IF (ilist == 2) basis_set => basis_set_list_b(ibasis)%gto_basis_set
     429         636 :             IF (ilist == 3) basis_set => basis_set_list_c(ibasis)%gto_basis_set
     430             : 
     431        3128 :             DO iset = 1, basis_set%nset
     432             : 
     433        2096 :                ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     434        2096 :                sgfa = basis_set%first_sgf(1, iset)
     435        2096 :                egfa = sgfa + basis_set%nsgf_set(iset) - 1
     436             : 
     437        2732 :                IF (ilist == 1) THEN
     438        3192 :                   ALLOCATE (tspa(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoa))
     439       41750 :                   tspa(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoa, sgfa:egfa))
     440        1298 :                ELSE IF (ilist == 2) THEN
     441        3192 :                   ALLOCATE (spb(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     442       36942 :                   spb(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     443             :                ELSE
     444        2000 :                   ALLOCATE (spc(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     445     2179948 :                   spc(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     446             :                END IF
     447             : 
     448             :             END DO !iset
     449             :          END DO !ibasis
     450             :       END DO !ilist
     451             : 
     452         132 :       my_sort_bc = .FALSE.
     453         132 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     454             : 
     455             :       !Init the truncated Coulomb operator
     456         132 :       CALL get_qs_env(qs_env, para_env=para_env)
     457         132 :       IF (potential_parameter%potential_type == do_potential_truncated) THEN
     458             : 
     459             :          !open the file only if necessary
     460           6 :          IF (m_max > get_lmax_init()) THEN
     461           0 :             IF (para_env%mepos == 0) THEN
     462           0 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
     463             :             END IF
     464           0 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
     465           0 :             IF (para_env%mepos == 0) THEN
     466           0 :                CALL close_file(unit_id)
     467             :             END IF
     468             :          END IF
     469             :       END IF
     470             : 
     471             :       !Inint the initial gamma function before the OMP region as it is not thread safe
     472         132 :       CALL init_md_ftable(nmax=m_max)
     473             : 
     474             :       !Strategy: we use the o3c iterator because it is OMP parallelized and also takes the
     475             :       !          only_bc_same_center argument. Only the dbcsr_put_block is critical
     476             : 
     477             :       nthread = 1
     478         132 : !$    nthread = omp_get_max_threads()
     479             : 
     480         132 :       ALLOCATE (o3c)
     481             :       CALL init_o3c_container(o3c, 1, basis_set_list_a, basis_set_list_b, basis_set_list_c, &
     482         132 :                               ab_nl, ac_nl, only_bc_same_center=my_sort_bc)
     483         132 :       CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
     484             : 
     485             : !$OMP PARALLEL DEFAULT(NONE) &
     486             : !$OMP SHARED (pq_X,do_screen,max_nset,basis_set_list_a,max_contra,max_contrb,max_contrc,max_nsgfa,&
     487             : !$OMP         basis_set_list_b, basis_set_list_c,ncoset,screen_radius,potential_parameter,max_ncob,&
     488             : !$OMP         my_eps_screen,maxli,maxlj,maxlk,my_sort_bc,nthread,o3c,o3c_iterator,tspa,spb,spc,&
     489             : !$OMP         max_ncoc,max_nsgfb) &
     490             : !$OMP PRIVATE (lib,i,mepos,work,iset,ncoa,sgfa,egfa,nseta,&
     491             : !$OMP          iatom,ikind,jatom,jkind,katom,kkind,rij,rik,rjk,basis_set_a,nsetb,&
     492             : !$OMP          la_max,la_min,lb_max,lb_min,lc_max,lc_min,npgfa,npgfb,npgfc,nsgfa,nsgfb,nsgfc,ri,rk,&
     493             : !$OMP          first_sgfa,first_sgfb,first_sgfc,set_radius_a,set_radius_b,set_radius_c, nsetc,rj,&
     494             : !$OMP          rpgf_a,rpgf_b,rpgf_c,zeta,zetb,zetc,basis_set_b,basis_set_c,dij,dik,djk,ni,nj,nk,&
     495         132 : !$OMP          iabc,sabc,jset,kset,ncob,ncoc,sgfb,sgfc,egfb,egfc,sabc_ext,cpp_buffer,ccp_buffer)
     496             : 
     497             :       mepos = 0
     498             : !$    mepos = omp_get_thread_num()
     499             : 
     500             :       !pre-allocate work buffers for LIBXSMM contract in order to avoid memory ops
     501             :       ALLOCATE (cpp_buffer(max_nsgfa*max_ncob))
     502             :       ALLOCATE (ccp_buffer(max_nsgfa*max_nsgfb*max_ncoc))
     503             : 
     504             :       !note: we do not initalize libxsmm here, because we assume that if the flag is there, then it
     505             :       !      is done in dbcsr already
     506             : 
     507             :       !each thread need its own libint object (internals may change at different rates)
     508             :       CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
     509             :       CALL cp_libint_set_contrdepth(lib, 1)
     510             : 
     511             :       DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
     512             :          CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, ikind=ikind, jkind=jkind, kkind=kkind, &
     513             :                                     iatom=iatom, jatom=jatom, katom=katom, rij=rij, rik=rik)
     514             : 
     515             :          !get first center basis info
     516             :          basis_set_a => basis_set_list_a(ikind)%gto_basis_set
     517             :          first_sgfa => basis_set_a%first_sgf
     518             :          la_max => basis_set_a%lmax
     519             :          la_min => basis_set_a%lmin
     520             :          npgfa => basis_set_a%npgf
     521             :          nseta = basis_set_a%nset
     522             :          nsgfa => basis_set_a%nsgf_set
     523             :          zeta => basis_set_a%zet
     524             :          rpgf_a => basis_set_a%pgf_radius
     525             :          set_radius_a => basis_set_a%set_radius
     526             :          ni = SUM(nsgfa)
     527             :          !second center basis info
     528             :          basis_set_b => basis_set_list_b(jkind)%gto_basis_set
     529             :          first_sgfb => basis_set_b%first_sgf
     530             :          lb_max => basis_set_b%lmax
     531             :          lb_min => basis_set_b%lmin
     532             :          npgfb => basis_set_b%npgf
     533             :          nsetb = basis_set_b%nset
     534             :          nsgfb => basis_set_b%nsgf_set
     535             :          zetb => basis_set_b%zet
     536             :          rpgf_b => basis_set_b%pgf_radius
     537             :          set_radius_b => basis_set_b%set_radius
     538             :          nj = SUM(nsgfb)
     539             :          !third center basis info
     540             :          basis_set_c => basis_set_list_c(kkind)%gto_basis_set
     541             :          first_sgfc => basis_set_c%first_sgf
     542             :          lc_max => basis_set_c%lmax
     543             :          lc_min => basis_set_c%lmin
     544             :          npgfc => basis_set_c%npgf
     545             :          nsetc = basis_set_c%nset
     546             :          nsgfc => basis_set_c%nsgf_set
     547             :          zetc => basis_set_c%zet
     548             :          rpgf_c => basis_set_c%pgf_radius
     549             :          set_radius_c => basis_set_c%set_radius
     550             :          nk = SUM(nsgfc)
     551             : 
     552             :          !position and distances, only relative pos matter for libint
     553             :          rjk = rik - rij
     554             :          ri = 0.0_dp
     555             :          rj = rij ! ri + rij
     556             :          rk = rik ! ri + rik
     557             : 
     558             :          djk = NORM2(rjk)
     559             :          dij = NORM2(rij)
     560             :          dik = NORM2(rik)
     561             : 
     562             :          !sgf integrals
     563             :          ALLOCATE (iabc(ni, nj, nk))
     564             :          iabc(:, :, :) = 0.0_dp
     565             : 
     566             :          DO iset = 1, nseta
     567             :             ncoa = npgfa(iset)*ncoset(la_max(iset))
     568             :             sgfa = first_sgfa(1, iset)
     569             :             egfa = sgfa + nsgfa(iset) - 1
     570             : 
     571             :             DO jset = 1, nsetb
     572             :                ncob = npgfb(jset)*ncoset(lb_max(jset))
     573             :                sgfb = first_sgfb(1, jset)
     574             :                egfb = sgfb + nsgfb(jset) - 1
     575             : 
     576             :                !screening (overlap)
     577             :                IF (set_radius_a(iset) + set_radius_b(jset) < dij) CYCLE
     578             : 
     579             :                DO kset = 1, nsetc
     580             :                   ncoc = npgfc(kset)*ncoset(lc_max(kset))
     581             :                   sgfc = first_sgfc(1, kset)
     582             :                   egfc = sgfc + nsgfc(kset) - 1
     583             : 
     584             :                   !screening (potential)
     585             :                   IF (set_radius_a(iset) + set_radius_c(kset) + screen_radius < dik) CYCLE
     586             :                   IF (set_radius_b(jset) + set_radius_c(kset) + screen_radius < djk) CYCLE
     587             : 
     588             :                   !pgf integrals
     589             :                   ALLOCATE (sabc(ncoa, ncob, ncoc))
     590             :                   sabc(:, :, :) = 0.0_dp
     591             : 
     592             :                   IF (do_screen) THEN
     593             :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     594             :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     595             :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     596             :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     597             :                                       djk, lib, potential_parameter, int_abc_ext=sabc_ext)
     598             :                      IF (my_eps_screen > sabc_ext*(max_contra(iset, ikind)* &
     599             :                                                    max_contrb(jset, jkind)* &
     600             :                                                    max_contrc(kset, kkind))) THEN
     601             :                         DEALLOCATE (sabc)
     602             :                         CYCLE
     603             :                      END IF
     604             :                   ELSE
     605             :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     606             :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     607             :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     608             :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     609             :                                       djk, lib, potential_parameter)
     610             :                   END IF
     611             : 
     612             :                   ALLOCATE (work(nsgfa(iset), nsgfb(jset), nsgfc(kset)))
     613             : 
     614             :                   CALL libxsmm_abc_contract(work, sabc, tspa(iset, ikind)%array, spb(jset, jkind)%array, &
     615             :                                             spc(kset, kkind)%array, ncoa, ncob, ncoc, nsgfa(iset), &
     616             :                                             nsgfb(jset), nsgfc(kset), cpp_buffer, ccp_buffer)
     617             : 
     618             :                   iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc) = work(:, :, :)
     619             :                   DEALLOCATE (sabc, work)
     620             : 
     621             :                END DO !kset
     622             :             END DO !jset
     623             :          END DO !iset
     624             : 
     625             :          !Add the integral to the proper tensor block
     626             : !$OMP CRITICAL
     627             :          CALL dbt_put_block(pq_X, [iatom, jatom, katom], SHAPE(iabc), iabc, summation=.TRUE.)
     628             : !$OMP END CRITICAL
     629             : 
     630             :          DEALLOCATE (iabc)
     631             :       END DO !o3c_iterator
     632             : 
     633             :       CALL cp_libint_cleanup_3eri(lib)
     634             : 
     635             : !$OMP END PARALLEL
     636         132 :       CALL o3c_iterator_release(o3c_iterator)
     637         132 :       CALL release_o3c_container(o3c)
     638         132 :       DEALLOCATE (o3c)
     639             : 
     640         780 :       DO iset = 1, max_nset
     641        1808 :          DO ibasis = 1, nbasis
     642        1028 :             IF (ASSOCIATED(tspa(iset, ibasis)%array)) DEALLOCATE (tspa(iset, ibasis)%array)
     643        1028 :             IF (ASSOCIATED(spb(iset, ibasis)%array)) DEALLOCATE (spb(iset, ibasis)%array)
     644        1676 :             IF (ASSOCIATED(spc(iset, ibasis)%array)) DEALLOCATE (spc(iset, ibasis)%array)
     645             :          END DO
     646             :       END DO
     647         132 :       DEALLOCATE (tspa, spb, spc)
     648             : 
     649         132 :       CALL timestop(handle)
     650             : 
     651         264 :    END SUBROUTINE fill_pqX_tensor
     652             : 
     653             : ! **************************************************************************************************
     654             : !> \brief Builds a neighbor lists set for overlaping 2-center S_ab, where b is restricted on a
     655             : !>        a given list of atoms. Used for Coulomb RI where (aI|P) = sum_b C_bI (ab|P), where
     656             : !>        contraction coeff C_bI is assumed to be non-zero only on excited atoms
     657             : !> \param ab_list the neighbor list
     658             : !> \param basis_a basis set list for atom a
     659             : !> \param basis_b basis set list for atom b
     660             : !> \param qs_env ...
     661             : !> \param excited_atoms the indices of the excited atoms on which b is centered
     662             : !> \param ext_dist2d use an external distribution2d
     663             : ! **************************************************************************************************
     664         256 :    SUBROUTINE build_xas_tdp_ovlp_nl(ab_list, basis_a, basis_b, qs_env, excited_atoms, ext_dist2d)
     665             : 
     666             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     667             :          POINTER                                         :: ab_list
     668             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_b
     669             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     670             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     671             :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     672             : 
     673             :       INTEGER                                            :: ikind, nkind
     674             :       LOGICAL                                            :: my_restrictb
     675             :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, b_present
     676             :       REAL(dp)                                           :: subcells
     677             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, b_radius
     678         256 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     679         256 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     680             :       TYPE(cell_type), POINTER                           :: cell
     681             :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     682             :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     683         256 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     684         256 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     685         256 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     686             : 
     687         256 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     688             : 
     689             : !  Initialization
     690         256 :       CALL get_qs_env(qs_env, nkind=nkind)
     691         256 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     692             : 
     693         256 :       my_restrictb = .FALSE.
     694         256 :       IF (PRESENT(excited_atoms)) THEN
     695          88 :          my_restrictb = .TRUE.
     696             :       END IF
     697             : 
     698        1280 :       ALLOCATE (a_present(nkind), b_present(nkind))
     699         662 :       a_present = .FALSE.
     700         662 :       b_present = .FALSE.
     701        1280 :       ALLOCATE (a_radius(nkind), b_radius(nkind))
     702         662 :       a_radius = 0.0_dp
     703         662 :       b_radius = 0.0_dp
     704             : 
     705             : !  Set up the radii
     706         662 :       DO ikind = 1, nkind
     707         406 :          IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     708         406 :             a_present(ikind) = .TRUE.
     709         406 :             CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     710             :          END IF
     711             : 
     712         662 :          IF (ASSOCIATED(basis_b(ikind)%gto_basis_set)) THEN
     713         406 :             b_present(ikind) = .TRUE.
     714         406 :             CALL get_gto_basis_set(basis_b(ikind)%gto_basis_set, kind_radius=b_radius(ikind))
     715             :          END IF
     716             :       END DO !ikind
     717             : 
     718        1024 :       ALLOCATE (pair_radius(nkind, nkind))
     719        1404 :       pair_radius = 0.0_dp
     720         256 :       CALL pair_radius_setup(a_present, b_present, a_radius, b_radius, pair_radius)
     721             : 
     722             : !  Set up the nl
     723             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     724             :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     725         256 :                       particle_set=particle_set, molecule_set=molecule_set)
     726             : 
     727             :       !use an external distribution_2d if required
     728         256 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     729             : 
     730         768 :       ALLOCATE (atom2d(nkind))
     731             :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     732         256 :                         molecule_set, .FALSE., particle_set)
     733             : 
     734         256 :       IF (my_restrictb) THEN
     735             : 
     736             :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     737          88 :                                    atomb_to_keep=excited_atoms, nlname="XAS_TDP_ovlp_nl")
     738             : 
     739             :       ELSE
     740             : 
     741             :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     742         168 :                                    nlname="XAS_TDP_ovlp_nl")
     743             : 
     744             :       END IF
     745             : !  Clean-up
     746         256 :       CALL atom2d_cleanup(atom2d)
     747             : 
     748         512 :    END SUBROUTINE build_xas_tdp_ovlp_nl
     749             : 
     750             : ! **************************************************************************************************
     751             : !> \brief Builds a neighbor lists set taylored for 3-center integral within XAS TDP, such that only
     752             : !>        excited atoms are taken into account for the list_c
     753             : !> \param ac_list the neighbor list ready for 3-center integrals
     754             : !> \param basis_a basis set list for atom a
     755             : !> \param basis_c basis set list for atom c
     756             : !> \param op_type to indicate whther the list should be built with overlap, Coulomb or else in mind
     757             : !> \param qs_env ...
     758             : !> \param excited_atoms the indices of the excited atoms to consider (if not given, all atoms are taken)
     759             : !> \param x_range in case some truncated/screened operator is used, gives its range
     760             : !> \param ext_dist2d external distribution_2d to be used
     761             : !> \note Based on setup_neighbor_list with added features
     762             : ! **************************************************************************************************
     763         218 :    SUBROUTINE build_xas_tdp_3c_nl(ac_list, basis_a, basis_c, op_type, qs_env, excited_atoms, &
     764             :                                   x_range, ext_dist2d)
     765             : 
     766             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     767             :          POINTER                                         :: ac_list
     768             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_c
     769             :       INTEGER, INTENT(IN)                                :: op_type
     770             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     771             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     772             :       REAL(dp), INTENT(IN), OPTIONAL                     :: x_range
     773             :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     774             : 
     775             :       INTEGER                                            :: ikind, nkind
     776             :       LOGICAL                                            :: sort_atoms
     777             :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, c_present
     778             :       REAL(dp)                                           :: subcells
     779             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, c_radius
     780         218 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     781         218 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     782             :       TYPE(cell_type), POINTER                           :: cell
     783             :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     784             :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     785         218 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     786         218 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     787         218 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     788             : 
     789         218 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     790             : 
     791             : !  Initialization
     792         218 :       CALL get_qs_env(qs_env, nkind=nkind)
     793         218 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     794         218 :       sort_atoms = .FALSE.
     795         218 :       IF ((PRESENT(excited_atoms))) sort_atoms = .TRUE.
     796             : 
     797        1090 :       ALLOCATE (a_present(nkind), c_present(nkind))
     798         564 :       a_present = .FALSE.
     799         564 :       c_present = .FALSE.
     800        1090 :       ALLOCATE (a_radius(nkind), c_radius(nkind))
     801         564 :       a_radius = 0.0_dp
     802         564 :       c_radius = 0.0_dp
     803             : 
     804             : !  Set up the radii, depending on the operator type
     805         218 :       IF (op_type == do_potential_id) THEN
     806             : 
     807             :          !overlap => use the kind radius for both a and c
     808         352 :          DO ikind = 1, nkind
     809             :             !orbital basis set
     810         214 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     811         214 :                a_present(ikind) = .TRUE.
     812         214 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     813             :             END IF
     814             :             !RI_XAS basis set
     815         352 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     816         214 :                c_present(ikind) = .TRUE.
     817         214 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     818             :             END IF
     819             :          END DO !ikind
     820             : 
     821          80 :       ELSE IF (op_type == do_potential_coulomb) THEN
     822             : 
     823             :          !Coulomb operator, virtually infinite range => set c_radius to arbitrarily large number
     824         164 :          DO ikind = 1, nkind
     825         108 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     826         108 :                c_present(ikind) = .TRUE.
     827         108 :                c_radius(ikind) = 1000000.0_dp
     828             :             END IF
     829         164 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) a_present(ikind) = .TRUE.
     830             :          END DO !ikind
     831             : 
     832          24 :       ELSE IF (op_type == do_potential_truncated .OR. op_type == do_potential_short) THEN
     833             : 
     834             :          !Truncated coulomb/short range: set c_radius to x_range + the kind_radii
     835          48 :          DO ikind = 1, nkind
     836          24 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     837          24 :                a_present(ikind) = .TRUE.
     838          24 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     839             :             END IF
     840          48 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     841          24 :                c_present(ikind) = .TRUE.
     842          24 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     843          24 :                c_radius(ikind) = c_radius(ikind) + x_range
     844             :             END IF
     845             :          END DO !ikind
     846             : 
     847             :       ELSE
     848           0 :          CPABORT("Operator not known")
     849             :       END IF
     850             : 
     851         872 :       ALLOCATE (pair_radius(nkind, nkind))
     852        1198 :       pair_radius = 0.0_dp
     853         218 :       CALL pair_radius_setup(a_present, c_present, a_radius, c_radius, pair_radius)
     854             : 
     855             : !  Actually setup the list
     856             :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     857             :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     858         218 :                       particle_set=particle_set, molecule_set=molecule_set)
     859             : 
     860             :       !use an external distribution_2d if required
     861         218 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     862             : 
     863         654 :       ALLOCATE (atom2d(nkind))
     864             :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     865         218 :                         molecule_set, .FALSE., particle_set)
     866             : 
     867         218 :       IF (sort_atoms) THEN
     868             :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     869             :                                    operator_type="ABC", atomb_to_keep=excited_atoms, &
     870         218 :                                    nlname="XAS_TDP_3c_nl")
     871             :       ELSE
     872             :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     873           0 :                                    operator_type="ABC", nlname="XAS_TDP_3c_nl")
     874             :       END IF
     875             : 
     876             : !  Clean-up
     877         218 :       CALL atom2d_cleanup(atom2d)
     878             : 
     879         436 :    END SUBROUTINE build_xas_tdp_3c_nl
     880             : 
     881             : ! **************************************************************************************************
     882             : !> \brief Returns an optimized distribution_2d for the given neighbor lists based on an evaluation
     883             : !>        of the cost of the corresponding 3-center integrals
     884             : !> \param opt_3c_dist2d the optimized distribution_2d
     885             : !> \param ab_list ...
     886             : !> \param ac_list ...
     887             : !> \param basis_set_a ...
     888             : !> \param basis_set_b ...
     889             : !> \param basis_set_c ...
     890             : !> \param qs_env ...
     891             : !> \param only_bc_same_center ...
     892             : ! **************************************************************************************************
     893          86 :    SUBROUTINE get_opt_3c_dist2d(opt_3c_dist2d, ab_list, ac_list, basis_set_a, basis_set_b, &
     894             :                                 basis_set_c, qs_env, only_bc_same_center)
     895             : 
     896             :       TYPE(distribution_2d_type), POINTER                :: opt_3c_dist2d
     897             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     898             :          POINTER                                         :: ab_list, ac_list
     899             :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_a, basis_set_b, basis_set_c
     900             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     901             :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     902             : 
     903             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_opt_3c_dist2d'
     904             : 
     905             :       INTEGER                                            :: handle, i, iatom, ikind, ip, jatom, &
     906             :                                                             jkind, kkind, mypcol, myprow, n, &
     907             :                                                             natom, nkind, npcol, nprow, nsgfa, &
     908             :                                                             nsgfb, nsgfc
     909          86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nparticle_local_col, nparticle_local_row
     910          86 :       INTEGER, DIMENSION(:, :), POINTER                  :: col_dist, row_dist
     911             :       LOGICAL                                            :: my_sort_bc
     912             :       REAL(dp)                                           :: cost, rab(3), rac(3), rbc(3)
     913          86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: col_cost, col_proc_cost, row_cost, &
     914          86 :                                                             row_proc_cost
     915          86 :       TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: local_particle_col, local_particle_row
     916             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     917             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     918             :       TYPE(neighbor_list_iterator_p_type), &
     919          86 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     920          86 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     921          86 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     922             : 
     923          86 :       NULLIFY (para_env, col_dist, row_dist, blacs_env, qs_kind_set, particle_set)
     924          86 :       NULLIFY (local_particle_col, local_particle_row, ab_iter, ac_iter)
     925             : 
     926          86 :       CALL timeset(routineN, handle)
     927             : 
     928             :       !Idea: create a,b and a,c nl_iterators in the original dist, then loop over them and compute the
     929             :       !      cost of each ab pairs and project on the col/row. Then distribute the atom col/row to
     930             :       !      the proc col/row in order to spread out the cost as much as possible
     931             : 
     932          86 :       my_sort_bc = .FALSE.
     933          86 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     934             : 
     935             :       CALL get_qs_env(qs_env, natom=natom, para_env=para_env, blacs_env=blacs_env, &
     936          86 :                       qs_kind_set=qs_kind_set, particle_set=particle_set, nkind=nkind)
     937             : 
     938          86 :       myprow = blacs_env%mepos(1) + 1
     939          86 :       mypcol = blacs_env%mepos(2) + 1
     940          86 :       nprow = blacs_env%num_pe(1)
     941          86 :       npcol = blacs_env%num_pe(2)
     942             : 
     943         430 :       ALLOCATE (col_cost(natom), row_cost(natom))
     944        1320 :       col_cost = 0.0_dp; row_cost = 0.0_dp
     945             : 
     946         430 :       ALLOCATE (row_dist(natom, 2), col_dist(natom, 2))
     947        2726 :       row_dist = 1; col_dist = 1
     948             : 
     949          86 :       CALL neighbor_list_iterator_create(ab_iter, ab_list)
     950          86 :       CALL neighbor_list_iterator_create(ac_iter, ac_list, search=.TRUE.)
     951        1165 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     952        1079 :          CALL get_iterator_info(ab_iter, ikind=ikind, jkind=jkind, iatom=iatom, jatom=jatom, r=rab)
     953             : 
     954        2635 :          DO kkind = 1, nkind
     955        1470 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     956             : 
     957        6887 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     958             : 
     959        4338 :                IF (my_sort_bc) THEN
     960             :                   !only take a,b,c if b,c (or a,c because of symmetry) share the same center
     961         690 :                   CALL get_iterator_info(ac_iter, r=rac)
     962        2760 :                   rbc(:) = rac(:) - rab(:)
     963        2979 :                   IF (.NOT. (ALL(ABS(rbc) .LE. 1.0E-8_dp) .OR. ALL(ABS(rac) .LE. 1.0E-8_dp))) CYCLE
     964             : 
     965             :                END IF
     966             : 
     967             :                !Use the size of integral as measure as contraciton cost seems to dominate
     968        4048 :                nsgfa = basis_set_a(ikind)%gto_basis_set%nsgf
     969        4048 :                nsgfb = basis_set_b(jkind)%gto_basis_set%nsgf
     970        4048 :                nsgfc = basis_set_c(kkind)%gto_basis_set%nsgf
     971             : 
     972        4048 :                cost = REAL(nsgfa*nsgfb*nsgfc, dp)
     973             : 
     974        4048 :                row_cost(iatom) = row_cost(iatom) + cost
     975        4338 :                col_cost(jatom) = col_cost(jatom) + cost
     976             : 
     977             :             END DO !ac_iter
     978             :          END DO !kkind
     979             :       END DO !ab_iter
     980          86 :       CALL neighbor_list_iterator_release(ab_iter)
     981          86 :       CALL neighbor_list_iterator_release(ac_iter)
     982             : 
     983          86 :       CALL para_env%sum(row_cost)
     984          86 :       CALL para_env%sum(col_cost)
     985             : 
     986             :       !Distribute the cost as evenly as possible
     987         430 :       ALLOCATE (col_proc_cost(npcol), row_proc_cost(nprow))
     988         430 :       col_proc_cost = 0.0_dp; row_proc_cost = 0.0_dp
     989         660 :       DO i = 1, natom
     990       21340 :          iatom = MAXLOC(row_cost, 1)
     991        2296 :          ip = MINLOC(row_proc_cost, 1)
     992         574 :          row_proc_cost(ip) = row_proc_cost(ip) + row_cost(iatom)
     993         574 :          row_dist(iatom, 1) = ip
     994         574 :          row_cost(iatom) = 0.0_dp
     995             : 
     996       21340 :          iatom = MAXLOC(col_cost, 1)
     997        1722 :          ip = MINLOC(col_proc_cost, 1)
     998         574 :          col_proc_cost(ip) = col_proc_cost(ip) + col_cost(iatom)
     999         574 :          col_dist(iatom, 1) = ip
    1000         660 :          col_cost(iatom) = 0.0_dp
    1001             :       END DO
    1002             : 
    1003             :       !the usual stuff
    1004         698 :       ALLOCATE (local_particle_col(nkind), local_particle_row(nkind))
    1005         430 :       ALLOCATE (nparticle_local_row(nkind), nparticle_local_col(nkind))
    1006         440 :       nparticle_local_row = 0; nparticle_local_col = 0
    1007             : 
    1008         660 :       DO iatom = 1, natom
    1009         574 :          ikind = particle_set(iatom)%atomic_kind%kind_number
    1010             : 
    1011         574 :          IF (row_dist(iatom, 1) == myprow) nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
    1012         660 :          IF (col_dist(iatom, 1) == mypcol) nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
    1013             :       END DO
    1014             : 
    1015         220 :       DO ikind = 1, nkind
    1016         134 :          n = nparticle_local_row(ikind)
    1017         360 :          ALLOCATE (local_particle_row(ikind)%array(n))
    1018             : 
    1019         134 :          n = nparticle_local_col(ikind)
    1020         488 :          ALLOCATE (local_particle_col(ikind)%array(n))
    1021             :       END DO
    1022             : 
    1023         440 :       nparticle_local_row = 0; nparticle_local_col = 0
    1024         660 :       DO iatom = 1, natom
    1025         574 :          ikind = particle_set(iatom)%atomic_kind%kind_number
    1026             : 
    1027         574 :          IF (row_dist(iatom, 1) == myprow) THEN
    1028         287 :             nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
    1029         287 :             local_particle_row(ikind)%array(nparticle_local_row(ikind)) = iatom
    1030             :          END IF
    1031         660 :          IF (col_dist(iatom, 1) == mypcol) THEN
    1032         574 :             nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
    1033         574 :             local_particle_col(ikind)%array(nparticle_local_col(ikind)) = iatom
    1034             :          END IF
    1035             :       END DO
    1036             : 
    1037             :       !Finally create the dist_2d
    1038         660 :       row_dist(:, 1) = row_dist(:, 1) - 1
    1039         660 :       col_dist(:, 1) = col_dist(:, 1) - 1
    1040             :       CALL distribution_2d_create(opt_3c_dist2d, row_distribution_ptr=row_dist, &
    1041             :                                   col_distribution_ptr=col_dist, local_rows_ptr=local_particle_row, &
    1042          86 :                                   local_cols_ptr=local_particle_col, blacs_env=blacs_env)
    1043             : 
    1044          86 :       CALL timestop(handle)
    1045             : 
    1046         172 :    END SUBROUTINE get_opt_3c_dist2d
    1047             : 
    1048             : ! **************************************************************************************************
    1049             : !> \brief Computes the RI exchange 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1050             : !>        centered on excited atoms and kind. The operator used is that of the RI metric
    1051             : !> \param ex_atoms excited atoms on which the third center is located
    1052             : !> \param xas_tdp_env ...
    1053             : !> \param xas_tdp_control ...
    1054             : !> \param qs_env ...
    1055             : !> \note  This routine is called once for each excited atom. Because there are many different a,b
    1056             : !>        pairs involved, load balance is ok. This allows memory saving
    1057             : ! **************************************************************************************************
    1058          42 :    SUBROUTINE compute_ri_3c_exchange(ex_atoms, xas_tdp_env, xas_tdp_control, qs_env)
    1059             : 
    1060             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: ex_atoms
    1061             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1062             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1063             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1064             : 
    1065             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_exchange'
    1066             : 
    1067             :       INTEGER                                            :: handle, natom, nkind
    1068             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1069             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1070          42 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1071             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1072             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1073          42 :          POINTER                                         :: ab_list, ac_list
    1074          42 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1075          42 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1076             : 
    1077          42 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1078             : 
    1079          42 :       CALL timeset(routineN, handle)
    1080             : 
    1081             : !  Take what we need from the qs_env
    1082             :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1083          42 :                       natom=natom, particle_set=particle_set)
    1084             : 
    1085             : !  Build the basis set lists
    1086         194 :       ALLOCATE (basis_set_ri(nkind))
    1087         194 :       ALLOCATE (basis_set_orb(nkind))
    1088          42 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1089          42 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1090             : 
    1091             : !  Get the optimized distribution 2d for theses integrals (and store it in xas_tdp_env)
    1092          42 :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
    1093             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1094             :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1095          42 :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius)
    1096             : 
    1097             :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_ex, ab_list, ac_list, basis_set_orb, &
    1098          42 :                              basis_set_orb, basis_set_ri, qs_env)
    1099          42 :       CALL release_neighbor_list_sets(ab_list)
    1100          42 :       CALL release_neighbor_list_sets(ac_list)
    1101             : 
    1102             : !  Build the ab and ac centers neighbor lists based on the optimized distribution
    1103             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1104          42 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1105             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1106             :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1107             :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius, &
    1108          42 :                                ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1109             : 
    1110             : !  Allocate, init and compute the integrals.
    1111         210 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1112          42 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
    1113          42 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1114          42 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1115             : 
    1116         420 :       ALLOCATE (xas_tdp_env%ri_3c_ex)
    1117             :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1118          42 :                              blk_size_orb, blk_size_ri)
    1119             :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1120             :                            basis_set_ri, xas_tdp_control%ri_m_potential, qs_env, &
    1121          42 :                            eps_screen=xas_tdp_control%eps_screen)
    1122             : 
    1123             : ! Clean-up
    1124          42 :       CALL release_neighbor_list_sets(ab_list)
    1125          42 :       CALL release_neighbor_list_sets(ac_list)
    1126          42 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1127          42 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1128             : 
    1129             :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1130             :       !timings for other routines
    1131          42 :       CALL para_env%sync()
    1132             : 
    1133          42 :       CALL timestop(handle)
    1134             : 
    1135          84 :    END SUBROUTINE compute_ri_3c_exchange
    1136             : 
    1137             : ! **************************************************************************************************
    1138             : !> \brief Computes the RI Coulomb 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1139             : !>        centered on the excited atoms of xas_tdp_env
    1140             : !> \param xas_tdp_env ...
    1141             : !> \param qs_env ...
    1142             : !> \note  The ri_3c_coul tensor of xas_tdp_env is defined and allocated here. Only computed once
    1143             : !>        for the whole system (for optimized load balance). Ok because not too much memory needed
    1144             : ! **************************************************************************************************
    1145          44 :    SUBROUTINE compute_ri_3c_coulomb(xas_tdp_env, qs_env)
    1146             : 
    1147             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1148             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1149             : 
    1150             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_coulomb'
    1151             : 
    1152             :       INTEGER                                            :: handle, natom, nkind
    1153             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1154             :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1155          44 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1156             :       TYPE(libint_potential_type)                        :: pot
    1157             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1158             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1159          44 :          POINTER                                         :: ab_list, ac_list
    1160          44 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1161          44 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1162             : 
    1163          44 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1164             : 
    1165          44 :       CALL timeset(routineN, handle)
    1166             : 
    1167             : !  Take what we need from the qs_env
    1168             :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1169          44 :                       natom=natom, particle_set=particle_set)
    1170             : 
    1171             : !  Build the basis set lists
    1172         198 :       ALLOCATE (basis_set_ri(nkind))
    1173         198 :       ALLOCATE (basis_set_orb(nkind))
    1174          44 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1175          44 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1176             : 
    1177             : !  Get the optimized distribution 2d for these integrals (and store it in xas_tdp_env)
    1178             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1179          44 :                                  excited_atoms=xas_tdp_env%ex_atom_indices)
    1180             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1181          44 :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices)
    1182             :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_coul, ab_list, ac_list, basis_set_orb, &
    1183          44 :                              basis_set_orb, basis_set_ri, qs_env, only_bc_same_center=.TRUE.)
    1184          44 :       CALL release_neighbor_list_sets(ab_list)
    1185          44 :       CALL release_neighbor_list_sets(ac_list)
    1186             : 
    1187             : !  Build a neighbor list for the ab centers. Assume (aI|c) = sum_b c_bI (ab|c), with c_bI only
    1188             : !  non-zero for b centered on the same atom as c => build overlap nl, but only keeping b if centered
    1189             : !  on an excited atom
    1190             :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1191             :                                  excited_atoms=xas_tdp_env%ex_atom_indices, &
    1192          44 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1193             : 
    1194             : !  Build a neighbor list for the ac centers. Since we later contract as (aI|c) and we assume I is
    1195             : !  very localized on the same atom as c, we take a,c as neighbors if they overlap
    1196             :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1197             :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices, &
    1198          44 :                                ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1199             : 
    1200             : !  Allocate, init and compute the integrals
    1201         220 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1202          44 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_coul, opt_dbcsr_dist)
    1203          44 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1204          44 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1205             :       pot%potential_type = do_potential_coulomb
    1206             : 
    1207         440 :       ALLOCATE (xas_tdp_env%ri_3c_coul)
    1208             :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1209          44 :                              blk_size_orb, blk_size_ri, only_bc_same_center=.TRUE.)
    1210             :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1211          44 :                            basis_set_ri, pot, qs_env, only_bc_same_center=.TRUE.)
    1212             : 
    1213             : ! Clean-up
    1214          44 :       CALL release_neighbor_list_sets(ab_list)
    1215          44 :       CALL release_neighbor_list_sets(ac_list)
    1216          44 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1217          44 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1218             : 
    1219             :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1220             :       !timings for other routines
    1221          44 :       CALL para_env%sync()
    1222             : 
    1223          44 :       CALL timestop(handle)
    1224             : 
    1225          88 :    END SUBROUTINE compute_ri_3c_coulomb
    1226             : 
    1227             : ! **************************************************************************************************
    1228             : !> \brief Computes the two-center Exchange integral needed for the RI in kernel calculation. Stores
    1229             : !>        the integrals in the xas_tdp_env as global (small) arrays. Does that for a given excited
    1230             : !>        kind. The quantity stored is M^-1 (P|Q) M^-1, where M is the RI metric. If the metric is
    1231             : !>        the same as the exchange potential, then we end up with the V-approximation (P|Q)^-1
    1232             : !>        By default (if no metric), the ri_m_potential is a copy of the x_potential
    1233             : !> \param ex_kind ...
    1234             : !> \param xas_tdp_env ...
    1235             : !> \param xas_tdp_control ...
    1236             : !> \param qs_env ...
    1237             : !> \note Computes all these integrals in non-PBCs as we assume that the range is short enough that
    1238             : !>       atoms do not exchange with their periodic images
    1239             : ! **************************************************************************************************
    1240          42 :    SUBROUTINE compute_ri_exchange2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1241             : 
    1242             :       INTEGER, INTENT(IN)                                :: ex_kind
    1243             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1244             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1245             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1246             : 
    1247             :       INTEGER                                            :: egfp, egfq, maxl, ncop, ncoq, nset, &
    1248             :                                                             nsgf, pset, qset, sgfp, sgfq, unit_id
    1249          42 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf_set, nsgf_set
    1250          42 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1251             :       REAL(dp)                                           :: r(3)
    1252          42 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: metric, pq, work
    1253          42 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf, sphi, zet
    1254             :       TYPE(cp_libint_t)                                  :: lib
    1255             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1256             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1257          42 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1258             : 
    1259          42 :       NULLIFY (ri_basis, qs_kind_set, para_env, lmin, lmax, npgf_set, zet, rpgf, first_sgf)
    1260          42 :       NULLIFY (sphi, nsgf_set)
    1261             : 
    1262             : !  Initialization
    1263          42 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
    1264          42 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_ex)) THEN
    1265           4 :          DEALLOCATE (xas_tdp_env%ri_inv_ex)
    1266             :       END IF
    1267             : 
    1268             : !  Get the RI basis of interest and its quantum numbers
    1269          42 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1270             :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf, maxl=maxl, npgf=npgf_set, lmin=lmin, &
    1271             :                              lmax=lmax, zet=zet, pgf_radius=rpgf, first_sgf=first_sgf, &
    1272          42 :                              nsgf_set=nsgf_set, sphi=sphi, nset=nset)
    1273         168 :       ALLOCATE (metric(nsgf, nsgf))
    1274      397238 :       metric = 0.0_dp
    1275             : 
    1276          42 :       r = 0.0_dp
    1277             : 
    1278             :       !init the libint 2-center object
    1279          42 :       CALL cp_libint_init_2eri(lib, maxl)
    1280          42 :       CALL cp_libint_set_contrdepth(lib, 1)
    1281             : 
    1282             :       !make sure the truncted coulomb is initialized
    1283          42 :       IF (xas_tdp_control%ri_m_potential%potential_type == do_potential_truncated) THEN
    1284             : 
    1285           6 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1286           6 :             IF (para_env%mepos == 0) THEN
    1287           3 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%ri_m_potential%filename)
    1288             :             END IF
    1289           6 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1290           6 :             IF (para_env%mepos == 0) THEN
    1291           3 :                CALL close_file(unit_id)
    1292             :             END IF
    1293             :          END IF
    1294             :       END IF
    1295             : 
    1296             : !  Compute the RI metric
    1297         126 :       DO pset = 1, nset
    1298          84 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1299          84 :          sgfp = first_sgf(1, pset)
    1300          84 :          egfp = sgfp + nsgf_set(pset) - 1
    1301             : 
    1302         294 :          DO qset = 1, nset
    1303         168 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1304         168 :             sgfq = first_sgf(1, qset)
    1305         168 :             egfq = sgfq + nsgf_set(qset) - 1
    1306             : 
    1307         672 :             ALLOCATE (work(ncop, ncoq))
    1308     1317508 :             work = 0.0_dp
    1309             : 
    1310             :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1311             :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1312         168 :                              r, 0.0_dp, lib, xas_tdp_control%ri_m_potential)
    1313             : 
    1314             :             CALL ab_contract(metric(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1315         168 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1316             : 
    1317         252 :             DEALLOCATE (work)
    1318             :          END DO !qset
    1319             :       END DO !pset
    1320             : 
    1321             : !  Inverting (to M^-1)
    1322          42 :       CALL invmat_symm(metric)
    1323             : 
    1324          42 :       IF (.NOT. xas_tdp_control%do_ri_metric) THEN
    1325             : 
    1326             :          !If no metric, then x_pot = ri_m_pot and (P|Q)^-1 = M^-1 (V-approximation)
    1327         144 :          ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1328      374540 :          xas_tdp_env%ri_inv_ex(:, :) = metric(:, :)
    1329          36 :          CALL cp_libint_cleanup_2eri(lib)
    1330          36 :          RETURN
    1331             : 
    1332             :       END IF
    1333             : 
    1334             :       !make sure the truncted coulomb is initialized
    1335           6 :       IF (xas_tdp_control%x_potential%potential_type == do_potential_truncated) THEN
    1336             : 
    1337           2 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1338           0 :             IF (para_env%mepos == 0) THEN
    1339           0 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%x_potential%filename)
    1340             :             END IF
    1341           0 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1342           0 :             IF (para_env%mepos == 0) THEN
    1343           0 :                CALL close_file(unit_id)
    1344             :             END IF
    1345             :          END IF
    1346             :       END IF
    1347             : 
    1348             : !  Compute the proper exchange 2-center
    1349          24 :       ALLOCATE (pq(nsgf, nsgf))
    1350       22698 :       pq = 0.0_dp
    1351             : 
    1352          18 :       DO pset = 1, nset
    1353          12 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1354          12 :          sgfp = first_sgf(1, pset)
    1355          12 :          egfp = sgfp + nsgf_set(pset) - 1
    1356             : 
    1357          42 :          DO qset = 1, nset
    1358          24 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1359          24 :             sgfq = first_sgf(1, qset)
    1360          24 :             egfq = sgfq + nsgf_set(qset) - 1
    1361             : 
    1362          96 :             ALLOCATE (work(ncop, ncoq))
    1363       58824 :             work = 0.0_dp
    1364             : 
    1365             :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1366             :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1367          24 :                              r, 0.0_dp, lib, xas_tdp_control%x_potential)
    1368             : 
    1369             :             CALL ab_contract(pq(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1370          24 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1371             : 
    1372          36 :             DEALLOCATE (work)
    1373             :          END DO !qset
    1374             :       END DO !pset
    1375             : 
    1376             : !  Compute and store M^-1 (P|Q) M^-1
    1377          24 :       ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1378       22698 :       xas_tdp_env%ri_inv_ex = 0.0_dp
    1379             : 
    1380             :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, metric, nsgf, pq, nsgf, &
    1381           6 :                  0.0_dp, xas_tdp_env%ri_inv_ex, nsgf)
    1382             :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, xas_tdp_env%ri_inv_ex, nsgf, metric, nsgf, &
    1383           6 :                  0.0_dp, pq, nsgf)
    1384       22698 :       xas_tdp_env%ri_inv_ex(:, :) = pq(:, :)
    1385             : 
    1386           6 :       CALL cp_libint_cleanup_2eri(lib)
    1387             : 
    1388         126 :    END SUBROUTINE compute_ri_exchange2_int
    1389             : 
    1390             : ! **************************************************************************************************
    1391             : !> \brief Computes the two-center Coulomb integral needed for the RI in kernel calculation. Stores
    1392             : !>        the integrals (P|Q)^-1 in the xas_tdp_env as global (small) arrays. Does that for a given
    1393             : !>        excited kind
    1394             : !> \param ex_kind ...
    1395             : !> \param xas_tdp_env ...
    1396             : !> \param xas_tdp_control ...
    1397             : !> \param qs_env ...
    1398             : ! **************************************************************************************************
    1399          52 :    SUBROUTINE compute_ri_coulomb2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1400             : 
    1401             :       INTEGER, INTENT(IN)                                :: ex_kind
    1402             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1403             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1404             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1405             : 
    1406             :       INTEGER                                            :: nsgf
    1407             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1408          52 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1409             : 
    1410          52 :       NULLIFY (ri_basis, qs_kind_set)
    1411             : 
    1412             : !  Initialization
    1413          52 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
    1414          52 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_coul)) THEN
    1415           4 :          DEALLOCATE (xas_tdp_env%ri_inv_coul)
    1416             :       END IF
    1417             : 
    1418             : !  Get the RI basis of interest and its quantum numbers
    1419          52 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1420          52 :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf)
    1421         208 :       ALLOCATE (xas_tdp_env%ri_inv_coul(nsgf, nsgf))
    1422      482860 :       xas_tdp_env%ri_inv_coul = 0.0_dp
    1423             : 
    1424          52 :       IF (.NOT. xas_tdp_control%is_periodic) THEN
    1425             :          CALL int_operators_r12_ab_os(r12_operator=operator_coulomb, vab=xas_tdp_env%ri_inv_coul, &
    1426             :                                       rab=(/0.0_dp, 0.0_dp, 0.0_dp/), fba=ri_basis, fbb=ri_basis, &
    1427          36 :                                       calculate_forces=.FALSE.)
    1428          36 :          CPASSERT(ASSOCIATED(xas_tdp_control))
    1429             :       ELSE
    1430          16 :          CALL periodic_ri_coulomb2(xas_tdp_env%ri_inv_coul, ri_basis, qs_env)
    1431             :       END IF
    1432             : 
    1433             : !  Inverting
    1434          52 :       CALL invmat_symm(xas_tdp_env%ri_inv_coul)
    1435             : 
    1436         104 :    END SUBROUTINE compute_ri_coulomb2_int
    1437             : 
    1438             : ! **************************************************************************************************
    1439             : !> \brief Computes the two-center inverse coulomb integral in the case of PBCs
    1440             : !> \param ri_coul2 ...
    1441             : !> \param ri_basis ...
    1442             : !> \param qs_env ...
    1443             : ! **************************************************************************************************
    1444          16 :    SUBROUTINE periodic_ri_coulomb2(ri_coul2, ri_basis, qs_env)
    1445             : 
    1446             :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: ri_coul2
    1447             :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1448             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1449             : 
    1450             :       INTEGER                                            :: maxco, ncop, ncoq, nset, op, oq, ppgf, &
    1451             :                                                             pset, qpgf, qset, sgfp, sgfq
    1452          32 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf
    1453          16 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1454             :       REAL(dp)                                           :: r(3)
    1455          16 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: hpq
    1456          16 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sphi, zet
    1457             :       TYPE(cell_type), POINTER                           :: cell
    1458         416 :       TYPE(cp_eri_mme_param)                             :: mme_param
    1459             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1460          16 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1461             : 
    1462          16 :       NULLIFY (cell, qs_kind_set, lmin, lmax, nsgf, npgf, zet, sphi, first_sgf)
    1463             : 
    1464             :       ! Use eri_mme for this. Don't want to annoy the user with a full input section just for this
    1465             :       ! tiny bit => initialize our own eri_mme section with the defaults
    1466             : 
    1467          16 :       CALL get_qs_env(qs_env, cell=cell, qs_kind_set=qs_kind_set, para_env=para_env)
    1468             : 
    1469             :       CALL eri_mme_init(mme_param%par, n_minimax=20, cutoff=300.0_dp, do_calib_cutoff=.TRUE., &
    1470             :                         cutoff_min=10.0_dp, cutoff_max=10000.0_dp, cutoff_eps=0.01_dp, &
    1471             :                         cutoff_delta=0.9_dp, sum_precision=1.0E-12_dp, debug=.FALSE., &
    1472             :                         debug_delta=1.0E-6_dp, debug_nsum=1000000, unit_nr=0, print_calib=.FALSE., &
    1473          16 :                         do_error_est=.FALSE.)
    1474          16 :       mme_param%do_calib = .TRUE.
    1475             : 
    1476          16 :       CALL cp_eri_mme_set_params(mme_param, cell, qs_kind_set, basis_type_1="RI_XAS", para_env=para_env)
    1477             : 
    1478             :       CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, lmin=lmin, nset=nset, &
    1479          16 :                              nsgf_set=nsgf, sphi=sphi, first_sgf=first_sgf, maxco=maxco)
    1480             : 
    1481          16 :       r = 0.0_dp
    1482          64 :       ALLOCATE (hpq(nset*maxco, nset*maxco))
    1483      161616 :       hpq = 0.0_dp
    1484             : 
    1485          48 :       DO pset = 1, nset
    1486          32 :          ncop = npgf(pset)*ncoset(lmax(pset))
    1487          32 :          sgfp = first_sgf(1, pset)
    1488             : 
    1489         112 :          DO qset = 1, nset
    1490          64 :             ncoq = npgf(qset)*ncoset(lmax(qset))
    1491          64 :             sgfq = first_sgf(1, qset)
    1492             : 
    1493         608 :             DO ppgf = 1, npgf(pset)
    1494         544 :                op = (pset - 1)*maxco + (ppgf - 1)*ncoset(lmax(pset))
    1495        5232 :                DO qpgf = 1, npgf(qset)
    1496        4624 :                   oq = (qset - 1)*maxco + (qpgf - 1)*ncoset(lmax(qset))
    1497             : 
    1498             :                   CALL eri_mme_2c_integrate(mme_param%par, lmin(pset), lmax(pset), lmin(qset), &
    1499             :                                             lmax(qset), zet(ppgf, pset), zet(qpgf, qset), r, hpq, &
    1500        5168 :                                             op, oq)
    1501             : 
    1502             :                END DO !qpgf
    1503             :             END DO ! ppgf
    1504             : 
    1505             :             !contraction into sgfs
    1506          64 :             op = (pset - 1)*maxco + 1
    1507          64 :             oq = (qset - 1)*maxco + 1
    1508             : 
    1509             :             CALL ab_contract(ri_coul2(sgfp:sgfp + nsgf(pset) - 1, sgfq:sgfq + nsgf(qset) - 1), &
    1510             :                              hpq(op:op + ncop - 1, oq:oq + ncoq - 1), sphi(:, sgfp:), sphi(:, sgfq:), &
    1511          96 :                              ncop, ncoq, nsgf(pset), nsgf(qset))
    1512             : 
    1513             :          END DO !qset
    1514             :       END DO !pset
    1515             : 
    1516             :       !celan-up
    1517          16 :       CALL eri_mme_release(mme_param%par)
    1518             : 
    1519          64 :    END SUBROUTINE periodic_ri_coulomb2
    1520             : 
    1521             : END MODULE xas_tdp_integrals

Generated by: LCOV version 1.15