LCOV - code coverage report
Current view: top level - src - xas_tdp_integrals.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 97.6 % 535 522
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 10 10

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief 3-center integrals machinery for the XAS_TDP method
      10              : !> \author A. Bussy (03.2020)
      11              : ! **************************************************************************************************
      12              : 
      13              : MODULE xas_tdp_integrals
      14              :    USE OMP_LIB,                         ONLY: omp_get_max_threads,&
      15              :                                               omp_get_num_threads,&
      16              :                                               omp_get_thread_num
      17              :    USE ai_contraction_sphi,             ONLY: ab_contract,&
      18              :                                               abc_contract_xsmm
      19              :    USE atomic_kind_types,               ONLY: atomic_kind_type
      20              :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      21              :                                               gto_basis_set_p_type,&
      22              :                                               gto_basis_set_type
      23              :    USE cell_types,                      ONLY: cell_type
      24              :    USE constants_operator,              ONLY: operator_coulomb
      25              :    USE cp_array_utils,                  ONLY: cp_1d_i_p_type,&
      26              :                                               cp_2d_r_p_type
      27              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      28              :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get,&
      29              :                                               dbcsr_distribution_release,&
      30              :                                               dbcsr_distribution_type
      31              :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist
      32              :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param,&
      33              :                                               cp_eri_mme_set_params
      34              :    USE cp_files,                        ONLY: close_file,&
      35              :                                               open_file
      36              :    USE dbt_api,                         ONLY: &
      37              :         dbt_create, dbt_distribution_destroy, dbt_distribution_new, dbt_distribution_type, &
      38              :         dbt_finalize, dbt_pgrid_create, dbt_pgrid_destroy, dbt_pgrid_type, dbt_put_block, &
      39              :         dbt_reserve_blocks, dbt_type
      40              :    USE distribution_1d_types,           ONLY: distribution_1d_type
      41              :    USE distribution_2d_types,           ONLY: distribution_2d_create,&
      42              :                                               distribution_2d_type
      43              :    USE eri_mme_integrate,               ONLY: eri_mme_2c_integrate
      44              :    USE eri_mme_types,                   ONLY: eri_mme_init,&
      45              :                                               eri_mme_release
      46              :    USE gamma,                           ONLY: init_md_ftable
      47              :    USE generic_os_integrals,            ONLY: int_operators_r12_ab_os
      48              :    USE input_constants,                 ONLY: do_potential_coulomb,&
      49              :                                               do_potential_id,&
      50              :                                               do_potential_short,&
      51              :                                               do_potential_truncated
      52              :    USE input_section_types,             ONLY: section_vals_val_get
      53              :    USE kinds,                           ONLY: dp
      54              :    USE libint_2c_3c,                    ONLY: cutoff_screen_factor,&
      55              :                                               eri_2center,&
      56              :                                               eri_3center,&
      57              :                                               libint_potential_type
      58              :    USE libint_wrapper,                  ONLY: cp_libint_cleanup_2eri,&
      59              :                                               cp_libint_cleanup_3eri,&
      60              :                                               cp_libint_init_2eri,&
      61              :                                               cp_libint_init_3eri,&
      62              :                                               cp_libint_set_contrdepth,&
      63              :                                               cp_libint_t
      64              :    USE mathlib,                         ONLY: invmat_symm
      65              :    USE message_passing,                 ONLY: mp_comm_type,&
      66              :                                               mp_para_env_type
      67              :    USE molecule_types,                  ONLY: molecule_type
      68              :    USE orbital_pointers,                ONLY: ncoset
      69              :    USE particle_methods,                ONLY: get_particle_set
      70              :    USE particle_types,                  ONLY: particle_type
      71              :    USE qs_environment_types,            ONLY: get_qs_env,&
      72              :                                               qs_environment_type
      73              :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      74              :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      75              :                                               qs_kind_type
      76              :    USE qs_neighbor_list_types,          ONLY: &
      77              :         get_iterator_info, get_neighbor_list_set_p, neighbor_list_iterate, &
      78              :         neighbor_list_iterator_create, neighbor_list_iterator_p_type, &
      79              :         neighbor_list_iterator_release, neighbor_list_set_p_type, nl_set_sub_iterator, &
      80              :         nl_sub_iterate, release_neighbor_list_sets
      81              :    USE qs_neighbor_lists,               ONLY: atom2d_build,&
      82              :                                               atom2d_cleanup,&
      83              :                                               build_neighbor_lists,&
      84              :                                               local_atoms_type,&
      85              :                                               pair_radius_setup
      86              :    USE qs_o3c_types,                    ONLY: get_o3c_iterator_info,&
      87              :                                               init_o3c_container,&
      88              :                                               o3c_container_type,&
      89              :                                               o3c_iterate,&
      90              :                                               o3c_iterator_create,&
      91              :                                               o3c_iterator_release,&
      92              :                                               o3c_iterator_type,&
      93              :                                               release_o3c_container
      94              :    USE t_c_g0,                          ONLY: get_lmax_init,&
      95              :                                               init
      96              :    USE xas_tdp_types,                   ONLY: xas_tdp_control_type,&
      97              :                                               xas_tdp_env_type
      98              : #include "./base/base_uses.f90"
      99              : 
     100              :    IMPLICIT NONE
     101              :    PRIVATE
     102              : 
     103              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_integrals'
     104              : 
     105              :    PUBLIC :: create_pqX_tensor, fill_pqX_tensor, compute_ri_3c_coulomb, compute_ri_3c_exchange, &
     106              :              build_xas_tdp_3c_nl, build_xas_tdp_ovlp_nl, get_opt_3c_dist2d, &
     107              :              compute_ri_coulomb2_int, compute_ri_exchange2_int
     108              : 
     109              : CONTAINS
     110              : 
     111              : ! **************************************************************************************************
     112              : !> \brief Prepares a tensor to hold 3-center integrals (pq|X), where p,q are distributed according
     113              : !>        to the given 2d dbcsr distribution of the given . The third dimension of the tensor is
     114              : !>        iteslf not distributed (i.e. the t_pgrid's third dimension has size 1). The blocks are
     115              : !>        reserved according to the neighbor lists
     116              : !> \param pq_X the tensor to store the integrals
     117              : !> \param ab_nl the 1st and 2nd center neighbor list
     118              : !> \param ac_nl the 1st and 3rd center neighbor list
     119              : !> \param matrix_dist ...
     120              : !> \param blk_size_1 the block size in the first dimension
     121              : !> \param blk_size_2 the block size in the second dimension
     122              : !> \param blk_size_3 the block size in the third dimension
     123              : !> \param only_bc_same_center only keep block if, for the corresponding integral (ab|c), b and c
     124              : !>        share the same center, i.e. r_bc = 0.0
     125              : ! **************************************************************************************************
     126         1870 :    SUBROUTINE create_pqX_tensor(pq_X, ab_nl, ac_nl, matrix_dist, blk_size_1, blk_size_2, &
     127          170 :                                 blk_size_3, only_bc_same_center)
     128              : 
     129              :       TYPE(dbt_type), INTENT(OUT)                        :: pq_X
     130              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     131              :          POINTER                                         :: ab_nl, ac_nl
     132              :       TYPE(dbcsr_distribution_type), INTENT(IN)          :: matrix_dist
     133              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: blk_size_1, blk_size_2, blk_size_3
     134              :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     135              : 
     136              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'create_pqX_tensor'
     137              : 
     138              :       INTEGER                                            :: A, b, group_handle, handle, i, iatom, &
     139              :                                                             ikind, jatom, katom, kkind, nblk, &
     140              :                                                             nblk_3, nblk_per_thread, nkind
     141          170 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx1, idx2, idx3
     142              :       INTEGER, DIMENSION(3)                              :: pdims
     143          170 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, row_dist
     144          170 :       INTEGER, DIMENSION(:, :), POINTER                  :: mat_pgrid
     145              :       LOGICAL                                            :: my_sort_bc, symmetric
     146              :       REAL(dp), DIMENSION(3)                             :: rab, rac, rbc
     147         1530 :       TYPE(dbt_distribution_type)                        :: t_dist
     148          510 :       TYPE(dbt_pgrid_type)                               :: t_pgrid
     149              :       TYPE(mp_comm_type)                                 :: group
     150              :       TYPE(neighbor_list_iterator_p_type), &
     151          170 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     152              : 
     153          170 :       NULLIFY (ab_iter, ac_iter, col_dist, row_dist, mat_pgrid)
     154              : 
     155          170 :       CALL timeset(routineN, handle)
     156              : 
     157          170 :       my_sort_bc = .FALSE.
     158          170 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     159              : 
     160          170 :       CALL get_neighbor_list_set_p(ab_nl, symmetric=symmetric)
     161          170 :       CPASSERT(symmetric)
     162              : 
     163              :       !get 2D distribution info from matrix_dist
     164              :       CALL dbcsr_distribution_get(matrix_dist, pgrid=mat_pgrid, group=group_handle, &
     165          170 :                                   row_dist=row_dist, col_dist=col_dist)
     166          170 :       CALL group%set_handle(group_handle)
     167              : 
     168              :       !create the corresponding tensor proc grid and dist
     169          170 :       pdims(1) = SIZE(mat_pgrid, 1); pdims(2) = SIZE(mat_pgrid, 2); pdims(3) = 1
     170          170 :       CALL dbt_pgrid_create(group, pdims, t_pgrid)
     171              : 
     172          170 :       nblk_3 = SIZE(blk_size_3)
     173              :       CALL dbt_distribution_new(t_dist, t_pgrid, nd_dist_1=row_dist, nd_dist_2=col_dist, &
     174         2864 :                                 nd_dist_3=[(0, i=1, nblk_3)])
     175              : 
     176              :       !create the tensor itself.
     177              :       CALL dbt_create(pq_X, name="(pq|X)", dist=t_dist, map1_2d=[1, 2], map2_2d=[3], &
     178          170 :                       blk_size_1=blk_size_1, blk_size_2=blk_size_2, blk_size_3=blk_size_3)
     179              : 
     180              :       !count the blocks to reserve !note: dbcsr takes care of only keeping unique indices
     181          170 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     182          170 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     183          170 :       nblk = 0
     184        17495 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     185        17325 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, nkind=nkind, r=rab)
     186              : 
     187        50903 :          DO kkind = 1, nkind
     188        33408 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     189              : 
     190        72213 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     191              : 
     192        21480 :                IF (my_sort_bc) THEN
     193              :                   !we check for rbc or rac because of symmetry in ab_nl
     194          714 :                   CALL get_iterator_info(ac_iter, r=rac)
     195         2856 :                   rbc(:) = rac(:) - rab(:)
     196         3131 :                   IF (.NOT. (ALL(ABS(rbc) <= 1.0E-8_dp) .OR. ALL(ABS(rac) <= 1.0E-8_dp))) CYCLE
     197              : 
     198              :                END IF
     199              : 
     200        21480 :                nblk = nblk + 1
     201              :             END DO !ac_iter
     202              :          END DO !kkind
     203              :       END DO !ab_iter
     204          170 :       CALL neighbor_list_iterator_release(ab_iter)
     205          170 :       CALL neighbor_list_iterator_release(ac_iter)
     206              : 
     207         1050 :       ALLOCATE (idx1(nblk), idx2(nblk), idx3(nblk))
     208              : 
     209              :       !actually reserve the blocks
     210          170 :       CALL neighbor_list_iterator_create(ab_iter, ab_nl)
     211          170 :       CALL neighbor_list_iterator_create(ac_iter, ac_nl, search=.TRUE.)
     212          170 :       nblk = 0
     213        17495 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     214        17325 :          CALL get_iterator_info(ab_iter, ikind=ikind, iatom=iatom, jatom=jatom, nkind=nkind, r=rab)
     215              : 
     216        50903 :          DO kkind = 1, nkind
     217        33408 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     218              : 
     219        72213 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     220        21480 :                CALL get_iterator_info(ac_iter, jatom=katom, r=rac)
     221              : 
     222        21480 :                IF (my_sort_bc) THEN
     223              :                   !we check for rbc or rac because of symmetry in ab_nl
     224          714 :                   CALL get_iterator_info(ac_iter, r=rac)
     225         2856 :                   rbc(:) = rac(:) - rab(:)
     226         3131 :                   IF (.NOT. (ALL(ABS(rbc) <= 1.0E-8_dp) .OR. ALL(ABS(rac) <= 1.0E-8_dp))) CYCLE
     227              : 
     228              :                END IF
     229              : 
     230        21188 :                nblk = nblk + 1
     231              : 
     232        21188 :                idx1(nblk) = iatom
     233        21188 :                idx2(nblk) = jatom
     234        21480 :                idx3(nblk) = katom
     235              : 
     236              :             END DO !ac_iter
     237              :          END DO !kkind
     238              :       END DO !ab_iter
     239          170 :       CALL neighbor_list_iterator_release(ab_iter)
     240          170 :       CALL neighbor_list_iterator_release(ac_iter)
     241              : 
     242              : !TODO: Parallelize creation of block list.
     243          170 : !$OMP PARALLEL DEFAULT(NONE) SHARED(pq_X, nblk, idx1, idx2, idx3) PRIVATE(nblk_per_thread,A,b)
     244              :       nblk_per_thread = nblk/omp_get_num_threads() + 1
     245              :       a = omp_get_thread_num()*nblk_per_thread + 1
     246              :       b = MIN(a + nblk_per_thread, nblk)
     247              :       CALL dbt_reserve_blocks(pq_X, idx1(a:b), idx2(a:b), idx3(a:b))
     248              : !$OMP END PARALLEL
     249          170 :       CALL dbt_finalize(pq_X)
     250              : 
     251              :       !clean-up
     252          170 :       CALL dbt_distribution_destroy(t_dist)
     253          170 :       CALL dbt_pgrid_destroy(t_pgrid)
     254              : 
     255          170 :       CALL timestop(handle)
     256              : 
     257          850 :    END SUBROUTINE create_pqX_tensor
     258              : 
     259              : ! **************************************************************************************************
     260              : !> \brief Fills the given 3 dimensional (pq|X) tensor with integrals
     261              : !> \param pq_X the tensor to fill
     262              : !> \param ab_nl the neighbor list for the first 2 centers
     263              : !> \param ac_nl the neighbor list for the first and third centers
     264              : !> \param basis_set_list_a basis sets for first center
     265              : !> \param basis_set_list_b basis sets for second center
     266              : !> \param basis_set_list_c basis sets for third center
     267              : !> \param potential_parameter the operator for the integrals
     268              : !> \param qs_env ...
     269              : !> \param only_bc_same_center same as in create_pqX_tensor
     270              : !> \param eps_screen threshold for possible screening
     271              : !> \note The following indices are happily mixed within this routine: First center i,a,p
     272              : !>       Second center: j,b,q       Third center: k,c,X
     273              : ! **************************************************************************************************
     274          170 :    SUBROUTINE fill_pqX_tensor(pq_X, ab_nl, ac_nl, basis_set_list_a, basis_set_list_b, &
     275              :                               basis_set_list_c, potential_parameter, qs_env, &
     276              :                               only_bc_same_center, eps_screen)
     277              : 
     278              :       TYPE(dbt_type)                                     :: pq_X
     279              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     280              :          POINTER                                         :: ab_nl, ac_nl
     281              :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b, &
     282              :                                                             basis_set_list_c
     283              :       TYPE(libint_potential_type)                        :: potential_parameter
     284              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     285              :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     286              :       REAL(dp), INTENT(IN), OPTIONAL                     :: eps_screen
     287              : 
     288              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'fill_pqX_tensor'
     289              : 
     290              :       INTEGER :: egfa, egfb, egfc, handle, i, iatom, ibasis, ikind, ilist, imax, iset, jatom, &
     291              :          jkind, jset, katom, kkind, kset, m_max, max_nset, maxli, maxlj, maxlk, mepos, nbasis, &
     292              :          ncoa, ncob, ncoc, ni, nj, nk, nseta, nsetb, nsetc, nthread, sgfa, sgfb, sgfc, unit_id
     293          170 :       INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
     294          170 :                                                             lc_min, npgfa, npgfb, npgfc, nsgfa, &
     295          170 :                                                             nsgfb, nsgfc
     296          170 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb, first_sgfc
     297              :       LOGICAL                                            :: do_screen, my_sort_bc
     298              :       REAL(dp)                                           :: dij, dik, djk, my_eps_screen, ri(3), &
     299              :                                                             rij(3), rik(3), rj(3), rjk(3), rk(3), &
     300              :                                                             sabc_ext, screen_radius
     301          170 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: ccp_buffer, cpp_buffer
     302          170 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: max_contr, max_contra, max_contrb, &
     303          170 :                                                             max_contrc
     304          170 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc, sabc, work
     305          170 :       REAL(dp), DIMENSION(:), POINTER                    :: set_radius_a, set_radius_b, set_radius_c
     306          170 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc
     307          170 :       TYPE(cp_2d_r_p_type), DIMENSION(:, :), POINTER     :: spb, spc, tspa
     308              :       TYPE(cp_libint_t)                                  :: lib
     309          170 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list
     310              :       TYPE(gto_basis_set_type), POINTER                  :: basis_set, basis_set_a, basis_set_b, &
     311              :                                                             basis_set_c
     312              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     313              :       TYPE(o3c_container_type), POINTER                  :: o3c
     314              :       TYPE(o3c_iterator_type)                            :: o3c_iterator
     315              : 
     316          170 :       NULLIFY (basis_set, basis_set_list, para_env, la_max, la_min)
     317          170 :       NULLIFY (lb_max, lb_min, lc_max, lc_min, npgfa, npgfb, npgfc, nsgfa, nsgfb, nsgfc)
     318          170 :       NULLIFY (first_sgfa, first_sgfb, first_sgfc, set_radius_a, set_radius_b, set_radius_c)
     319          170 :       NULLIFY (rpgf_a, rpgf_b, rpgf_c, zeta, zetb, zetc)
     320          170 :       NULLIFY (basis_set_a, basis_set_b, basis_set_c, tspa, spb, spc)
     321              : 
     322          170 :       CALL timeset(routineN, handle)
     323              : 
     324              :       !Need the max l for each basis for libint (and overall max #of sets for screening)
     325          170 :       nbasis = SIZE(basis_set_list_a)
     326          170 :       max_nset = 0
     327          170 :       maxli = 0
     328          450 :       DO ibasis = 1, nbasis
     329              :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_a(ibasis)%gto_basis_set, &
     330          280 :                                 maxl=imax, nset=iset, nsgf_set=nsgfa)
     331          280 :          maxli = MAX(maxli, imax)
     332          450 :          max_nset = MAX(max_nset, iset)
     333              :       END DO
     334              :       maxlj = 0
     335          450 :       DO ibasis = 1, nbasis
     336              :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_b(ibasis)%gto_basis_set, &
     337          280 :                                 maxl=imax, nset=iset, nsgf_set=nsgfb, npgf=npgfb)
     338          280 :          maxlj = MAX(maxlj, imax)
     339          450 :          max_nset = MAX(max_nset, iset)
     340              :       END DO
     341              :       maxlk = 0
     342          450 :       DO ibasis = 1, nbasis
     343              :          CALL get_gto_basis_set(gto_basis_set=basis_set_list_c(ibasis)%gto_basis_set, &
     344          280 :                                 maxl=imax, nset=iset, npgf=npgfc)
     345          280 :          maxlk = MAX(maxlk, imax)
     346          450 :          max_nset = MAX(max_nset, iset)
     347              :       END DO
     348          170 :       m_max = maxli + maxlj + maxlk
     349              : 
     350              :       !Screening
     351          170 :       do_screen = .FALSE.
     352          170 :       IF (PRESENT(eps_screen)) THEN
     353           50 :          do_screen = .TRUE.
     354           50 :          my_eps_screen = eps_screen
     355              :       END IF
     356          170 :       screen_radius = 0.0_dp
     357          170 :       IF (potential_parameter%potential_type == do_potential_truncated .OR. &
     358              :           potential_parameter%potential_type == do_potential_short) THEN
     359              : 
     360           12 :          screen_radius = potential_parameter%cutoff_radius*cutoff_screen_factor
     361          158 :       ELSE IF (potential_parameter%potential_type == do_potential_coulomb) THEN
     362              : 
     363           94 :          screen_radius = 1000000.0_dp
     364              :       END IF
     365              : 
     366              :       !get maximum contraction values for abc_contract screening
     367          170 :       IF (do_screen) THEN
     368              : 
     369              :          !Allocate max_contraction arrays such that we have a specific value for each set/kind
     370            0 :          ALLOCATE (max_contr(max_nset, nbasis), max_contra(max_nset, nbasis), &
     371          500 :                    max_contrb(max_nset, nbasis), max_contrc(max_nset, nbasis))
     372              : 
     373              :          !Not the most elegent, but better than copying 3 times the same
     374          200 :          DO ilist = 1, 3
     375              : 
     376          150 :             IF (ilist == 1) basis_set_list => basis_set_list_a
     377          150 :             IF (ilist == 2) basis_set_list => basis_set_list_b
     378          150 :             IF (ilist == 3) basis_set_list => basis_set_list_c
     379              : 
     380         1668 :             max_contr = 0.0_dp
     381              : 
     382          402 :             DO ibasis = 1, nbasis
     383          252 :                basis_set => basis_set_list(ibasis)%gto_basis_set
     384              : 
     385         1226 :                DO iset = 1, basis_set%nset
     386              : 
     387          824 :                   ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     388          824 :                   sgfa = basis_set%first_sgf(1, iset)
     389          824 :                   egfa = sgfa + basis_set%nsgf_set(iset) - 1
     390              : 
     391              :                   max_contr(iset, ibasis) = &
     392       536546 :                      MAXVAL([(SUM(ABS(basis_set%sphi(1:ncoa, i))), i=sgfa, egfa)])
     393              : 
     394              :                END DO !iset
     395              :             END DO !ibasis
     396              : 
     397          656 :             IF (ilist == 1) max_contra(:, :) = max_contr(:, :)
     398          656 :             IF (ilist == 2) max_contrb(:, :) = max_contr(:, :)
     399          706 :             IF (ilist == 3) max_contrc(:, :) = max_contr(:, :)
     400              :          END DO !ilist
     401           50 :          DEALLOCATE (max_contr)
     402              :       END IF !do_screen
     403              : 
     404              :       !To minimize memory ops in contraction, we need to pre-allocate buffers, pre-tranpose sphi_a
     405              :       !and also trim sphi in general to have contiguous arrays
     406         6400 :       ALLOCATE (tspa(max_nset, nbasis), spb(max_nset, nbasis), spc(max_nset, nbasis))
     407          450 :       DO ibasis = 1, nbasis
     408         1850 :          DO iset = 1, max_nset
     409         1400 :             NULLIFY (tspa(iset, ibasis)%array)
     410         1400 :             NULLIFY (spb(iset, ibasis)%array)
     411         1680 :             NULLIFY (spc(iset, ibasis)%array)
     412              :          END DO
     413              :       END DO
     414              : 
     415          680 :       DO ilist = 1, 3
     416              : 
     417         1520 :          DO ibasis = 1, nbasis
     418          840 :             IF (ilist == 1) basis_set => basis_set_list_a(ibasis)%gto_basis_set
     419          840 :             IF (ilist == 2) basis_set => basis_set_list_b(ibasis)%gto_basis_set
     420          840 :             IF (ilist == 3) basis_set => basis_set_list_c(ibasis)%gto_basis_set
     421              : 
     422         4186 :             DO iset = 1, basis_set%nset
     423              : 
     424         2836 :                ncoa = basis_set%npgf(iset)*ncoset(basis_set%lmax(iset))
     425         2836 :                sgfa = basis_set%first_sgf(1, iset)
     426         2836 :                egfa = sgfa + basis_set%nsgf_set(iset) - 1
     427              : 
     428         3676 :                IF (ilist == 1) THEN
     429         4248 :                   ALLOCATE (tspa(iset, ibasis)%array(basis_set%nsgf_set(iset), ncoa))
     430        55078 :                   tspa(iset, ibasis)%array(:, :) = TRANSPOSE(basis_set%sphi(1:ncoa, sgfa:egfa))
     431         1774 :                ELSE IF (ilist == 2) THEN
     432         4248 :                   ALLOCATE (spb(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     433        48762 :                   spb(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     434              :                ELSE
     435         2848 :                   ALLOCATE (spc(iset, ibasis)%array(ncoa, basis_set%nsgf_set(iset)))
     436      2858300 :                   spc(iset, ibasis)%array(:, :) = basis_set%sphi(1:ncoa, sgfa:egfa)
     437              :                END IF
     438              : 
     439              :             END DO !iset
     440              :          END DO !ibasis
     441              :       END DO !ilist
     442              : 
     443          170 :       my_sort_bc = .FALSE.
     444          170 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     445              : 
     446              :       !Init the truncated Coulomb operator
     447          170 :       CALL get_qs_env(qs_env, para_env=para_env)
     448          170 :       IF (potential_parameter%potential_type == do_potential_truncated) THEN
     449              : 
     450              :          !open the file only if necessary
     451            6 :          IF (m_max > get_lmax_init()) THEN
     452            0 :             IF (para_env%mepos == 0) THEN
     453            0 :                CALL open_file(unit_number=unit_id, file_name=potential_parameter%filename)
     454              :             END IF
     455            0 :             CALL init(m_max, unit_id, para_env%mepos, para_env)
     456            0 :             IF (para_env%mepos == 0) THEN
     457            0 :                CALL close_file(unit_id)
     458              :             END IF
     459              :          END IF
     460              :       END IF
     461              : 
     462              :       !Inint the initial gamma function before the OMP region as it is not thread safe
     463          170 :       CALL init_md_ftable(nmax=m_max)
     464              : 
     465              :       !Strategy: we use the o3c iterator because it is OMP parallelized and also takes the
     466              :       !          only_bc_same_center argument. Only the dbcsr_put_block is critical
     467              : 
     468              :       nthread = 1
     469          170 : !$    nthread = omp_get_max_threads()
     470              : 
     471          170 :       ALLOCATE (o3c)
     472              :       CALL init_o3c_container(o3c, 1, basis_set_list_a, basis_set_list_b, basis_set_list_c, &
     473          170 :                               ab_nl, ac_nl, only_bc_same_center=my_sort_bc)
     474          170 :       CALL o3c_iterator_create(o3c, o3c_iterator, nthread=nthread)
     475              : 
     476              : !$OMP PARALLEL DEFAULT(NONE) &
     477              : !$OMP SHARED (pq_X,do_screen,max_nset,basis_set_list_a,max_contra,max_contrb,max_contrc,&
     478              : !$OMP         basis_set_list_b, basis_set_list_c,ncoset,screen_radius,potential_parameter,&
     479              : !$OMP         my_eps_screen,maxli,maxlj,maxlk,my_sort_bc,nthread,o3c,o3c_iterator,tspa,spb,spc) &
     480              : !$OMP PRIVATE (lib,i,mepos,work,iset,ncoa,sgfa,egfa,nseta,iatom,ikind,jatom,jkind,katom,kkind,&
     481              : !$OMP          rij,rik,rjk,basis_set_a,nsetb,la_max,la_min,lb_max,lb_min,lc_max,lc_min,npgfa,&
     482              : !$OMP          npgfb,npgfc,nsgfa,nsgfb,nsgfc,ri,rk,first_sgfa,first_sgfb,first_sgfc,nsetc,&
     483              : !$OMP          set_radius_a,set_radius_b,set_radius_c,rj,rpgf_a,rpgf_b,rpgf_c,zeta,zetb,&
     484              : !$OMP          zetc,basis_set_b,basis_set_c,dij,dik,djk,ni,nj,nk,iabc,sabc,jset,kset,&
     485          170 : !$OMP          ncob,ncoc,sgfb,sgfc,egfb,egfc,sabc_ext,cpp_buffer,ccp_buffer)
     486              : 
     487              :       mepos = 0
     488              : !$    mepos = omp_get_thread_num()
     489              : 
     490              :       !each thread need its own libint object (internals may change at different rates)
     491              :       CALL cp_libint_init_3eri(lib, MAX(maxli, maxlj, maxlk))
     492              :       CALL cp_libint_set_contrdepth(lib, 1)
     493              : 
     494              :       DO WHILE (o3c_iterate(o3c_iterator, mepos=mepos) == 0)
     495              :          CALL get_o3c_iterator_info(o3c_iterator, mepos=mepos, ikind=ikind, jkind=jkind, kkind=kkind, &
     496              :                                     iatom=iatom, jatom=jatom, katom=katom, rij=rij, rik=rik)
     497              : 
     498              :          !get first center basis info
     499              :          basis_set_a => basis_set_list_a(ikind)%gto_basis_set
     500              :          first_sgfa => basis_set_a%first_sgf
     501              :          la_max => basis_set_a%lmax
     502              :          la_min => basis_set_a%lmin
     503              :          npgfa => basis_set_a%npgf
     504              :          nseta = basis_set_a%nset
     505              :          nsgfa => basis_set_a%nsgf_set
     506              :          zeta => basis_set_a%zet
     507              :          rpgf_a => basis_set_a%pgf_radius
     508              :          set_radius_a => basis_set_a%set_radius
     509              :          ni = SUM(nsgfa)
     510              :          !second center basis info
     511              :          basis_set_b => basis_set_list_b(jkind)%gto_basis_set
     512              :          first_sgfb => basis_set_b%first_sgf
     513              :          lb_max => basis_set_b%lmax
     514              :          lb_min => basis_set_b%lmin
     515              :          npgfb => basis_set_b%npgf
     516              :          nsetb = basis_set_b%nset
     517              :          nsgfb => basis_set_b%nsgf_set
     518              :          zetb => basis_set_b%zet
     519              :          rpgf_b => basis_set_b%pgf_radius
     520              :          set_radius_b => basis_set_b%set_radius
     521              :          nj = SUM(nsgfb)
     522              :          !third center basis info
     523              :          basis_set_c => basis_set_list_c(kkind)%gto_basis_set
     524              :          first_sgfc => basis_set_c%first_sgf
     525              :          lc_max => basis_set_c%lmax
     526              :          lc_min => basis_set_c%lmin
     527              :          npgfc => basis_set_c%npgf
     528              :          nsetc = basis_set_c%nset
     529              :          nsgfc => basis_set_c%nsgf_set
     530              :          zetc => basis_set_c%zet
     531              :          rpgf_c => basis_set_c%pgf_radius
     532              :          set_radius_c => basis_set_c%set_radius
     533              :          nk = SUM(nsgfc)
     534              : 
     535              :          !position and distances, only relative pos matter for libint
     536              :          rjk = rik - rij
     537              :          ri = 0.0_dp
     538              :          rj = rij ! ri + rij
     539              :          rk = rik ! ri + rik
     540              : 
     541              :          djk = NORM2(rjk)
     542              :          dij = NORM2(rij)
     543              :          dik = NORM2(rik)
     544              : 
     545              :          !sgf integrals
     546              :          ALLOCATE (iabc(ni, nj, nk))
     547              :          iabc(:, :, :) = 0.0_dp
     548              : 
     549              :          DO iset = 1, nseta
     550              :             ncoa = npgfa(iset)*ncoset(la_max(iset))
     551              :             sgfa = first_sgfa(1, iset)
     552              :             egfa = sgfa + nsgfa(iset) - 1
     553              : 
     554              :             DO jset = 1, nsetb
     555              :                ncob = npgfb(jset)*ncoset(lb_max(jset))
     556              :                sgfb = first_sgfb(1, jset)
     557              :                egfb = sgfb + nsgfb(jset) - 1
     558              : 
     559              :                !screening (overlap)
     560              :                IF (set_radius_a(iset) + set_radius_b(jset) < dij) CYCLE
     561              : 
     562              :                DO kset = 1, nsetc
     563              :                   ncoc = npgfc(kset)*ncoset(lc_max(kset))
     564              :                   sgfc = first_sgfc(1, kset)
     565              :                   egfc = sgfc + nsgfc(kset) - 1
     566              : 
     567              :                   !screening (potential)
     568              :                   IF (set_radius_a(iset) + set_radius_c(kset) + screen_radius < dik) CYCLE
     569              :                   IF (set_radius_b(jset) + set_radius_c(kset) + screen_radius < djk) CYCLE
     570              : 
     571              :                   !pgf integrals
     572              :                   ALLOCATE (sabc(ncoa, ncob, ncoc))
     573              :                   sabc(:, :, :) = 0.0_dp
     574              : 
     575              :                   IF (do_screen) THEN
     576              :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     577              :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     578              :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     579              :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     580              :                                       djk, lib, potential_parameter, int_abc_ext=sabc_ext)
     581              :                      IF (my_eps_screen > sabc_ext*(max_contra(iset, ikind)* &
     582              :                                                    max_contrb(jset, jkind)* &
     583              :                                                    max_contrc(kset, kkind))) THEN
     584              :                         DEALLOCATE (sabc)
     585              :                         CYCLE
     586              :                      END IF
     587              :                   ELSE
     588              :                      CALL eri_3center(sabc, la_min(iset), la_max(iset), npgfa(iset), zeta(:, iset), &
     589              :                                       rpgf_a(:, iset), ri, lb_min(jset), lb_max(jset), npgfb(jset), &
     590              :                                       zetb(:, jset), rpgf_b(:, jset), rj, lc_min(kset), lc_max(kset), &
     591              :                                       npgfc(kset), zetc(:, kset), rpgf_c(:, kset), rk, dij, dik, &
     592              :                                       djk, lib, potential_parameter)
     593              :                   END IF
     594              : 
     595              :                   ALLOCATE (work(nsgfa(iset), nsgfb(jset), nsgfc(kset)))
     596              : 
     597              :                   CALL abc_contract_xsmm(work, sabc, tspa(iset, ikind)%array, spb(jset, jkind)%array, &
     598              :                                          spc(kset, kkind)%array, ncoa, ncob, ncoc, nsgfa(iset), &
     599              :                                          nsgfb(jset), nsgfc(kset), cpp_buffer, ccp_buffer)
     600              : 
     601              :                   iabc(sgfa:egfa, sgfb:egfb, sgfc:egfc) = work(:, :, :)
     602              :                   DEALLOCATE (sabc, work)
     603              : 
     604              :                END DO !kset
     605              :             END DO !jset
     606              :          END DO !iset
     607              : 
     608              :          !Add the integral to the proper tensor block
     609              : !$OMP CRITICAL(omp_fill_pqX_tensor)
     610              :          CALL dbt_put_block(pq_X, [iatom, jatom, katom], SHAPE(iabc), iabc, summation=.TRUE.)
     611              : !$OMP END CRITICAL(omp_fill_pqX_tensor)
     612              : 
     613              :          DEALLOCATE (iabc)
     614              :       END DO !o3c_iterator
     615              : 
     616              :       IF (ALLOCATED(ccp_buffer)) DEALLOCATE (ccp_buffer)
     617              :       IF (ALLOCATED(cpp_buffer)) DEALLOCATE (cpp_buffer)
     618              : 
     619              :       CALL cp_libint_cleanup_3eri(lib)
     620              : 
     621              : !$OMP END PARALLEL
     622          170 :       CALL o3c_iterator_release(o3c_iterator)
     623          170 :       CALL release_o3c_container(o3c)
     624          170 :       DEALLOCATE (o3c)
     625              : 
     626         1062 :       DO iset = 1, max_nset
     627         2462 :          DO ibasis = 1, nbasis
     628         1400 :             IF (ASSOCIATED(tspa(iset, ibasis)%array)) DEALLOCATE (tspa(iset, ibasis)%array)
     629         1400 :             IF (ASSOCIATED(spb(iset, ibasis)%array)) DEALLOCATE (spb(iset, ibasis)%array)
     630         2292 :             IF (ASSOCIATED(spc(iset, ibasis)%array)) DEALLOCATE (spc(iset, ibasis)%array)
     631              :          END DO
     632              :       END DO
     633          170 :       DEALLOCATE (tspa, spb, spc)
     634              : 
     635          170 :       CALL timestop(handle)
     636              : 
     637          340 :    END SUBROUTINE fill_pqX_tensor
     638              : 
     639              : ! **************************************************************************************************
     640              : !> \brief Builds a neighbor lists set for overlaping 2-center S_ab, where b is restricted on a
     641              : !>        a given list of atoms. Used for Coulomb RI where (aI|P) = sum_b C_bI (ab|P), where
     642              : !>        contraction coeff C_bI is assumed to be non-zero only on excited atoms
     643              : !> \param ab_list the neighbor list
     644              : !> \param basis_a basis set list for atom a
     645              : !> \param basis_b basis set list for atom b
     646              : !> \param qs_env ...
     647              : !> \param excited_atoms the indices of the excited atoms on which b is centered
     648              : !> \param ext_dist2d use an external distribution2d
     649              : ! **************************************************************************************************
     650          330 :    SUBROUTINE build_xas_tdp_ovlp_nl(ab_list, basis_a, basis_b, qs_env, excited_atoms, ext_dist2d)
     651              : 
     652              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     653              :          POINTER                                         :: ab_list
     654              :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_b
     655              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     656              :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     657              :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     658              : 
     659              :       INTEGER                                            :: ikind, nkind
     660              :       LOGICAL                                            :: my_restrictb
     661              :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, b_present
     662              :       REAL(dp)                                           :: subcells
     663              :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, b_radius
     664          330 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     665          330 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     666              :       TYPE(cell_type), POINTER                           :: cell
     667              :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     668              :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     669          330 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     670          330 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     671          330 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     672              : 
     673          330 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     674              : 
     675              : !  Initialization
     676          330 :       CALL get_qs_env(qs_env, nkind=nkind)
     677          330 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     678              : 
     679          330 :       my_restrictb = .FALSE.
     680          330 :       IF (PRESENT(excited_atoms)) THEN
     681          116 :          my_restrictb = .TRUE.
     682              :       END IF
     683              : 
     684         1320 :       ALLOCATE (a_present(nkind), b_present(nkind))
     685          868 :       a_present = .FALSE.
     686          868 :       b_present = .FALSE.
     687         1320 :       ALLOCATE (a_radius(nkind), b_radius(nkind))
     688          868 :       a_radius = 0.0_dp
     689          868 :       b_radius = 0.0_dp
     690              : 
     691              : !  Set up the radii
     692          868 :       DO ikind = 1, nkind
     693          538 :          IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     694          538 :             a_present(ikind) = .TRUE.
     695          538 :             CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     696              :          END IF
     697              : 
     698          868 :          IF (ASSOCIATED(basis_b(ikind)%gto_basis_set)) THEN
     699          538 :             b_present(ikind) = .TRUE.
     700          538 :             CALL get_gto_basis_set(basis_b(ikind)%gto_basis_set, kind_radius=b_radius(ikind))
     701              :          END IF
     702              :       END DO !ikind
     703              : 
     704         1320 :       ALLOCATE (pair_radius(nkind, nkind))
     705         1858 :       pair_radius = 0.0_dp
     706          330 :       CALL pair_radius_setup(a_present, b_present, a_radius, b_radius, pair_radius)
     707              : 
     708              : !  Set up the nl
     709              :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     710              :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     711          330 :                       particle_set=particle_set, molecule_set=molecule_set)
     712              : 
     713              :       !use an external distribution_2d if required
     714          330 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     715              : 
     716         1528 :       ALLOCATE (atom2d(nkind))
     717              :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     718          330 :                         molecule_set, .FALSE., particle_set)
     719              : 
     720          330 :       IF (my_restrictb) THEN
     721              : 
     722              :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     723          116 :                                    atomb_to_keep=excited_atoms, nlname="XAS_TDP_ovlp_nl")
     724              : 
     725              :       ELSE
     726              : 
     727              :          CALL build_neighbor_lists(ab_list, particle_set, atom2d, cell, pair_radius, subcells, &
     728          214 :                                    nlname="XAS_TDP_ovlp_nl")
     729              : 
     730              :       END IF
     731              : !  Clean-up
     732          330 :       CALL atom2d_cleanup(atom2d)
     733              : 
     734          660 :    END SUBROUTINE build_xas_tdp_ovlp_nl
     735              : 
     736              : ! **************************************************************************************************
     737              : !> \brief Builds a neighbor lists set taylored for 3-center integral within XAS TDP, such that only
     738              : !>        excited atoms are taken into account for the list_c
     739              : !> \param ac_list the neighbor list ready for 3-center integrals
     740              : !> \param basis_a basis set list for atom a
     741              : !> \param basis_c basis set list for atom c
     742              : !> \param op_type to indicate whther the list should be built with overlap, Coulomb or else in mind
     743              : !> \param qs_env ...
     744              : !> \param excited_atoms the indices of the excited atoms to consider (if not given, all atoms are taken)
     745              : !> \param x_range in case some truncated/screened operator is used, gives its range
     746              : !> \param ext_dist2d external distribution_2d to be used
     747              : !> \note Based on setup_neighbor_list with added features
     748              : ! **************************************************************************************************
     749          278 :    SUBROUTINE build_xas_tdp_3c_nl(ac_list, basis_a, basis_c, op_type, qs_env, excited_atoms, &
     750              :                                   x_range, ext_dist2d)
     751              : 
     752              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     753              :          POINTER                                         :: ac_list
     754              :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_a, basis_c
     755              :       INTEGER, INTENT(IN)                                :: op_type
     756              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     757              :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL        :: excited_atoms
     758              :       REAL(dp), INTENT(IN), OPTIONAL                     :: x_range
     759              :       TYPE(distribution_2d_type), OPTIONAL, POINTER      :: ext_dist2d
     760              : 
     761              :       INTEGER                                            :: ikind, nkind
     762              :       LOGICAL                                            :: sort_atoms
     763              :       LOGICAL, ALLOCATABLE, DIMENSION(:)                 :: a_present, c_present
     764              :       REAL(dp)                                           :: subcells
     765              :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: a_radius, c_radius
     766          278 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: pair_radius
     767          278 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     768              :       TYPE(cell_type), POINTER                           :: cell
     769              :       TYPE(distribution_1d_type), POINTER                :: distribution_1d
     770              :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     771          278 :       TYPE(local_atoms_type), ALLOCATABLE, DIMENSION(:)  :: atom2d
     772          278 :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     773          278 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     774              : 
     775          278 :       NULLIFY (atomic_kind_set, distribution_1d, distribution_2d, molecule_set, particle_set, cell)
     776              : 
     777              : !  Initialization
     778          278 :       CALL get_qs_env(qs_env, nkind=nkind)
     779          278 :       CALL section_vals_val_get(qs_env%input, "DFT%SUBCELLS", r_val=subcells)
     780          278 :       sort_atoms = .FALSE.
     781          278 :       IF ((PRESENT(excited_atoms))) sort_atoms = .TRUE.
     782              : 
     783         1112 :       ALLOCATE (a_present(nkind), c_present(nkind))
     784          732 :       a_present = .FALSE.
     785          732 :       c_present = .FALSE.
     786         1112 :       ALLOCATE (a_radius(nkind), c_radius(nkind))
     787          732 :       a_radius = 0.0_dp
     788          732 :       c_radius = 0.0_dp
     789              : 
     790              : !  Set up the radii, depending on the operator type
     791          278 :       IF (op_type == do_potential_id) THEN
     792              : 
     793              :          !overlap => use the kind radius for both a and c
     794          472 :          DO ikind = 1, nkind
     795              :             !orbital basis set
     796          290 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     797          290 :                a_present(ikind) = .TRUE.
     798          290 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     799              :             END IF
     800              :             !RI_XAS basis set
     801          472 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     802          290 :                c_present(ikind) = .TRUE.
     803          290 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     804              :             END IF
     805              :          END DO !ikind
     806              : 
     807           96 :       ELSE IF (op_type == do_potential_coulomb) THEN
     808              : 
     809              :          !Coulomb operator, virtually infinite range => set c_radius to arbitrarily large number
     810          212 :          DO ikind = 1, nkind
     811          140 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     812          140 :                c_present(ikind) = .TRUE.
     813          140 :                c_radius(ikind) = 1000000.0_dp
     814              :             END IF
     815          212 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) a_present(ikind) = .TRUE.
     816              :          END DO !ikind
     817              : 
     818           24 :       ELSE IF (op_type == do_potential_truncated .OR. op_type == do_potential_short) THEN
     819              : 
     820              :          !Truncated coulomb/short range: set c_radius to x_range + the kind_radii
     821           48 :          DO ikind = 1, nkind
     822           24 :             IF (ASSOCIATED(basis_a(ikind)%gto_basis_set)) THEN
     823           24 :                a_present(ikind) = .TRUE.
     824           24 :                CALL get_gto_basis_set(basis_a(ikind)%gto_basis_set, kind_radius=a_radius(ikind))
     825              :             END IF
     826           48 :             IF (ASSOCIATED(basis_c(ikind)%gto_basis_set)) THEN
     827           24 :                c_present(ikind) = .TRUE.
     828           24 :                CALL get_gto_basis_set(basis_c(ikind)%gto_basis_set, kind_radius=c_radius(ikind))
     829           24 :                c_radius(ikind) = c_radius(ikind) + x_range
     830              :             END IF
     831              :          END DO !ikind
     832              : 
     833              :       ELSE
     834            0 :          CPABORT("Operator not known")
     835              :       END IF
     836              : 
     837         1112 :       ALLOCATE (pair_radius(nkind, nkind))
     838         1570 :       pair_radius = 0.0_dp
     839          278 :       CALL pair_radius_setup(a_present, c_present, a_radius, c_radius, pair_radius)
     840              : 
     841              : !  Actually setup the list
     842              :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, cell=cell, &
     843              :                       distribution_2d=distribution_2d, local_particles=distribution_1d, &
     844          278 :                       particle_set=particle_set, molecule_set=molecule_set)
     845              : 
     846              :       !use an external distribution_2d if required
     847          278 :       IF (PRESENT(ext_dist2d)) distribution_2d => ext_dist2d
     848              : 
     849         1288 :       ALLOCATE (atom2d(nkind))
     850              :       CALL atom2d_build(atom2d, distribution_1d, distribution_2d, atomic_kind_set, &
     851          278 :                         molecule_set, .FALSE., particle_set)
     852              : 
     853          278 :       IF (sort_atoms) THEN
     854              :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     855              :                                    operator_type="ABC", atomb_to_keep=excited_atoms, &
     856          278 :                                    nlname="XAS_TDP_3c_nl")
     857              :       ELSE
     858              :          CALL build_neighbor_lists(ac_list, particle_set, atom2d, cell, pair_radius, subcells, &
     859            0 :                                    operator_type="ABC", nlname="XAS_TDP_3c_nl")
     860              :       END IF
     861              : 
     862              : !  Clean-up
     863          278 :       CALL atom2d_cleanup(atom2d)
     864              : 
     865          556 :    END SUBROUTINE build_xas_tdp_3c_nl
     866              : 
     867              : ! **************************************************************************************************
     868              : !> \brief Returns an optimized distribution_2d for the given neighbor lists based on an evaluation
     869              : !>        of the cost of the corresponding 3-center integrals
     870              : !> \param opt_3c_dist2d the optimized distribution_2d
     871              : !> \param ab_list ...
     872              : !> \param ac_list ...
     873              : !> \param basis_set_a ...
     874              : !> \param basis_set_b ...
     875              : !> \param basis_set_c ...
     876              : !> \param qs_env ...
     877              : !> \param only_bc_same_center ...
     878              : ! **************************************************************************************************
     879          108 :    SUBROUTINE get_opt_3c_dist2d(opt_3c_dist2d, ab_list, ac_list, basis_set_a, basis_set_b, &
     880              :                                 basis_set_c, qs_env, only_bc_same_center)
     881              : 
     882              :       TYPE(distribution_2d_type), POINTER                :: opt_3c_dist2d
     883              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     884              :          POINTER                                         :: ab_list, ac_list
     885              :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_a, basis_set_b, basis_set_c
     886              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     887              :       LOGICAL, INTENT(IN), OPTIONAL                      :: only_bc_same_center
     888              : 
     889              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_opt_3c_dist2d'
     890              : 
     891              :       INTEGER                                            :: handle, i, iatom, ikind, ip, jatom, &
     892              :                                                             jkind, kkind, mypcol, myprow, n, &
     893              :                                                             natom, nkind, npcol, nprow, nsgfa, &
     894              :                                                             nsgfb, nsgfc
     895          108 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nparticle_local_col, nparticle_local_row
     896          108 :       INTEGER, DIMENSION(:, :), POINTER                  :: col_dist, row_dist
     897              :       LOGICAL                                            :: my_sort_bc
     898              :       REAL(dp)                                           :: cost, rab(3), rac(3), rbc(3)
     899          108 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: col_cost, col_proc_cost, row_cost, &
     900          108 :                                                             row_proc_cost
     901          108 :       TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: local_particle_col, local_particle_row
     902              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     903              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     904              :       TYPE(neighbor_list_iterator_p_type), &
     905          108 :          DIMENSION(:), POINTER                           :: ab_iter, ac_iter
     906          108 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     907          108 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     908              : 
     909          108 :       NULLIFY (para_env, col_dist, row_dist, blacs_env, qs_kind_set, particle_set)
     910          108 :       NULLIFY (local_particle_col, local_particle_row, ab_iter, ac_iter)
     911              : 
     912          108 :       CALL timeset(routineN, handle)
     913              : 
     914              :       !Idea: create a,b and a,c nl_iterators in the original dist, then loop over them and compute the
     915              :       !      cost of each ab pairs and project on the col/row. Then distribute the atom col/row to
     916              :       !      the proc col/row in order to spread out the cost as much as possible
     917              : 
     918          108 :       my_sort_bc = .FALSE.
     919          108 :       IF (PRESENT(only_bc_same_center)) my_sort_bc = only_bc_same_center
     920              : 
     921              :       CALL get_qs_env(qs_env, natom=natom, para_env=para_env, blacs_env=blacs_env, &
     922          108 :                       qs_kind_set=qs_kind_set, particle_set=particle_set, nkind=nkind)
     923              : 
     924          108 :       myprow = blacs_env%mepos(1) + 1
     925          108 :       mypcol = blacs_env%mepos(2) + 1
     926          108 :       nprow = blacs_env%num_pe(1)
     927          108 :       npcol = blacs_env%num_pe(2)
     928              : 
     929          432 :       ALLOCATE (col_cost(natom), row_cost(natom))
     930         1492 :       col_cost = 0.0_dp; row_cost = 0.0_dp
     931              : 
     932          432 :       ALLOCATE (row_dist(natom, 2), col_dist(natom, 2))
     933         3092 :       row_dist = 1; col_dist = 1
     934              : 
     935          108 :       CALL neighbor_list_iterator_create(ab_iter, ab_list)
     936          108 :       CALL neighbor_list_iterator_create(ac_iter, ac_list, search=.TRUE.)
     937         1235 :       DO WHILE (neighbor_list_iterate(ab_iter) == 0)
     938         1127 :          CALL get_iterator_info(ab_iter, ikind=ikind, jkind=jkind, iatom=iatom, jatom=jatom, r=rab)
     939              : 
     940         2799 :          DO kkind = 1, nkind
     941         1564 :             CALL nl_set_sub_iterator(ac_iter, ikind, kkind, iatom)
     942              : 
     943         7080 :             DO WHILE (nl_sub_iterate(ac_iter) == 0)
     944              : 
     945         4389 :                IF (my_sort_bc) THEN
     946              :                   !only take a,b,c if b,c (or a,c because of symmetry) share the same center
     947          714 :                   CALL get_iterator_info(ac_iter, r=rac)
     948         2856 :                   rbc(:) = rac(:) - rab(:)
     949         3131 :                   IF (.NOT. (ALL(ABS(rbc) <= 1.0E-8_dp) .OR. ALL(ABS(rac) <= 1.0E-8_dp))) CYCLE
     950              : 
     951              :                END IF
     952              : 
     953              :                !Use the size of integral as measure as contraciton cost seems to dominate
     954         4097 :                nsgfa = basis_set_a(ikind)%gto_basis_set%nsgf
     955         4097 :                nsgfb = basis_set_b(jkind)%gto_basis_set%nsgf
     956         4097 :                nsgfc = basis_set_c(kkind)%gto_basis_set%nsgf
     957              : 
     958         4097 :                cost = REAL(nsgfa*nsgfb*nsgfc, dp)
     959              : 
     960         4097 :                row_cost(iatom) = row_cost(iatom) + cost
     961         4389 :                col_cost(jatom) = col_cost(jatom) + cost
     962              : 
     963              :             END DO !ac_iter
     964              :          END DO !kkind
     965              :       END DO !ab_iter
     966          108 :       CALL neighbor_list_iterator_release(ab_iter)
     967          108 :       CALL neighbor_list_iterator_release(ac_iter)
     968              : 
     969          108 :       CALL para_env%sum(row_cost)
     970          108 :       CALL para_env%sum(col_cost)
     971              : 
     972              :       !Distribute the cost as evenly as possible
     973          540 :       ALLOCATE (col_proc_cost(npcol), row_proc_cost(nprow))
     974          540 :       col_proc_cost = 0.0_dp; row_proc_cost = 0.0_dp
     975          746 :       DO i = 1, natom
     976        21632 :          iatom = MAXLOC(row_cost, 1)
     977         2552 :          ip = MINLOC(row_proc_cost, 1)
     978          638 :          row_proc_cost(ip) = row_proc_cost(ip) + row_cost(iatom)
     979          638 :          row_dist(iatom, 1) = ip
     980          638 :          row_cost(iatom) = 0.0_dp
     981              : 
     982        21632 :          iatom = MAXLOC(col_cost, 1)
     983         1914 :          ip = MINLOC(col_proc_cost, 1)
     984          638 :          col_proc_cost(ip) = col_proc_cost(ip) + col_cost(iatom)
     985          638 :          col_dist(iatom, 1) = ip
     986          746 :          col_cost(iatom) = 0.0_dp
     987              :       END DO
     988              : 
     989              :       !the usual stuff
     990          780 :       ALLOCATE (local_particle_col(nkind), local_particle_row(nkind))
     991          432 :       ALLOCATE (nparticle_local_row(nkind), nparticle_local_col(nkind))
     992          564 :       nparticle_local_row = 0; nparticle_local_col = 0
     993              : 
     994          746 :       DO iatom = 1, natom
     995          638 :          ikind = particle_set(iatom)%atomic_kind%kind_number
     996              : 
     997          638 :          IF (row_dist(iatom, 1) == myprow) nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
     998          746 :          IF (col_dist(iatom, 1) == mypcol) nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
     999              :       END DO
    1000              : 
    1001          282 :       DO ikind = 1, nkind
    1002          174 :          n = nparticle_local_row(ikind)
    1003          464 :          ALLOCATE (local_particle_row(ikind)%array(n))
    1004              : 
    1005          174 :          n = nparticle_local_col(ikind)
    1006          630 :          ALLOCATE (local_particle_col(ikind)%array(n))
    1007              :       END DO
    1008              : 
    1009          564 :       nparticle_local_row = 0; nparticle_local_col = 0
    1010          746 :       DO iatom = 1, natom
    1011          638 :          ikind = particle_set(iatom)%atomic_kind%kind_number
    1012              : 
    1013          638 :          IF (row_dist(iatom, 1) == myprow) THEN
    1014          319 :             nparticle_local_row(ikind) = nparticle_local_row(ikind) + 1
    1015          319 :             local_particle_row(ikind)%array(nparticle_local_row(ikind)) = iatom
    1016              :          END IF
    1017          746 :          IF (col_dist(iatom, 1) == mypcol) THEN
    1018          638 :             nparticle_local_col(ikind) = nparticle_local_col(ikind) + 1
    1019          638 :             local_particle_col(ikind)%array(nparticle_local_col(ikind)) = iatom
    1020              :          END IF
    1021              :       END DO
    1022              : 
    1023              :       !Finally create the dist_2d
    1024          746 :       row_dist(:, 1) = row_dist(:, 1) - 1
    1025          746 :       col_dist(:, 1) = col_dist(:, 1) - 1
    1026              :       CALL distribution_2d_create(opt_3c_dist2d, row_distribution_ptr=row_dist, &
    1027              :                                   col_distribution_ptr=col_dist, local_rows_ptr=local_particle_row, &
    1028          108 :                                   local_cols_ptr=local_particle_col, blacs_env=blacs_env)
    1029              : 
    1030          108 :       CALL timestop(handle)
    1031              : 
    1032          216 :    END SUBROUTINE get_opt_3c_dist2d
    1033              : 
    1034              : ! **************************************************************************************************
    1035              : !> \brief Computes the RI exchange 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1036              : !>        centered on excited atoms and kind. The operator used is that of the RI metric
    1037              : !> \param ex_atoms excited atoms on which the third center is located
    1038              : !> \param xas_tdp_env ...
    1039              : !> \param xas_tdp_control ...
    1040              : !> \param qs_env ...
    1041              : !> \note  This routine is called once for each excited atom. Because there are many different a,b
    1042              : !>        pairs involved, load balance is ok. This allows memory saving
    1043              : ! **************************************************************************************************
    1044           50 :    SUBROUTINE compute_ri_3c_exchange(ex_atoms, xas_tdp_env, xas_tdp_control, qs_env)
    1045              : 
    1046              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: ex_atoms
    1047              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1048              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1049              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1050              : 
    1051              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_exchange'
    1052              : 
    1053              :       INTEGER                                            :: handle, natom, nkind
    1054              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1055              :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1056           50 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1057              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1058              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1059           50 :          POINTER                                         :: ab_list, ac_list
    1060           50 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1061           50 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1062              : 
    1063           50 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1064              : 
    1065           50 :       CALL timeset(routineN, handle)
    1066              : 
    1067              : !  Take what we need from the qs_env
    1068              :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1069           50 :                       natom=natom, particle_set=particle_set)
    1070              : 
    1071              : !  Build the basis set lists
    1072          234 :       ALLOCATE (basis_set_ri(nkind))
    1073          234 :       ALLOCATE (basis_set_orb(nkind))
    1074           50 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1075           50 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1076              : 
    1077              : !  Get the optimized distribution 2d for theses integrals (and store it in xas_tdp_env)
    1078           50 :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env)
    1079              :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1080              :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1081           50 :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius)
    1082              : 
    1083              :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_ex, ab_list, ac_list, basis_set_orb, &
    1084           50 :                              basis_set_orb, basis_set_ri, qs_env)
    1085           50 :       CALL release_neighbor_list_sets(ab_list)
    1086           50 :       CALL release_neighbor_list_sets(ac_list)
    1087              : 
    1088              : !  Build the ab and ac centers neighbor lists based on the optimized distribution
    1089              :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1090           50 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1091              :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, &
    1092              :                                xas_tdp_control%ri_m_potential%potential_type, qs_env, &
    1093              :                                excited_atoms=ex_atoms, x_range=xas_tdp_control%ri_m_potential%cutoff_radius, &
    1094           50 :                                ext_dist2d=xas_tdp_env%opt_dist2d_ex)
    1095              : 
    1096              : !  Allocate, init and compute the integrals.
    1097          250 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1098           50 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
    1099           50 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1100           50 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1101              : 
    1102          500 :       ALLOCATE (xas_tdp_env%ri_3c_ex)
    1103              :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1104           50 :                              blk_size_orb, blk_size_ri)
    1105              :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_ex, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1106              :                            basis_set_ri, xas_tdp_control%ri_m_potential, qs_env, &
    1107           50 :                            eps_screen=xas_tdp_control%eps_screen)
    1108              : 
    1109              : ! Clean-up
    1110           50 :       CALL release_neighbor_list_sets(ab_list)
    1111           50 :       CALL release_neighbor_list_sets(ac_list)
    1112           50 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1113           50 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1114              : 
    1115              :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1116              :       !timings for other routines
    1117           50 :       CALL para_env%sync()
    1118              : 
    1119           50 :       CALL timestop(handle)
    1120              : 
    1121          150 :    END SUBROUTINE compute_ri_3c_exchange
    1122              : 
    1123              : ! **************************************************************************************************
    1124              : !> \brief Computes the RI Coulomb 3-center integrals (ab|c), where c is from the RI_XAS basis and
    1125              : !>        centered on the excited atoms of xas_tdp_env
    1126              : !> \param xas_tdp_env ...
    1127              : !> \param qs_env ...
    1128              : !> \note  The ri_3c_coul tensor of xas_tdp_env is defined and allocated here. Only computed once
    1129              : !>        for the whole system (for optimized load balance). Ok because not too much memory needed
    1130              : ! **************************************************************************************************
    1131           58 :    SUBROUTINE compute_ri_3c_coulomb(xas_tdp_env, qs_env)
    1132              : 
    1133              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1134              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1135              : 
    1136              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_ri_3c_coulomb'
    1137              : 
    1138              :       INTEGER                                            :: handle, natom, nkind
    1139              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_size_orb, blk_size_ri
    1140              :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
    1141           58 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_orb, basis_set_ri
    1142              :       TYPE(libint_potential_type)                        :: pot
    1143              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1144              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1145           58 :          POINTER                                         :: ab_list, ac_list
    1146           58 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1147           58 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1148              : 
    1149           58 :       NULLIFY (basis_set_ri, basis_set_orb, ac_list, ab_list, qs_kind_set, para_env, particle_set)
    1150              : 
    1151           58 :       CALL timeset(routineN, handle)
    1152              : 
    1153              : !  Take what we need from the qs_env
    1154              :       CALL get_qs_env(qs_env, nkind=nkind, qs_kind_set=qs_kind_set, para_env=para_env, &
    1155           58 :                       natom=natom, particle_set=particle_set)
    1156              : 
    1157              : !  Build the basis set lists
    1158          264 :       ALLOCATE (basis_set_ri(nkind))
    1159          264 :       ALLOCATE (basis_set_orb(nkind))
    1160           58 :       CALL basis_set_list_setup(basis_set_ri, "RI_XAS", qs_kind_set)
    1161           58 :       CALL basis_set_list_setup(basis_set_orb, "ORB", qs_kind_set)
    1162              : 
    1163              : !  Get the optimized distribution 2d for these integrals (and store it in xas_tdp_env)
    1164              :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1165           58 :                                  excited_atoms=xas_tdp_env%ex_atom_indices)
    1166              :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1167           58 :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices)
    1168              :       CALL get_opt_3c_dist2d(xas_tdp_env%opt_dist2d_coul, ab_list, ac_list, basis_set_orb, &
    1169           58 :                              basis_set_orb, basis_set_ri, qs_env, only_bc_same_center=.TRUE.)
    1170           58 :       CALL release_neighbor_list_sets(ab_list)
    1171           58 :       CALL release_neighbor_list_sets(ac_list)
    1172              : 
    1173              : !  Build a neighbor list for the ab centers. Assume (aI|c) = sum_b c_bI (ab|c), with c_bI only
    1174              : !  non-zero for b centered on the same atom as c => build overlap nl, but only keeping b if centered
    1175              : !  on an excited atom
    1176              :       CALL build_xas_tdp_ovlp_nl(ab_list, basis_set_orb, basis_set_orb, qs_env, &
    1177              :                                  excited_atoms=xas_tdp_env%ex_atom_indices, &
    1178           58 :                                  ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1179              : 
    1180              : !  Build a neighbor list for the ac centers. Since we later contract as (aI|c) and we assume I is
    1181              : !  very localized on the same atom as c, we take a,c as neighbors if they overlap
    1182              :       CALL build_xas_tdp_3c_nl(ac_list, basis_set_orb, basis_set_ri, do_potential_id, &
    1183              :                                qs_env, excited_atoms=xas_tdp_env%ex_atom_indices, &
    1184           58 :                                ext_dist2d=xas_tdp_env%opt_dist2d_coul)
    1185              : 
    1186              : !  Allocate, init and compute the integrals
    1187          290 :       ALLOCATE (blk_size_orb(natom), blk_size_ri(natom))
    1188           58 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_coul, opt_dbcsr_dist)
    1189           58 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_orb, basis=basis_set_orb)
    1190           58 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=blk_size_ri, basis=basis_set_ri)
    1191              :       pot%potential_type = do_potential_coulomb
    1192              : 
    1193          580 :       ALLOCATE (xas_tdp_env%ri_3c_coul)
    1194              :       CALL create_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, opt_dbcsr_dist, blk_size_orb, &
    1195           58 :                              blk_size_orb, blk_size_ri, only_bc_same_center=.TRUE.)
    1196              :       CALL fill_pqX_tensor(xas_tdp_env%ri_3c_coul, ab_list, ac_list, basis_set_orb, basis_set_orb, &
    1197           58 :                            basis_set_ri, pot, qs_env, only_bc_same_center=.TRUE.)
    1198              : 
    1199              : ! Clean-up
    1200           58 :       CALL release_neighbor_list_sets(ab_list)
    1201           58 :       CALL release_neighbor_list_sets(ac_list)
    1202           58 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1203           58 :       DEALLOCATE (basis_set_ri, basis_set_orb)
    1204              : 
    1205              :       !not strictly necessary but avoid having any load unbalance here being reported in the
    1206              :       !timings for other routines
    1207           58 :       CALL para_env%sync()
    1208              : 
    1209           58 :       CALL timestop(handle)
    1210              : 
    1211          174 :    END SUBROUTINE compute_ri_3c_coulomb
    1212              : 
    1213              : ! **************************************************************************************************
    1214              : !> \brief Computes the two-center Exchange integral needed for the RI in kernel calculation. Stores
    1215              : !>        the integrals in the xas_tdp_env as global (small) arrays. Does that for a given excited
    1216              : !>        kind. The quantity stored is M^-1 (P|Q) M^-1, where M is the RI metric. If the metric is
    1217              : !>        the same as the exchange potential, then we end up with the V-approximation (P|Q)^-1
    1218              : !>        By default (if no metric), the ri_m_potential is a copy of the x_potential
    1219              : !> \param ex_kind ...
    1220              : !> \param xas_tdp_env ...
    1221              : !> \param xas_tdp_control ...
    1222              : !> \param qs_env ...
    1223              : !> \note Computes all these integrals in non-PBCs as we assume that the range is short enough that
    1224              : !>       atoms do not exchange with their periodic images
    1225              : ! **************************************************************************************************
    1226           50 :    SUBROUTINE compute_ri_exchange2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1227              : 
    1228              :       INTEGER, INTENT(IN)                                :: ex_kind
    1229              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1230              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1231              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1232              : 
    1233              :       INTEGER                                            :: egfp, egfq, maxl, ncop, ncoq, nset, &
    1234              :                                                             nsgf, pset, qset, sgfp, sgfq, unit_id
    1235           50 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf_set, nsgf_set
    1236           50 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1237              :       REAL(dp)                                           :: r(3)
    1238           50 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: metric, pq, work
    1239           50 :       REAL(dp), DIMENSION(:, :), POINTER                 :: rpgf, sphi, zet
    1240              :       TYPE(cp_libint_t)                                  :: lib
    1241              :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1242              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1243           50 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1244              : 
    1245           50 :       NULLIFY (ri_basis, qs_kind_set, para_env, lmin, lmax, npgf_set, zet, rpgf, first_sgf)
    1246           50 :       NULLIFY (sphi, nsgf_set)
    1247              : 
    1248              : !  Initialization
    1249           50 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set, para_env=para_env)
    1250           50 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_ex)) THEN
    1251            6 :          DEALLOCATE (xas_tdp_env%ri_inv_ex)
    1252              :       END IF
    1253              : 
    1254              : !  Get the RI basis of interest and its quantum numbers
    1255           50 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1256              :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf, maxl=maxl, npgf=npgf_set, lmin=lmin, &
    1257              :                              lmax=lmax, zet=zet, pgf_radius=rpgf, first_sgf=first_sgf, &
    1258           50 :                              nsgf_set=nsgf_set, sphi=sphi, nset=nset)
    1259          200 :       ALLOCATE (metric(nsgf, nsgf))
    1260       452962 :       metric = 0.0_dp
    1261              : 
    1262           50 :       r = 0.0_dp
    1263              : 
    1264              :       !init the libint 2-center object
    1265           50 :       CALL cp_libint_init_2eri(lib, maxl)
    1266           50 :       CALL cp_libint_set_contrdepth(lib, 1)
    1267              : 
    1268              :       !make sure the truncted coulomb is initialized
    1269           50 :       IF (xas_tdp_control%ri_m_potential%potential_type == do_potential_truncated) THEN
    1270              : 
    1271            6 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1272            6 :             IF (para_env%mepos == 0) THEN
    1273            3 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%ri_m_potential%filename)
    1274              :             END IF
    1275            6 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1276            6 :             IF (para_env%mepos == 0) THEN
    1277            3 :                CALL close_file(unit_id)
    1278              :             END IF
    1279              :          END IF
    1280              :       END IF
    1281              : 
    1282              : !  Compute the RI metric
    1283          150 :       DO pset = 1, nset
    1284          100 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1285          100 :          sgfp = first_sgf(1, pset)
    1286          100 :          egfp = sgfp + nsgf_set(pset) - 1
    1287              : 
    1288          350 :          DO qset = 1, nset
    1289          200 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1290          200 :             sgfq = first_sgf(1, qset)
    1291          200 :             egfq = sgfq + nsgf_set(qset) - 1
    1292              : 
    1293          800 :             ALLOCATE (work(ncop, ncoq))
    1294      1474020 :             work = 0.0_dp
    1295              : 
    1296              :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1297              :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1298          200 :                              r, 0.0_dp, lib, xas_tdp_control%ri_m_potential)
    1299              : 
    1300              :             CALL ab_contract(metric(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1301          200 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1302              : 
    1303          300 :             DEALLOCATE (work)
    1304              :          END DO !qset
    1305              :       END DO !pset
    1306              : 
    1307              : !  Inverting (to M^-1)
    1308           50 :       CALL invmat_symm(metric)
    1309              : 
    1310           50 :       IF (.NOT. xas_tdp_control%do_ri_metric) THEN
    1311              : 
    1312              :          !If no metric, then x_pot = ri_m_pot and (P|Q)^-1 = M^-1 (V-approximation)
    1313          132 :          ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1314       430264 :          xas_tdp_env%ri_inv_ex(:, :) = metric(:, :)
    1315           44 :          CALL cp_libint_cleanup_2eri(lib)
    1316           44 :          RETURN
    1317              : 
    1318              :       END IF
    1319              : 
    1320              :       !make sure the truncted coulomb is initialized
    1321            6 :       IF (xas_tdp_control%x_potential%potential_type == do_potential_truncated) THEN
    1322              : 
    1323            2 :          IF (2*maxl + 1 > get_lmax_init()) THEN
    1324            0 :             IF (para_env%mepos == 0) THEN
    1325            0 :                CALL open_file(unit_number=unit_id, file_name=xas_tdp_control%x_potential%filename)
    1326              :             END IF
    1327            0 :             CALL init(2*maxl + 1, unit_id, para_env%mepos, para_env)
    1328            0 :             IF (para_env%mepos == 0) THEN
    1329            0 :                CALL close_file(unit_id)
    1330              :             END IF
    1331              :          END IF
    1332              :       END IF
    1333              : 
    1334              : !  Compute the proper exchange 2-center
    1335           18 :       ALLOCATE (pq(nsgf, nsgf))
    1336        22698 :       pq = 0.0_dp
    1337              : 
    1338           18 :       DO pset = 1, nset
    1339           12 :          ncop = npgf_set(pset)*ncoset(lmax(pset))
    1340           12 :          sgfp = first_sgf(1, pset)
    1341           12 :          egfp = sgfp + nsgf_set(pset) - 1
    1342              : 
    1343           42 :          DO qset = 1, nset
    1344           24 :             ncoq = npgf_set(qset)*ncoset(lmax(qset))
    1345           24 :             sgfq = first_sgf(1, qset)
    1346           24 :             egfq = sgfq + nsgf_set(qset) - 1
    1347              : 
    1348           96 :             ALLOCATE (work(ncop, ncoq))
    1349        58824 :             work = 0.0_dp
    1350              : 
    1351              :             CALL eri_2center(work, lmin(pset), lmax(pset), npgf_set(pset), zet(:, pset), rpgf(:, pset), &
    1352              :                              r, lmin(qset), lmax(qset), npgf_set(qset), zet(:, qset), rpgf(:, qset), &
    1353           24 :                              r, 0.0_dp, lib, xas_tdp_control%x_potential)
    1354              : 
    1355              :             CALL ab_contract(pq(sgfp:egfp, sgfq:egfq), work, sphi(:, sgfp:), sphi(:, sgfq:), &
    1356           24 :                              ncop, ncoq, nsgf_set(pset), nsgf_set(qset))
    1357              : 
    1358           36 :             DEALLOCATE (work)
    1359              :          END DO !qset
    1360              :       END DO !pset
    1361              : 
    1362              : !  Compute and store M^-1 (P|Q) M^-1
    1363           18 :       ALLOCATE (xas_tdp_env%ri_inv_ex(nsgf, nsgf))
    1364        22698 :       xas_tdp_env%ri_inv_ex = 0.0_dp
    1365              : 
    1366              :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, metric, nsgf, pq, nsgf, &
    1367            6 :                  0.0_dp, xas_tdp_env%ri_inv_ex, nsgf)
    1368              :       CALL dgemm('N', 'N', nsgf, nsgf, nsgf, 1.0_dp, xas_tdp_env%ri_inv_ex, nsgf, metric, nsgf, &
    1369            6 :                  0.0_dp, pq, nsgf)
    1370        22698 :       xas_tdp_env%ri_inv_ex(:, :) = pq(:, :)
    1371              : 
    1372            6 :       CALL cp_libint_cleanup_2eri(lib)
    1373              : 
    1374          150 :    END SUBROUTINE compute_ri_exchange2_int
    1375              : 
    1376              : ! **************************************************************************************************
    1377              : !> \brief Computes the two-center Coulomb integral needed for the RI in kernel calculation. Stores
    1378              : !>        the integrals (P|Q)^-1 in the xas_tdp_env as global (small) arrays. Does that for a given
    1379              : !>        excited kind
    1380              : !> \param ex_kind ...
    1381              : !> \param xas_tdp_env ...
    1382              : !> \param xas_tdp_control ...
    1383              : !> \param qs_env ...
    1384              : ! **************************************************************************************************
    1385           68 :    SUBROUTINE compute_ri_coulomb2_int(ex_kind, xas_tdp_env, xas_tdp_control, qs_env)
    1386              : 
    1387              :       INTEGER, INTENT(IN)                                :: ex_kind
    1388              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1389              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1390              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1391              : 
    1392              :       INTEGER                                            :: nsgf
    1393              :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1394           68 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1395              : 
    1396           68 :       NULLIFY (ri_basis, qs_kind_set)
    1397              : 
    1398              : !  Initialization
    1399           68 :       CALL get_qs_env(qs_env, qs_kind_set=qs_kind_set)
    1400           68 :       IF (ASSOCIATED(xas_tdp_env%ri_inv_coul)) THEN
    1401            6 :          DEALLOCATE (xas_tdp_env%ri_inv_coul)
    1402              :       END IF
    1403              : 
    1404              : !  Get the RI basis of interest and its quantum numbers
    1405           68 :       CALL get_qs_kind(qs_kind_set(ex_kind), basis_set=ri_basis, basis_type="RI_XAS")
    1406           68 :       CALL get_gto_basis_set(ri_basis, nsgf=nsgf)
    1407          272 :       ALLOCATE (xas_tdp_env%ri_inv_coul(nsgf, nsgf))
    1408       624040 :       xas_tdp_env%ri_inv_coul = 0.0_dp
    1409              : 
    1410           68 :       IF (.NOT. xas_tdp_control%is_periodic) THEN
    1411              :          CALL int_operators_r12_ab_os(r12_operator=operator_coulomb, vab=xas_tdp_env%ri_inv_coul, &
    1412              :                                       rab=[0.0_dp, 0.0_dp, 0.0_dp], fba=ri_basis, fbb=ri_basis, &
    1413           52 :                                       calculate_forces=.FALSE.)
    1414           52 :          CPASSERT(ASSOCIATED(xas_tdp_control))
    1415              :       ELSE
    1416           16 :          CALL periodic_ri_coulomb2(xas_tdp_env%ri_inv_coul, ri_basis, qs_env)
    1417              :       END IF
    1418              : 
    1419              : !  Inverting
    1420           68 :       CALL invmat_symm(xas_tdp_env%ri_inv_coul)
    1421              : 
    1422          136 :    END SUBROUTINE compute_ri_coulomb2_int
    1423              : 
    1424              : ! **************************************************************************************************
    1425              : !> \brief Computes the two-center inverse coulomb integral in the case of PBCs
    1426              : !> \param ri_coul2 ...
    1427              : !> \param ri_basis ...
    1428              : !> \param qs_env ...
    1429              : ! **************************************************************************************************
    1430           16 :    SUBROUTINE periodic_ri_coulomb2(ri_coul2, ri_basis, qs_env)
    1431              : 
    1432              :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: ri_coul2
    1433              :       TYPE(gto_basis_set_type), POINTER                  :: ri_basis
    1434              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1435              : 
    1436              :       INTEGER                                            :: maxco, ncop, ncoq, nset, op, oq, ppgf, &
    1437              :                                                             pset, qpgf, qset, sgfp, sgfq
    1438           32 :       INTEGER, DIMENSION(:), POINTER                     :: lmax, lmin, npgf, nsgf
    1439           16 :       INTEGER, DIMENSION(:, :), POINTER                  :: first_sgf
    1440              :       REAL(dp)                                           :: r(3)
    1441           16 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: hpq
    1442           16 :       REAL(dp), DIMENSION(:, :), POINTER                 :: sphi, zet
    1443              :       TYPE(cell_type), POINTER                           :: cell
    1444          416 :       TYPE(cp_eri_mme_param)                             :: mme_param
    1445              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1446           16 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1447              : 
    1448           16 :       NULLIFY (cell, qs_kind_set, lmin, lmax, nsgf, npgf, zet, sphi, first_sgf)
    1449              : 
    1450              :       ! Use eri_mme for this. Don't want to annoy the user with a full input section just for this
    1451              :       ! tiny bit => initialize our own eri_mme section with the defaults
    1452              : 
    1453           16 :       CALL get_qs_env(qs_env, cell=cell, qs_kind_set=qs_kind_set, para_env=para_env)
    1454              : 
    1455              :       CALL eri_mme_init(mme_param%par, n_minimax=20, cutoff=300.0_dp, do_calib_cutoff=.TRUE., &
    1456              :                         cutoff_min=10.0_dp, cutoff_max=10000.0_dp, cutoff_eps=0.01_dp, &
    1457              :                         cutoff_delta=0.9_dp, sum_precision=1.0E-12_dp, debug=.FALSE., &
    1458              :                         debug_delta=1.0E-6_dp, debug_nsum=1000000, unit_nr=0, print_calib=.FALSE., &
    1459           16 :                         do_error_est=.FALSE.)
    1460           16 :       mme_param%do_calib = .TRUE.
    1461              : 
    1462           16 :       CALL cp_eri_mme_set_params(mme_param, cell, qs_kind_set, basis_type_1="RI_XAS", para_env=para_env)
    1463              : 
    1464              :       CALL get_gto_basis_set(ri_basis, lmax=lmax, npgf=npgf, zet=zet, lmin=lmin, nset=nset, &
    1465           16 :                              nsgf_set=nsgf, sphi=sphi, first_sgf=first_sgf, maxco=maxco)
    1466              : 
    1467           16 :       r = 0.0_dp
    1468           64 :       ALLOCATE (hpq(nset*maxco, nset*maxco))
    1469       161616 :       hpq = 0.0_dp
    1470              : 
    1471           48 :       DO pset = 1, nset
    1472           32 :          ncop = npgf(pset)*ncoset(lmax(pset))
    1473           32 :          sgfp = first_sgf(1, pset)
    1474              : 
    1475          112 :          DO qset = 1, nset
    1476           64 :             ncoq = npgf(qset)*ncoset(lmax(qset))
    1477           64 :             sgfq = first_sgf(1, qset)
    1478              : 
    1479          608 :             DO ppgf = 1, npgf(pset)
    1480          544 :                op = (pset - 1)*maxco + (ppgf - 1)*ncoset(lmax(pset))
    1481         5232 :                DO qpgf = 1, npgf(qset)
    1482         4624 :                   oq = (qset - 1)*maxco + (qpgf - 1)*ncoset(lmax(qset))
    1483              : 
    1484              :                   CALL eri_mme_2c_integrate(mme_param%par, lmin(pset), lmax(pset), lmin(qset), &
    1485              :                                             lmax(qset), zet(ppgf, pset), zet(qpgf, qset), r, hpq, &
    1486         5168 :                                             op, oq)
    1487              : 
    1488              :                END DO !qpgf
    1489              :             END DO ! ppgf
    1490              : 
    1491              :             !contraction into sgfs
    1492           64 :             op = (pset - 1)*maxco + 1
    1493           64 :             oq = (qset - 1)*maxco + 1
    1494              : 
    1495              :             CALL ab_contract(ri_coul2(sgfp:sgfp + nsgf(pset) - 1, sgfq:sgfq + nsgf(qset) - 1), &
    1496              :                              hpq(op:op + ncop - 1, oq:oq + ncoq - 1), sphi(:, sgfp:), sphi(:, sgfq:), &
    1497           96 :                              ncop, ncoq, nsgf(pset), nsgf(qset))
    1498              : 
    1499              :          END DO !qset
    1500              :       END DO !pset
    1501              : 
    1502              :       !celan-up
    1503           16 :       CALL eri_mme_release(mme_param%par)
    1504              : 
    1505           64 :    END SUBROUTINE periodic_ri_coulomb2
    1506              : 
    1507              : END MODULE xas_tdp_integrals
        

Generated by: LCOV version 2.0-1