LCOV - code coverage report
Current view: top level - src - xas_tdp_kernel.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 87.2 % 437 381
Test Date: 2025-07-25 12:55:17 Functions: 93.3 % 15 14

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief All the kernel specific subroutines for XAS TDP calculations
      10              : !> \author A. Bussy (03.2019)
      11              : ! **************************************************************************************************
      12              : 
      13              : MODULE xas_tdp_kernel
      14              :    USE basis_set_types,                 ONLY: gto_basis_set_p_type
      15              :    USE cp_dbcsr_api,                    ONLY: &
      16              :         dbcsr_add, dbcsr_complete_redistribute, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      17              :         dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
      18              :         dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      19              :         dbcsr_get_num_blocks, dbcsr_get_stored_coordinates, dbcsr_iterator_blocks_left, &
      20              :         dbcsr_iterator_next_block, dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, &
      21              :         dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_reserve_blocks, &
      22              :         dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, dbcsr_type_symmetric
      23              :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_dist2d_to_dist,&
      24              :                                               dbcsr_deallocate_matrix_set
      25              :    USE dbt_api,                         ONLY: dbt_get_block,&
      26              :                                               dbt_iterator_blocks_left,&
      27              :                                               dbt_iterator_next_block,&
      28              :                                               dbt_iterator_start,&
      29              :                                               dbt_iterator_stop,&
      30              :                                               dbt_iterator_type,&
      31              :                                               dbt_type
      32              :    USE distribution_2d_types,           ONLY: distribution_2d_type
      33              :    USE kinds,                           ONLY: dp
      34              :    USE message_passing,                 ONLY: mp_para_env_type
      35              :    USE particle_methods,                ONLY: get_particle_set
      36              :    USE particle_types,                  ONLY: particle_type
      37              :    USE qs_environment_types,            ONLY: get_qs_env,&
      38              :                                               qs_environment_type
      39              :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      40              :    USE qs_kind_types,                   ONLY: qs_kind_type
      41              :    USE util,                            ONLY: get_limit
      42              :    USE xas_tdp_types,                   ONLY: donor_state_type,&
      43              :                                               get_proc_batch_sizes,&
      44              :                                               xas_tdp_control_type,&
      45              :                                               xas_tdp_env_type
      46              : 
      47              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
      48              : #include "./base/base_uses.f90"
      49              : 
      50              :    IMPLICIT NONE
      51              :    PRIVATE
      52              : 
      53              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_kernel'
      54              : 
      55              :    PUBLIC :: kernel_coulomb_xc, kernel_exchange, contract2_AO_to_doMO, &
      56              :              reserve_contraction_blocks, ri_all_blocks_mm
      57              : 
      58              : CONTAINS
      59              : 
      60              : ! **************************************************************************************************
      61              : !> \brief Computes, if asked for it, the Coulomb and XC kernel matrices, in the usuall matrix format
      62              : !> \param coul_ker pointer the the Coulomb kernel matrix (can be void pointer)
      63              : !> \param xc_ker array of pointer to the different xc kernels (5 of them):
      64              : !>               1) the restricted closed-shell singlet kernel
      65              : !>               2) the restricted closed-shell triplet kernel
      66              : !>               3) the spin-conserving open-shell xc kernel
      67              : !>               4) the on-diagonal spin-flip open-shell xc kernel
      68              : !> \param donor_state ...
      69              : !> \param xas_tdp_env ...
      70              : !> \param xas_tdp_control ...
      71              : !> \param qs_env ...
      72              : !> \note Coulomb and xc kernel are put together in the same routine because they use the same RI
      73              : !>       Coulomb: (aI|Jb) = (aI|P) (P|Q)^-1 (Q|Jb)
      74              : !>       XC : (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
      75              : !>       In the above formula, a,b label the sgfs
      76              : !>       The routine analyses the xas_tdp_control to know which kernel must be computed and how
      77              : !>       (open-shell, singlet, triplet, ROKS, LSD, etc...)
      78              : !>       On entry, the pointers should be allocated
      79              : ! **************************************************************************************************
      80          136 :    SUBROUTINE kernel_coulomb_xc(coul_ker, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
      81              : 
      82              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
      83              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: xc_ker
      84              :       TYPE(donor_state_type), POINTER                    :: donor_state
      85              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
      86              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
      87              :       TYPE(qs_environment_type), POINTER                 :: qs_env
      88              : 
      89              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kernel_coulomb_xc'
      90              : 
      91              :       INTEGER                                            :: batch_size, bo(2), handle, i, ibatch, &
      92              :                                                             iex, lb, natom, nbatch, ndo_mo, &
      93              :                                                             ndo_so, nex_atom, nsgfp, ri_atom, &
      94              :                                                             source, ub
      95           68 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
      96              :       LOGICAL                                            :: do_coulomb, do_sc, do_sf, do_sg, do_tp, &
      97              :                                                             do_xc, found
      98           68 :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
      99              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     100           68 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     101              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     102              : 
     103           68 :       NULLIFY (contr1_int, PQ, para_env, dist, blk_size)
     104              : 
     105              : !  Initialization
     106           68 :       ndo_mo = donor_state%ndo_mo
     107           68 :       do_xc = xas_tdp_control%do_xc
     108           68 :       do_sg = xas_tdp_control%do_singlet
     109           68 :       do_tp = xas_tdp_control%do_triplet
     110           68 :       do_sc = xas_tdp_control%do_spin_cons
     111           68 :       do_sf = xas_tdp_control%do_spin_flip
     112           68 :       ndo_so = ndo_mo; IF (xas_tdp_control%do_uks) ndo_so = 2*ndo_mo
     113           68 :       ri_atom = donor_state%at_index
     114           68 :       CALL get_qs_env(qs_env, natom=natom, para_env=para_env)
     115           68 :       do_coulomb = xas_tdp_control%do_coulomb
     116           68 :       dist => donor_state%dbcsr_dist
     117           68 :       blk_size => donor_state%blk_size
     118              : 
     119              : !  If no Coulomb nor xc, simply exit
     120           68 :       IF ((.NOT. do_coulomb) .AND. (.NOT. do_xc)) RETURN
     121              : 
     122           68 :       CALL timeset(routineN, handle)
     123              : 
     124              : !  Contract the RI 3-center integrals once to get (aI|P)
     125           68 :       CALL contract2_AO_to_doMO(contr1_int, "COULOMB", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     126              : 
     127              : !  Deal with the Coulomb case
     128           68 :       IF (do_coulomb) CALL coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, &
     129           68 :                                    xas_tdp_control, qs_env)
     130              : 
     131              : !  Deal with the XC case
     132           68 :       IF (do_xc) THEN
     133              : 
     134              :          ! In the end, we compute: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|Jb)
     135              :          ! where fxc can take different spin contributions.
     136              : 
     137              :          ! Precompute the product (aI|P) * (P|Q)^-1 and store it in contr1_int
     138           60 :          PQ => xas_tdp_env%ri_inv_coul
     139           60 :          CALL ri_all_blocks_mm(contr1_int, PQ)
     140              : 
     141              :          ! If not already done (e.g. when multpile donor states for a given excited atom), broadcast
     142              :          ! the RI matrix (Q|fxc|R) on all procs
     143           60 :          IF (.NOT. xas_tdp_env%fxc_avail) THEN
     144              :             ! Find on which processor the integrals (Q|fxc|R) for this atom are stored
     145           58 :             nsgfp = SIZE(PQ, 1)
     146           58 :             CALL get_qs_env(qs_env, para_env=para_env)
     147           58 :             found = .FALSE.
     148           58 :             nex_atom = SIZE(xas_tdp_env%ex_atom_indices)
     149           58 :             CALL get_proc_batch_sizes(batch_size, nbatch, nex_atom, para_env%num_pe)
     150              : 
     151           68 :             DO ibatch = 0, nbatch - 1
     152              : 
     153           68 :                bo = get_limit(nex_atom, nbatch, ibatch)
     154           78 :                DO iex = bo(1), bo(2)
     155              : 
     156           78 :                   IF (xas_tdp_env%ex_atom_indices(iex) == ri_atom) THEN
     157           58 :                      source = ibatch*batch_size
     158              :                      found = .TRUE. !but simply take the first
     159              :                      EXIT
     160              :                   END IF
     161              :                END DO !iex
     162           10 :                IF (found) EXIT
     163              :             END DO !ip
     164              : 
     165              :             ! Broadcast the integrals to all procs (deleted after all donor states for this atoms are treated)
     166           58 :             lb = 1; IF (do_sf .AND. .NOT. do_sc) lb = 4
     167           58 :             ub = 2; IF (do_sc) ub = 3; IF (do_sf) ub = 4
     168          176 :             DO i = lb, ub
     169          118 :                IF (.NOT. ASSOCIATED(xas_tdp_env%ri_fxc(ri_atom, i)%array)) THEN
     170           80 :                   ALLOCATE (xas_tdp_env%ri_fxc(ri_atom, i)%array(nsgfp, nsgfp))
     171              :                END IF
     172      1553928 :                CALL para_env%bcast(xas_tdp_env%ri_fxc(ri_atom, i)%array, source)
     173              :             END DO
     174              : 
     175           58 :             xas_tdp_env%fxc_avail = .TRUE.
     176              :          END IF
     177              : 
     178              :          ! Case study on the calculation type
     179           60 :          IF (do_sg .OR. do_tp) THEN
     180              :             CALL rcs_xc(xc_ker(1)%matrix, xc_ker(2)%matrix, contr1_int, dist, blk_size, &
     181           58 :                         donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     182              :          END IF
     183              : 
     184           60 :          IF (do_sc) THEN
     185              :             CALL sc_os_xc(xc_ker(3)%matrix, contr1_int, dist, blk_size, donor_state, &
     186            2 :                           xas_tdp_env, xas_tdp_control, qs_env)
     187              :          END IF
     188              : 
     189           60 :          IF (do_sf) THEN
     190              :             CALL ondiag_sf_os_xc(xc_ker(4)%matrix, contr1_int, dist, blk_size, donor_state, &
     191            0 :                                  xas_tdp_env, xas_tdp_control, qs_env)
     192              :          END IF
     193              : 
     194              :       END IF ! do_xc
     195              : 
     196              : !  Clean-up
     197           68 :       CALL dbcsr_deallocate_matrix_set(contr1_int)
     198              : 
     199           68 :       CALL timestop(handle)
     200              : 
     201           68 :    END SUBROUTINE kernel_coulomb_xc
     202              : 
     203              : ! **************************************************************************************************
     204              : !> \brief Create the matrix containing the Coulomb kernel, which is:
     205              : !>        (aI_sigma|J_tau b) ~= (aI_sigma|P) * (P|Q) * (Q|J_tau b)
     206              : !> \param coul_ker the Coulomb kernel
     207              : !> \param contr1_int the once contracted RI integrals (aI|P)
     208              : !> \param dist the inherited dbcsr ditribution
     209              : !> \param blk_size the inherited block sizes
     210              : !> \param xas_tdp_env ...
     211              : !> \param xas_tdp_control ...
     212              : !> \param qs_env ...
     213              : ! **************************************************************************************************
     214           68 :    SUBROUTINE coulomb(coul_ker, contr1_int, dist, blk_size, xas_tdp_env, xas_tdp_control, qs_env)
     215              : 
     216              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: coul_ker
     217              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     218              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     219              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     220              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     221              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     222              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     223              : 
     224              :       LOGICAL                                            :: quadrants(3)
     225              :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
     226           68 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     227              :       TYPE(dbcsr_type)                                   :: work_mat
     228              : 
     229           68 :       NULLIFY (PQ, rhs_int, lhs_int)
     230              : 
     231              :       ! Get the inver RI coulomb
     232           68 :       PQ => xas_tdp_env%ri_inv_coul
     233              : 
     234              :       ! Create a normal type work matrix
     235              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     236           68 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     237              : 
     238              :       ! Compute the product (aI|P) * (P|Q)^-1 * (Q|Jb) = (aI|Jb)
     239           68 :       rhs_int => contr1_int ! the incoming contr1_int is not modified
     240          292 :       ALLOCATE (lhs_int(SIZE(contr1_int)))
     241           68 :       CALL copy_ri_contr_int(lhs_int, rhs_int) ! RHS containts (Q|JB)^T
     242           68 :       CALL ri_all_blocks_mm(lhs_int, PQ) ! LHS contatins (aI|P)*(P|Q)^-1
     243              : 
     244              :       !In the special case of ROKS, same MOs for each spin => put same (aI|Jb) product on the
     245              :       !alpha-alpha, alpha-beta and beta-beta quadrants of the kernel matrix.
     246           68 :       IF (xas_tdp_control%do_roks) THEN
     247            0 :          quadrants = [.TRUE., .TRUE., .TRUE.]
     248              :       ELSE
     249           68 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     250              :       END IF
     251              :       CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     252           68 :                           eps_filter=xas_tdp_control%eps_filter)
     253           68 :       CALL dbcsr_finalize(work_mat)
     254              : 
     255              :       !Create the symmetric kernel matrix and redistribute work_mat into it
     256              :       CALL dbcsr_create(coul_ker, name="COULOMB KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &
     257           68 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     258           68 :       CALL dbcsr_complete_redistribute(work_mat, coul_ker)
     259              : 
     260              :       !clean-up
     261           68 :       CALL dbcsr_release(work_mat)
     262           68 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     263              : 
     264           68 :    END SUBROUTINE coulomb
     265              : 
     266              : ! **************************************************************************************************
     267              : !> \brief Create the matrix containing the XC kenrel in the spin-conserving open-shell case:
     268              : !>        (aI_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
     269              : !> \param xc_ker the kernel matrix
     270              : !> \param contr1_int_PQ the once contracted RI integrals, with inverse coulomb: (aI_sigma|P) (P|Q)^-1
     271              : !> \param dist inherited dbcsr dist
     272              : !> \param blk_size inherited block sizes
     273              : !> \param donor_state ...
     274              : !> \param xas_tdp_env ...
     275              : !> \param xas_tdp_control ...
     276              : !> \param qs_env ...
     277              : !> note Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
     278              : ! **************************************************************************************************
     279            2 :    SUBROUTINE sc_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
     280              :                        xas_tdp_control, qs_env)
     281              : 
     282              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
     283              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     284              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     285              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     286              :       TYPE(donor_state_type), POINTER                    :: donor_state
     287              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     288              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     289              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     290              : 
     291              :       INTEGER                                            :: ndo_mo, ri_atom
     292              :       LOGICAL                                            :: quadrants(3)
     293            2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     294              :       TYPE(dbcsr_type)                                   :: work_mat
     295              : 
     296            2 :       NULLIFY (lhs_int, rhs_int)
     297              : 
     298              :       ! Initialization
     299            2 :       ndo_mo = donor_state%ndo_mo
     300            2 :       ri_atom = donor_state%at_index
     301              :       !normal type work matrix such that distribution of all spin quadrants match
     302              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     303            2 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     304              : 
     305            2 :       rhs_int => contr1_int_PQ ! contains [ (aI|P)*(P|Q)^-1 ]^T
     306           10 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! will contain (aI|P)*(P|Q)^-1 * (Q|fxc|R)
     307              : 
     308              :       ! Case study: UKS or ROKS ?
     309            2 :       IF (xas_tdp_control%do_uks) THEN
     310              : 
     311              :          ! In the case of UKS, donor MOs might be different for different spins. Moreover, the
     312              :          ! fxc itself might change since fxc = fxc_sigma,tau
     313              :          ! => Carfully treat each spin-quadrant separately
     314              : 
     315              :          ! alpha-alpha spin quadrant (upper-lefet)
     316            2 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     317              : 
     318              :          ! Copy the alpha part into lhs_int, multiply by the alpha-alpha (Q|fxc|R) and then
     319              :          ! by the alpha part of rhs_int
     320            2 :          CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
     321            2 :          CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 1)%array)
     322              :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
     323            2 :                              eps_filter=xas_tdp_control%eps_filter)
     324              : 
     325              :          ! alpha-beta spin quadrant (upper-right)
     326            2 :          quadrants = [.FALSE., .TRUE., .FALSE.]
     327              : 
     328              :          !Copy the alpha part into LHS, multiply by the alpha-beta kernel and the beta part of RHS
     329            2 :          CALL copy_ri_contr_int(lhs_int(1:ndo_mo), rhs_int(1:ndo_mo))
     330            2 :          CALL ri_all_blocks_mm(lhs_int(1:ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 2)%array)
     331              :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     332            2 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     333              : 
     334              :          ! beta-beta spin quadrant (lower-right)
     335            2 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     336              : 
     337              :          !Copy the beta part into LHS, multiply by the beta-beta kernel and the beta part of RHS
     338            2 :          CALL copy_ri_contr_int(lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo))
     339            2 :          CALL ri_all_blocks_mm(lhs_int(ndo_mo + 1:2*ndo_mo), xas_tdp_env%ri_fxc(ri_atom, 3)%array)
     340              :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     341            2 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     342              : 
     343            0 :       ELSE IF (xas_tdp_control%do_roks) THEN
     344              : 
     345              :          ! In the case of ROKS, fxc = fxc_sigma,tau is different for each spin quadrant, but the
     346              :          ! donor MOs remain the same
     347              : 
     348              :          ! alpha-alpha kernel in the upper left quadrant
     349            0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     350              : 
     351              :          !Copy the LHS and multiply by alpha-alpha kernel
     352            0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     353            0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 1)%array)
     354              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     355            0 :                              eps_filter=xas_tdp_control%eps_filter)
     356              : 
     357              :          ! alpha-beta kernel in the upper-right quadrant
     358            0 :          quadrants = [.FALSE., .TRUE., .FALSE.]
     359              : 
     360              :          !Copy LHS and multiply by the alpha-beta kernel
     361            0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     362            0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 2)%array)
     363              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     364            0 :                              eps_filter=xas_tdp_control%eps_filter)
     365              : 
     366              :          ! beta-beta kernel in the lower-right quadrant
     367            0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     368              : 
     369              :          !Copy the LHS and multiply by the beta-beta kernel
     370            0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     371            0 :          CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 3)%array)
     372              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     373            0 :                              eps_filter=xas_tdp_control%eps_filter)
     374              : 
     375              :       END IF
     376            2 :       CALL dbcsr_finalize(work_mat)
     377              : 
     378              :       ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
     379              :       CALL dbcsr_create(xc_ker, name="SC OS XC KERNEL", matrix_type=dbcsr_type_symmetric, dist=dist, &
     380            2 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     381            2 :       CALL dbcsr_complete_redistribute(work_mat, xc_ker)
     382              : 
     383              :       !clean-up
     384            2 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     385            2 :       CALL dbcsr_release(work_mat)
     386              : 
     387            2 :    END SUBROUTINE sc_os_xc
     388              : 
     389              : ! **************************************************************************************************
     390              : !> \brief Create the matrix containing the on-diagonal spin-flip XC kernel (open-shell), which is:
     391              : !>        (a I_sigma|fxc|J_tau b) * delta_sigma,tau, fxc = 1/(rhoa-rhob) * (dE/drhoa - dE/drhob)
     392              : !>        with RI: (a I_sigma|fxc|J_tau b) ~= (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) (R|S)^-1 (S|J_tau b)
     393              : !> \param xc_ker the kernel matrix
     394              : !> \param contr1_int_PQ the once contracted RI integrals with coulomb product: (aI_sigma|P) (P|Q)^-1
     395              : !> \param dist inherited dbcsr dist
     396              : !> \param blk_size inherited block sizes
     397              : !> \param donor_state ...
     398              : !> \param xas_tdp_env ...
     399              : !> \param xas_tdp_control ...
     400              : !> \param qs_env ...
     401              : !> \note  It must be later on multiplied by the spin-swapped Q projector
     402              : !>        Prior to calling this function, the (Q|fxc|R) integral must be brodcasted to all procs
     403              : ! **************************************************************************************************
     404            0 :    SUBROUTINE ondiag_sf_os_xc(xc_ker, contr1_int_PQ, dist, blk_size, donor_state, xas_tdp_env, &
     405              :                               xas_tdp_control, qs_env)
     406              : 
     407              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: xc_ker
     408              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     409              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     410              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     411              :       TYPE(donor_state_type), POINTER                    :: donor_state
     412              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     413              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     414              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     415              : 
     416              :       INTEGER                                            :: ndo_mo, ri_atom
     417              :       LOGICAL                                            :: quadrants(3)
     418            0 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     419              :       TYPE(dbcsr_type)                                   :: work_mat
     420              : 
     421            0 :       NULLIFY (lhs_int, rhs_int)
     422              : 
     423              :       ! Initialization
     424            0 :       ndo_mo = donor_state%ndo_mo
     425            0 :       ri_atom = donor_state%at_index
     426              :       !normal type work matrix such that distribution of all spin quadrants match
     427              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     428            0 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     429              : 
     430              :       !Create a lhs_int, in which the whole (aI_sigma|P) (P|Q)^-1 (Q|fxc|R) will be put
     431              :       !because in spin-flip fxc is spin-independent, can take the product once and for all
     432            0 :       rhs_int => contr1_int_PQ
     433            0 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ)))
     434            0 :       CALL copy_ri_contr_int(lhs_int, rhs_int)
     435            0 :       CALL ri_all_blocks_mm(lhs_int, xas_tdp_env%ri_fxc(ri_atom, 4)%array)
     436              : 
     437              :       ! Case study: UKS or ROKS ?
     438            0 :       IF (xas_tdp_control%do_uks) THEN
     439              : 
     440              :          ! In the case of UKS, donor MOs might be different for different spins
     441              :          ! => Carfully treat each spin-quadrant separately
     442              :          ! NO alpha-beta because of the delta_sigma,tau
     443              : 
     444              :          ! alpha-alpha spin quadrant (upper-lefet)
     445            0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     446              :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, qs_env, &
     447            0 :                              eps_filter=xas_tdp_control%eps_filter)
     448              : 
     449              :          ! beta-beta spin quadrant (lower-right)
     450            0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     451              :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     452            0 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter)
     453              : 
     454            0 :       ELSE IF (xas_tdp_control%do_roks) THEN
     455              : 
     456              :          ! In the case of ROKS, same donor MOs for both spins => can do it all at once
     457              :          ! But NOT the alpha-beta quadrant because of delta_sigma,tau
     458              : 
     459            0 :          quadrants = [.TRUE., .FALSE., .TRUE.]
     460              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     461            0 :                              eps_filter=xas_tdp_control%eps_filter)
     462              : 
     463              :       END IF
     464            0 :       CALL dbcsr_finalize(work_mat)
     465              : 
     466              :       ! Create a symmetric kernel matrix and redistribute the normal work matrix into it
     467              :       CALL dbcsr_create(xc_ker, name="ON-DIAG SF OS XC KERNEL", matrix_type=dbcsr_type_symmetric, &
     468            0 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     469            0 :       CALL dbcsr_complete_redistribute(work_mat, xc_ker)
     470              : 
     471              :       !clean-up
     472            0 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     473            0 :       CALL dbcsr_release(work_mat)
     474              : 
     475            0 :    END SUBROUTINE ondiag_sf_os_xc
     476              : 
     477              : ! **************************************************************************************************
     478              : !> \brief Create the matrix containing the XC kernel in the restricted closed-shell case, for
     479              : !>        singlets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp+fxc_alp,bet|R) (R|S)^-1 (S|Jb)
     480              : !>        triplets: (aI|fxc|Jb) = (aI|P) (P|Q)^-1 (Q|fxc_alp,alp-fxc_alp,bet|R) (R|S)^-1 (S|Jb)
     481              : !> \param sg_xc_ker the singlet kernel matrix
     482              : !> \param tp_xc_ker the triplet kernel matrix
     483              : !> \param contr1_int_PQ the once contracted RI integrals including inverse 2-center Coulomb prodcut:
     484              : !>                      (aI|P)*(P|Q)^-1
     485              : !> \param dist inherited dbcsr dist
     486              : !> \param blk_size inherited block sizes
     487              : !> \param donor_state ...
     488              : !> \param xas_tdp_env ...
     489              : !> \param xas_tdp_control ...
     490              : !> \param qs_env ...
     491              : ! **************************************************************************************************
     492           58 :    SUBROUTINE rcs_xc(sg_xc_ker, tp_xc_ker, contr1_int_PQ, dist, blk_size, donor_state, &
     493              :                      xas_tdp_env, xas_tdp_control, qs_env)
     494              : 
     495              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: sg_xc_ker, tp_xc_ker
     496              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int_PQ
     497              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     498              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     499              :       TYPE(donor_state_type), POINTER                    :: donor_state
     500              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     501              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     502              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     503              : 
     504              :       INTEGER                                            :: nsgfp, ri_atom
     505              :       LOGICAL                                            :: quadrants(3)
     506              :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: fxc
     507           58 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     508              :       TYPE(dbcsr_type)                                   :: work_mat
     509              : 
     510           58 :       NULLIFY (lhs_int, rhs_int)
     511              : 
     512              :       ! Initialization
     513           58 :       ri_atom = donor_state%at_index
     514           58 :       nsgfp = SIZE(xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1)
     515           58 :       rhs_int => contr1_int_PQ ! RHS contains [ (aI|P)*(P|Q)^-1 ]^T
     516          232 :       ALLOCATE (lhs_int(SIZE(contr1_int_PQ))) ! LHS will contatin (aI|P)*(P|Q)^-1 * (Q|fxc|R)
     517              : 
     518              :       ! Work structures
     519          232 :       ALLOCATE (fxc(nsgfp, nsgfp))
     520              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     521           58 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     522              : 
     523              :       ! Case study: singlet and/or triplet ?
     524           58 :       IF (xas_tdp_control%do_singlet) THEN
     525              : 
     526              :          ! Take the sum of fxc for alpha-alpha and alpha-beta
     527           58 :          CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
     528           58 :          CALL daxpy(nsgfp*nsgfp, 1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
     529              : 
     530              :          ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
     531           58 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     532           58 :          CALL ri_all_blocks_mm(lhs_int, fxc)
     533              : 
     534              :          ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
     535           58 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     536              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     537           58 :                              eps_filter=xas_tdp_control%eps_filter)
     538           58 :          CALL dbcsr_finalize(work_mat)
     539              : 
     540              :          !Create the symmetric kernel matrix and redistribute work_mat into it
     541              :          CALL dbcsr_create(sg_xc_ker, name="XC SINGLET KERNEL", matrix_type=dbcsr_type_symmetric, &
     542           58 :                            dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     543           58 :          CALL dbcsr_complete_redistribute(work_mat, sg_xc_ker)
     544              : 
     545              :       END IF
     546              : 
     547           58 :       IF (xas_tdp_control%do_triplet) THEN
     548              : 
     549              :          ! Take the difference of fxc for alpha-alpha and alpha-beta
     550            0 :          CALL dcopy(nsgfp*nsgfp, xas_tdp_env%ri_fxc(ri_atom, 1)%array, 1, fxc, 1)
     551            0 :          CALL daxpy(nsgfp*nsgfp, -1.0_dp, xas_tdp_env%ri_fxc(ri_atom, 2)%array, 1, fxc, 1)
     552              : 
     553              :          ! Copy the fresh lhs_int = (aI|P) (P|Q)^-1 and multiply by (Q|fxc|R)
     554            0 :          CALL copy_ri_contr_int(lhs_int, rhs_int)
     555            0 :          CALL ri_all_blocks_mm(lhs_int, fxc)
     556              : 
     557              :          ! Compute the final LHS RHS product => spin-restricted, only upper-left quadrant
     558            0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     559              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     560            0 :                              eps_filter=xas_tdp_control%eps_filter)
     561            0 :          CALL dbcsr_finalize(work_mat)
     562              : 
     563              :          !Create the symmetric kernel matrix and redistribute work_mat into it
     564              :          CALL dbcsr_create(tp_xc_ker, name="XC TRIPLET KERNEL", matrix_type=dbcsr_type_symmetric, &
     565            0 :                            dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     566            0 :          CALL dbcsr_complete_redistribute(work_mat, tp_xc_ker)
     567              : 
     568              :       END IF
     569              : 
     570              :       ! clean-up
     571           58 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     572           58 :       CALL dbcsr_release(work_mat)
     573           58 :       DEALLOCATE (fxc)
     574              : 
     575           58 :    END SUBROUTINE rcs_xc
     576              : 
     577              : ! **************************************************************************************************
     578              : !> \brief Computes the exact exchange kernel matrix using RI. Returns an array of 2 matrices,
     579              : !>        which are:
     580              : !>        1) the on-diagonal kernel: (ab|I_sigma J_tau) * delta_sigma,tau
     581              : !>        2) the off-diagonal spin-conserving kernel: (aJ_sigma|I_tau b) * delta_sigma,tau
     582              : !>        An internal analysis determines which of the above are computed (can range from 0 to 2),
     583              : !> \param ex_ker ...
     584              : !> \param donor_state ...
     585              : !> \param xas_tdp_env ...
     586              : !> \param xas_tdp_control ...
     587              : !> \param qs_env ...
     588              : !> \note In the case of spin-conserving excitation, the kernel must later be multiplied by the
     589              : !>       usual Q projector. In the case of spin-flip, one needs to project the excitations coming
     590              : !>       from alpha donor MOs on the unoccupied beta MOs. This is done by multiplying by a Q
     591              : !>       projector where the alpha-alpha and beta-beta quadrants are swapped
     592              : !>       The ex_ker array should be allocated on entry (not the internals)
     593              : ! **************************************************************************************************
     594          118 :    SUBROUTINE kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     595              : 
     596              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker
     597              :       TYPE(donor_state_type), POINTER                    :: donor_state
     598              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     599              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     600              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     601              : 
     602              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kernel_exchange'
     603              : 
     604              :       INTEGER                                            :: handle
     605           68 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     606              :       LOGICAL                                            :: do_off_sc
     607              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     608           68 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     609              : 
     610           68 :       NULLIFY (contr1_int, dist, blk_size)
     611              : 
     612              :       !Don't do anything if no hfx
     613           18 :       IF (.NOT. xas_tdp_control%do_hfx) RETURN
     614              : 
     615           50 :       CALL timeset(routineN, handle)
     616              : 
     617           50 :       dist => donor_state%dbcsr_dist
     618           50 :       blk_size => donor_state%blk_size
     619              : 
     620              :       !compute the off-diag spin-conserving only if not TDA and anything that is spin-conserving
     621              :       do_off_sc = (.NOT. xas_tdp_control%tamm_dancoff) .AND. &
     622           50 :                   (xas_tdp_control%do_spin_cons .OR. xas_tdp_control%do_singlet .OR. xas_tdp_control%do_triplet)
     623              : 
     624              :       ! Need the once contracted integrals (aI|P)
     625           50 :       CALL contract2_AO_to_doMO(contr1_int, "EXCHANGE", donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     626              : 
     627              : !  The on-diagonal exchange : (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau) * delta_sigma,tau
     628              :       CALL ondiag_ex(ex_ker(1)%matrix, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     629           50 :                      xas_tdp_control, qs_env)
     630              : 
     631              : !  The off-diag spin-conserving case: (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b) * delta_sigma,tau
     632           50 :       IF (do_off_sc) THEN
     633              :          CALL offdiag_ex_sc(ex_ker(2)%matrix, contr1_int, dist, blk_size, donor_state, &
     634            6 :                             xas_tdp_env, xas_tdp_control, qs_env)
     635              :       END IF
     636              : 
     637              :       !clean-up
     638           50 :       CALL dbcsr_deallocate_matrix_set(contr1_int)
     639              : 
     640           50 :       CALL timestop(handle)
     641              : 
     642           68 :    END SUBROUTINE kernel_exchange
     643              : 
     644              : ! **************************************************************************************************
     645              : !> \brief Create the matrix containing the on-diagonal exact exchange kernel, which is:
     646              : !>        (ab|I_sigma J_tau) * delta_sigma,tau, where a,b are AOs, I_sigma and J_tau are the donor
     647              : !>        spin-orbitals. A RI is done: (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
     648              : !> \param ondiag_ex_ker the on-diagonal exchange kernel in dbcsr format
     649              : !> \param contr1_int the already once-contracted RI 3-center integrals (aI_sigma|P)
     650              : !>        where each matrix of the array contains the contraction for the donor spin-orbital I_sigma
     651              : !> \param dist the inherited dbcsr distribution
     652              : !> \param blk_size the inherited dbcsr block sizes
     653              : !> \param donor_state ...
     654              : !> \param xas_tdp_env ...
     655              : !> \param xas_tdp_control ...
     656              : !> \param qs_env ...
     657              : !> \note In the presence of a RI metric, we have instead M^-1 * (P|Q) * M^-1
     658              : ! **************************************************************************************************
     659           50 :    SUBROUTINE ondiag_ex(ondiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     660              :                         xas_tdp_control, qs_env)
     661              : 
     662              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: ondiag_ex_ker
     663              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     664              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     665              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     666              :       TYPE(donor_state_type), POINTER                    :: donor_state
     667              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     668              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     669              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     670              : 
     671              :       INTEGER                                            :: group, iblk, iso, jblk, jso, nblk, &
     672              :                                                             ndo_mo, ndo_so, nsgfa, nsgfp, ri_atom, &
     673              :                                                             source
     674           50 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_work, row_dist, &
     675           50 :                                                             row_dist_work
     676           50 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     677              :       LOGICAL                                            :: do_roks, do_uks, found
     678           50 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: coeffs, ri_coeffs
     679           50 :       REAL(dp), DIMENSION(:, :), POINTER                 :: aIQ, pblock, PQ
     680              :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist, work_dbcsr_dist
     681              :       TYPE(dbcsr_iterator_type)                          :: iter
     682           50 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
     683              :       TYPE(dbcsr_type)                                   :: abIJ, mats_desymm, work_mat
     684              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     685              : 
     686           50 :       NULLIFY (para_env, matrix_s, pblock, aIQ, row_dist, col_dist, row_dist_work, col_dist_work, pgrid)
     687              : 
     688              :       ! We want to compute (ab|I_sigma J_tau) = (ab|P) * (P|Q)^-1 * (Q|I_sigma J_tau)
     689              :       ! Already have (cJ_tau|P)  stored in contr1_int. Need to further contract the
     690              :       ! AOs with the coeff of the I_alpha spin-orbital.
     691              : 
     692              :       ! Initialization
     693           50 :       ndo_mo = donor_state%ndo_mo
     694           50 :       ri_atom = donor_state%at_index
     695           50 :       do_roks = xas_tdp_control%do_roks
     696           50 :       do_uks = xas_tdp_control%do_uks
     697            6 :       ndo_so = ndo_mo; IF (do_uks) ndo_so = 2*ndo_mo !if not UKS, same donor MOs for both spins
     698           50 :       PQ => xas_tdp_env%ri_inv_ex
     699              : 
     700           50 :       CALL get_qs_env(qs_env, para_env=para_env, matrix_s=matrix_s, natom=nblk)
     701           50 :       nsgfp = SIZE(PQ, 1)
     702           50 :       nsgfa = SIZE(donor_state%contract_coeffs, 1)
     703          300 :       ALLOCATE (coeffs(nsgfp, ndo_so), ri_coeffs(nsgfp, ndo_so))
     704              : 
     705              :       ! a and b need to overlap for non-zero (ab|IJ) => same block structure as overlap S
     706              :       ! need compatible distribution_2d with 3c tensor + normal type
     707           50 :       CALL cp_dbcsr_dist2d_to_dist(xas_tdp_env%opt_dist2d_ex, opt_dbcsr_dist)
     708              : 
     709           50 :       CALL dbcsr_desymmetrize(matrix_s(1)%matrix, mats_desymm)
     710              : 
     711           50 :       CALL dbcsr_create(abIJ, template=mats_desymm, name="(ab|IJ)", dist=opt_dbcsr_dist)
     712           50 :       CALL dbcsr_complete_redistribute(mats_desymm, abIJ)
     713              : 
     714           50 :       CALL dbcsr_release(mats_desymm)
     715              : 
     716              :       ! Create a work distribution based on opt_dbcsr_dist, but for full size matrices
     717              :       CALL dbcsr_distribution_get(opt_dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
     718           50 :                                   pgrid=pgrid)
     719              : 
     720          150 :       ALLOCATE (row_dist_work(ndo_so*nblk))
     721          100 :       ALLOCATE (col_dist_work(ndo_so*nblk))
     722          118 :       DO iso = 1, ndo_so
     723          608 :          row_dist_work((iso - 1)*nblk + 1:iso*nblk) = row_dist(:)
     724          658 :          col_dist_work((iso - 1)*nblk + 1:iso*nblk) = col_dist(:)
     725              :       END DO
     726              : 
     727              :       CALL dbcsr_distribution_new(work_dbcsr_dist, group=group, pgrid=pgrid, row_dist=row_dist_work, &
     728           50 :                                   col_dist=col_dist_work)
     729              : 
     730              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=work_dbcsr_dist, &
     731           50 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     732              : 
     733              :       ! Loop over donor spin-orbitals. End matrix is symmetric => span only upper half
     734          118 :       DO iso = 1, ndo_so
     735              : 
     736              :          ! take the (aI|Q) block in contr1_int that has a centered on the excited atom
     737           68 :          CALL dbcsr_get_stored_coordinates(contr1_int(iso)%matrix, ri_atom, ri_atom, source)
     738           68 :          IF (para_env%mepos == source) THEN
     739           34 :             CALL dbcsr_get_block_p(contr1_int(iso)%matrix, ri_atom, ri_atom, aIQ, found)
     740              :          ELSE
     741          136 :             ALLOCATE (aIQ(nsgfa, nsgfp))
     742              :          END IF
     743       183280 :          CALL para_env%bcast(aIQ, source)
     744              : 
     745              :          ! get the contraction (Q|IJ) by taking (Q|Ia)*contract_coeffs and put it in coeffs
     746              :          CALL dgemm('T', 'N', nsgfp, ndo_so, nsgfa, 1.0_dp, aIQ, nsgfa, donor_state%contract_coeffs, &
     747           68 :                     nsgfa, 0.0_dp, coeffs, nsgfp)
     748              : 
     749              :          ! take (P|Q)^-1 * (Q|IJ) and put that in ri_coeffs
     750              :          CALL dgemm('N', 'N', nsgfp, ndo_so, nsgfp, 1.0_dp, PQ, nsgfp, coeffs, nsgfp, 0.0_dp, &
     751           68 :                     ri_coeffs, nsgfp)
     752              : 
     753           68 :          IF (.NOT. para_env%mepos == source) DEALLOCATE (aIQ)
     754              : 
     755          294 :          DO jso = iso, ndo_so
     756              : 
     757              :             ! There is no alpha-beta exchange. In case of UKS, iso,jso span all spin-orbitals
     758              :             ! => CYCLE if iso and jso are indexing MOs with different spin (and we have UKS)
     759          108 :             IF (do_uks .AND. (iso <= ndo_mo .AND. jso > ndo_mo)) CYCLE
     760              : 
     761              :             ! compute (ab|IJ) = sum_P (ab|P) * (P|Q)^-1 * (Q|IJ)
     762           86 :             CALL dbcsr_set(abIJ, 0.0_dp)
     763           86 :             CALL contract3_RI_to_doMOs(xas_tdp_env%ri_3c_ex, ri_coeffs(:, jso), abIJ, ri_atom)
     764              : 
     765              :             ! Loop over (ab|IJ) and copy into work. OK because dist are made to match
     766           86 :             CALL dbcsr_iterator_start(iter, abIJ)
     767          673 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
     768              : 
     769          587 :                CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
     770          587 :                IF (iso == jso .AND. jblk < iblk) CYCLE
     771              : 
     772          357 :                CALL dbcsr_get_block_p(abIJ, iblk, jblk, pblock, found)
     773              : 
     774          443 :                IF (found) THEN
     775          357 :                   CALL dbcsr_put_block(work_mat, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
     776              : 
     777              :                   !In case of ROKS, we have (ab|IJ) for alpha-alpha spin, but it is the same for
     778              :                   !beta-beta => replicate the blocks (alpha-beta is zero)
     779          357 :                   IF (do_roks) THEN
     780              :                      !the beta-beta block
     781              :                      CALL dbcsr_put_block(work_mat, (ndo_so + iso - 1)*nblk + iblk, &
     782            0 :                                           (ndo_so + jso - 1)*nblk + jblk, pblock)
     783              :                   END IF
     784              :                END IF
     785              : 
     786              :             END DO !iterator
     787          262 :             CALL dbcsr_iterator_stop(iter)
     788              : 
     789              :          END DO !jso
     790              :       END DO !iso
     791              : 
     792           50 :       CALL dbcsr_finalize(work_mat)
     793              :       CALL dbcsr_create(ondiag_ex_ker, name="ONDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &
     794           50 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     795           50 :       CALL dbcsr_complete_redistribute(work_mat, ondiag_ex_ker)
     796              : 
     797              :       !Clean-up
     798           50 :       CALL dbcsr_release(work_mat)
     799           50 :       CALL dbcsr_release(abIJ)
     800           50 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
     801           50 :       CALL dbcsr_distribution_release(work_dbcsr_dist)
     802           50 :       DEALLOCATE (col_dist_work, row_dist_work)
     803              : 
     804          250 :    END SUBROUTINE ondiag_ex
     805              : 
     806              : ! **************************************************************************************************
     807              : !> \brief Create the matrix containing the off-diagonal exact exchange kernel in the spin-conserving
     808              : !>        case (which also includes excitations from the closed=shell ref state ) This matrix reads:
     809              : !>        (aJ_sigma|I_tau b) * delta_sigma,tau , where a, b are AOs and J_sigma, I_tau are the donor
     810              : !>        spin-orbital. A RI is done: (aJ_sigma|I_tau b) = (aJ_sigma|P) * (P|Q)^-1 * (Q|I_tau b)
     811              : !> \param offdiag_ex_ker the off-diagonal, spin-conserving exchange kernel in dbcsr format
     812              : !> \param contr1_int the once-contracted RI integrals: (aJ_sigma|P)
     813              : !> \param dist the inherited dbcsr ditribution
     814              : !> \param blk_size the inherited block sizes
     815              : !> \param donor_state ...
     816              : !> \param xas_tdp_env ...
     817              : !> \param xas_tdp_control ...
     818              : !> \param qs_env ...
     819              : ! **************************************************************************************************
     820            6 :    SUBROUTINE offdiag_ex_sc(offdiag_ex_ker, contr1_int, dist, blk_size, donor_state, xas_tdp_env, &
     821              :                             xas_tdp_control, qs_env)
     822              : 
     823              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: offdiag_ex_ker
     824              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr1_int
     825              :       TYPE(dbcsr_distribution_type), POINTER             :: dist
     826              :       INTEGER, DIMENSION(:), POINTER                     :: blk_size
     827              :       TYPE(donor_state_type), POINTER                    :: donor_state
     828              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     829              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     830              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     831              : 
     832              :       INTEGER                                            :: ndo_mo
     833              :       LOGICAL                                            :: do_roks, do_uks, quadrants(3)
     834              :       REAL(dp), DIMENSION(:, :), POINTER                 :: PQ
     835            6 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: lhs_int, rhs_int
     836              :       TYPE(dbcsr_type)                                   :: work_mat
     837              : 
     838            6 :       NULLIFY (PQ, lhs_int, rhs_int)
     839              : 
     840              :       !Initialization
     841            6 :       ndo_mo = donor_state%ndo_mo
     842            6 :       do_roks = xas_tdp_control%do_roks
     843            6 :       do_uks = xas_tdp_control%do_uks
     844            6 :       PQ => xas_tdp_env%ri_inv_ex
     845              : 
     846            6 :       rhs_int => contr1_int
     847           24 :       ALLOCATE (lhs_int(SIZE(contr1_int)))
     848            6 :       CALL copy_ri_contr_int(lhs_int, rhs_int)
     849            6 :       CALL ri_all_blocks_mm(lhs_int, PQ)
     850              : 
     851              :       !Given the lhs_int and rhs_int, all we need to do is multiply elements from the former by
     852              :       !the transpose of the later, and put the result in the correct spin quadrants
     853              : 
     854              :       !Create a normal type work matrix
     855              :       CALL dbcsr_create(work_mat, name="WORK", matrix_type=dbcsr_type_no_symmetry, dist=dist, &
     856            6 :                         row_blk_size=blk_size, col_blk_size=blk_size)
     857              : 
     858              :       !Case study on closed-shell, ROKS or UKS
     859            6 :       IF (do_roks) THEN
     860              :          !In ROKS, the donor MOs for each spin are the same => copy the product in both the
     861              :          !alpha-alpha and the beta-beta quadrants
     862            0 :          quadrants = [.TRUE., .FALSE., .TRUE.]
     863              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     864            0 :                              eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     865              : 
     866            6 :       ELSE IF (do_uks) THEN
     867              :          !In UKS, the donor MOs are possibly different for each spin => start with the
     868              :          !alpha-alpha product and the perform the beta-beta product separately
     869            0 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     870              :          CALL ri_int_product(work_mat, lhs_int(1:ndo_mo), rhs_int(1:ndo_mo), quadrants, &
     871            0 :                              qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     872              : 
     873            0 :          quadrants = [.FALSE., .FALSE., .TRUE.]
     874              :          CALL ri_int_product(work_mat, lhs_int(ndo_mo + 1:2*ndo_mo), rhs_int(ndo_mo + 1:2*ndo_mo), &
     875            0 :                              quadrants, qs_env, eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     876              :       ELSE
     877              :          !In the restricted closed-shell case, only have one spin and a single qudarant
     878            6 :          quadrants = [.TRUE., .FALSE., .FALSE.]
     879              :          CALL ri_int_product(work_mat, lhs_int, rhs_int, quadrants, qs_env, &
     880            6 :                              eps_filter=xas_tdp_control%eps_filter, mo_transpose=.TRUE.)
     881              :       END IF
     882            6 :       CALL dbcsr_finalize(work_mat)
     883              : 
     884              :       !Create the symmetric kernel matrix and redistribute work_mat into it
     885              :       CALL dbcsr_create(offdiag_ex_ker, name="OFFDIAG EX KERNEL", matrix_type=dbcsr_type_symmetric, &
     886            6 :                         dist=dist, row_blk_size=blk_size, col_blk_size=blk_size)
     887            6 :       CALL dbcsr_complete_redistribute(work_mat, offdiag_ex_ker)
     888              : 
     889              :       !clean-up
     890            6 :       CALL dbcsr_release(work_mat)
     891            6 :       CALL dbcsr_deallocate_matrix_set(lhs_int)
     892              : 
     893            6 :    END SUBROUTINE offdiag_ex_sc
     894              : 
     895              : ! **************************************************************************************************
     896              : !> \brief Reserves the blocks in of a dbcsr matrix as needed for RI 3-center contraction (aI|P)
     897              : !> \param matrices the matrices for which blocks are reserved
     898              : !> \param ri_atom the index of the atom on which RI is done (= all coeffs of I are there, and P too)
     899              : !> \param qs_env ...
     900              : !> \note the end product are normal type matrices that are possibly slightly spraser as matrix_s
     901              : ! **************************************************************************************************
     902          152 :    SUBROUTINE reserve_contraction_blocks(matrices, ri_atom, qs_env)
     903              : 
     904              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices
     905              :       INTEGER, INTENT(IN)                                :: ri_atom
     906              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     907              : 
     908              :       INTEGER                                            :: i, iblk, jblk, max_nblks, nblks
     909          152 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: reserve_cols, reserve_rows
     910              :       TYPE(dbcsr_distribution_type)                      :: dist
     911              :       TYPE(dbcsr_iterator_type)                          :: iter
     912          152 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
     913              :       TYPE(dbcsr_type)                                   :: template, work
     914              : 
     915          152 :       NULLIFY (matrix_s)
     916              : 
     917              :       ! Initialization
     918          152 :       CALL get_qs_env(qs_env, matrix_s=matrix_s)
     919          152 :       CALL dbcsr_get_info(matrices(1)%matrix, distribution=dist)
     920              : 
     921              :       ! Need to redistribute matrix_s in the distribution of matrices
     922          152 :       CALL dbcsr_create(work, template=matrix_s(1)%matrix, dist=dist)
     923          152 :       CALL dbcsr_complete_redistribute(matrix_s(1)%matrix, work)
     924              : 
     925              :       ! Need to desymmetrize matrix as as a template
     926          152 :       CALL dbcsr_desymmetrize(work, template)
     927              : 
     928              :       ! Allocate space for block indicies to reserve.
     929          152 :       max_nblks = dbcsr_get_num_blocks(template)
     930          592 :       ALLOCATE (reserve_rows(max_nblks), reserve_cols(max_nblks))
     931              : 
     932              :       ! Loop over matrix_s as need a,b to overlap
     933          152 :       nblks = 0
     934          152 :       CALL dbcsr_iterator_start(iter, template)
     935        19444 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     936        19292 :          CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
     937              :          !only have a,b pair if one of them is the ri_atom
     938        19444 :          IF (iblk == ri_atom .OR. jblk == ri_atom) THEN
     939          842 :             nblks = nblks + 1
     940          842 :             reserve_rows(nblks) = iblk
     941          842 :             reserve_cols(nblks) = jblk
     942              :          END IF
     943              :       END DO
     944          152 :       CALL dbcsr_iterator_stop(iter)
     945              : 
     946          456 :       DO i = 1, SIZE(matrices)
     947          456 :          CALL dbcsr_reserve_blocks(matrices(i)%matrix, rows=reserve_rows(1:nblks), cols=reserve_cols(1:nblks))
     948              :       END DO
     949              : 
     950              :       ! Clean-up
     951          152 :       CALL dbcsr_release(template)
     952          152 :       CALL dbcsr_release(work)
     953              : 
     954          456 :    END SUBROUTINE reserve_contraction_blocks
     955              : 
     956              : ! **************************************************************************************************
     957              : !> \brief Contract the ri 3-center integrals stored in a tensor with repect to the donor MOs coeffs,
     958              : !>        for a given excited atom k => (aI|k) = sum_b c_Ib (ab|k)
     959              : !> \param contr_int the contracted integrals as array of dbcsr matrices
     960              : !> \param op_type for which operator type we contract (COULOMB or EXCHANGE)
     961              : !> \param donor_state ...
     962              : !> \param xas_tdp_env ...
     963              : !> \param xas_tdp_control ...
     964              : !> \param qs_env ...
     965              : !> \note  In the output matrices, (aI_b|k) is stored at block a,b where I_b is the partial
     966              : !>        contraction that only includes coeffs from atom b. Note that the contracted matrix is
     967              : !>        not symmetric. To get the fully contracted matrix over b, one need to add the block
     968              : !>        columns of (aI_b|k) (get an array of size nao*nsgfp). This step is unnessary in our case
     969              : !>        because we assume locality of donor state, and only one column od (aI_b|k) is pouplated
     970              : ! **************************************************************************************************
     971          152 :    SUBROUTINE contract2_AO_to_doMO(contr_int, op_type, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     972              : 
     973              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: contr_int
     974              :       CHARACTER(len=*), INTENT(IN)                       :: op_type
     975              :       TYPE(donor_state_type), POINTER                    :: donor_state
     976              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     977              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     978              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     979              : 
     980              :       CHARACTER(len=*), PARAMETER :: routineN = 'contract2_AO_to_doMO'
     981              : 
     982              :       INTEGER                                            :: handle, i, imo, ispin, katom, kkind, &
     983              :                                                             natom, ndo_mo, ndo_so, nkind, nspins
     984          152 :       INTEGER, DIMENSION(:), POINTER                     :: ri_blk_size, std_blk_size
     985              :       LOGICAL                                            :: do_uks
     986          152 :       REAL(dp), DIMENSION(:, :), POINTER                 :: coeffs
     987              :       TYPE(dbcsr_distribution_type)                      :: opt_dbcsr_dist
     988          152 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrices, matrix_s
     989              :       TYPE(dbcsr_type), POINTER                          :: aI_P, P_Ib, work
     990              :       TYPE(dbt_type), POINTER                            :: pq_X
     991              :       TYPE(distribution_2d_type), POINTER                :: opt_dist2d
     992          152 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: ri_basis
     993              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     994          152 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     995          152 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     996              : 
     997          152 :       NULLIFY (matrix_s, std_blk_size, ri_blk_size, qs_kind_set, ri_basis, pq_X)
     998          152 :       NULLIFY (aI_P, P_Ib, work, matrices, coeffs, opt_dist2d, particle_set)
     999              : 
    1000          152 :       CALL timeset(routineN, handle)
    1001              : 
    1002              : !  Initialization
    1003          152 :       CALL get_qs_env(qs_env, natom=natom, matrix_s=matrix_s, qs_kind_set=qs_kind_set, para_env=para_env)
    1004          152 :       ndo_mo = donor_state%ndo_mo
    1005          152 :       kkind = donor_state%kind_index
    1006          152 :       katom = donor_state%at_index
    1007              :       !by default contract for Coulomb
    1008          152 :       pq_X => xas_tdp_env%ri_3c_coul
    1009          152 :       opt_dist2d => xas_tdp_env%opt_dist2d_coul
    1010          152 :       IF (op_type == "EXCHANGE") THEN
    1011           84 :          CPASSERT(ASSOCIATED(xas_tdp_env%ri_3c_ex))
    1012           84 :          pq_X => xas_tdp_env%ri_3c_ex
    1013           84 :          opt_dist2d => xas_tdp_env%opt_dist2d_ex
    1014              :       END IF
    1015          152 :       do_uks = xas_tdp_control%do_uks
    1016          152 :       nspins = 1; IF (do_uks) nspins = 2
    1017          152 :       ndo_so = nspins*ndo_mo
    1018              : 
    1019              : !  contracted integrals block sizes
    1020          152 :       CALL dbcsr_get_info(matrix_s(1)%matrix, col_blk_size=std_blk_size)
    1021              :       ! getting the block dimensions for the RI basis
    1022          152 :       CALL get_qs_env(qs_env, particle_set=particle_set, nkind=nkind)
    1023         1012 :       ALLOCATE (ri_basis(nkind), ri_blk_size(natom))
    1024          152 :       CALL basis_set_list_setup(ri_basis, "RI_XAS", qs_kind_set)
    1025          152 :       CALL get_particle_set(particle_set, qs_kind_set, nsgf=ri_blk_size, basis=ri_basis)
    1026              : 
    1027              : !  Create  work matrices. Everything that goes into a 3c routine must be compatible with the optimal dist_2d
    1028          152 :       CALL cp_dbcsr_dist2d_to_dist(opt_dist2d, opt_dbcsr_dist)
    1029              : 
    1030          456 :       ALLOCATE (aI_P, P_Ib, work, matrices(2))
    1031              :       CALL dbcsr_create(aI_P, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(aI|P)", &
    1032          152 :                         row_blk_size=std_blk_size, col_blk_size=ri_blk_size)
    1033              : 
    1034              :       CALL dbcsr_create(P_Ib, dist=opt_dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, name="(P|Ib)", &
    1035          152 :                         row_blk_size=ri_blk_size, col_blk_size=std_blk_size)
    1036              : 
    1037              :       !reserve the blocks (needed for 3c contraction routines)
    1038          152 :       matrices(1)%matrix => aI_P; matrices(2)%matrix => P_Ib
    1039          152 :       CALL reserve_contraction_blocks(matrices, katom, qs_env)
    1040          152 :       DEALLOCATE (matrices)
    1041              : 
    1042              :       ! Create the contracted integral matrices
    1043          662 :       ALLOCATE (contr_int(ndo_so))
    1044          358 :       DO i = 1, ndo_so
    1045          206 :          ALLOCATE (contr_int(i)%matrix)
    1046              :          CALL dbcsr_create(matrix=contr_int(i)%matrix, template=matrix_s(1)%matrix, &
    1047              :                            matrix_type=dbcsr_type_no_symmetry, row_blk_size=std_blk_size, &
    1048          358 :                            col_blk_size=ri_blk_size)
    1049              :       END DO
    1050              : 
    1051              :       ! Only take the coeffs for atom on which MOs I,J are localized
    1052          152 :       coeffs => donor_state%contract_coeffs
    1053              : 
    1054          326 :       DO ispin = 1, nspins
    1055              : 
    1056              : !     Loop over the donor MOs and contract
    1057          532 :          DO imo = 1, ndo_mo
    1058              : 
    1059              :             ! do the contraction
    1060          206 :             CALL dbcsr_set(aI_P, 0.0_dp); CALL dbcsr_set(P_Ib, 0.0_dp)
    1061          206 :             CALL contract2_AO_to_doMO_low(pq_X, coeffs(:, (ispin - 1)*ndo_mo + imo), aI_P, P_Ib, katom)
    1062              : 
    1063              :             ! Get the full (aI|P) contracted integrals
    1064          206 :             CALL dbcsr_transposed(work, P_Ib)
    1065          206 :             CALL dbcsr_add(work, aI_P, 1.0_dp, 1.0_dp)
    1066          206 :             CALL dbcsr_complete_redistribute(work, contr_int((ispin - 1)*ndo_mo + imo)%matrix)
    1067          206 :             CALL dbcsr_filter(contr_int((ispin - 1)*ndo_mo + imo)%matrix, 1.0E-16_dp)
    1068              : 
    1069          380 :             CALL dbcsr_release(work)
    1070              :          END DO !imo
    1071              :       END DO !ispin
    1072              : 
    1073              : !  Clean-up
    1074          152 :       CALL dbcsr_release(aI_P)
    1075          152 :       CALL dbcsr_release(P_Ib)
    1076          152 :       CALL dbcsr_distribution_release(opt_dbcsr_dist)
    1077          152 :       DEALLOCATE (ri_blk_size, aI_P, P_Ib, work, ri_basis)
    1078              : 
    1079          152 :       CALL timestop(handle)
    1080              : 
    1081          304 :    END SUBROUTINE contract2_AO_to_doMO
    1082              : 
    1083              : ! **************************************************************************************************
    1084              : !> \brief Contraction of the 3-center integrals (ab|Q) over the RI basis elements Q to get donor MOS
    1085              : !>        => (ab|IJ) = sum_X (ab|Q) coeffs_Q
    1086              : !> \param ab_Q the tensor holding the integrals
    1087              : !> \param vec the contraction coefficients
    1088              : !> \param mat_abIJ the matrix holding the (ab|IJ) integrals (blocks must be reserved)
    1089              : !> \param atom_k the atom for which we contract, i.e. we only take RI basis Q centered on atom_k
    1090              : !> \note By construction, distribution of tensor and matrix match, also for OMP threads
    1091              : ! **************************************************************************************************
    1092           86 :    SUBROUTINE contract3_RI_to_doMOs(ab_Q, vec, mat_abIJ, atom_k)
    1093              : 
    1094              :       TYPE(dbt_type)                                     :: ab_Q
    1095              :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec
    1096              :       TYPE(dbcsr_type)                                   :: mat_abIJ
    1097              :       INTEGER, INTENT(IN)                                :: atom_k
    1098              : 
    1099              :       CHARACTER(len=*), PARAMETER :: routineN = 'contract3_RI_to_doMOs'
    1100              : 
    1101              :       INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, katom
    1102              :       LOGICAL                                            :: found, t_found
    1103              :       REAL(dp)                                           :: prefac
    1104           86 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc
    1105           86 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: pblock
    1106              :       TYPE(dbcsr_type)                                   :: work
    1107              :       TYPE(dbt_iterator_type)                            :: iter
    1108              : 
    1109           86 :       NULLIFY (pblock)
    1110              : 
    1111           86 :       CALL timeset(routineN, handle)
    1112              : 
    1113              : !$OMP PARALLEL DEFAULT(NONE) &
    1114              : !$OMP SHARED(ab_Q,vec,mat_abIJ,atom_k) &
    1115           86 : !$OMP PRIVATE(iter,ind,iatom,jatom,katom,prefac,iabc,t_found,found,pblock,i,j)
    1116              :       CALL dbt_iterator_start(iter, ab_Q)
    1117              :       DO WHILE (dbt_iterator_blocks_left(iter))
    1118              :          CALL dbt_iterator_next_block(iter, ind)
    1119              : 
    1120              :          iatom = ind(1)
    1121              :          jatom = ind(2)
    1122              :          katom = ind(3)
    1123              : 
    1124              :          IF (.NOT. atom_k == katom) CYCLE
    1125              : 
    1126              :          prefac = 1.0_dp
    1127              :          IF (iatom == jatom) prefac = 0.5_dp
    1128              : 
    1129              :          CALL dbt_get_block(ab_Q, ind, iabc, t_found)
    1130              : 
    1131              :          CALL dbcsr_get_block_p(mat_abIJ, iatom, jatom, pblock, found)
    1132              :          IF ((.NOT. found) .OR. (.NOT. t_found)) CYCLE
    1133              : 
    1134              :          DO i = 1, SIZE(pblock, 1)
    1135              :             DO j = 1, SIZE(pblock, 2)
    1136              : !$OMP ATOMIC
    1137              :                pblock(i, j) = pblock(i, j) + prefac*DOT_PRODUCT(vec(:), iabc(i, j, :))
    1138              :             END DO
    1139              :          END DO
    1140              : 
    1141              :          DEALLOCATE (iabc)
    1142              :       END DO !iter
    1143              :       CALL dbt_iterator_stop(iter)
    1144              : !$OMP END PARALLEL
    1145              : 
    1146              :       !matrix only half filled => need to add its transpose
    1147           86 :       CALL dbcsr_create(work, template=mat_abIJ)
    1148           86 :       CALL dbcsr_transposed(work, mat_abIJ)
    1149           86 :       CALL dbcsr_add(mat_abIJ, work, 1.0_dp, 1.0_dp)
    1150           86 :       CALL dbcsr_release(work)
    1151              : 
    1152           86 :       CALL timestop(handle)
    1153              : 
    1154          172 :    END SUBROUTINE contract3_RI_to_doMOs
    1155              : 
    1156              : ! **************************************************************************************************
    1157              : !> \brief Contraction of the 3-center integrals over index 1 and 2, for a given atom_k. The results
    1158              : !>        are stored in two matrices, such that (a,b are block indices):
    1159              : !>        mat_aIb(ab) = mat_aIb(ab) + sum j_b (i_aj_b|k)*v(j_b) and
    1160              : !>        mat_bIa(ba) = mat_bIa(ba) + sum i_a (i_aj_b|k)*v(i_a)
    1161              : !>        The block size of the columns of mat_aIb and the rows of mat_bIa are the size of k (RI)
    1162              : !> \param ab_Q the tensor containing the 3-center integrals
    1163              : !> \param vec the contraction coefficients
    1164              : !> \param mat_aIb normal type dbcsr matrix
    1165              : !> \param mat_bIa normal type dbcsr matrix
    1166              : !> \param atom_k the atom for which we contract
    1167              : !>       It is assumed that the contraction coefficients for MO I are all on atom_k
    1168              : !>       We do the classic thing when we fill half the matrix and add its transposed to get the full
    1169              : !>       one, but here, the matrix is not symmetric, hence we explicitely have 2 input matrices
    1170              : !>       The distribution of the integrals and the normal dbcsr matrix are compatible out of the box
    1171              : ! **************************************************************************************************
    1172          206 :    SUBROUTINE contract2_AO_to_doMO_low(ab_Q, vec, mat_aIb, mat_bIa, atom_k)
    1173              : 
    1174              :       TYPE(dbt_type)                                     :: ab_Q
    1175              :       REAL(dp), DIMENSION(:), INTENT(IN)                 :: vec
    1176              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_aIb, mat_bIa
    1177              :       INTEGER, INTENT(IN)                                :: atom_k
    1178              : 
    1179              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'contract2_AO_to_doMO_low'
    1180              : 
    1181              :       INTEGER                                            :: handle, i, iatom, ind(3), j, jatom, &
    1182              :                                                             katom, s1, s2
    1183          206 :       INTEGER, DIMENSION(:), POINTER                     :: atom_blk_size
    1184              :       LOGICAL                                            :: found, t_found
    1185          206 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: iabc
    1186          206 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1187              :       TYPE(dbt_iterator_type)                            :: iter
    1188              : 
    1189          206 :       NULLIFY (atom_blk_size, pblock)
    1190              : 
    1191          206 :       CALL timeset(routineN, handle)
    1192              : 
    1193          206 :       CALL dbcsr_get_info(mat_aIb, row_blk_size=atom_blk_size)
    1194              : 
    1195              : !$OMP PARALLEL DEFAULT(NONE) &
    1196              : !$OMP SHARED(ab_Q,vec,mat_aIb,mat_bIa,atom_k,atom_blk_size) &
    1197          206 : !$OMP PRIVATE(iter,ind,iatom,jatom,katom,iabc,t_found,found,s1,s2,j,i,pblock)
    1198              :       CALL dbt_iterator_start(iter, ab_Q)
    1199              :       DO WHILE (dbt_iterator_blocks_left(iter))
    1200              :          CALL dbt_iterator_next_block(iter, ind)
    1201              : 
    1202              :          iatom = ind(1)
    1203              :          jatom = ind(2)
    1204              :          katom = ind(3)
    1205              : 
    1206              :          IF (atom_k .NE. katom) CYCLE
    1207              : 
    1208              :          CALL dbt_get_block(ab_Q, ind, iabc, t_found)
    1209              :          IF (.NOT. t_found) CYCLE
    1210              : 
    1211              :          ! Deal with mat_aIb
    1212              :          IF (jatom == atom_k) THEN
    1213              :             s1 = atom_blk_size(iatom)
    1214              :             s2 = SIZE(iabc, 3)
    1215              : 
    1216              :             CALL dbcsr_get_block_p(matrix=mat_aIb, row=iatom, col=jatom, BLOCK=pblock, found=found)
    1217              : 
    1218              :             IF (found) THEN
    1219              :                DO i = 1, s1
    1220              :                   DO j = 1, s2
    1221              : !$OMP ATOMIC
    1222              :                      pblock(i, j) = pblock(i, j) + DOT_PRODUCT(vec, iabc(i, :, j))
    1223              :                   END DO
    1224              :                END DO
    1225              :             END IF
    1226              :          END IF ! jatom == atom_k
    1227              : 
    1228              :          ! Deal with mat_bIa, keep block diagonal empty
    1229              :          IF (iatom == jatom) CYCLE
    1230              :          IF (iatom == atom_k) THEN
    1231              :             s1 = SIZE(iabc, 3)
    1232              :             s2 = atom_blk_size(jatom)
    1233              : 
    1234              :             CALL dbcsr_get_block_p(matrix=mat_bIa, row=iatom, col=jatom, BLOCK=pblock, found=found)
    1235              : 
    1236              :             IF (found) THEN
    1237              :                DO i = 1, s1
    1238              :                   DO j = 1, s2
    1239              : !$OMP ATOMIC
    1240              :                      pblock(i, j) = pblock(i, j) + DOT_PRODUCT(vec, iabc(:, j, i))
    1241              :                   END DO
    1242              :                END DO
    1243              :             END IF
    1244              :          END IF !iatom== atom_k
    1245              : 
    1246              :          DEALLOCATE (iabc)
    1247              :       END DO !iter
    1248              :       CALL dbt_iterator_stop(iter)
    1249              : !$OMP END PARALLEL
    1250              : 
    1251          206 :       CALL timestop(handle)
    1252              : 
    1253          412 :    END SUBROUTINE contract2_AO_to_doMO_low
    1254              : 
    1255              : ! **************************************************************************************************
    1256              : !> \brief Multiply all the blocks of a contracted RI integral (aI|P) by a matrix of type (P|...|Q)
    1257              : !> \param contr_int the integral array
    1258              : !> \param PQ the smaller matrix to multiply all blocks
    1259              : !> \note  It is assumed that all non-zero blocks have the same number of columns. Can pass partial
    1260              : !>        arrays, e.g. contr_int(1:3)
    1261              : ! **************************************************************************************************
    1262          232 :    SUBROUTINE ri_all_blocks_mm(contr_int, PQ)
    1263              : 
    1264              :       TYPE(dbcsr_p_type), DIMENSION(:)                   :: contr_int
    1265              :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: PQ
    1266              : 
    1267              :       INTEGER                                            :: iblk, imo, jblk, ndo_mo, s1, s2
    1268              :       LOGICAL                                            :: found
    1269          232 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: work
    1270          232 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1271              :       TYPE(dbcsr_iterator_type)                          :: iter
    1272              : 
    1273          232 :       NULLIFY (pblock)
    1274              : 
    1275          232 :       ndo_mo = SIZE(contr_int)
    1276              : 
    1277          502 :       DO imo = 1, ndo_mo
    1278          270 :          CALL dbcsr_iterator_start(iter, contr_int(imo)%matrix)
    1279         1230 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1280              : 
    1281          960 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1282          960 :             CALL dbcsr_get_block_p(contr_int(imo)%matrix, iblk, jblk, pblock, found)
    1283              : 
    1284         1230 :             IF (found) THEN
    1285          960 :                s1 = SIZE(pblock, 1)
    1286          960 :                s2 = SIZE(pblock, 2)
    1287         3840 :                ALLOCATE (work(s1, s2))
    1288          960 :                CALL dgemm('N', 'N', s1, s2, s2, 1.0_dp, pblock, s1, PQ, s2, 0.0_dp, work, s1)
    1289          960 :                CALL dcopy(s1*s2, work, 1, pblock, 1)
    1290          960 :                DEALLOCATE (work)
    1291              :             END IF
    1292              : 
    1293              :          END DO ! dbcsr iterator
    1294          772 :          CALL dbcsr_iterator_stop(iter)
    1295              :       END DO !imo
    1296              : 
    1297          464 :    END SUBROUTINE ri_all_blocks_mm
    1298              : 
    1299              : ! **************************************************************************************************
    1300              : !> \brief Copies an (partial) array of contracted RI integrals into anoter one
    1301              : !> \param new_int where the copy is stored
    1302              : !> \param ref_int what is copied
    1303              : !> \note Allocate the matrices of new_int if not done already
    1304              : ! **************************************************************************************************
    1305          138 :    SUBROUTINE copy_ri_contr_int(new_int, ref_int)
    1306              : 
    1307              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: new_int
    1308              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: ref_int
    1309              : 
    1310              :       INTEGER                                            :: iso, ndo_so
    1311              : 
    1312          138 :       CPASSERT(SIZE(new_int) == SIZE(ref_int))
    1313          138 :       ndo_so = SIZE(ref_int)
    1314              : 
    1315          296 :       DO iso = 1, ndo_so
    1316          158 :          IF (.NOT. ASSOCIATED(new_int(iso)%matrix)) ALLOCATE (new_int(iso)%matrix)
    1317          296 :          CALL dbcsr_copy(new_int(iso)%matrix, ref_int(iso)%matrix)
    1318              :       END DO
    1319              : 
    1320          138 :    END SUBROUTINE copy_ri_contr_int
    1321              : 
    1322              : ! **************************************************************************************************
    1323              : !> \brief Takes the product of contracted integrals and put them in a kernel matrix
    1324              : !> \param kernel the matrix where the products are stored
    1325              : !> \param lhs_int the left-hand side contracted integrals
    1326              : !> \param rhs_int the right-hand side contracted integrals
    1327              : !> \param quadrants on which quadrant(s) on the kernel matrix the product is stored
    1328              : !> \param qs_env ...
    1329              : !> \param eps_filter filter for dbcsr matrix multiplication
    1330              : !> \param mo_transpose whether the MO blocks should be transpose, i.e. (aI|Jb) => (aJ|Ib)
    1331              : !> \note It is assumed that the kerenl matrix is NOT symmetric
    1332              : !>       There are three quadrants, corresponding to 1: the upper-left (diagonal), 2: the
    1333              : !>       upper-right (off-diagonal) and 3: the lower-right (diagonal).
    1334              : !>       Need to finalize the kernel matrix after calling this routine (possibly multiple times)
    1335              : ! **************************************************************************************************
    1336          138 :    SUBROUTINE ri_int_product(kernel, lhs_int, rhs_int, quadrants, qs_env, eps_filter, mo_transpose)
    1337              : 
    1338              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: kernel
    1339              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: lhs_int, rhs_int
    1340              :       LOGICAL, DIMENSION(3), INTENT(IN)                  :: quadrants
    1341              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1342              :       REAL(dp), INTENT(IN), OPTIONAL                     :: eps_filter
    1343              :       LOGICAL, INTENT(IN), OPTIONAL                      :: mo_transpose
    1344              : 
    1345              :       INTEGER                                            :: i, iblk, iso, j, jblk, jso, nblk, ndo_so
    1346              :       LOGICAL                                            :: found, my_mt
    1347          138 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    1348              :       TYPE(dbcsr_iterator_type)                          :: iter
    1349          138 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1350              :       TYPE(dbcsr_type)                                   :: prod
    1351              : 
    1352          138 :       NULLIFY (matrix_s, pblock)
    1353              : 
    1354              : !  Initialization
    1355            0 :       CPASSERT(SIZE(lhs_int) == SIZE(rhs_int))
    1356          144 :       CPASSERT(ANY(quadrants))
    1357          138 :       ndo_so = SIZE(lhs_int)
    1358          138 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, natom=nblk)
    1359          138 :       CALL dbcsr_create(prod, template=matrix_s(1)%matrix, matrix_type=dbcsr_type_no_symmetry)
    1360          138 :       my_mt = .FALSE.
    1361          138 :       IF (PRESENT(mo_transpose)) my_mt = mo_transpose
    1362              : 
    1363              :       ! The kernel matrix is symmetric (even if normal type) => only fill upper half on diagonal
    1364              :       ! quadrants, but the whole thing on upper-right quadrant
    1365          296 :       DO iso = 1, ndo_so
    1366          538 :          DO jso = 1, ndo_so
    1367              : 
    1368              :             ! If on-diagonal quadrants only, can skip jso < iso
    1369          242 :             IF (.NOT. quadrants(2) .AND. jso < iso) CYCLE
    1370              : 
    1371          200 :             i = iso; j = jso; 
    1372          200 :             IF (my_mt) THEN
    1373            6 :                i = jso; j = iso; 
    1374              :             END IF
    1375              : 
    1376              :             ! Take the product lhs*rhs^T
    1377              :             CALL dbcsr_multiply('N', 'T', 1.0_dp, lhs_int(i)%matrix, rhs_int(j)%matrix, &
    1378          200 :                                 0.0_dp, prod, filter_eps=eps_filter)
    1379              : 
    1380              :             ! Loop over blocks of prod and fill kernel matrix => ok cuz same (but replicated) dist
    1381          200 :             CALL dbcsr_iterator_start(iter, prod)
    1382         7889 :             DO WHILE (dbcsr_iterator_blocks_left(iter))
    1383              : 
    1384         7689 :                CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1385         7689 :                IF ((iso == jso .AND. jblk < iblk) .AND. .NOT. quadrants(2)) CYCLE
    1386              : 
    1387         4019 :                CALL dbcsr_get_block_p(prod, iblk, jblk, pblock, found)
    1388              : 
    1389         4219 :                IF (found) THEN
    1390              : 
    1391              :                   ! Case study on quadrant
    1392              :                   !upper-left
    1393         4019 :                   IF (quadrants(1)) THEN
    1394         3993 :                      CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (jso - 1)*nblk + jblk, pblock)
    1395              :                   END IF
    1396              : 
    1397              :                   !upper-right
    1398         4019 :                   IF (quadrants(2)) THEN
    1399           16 :                      CALL dbcsr_put_block(kernel, (iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
    1400              :                   END IF
    1401              : 
    1402              :                   !lower-right
    1403         4019 :                   IF (quadrants(3)) THEN
    1404           10 :                      CALL dbcsr_put_block(kernel, (ndo_so + iso - 1)*nblk + iblk, (ndo_so + jso - 1)*nblk + jblk, pblock)
    1405              :                   END IF
    1406              : 
    1407              :                END IF
    1408              : 
    1409              :             END DO ! dbcsr iterator
    1410          600 :             CALL dbcsr_iterator_stop(iter)
    1411              : 
    1412              :          END DO !jso
    1413              :       END DO !iso
    1414              : 
    1415              : !  Clean-up
    1416          138 :       CALL dbcsr_release(prod)
    1417              : 
    1418          138 :    END SUBROUTINE ri_int_product
    1419              : 
    1420              : END MODULE xas_tdp_kernel
        

Generated by: LCOV version 2.0-1