LCOV - CP2K Regtests (git:c740a4b)

LCOV - code coverage report

Current view:	top level - src - xas_tdp_utils.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:c740a4b)	Lines:	1226	1316	93.2 %
Date:	2025-05-29 08:02:39	Functions:	17	19	89.5 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Utilities for X-ray absorption spectroscopy using TDDFPT
      10             : !> \author AB (01.2018)
      11             : ! **************************************************************************************************
      12             : 
      13             : MODULE xas_tdp_utils
      14             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      15             :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      16             :    USE cp_cfm_types,                    ONLY: cp_cfm_create,&
      17             :                                               cp_cfm_get_info,&
      18             :                                               cp_cfm_get_submatrix,&
      19             :                                               cp_cfm_release,&
      20             :                                               cp_cfm_type,&
      21             :                                               cp_fm_to_cfm
      22             :    USE cp_dbcsr_api,                    ONLY: &
      23             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
      24             :         dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
      25             :         dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      26             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      27             :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_set, dbcsr_type, &
      28             :         dbcsr_type_no_symmetry, dbcsr_type_symmetric
      29             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      30             :                                               cp_dbcsr_cholesky_invert
      31             :    USE cp_dbcsr_contrib,                ONLY: dbcsr_reserve_all_blocks
      32             :    USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
      33             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      34             :                                               copy_fm_to_dbcsr,&
      35             :                                               cp_dbcsr_sm_fm_multiply,&
      36             :                                               dbcsr_allocate_matrix_set,&
      37             :                                               dbcsr_deallocate_matrix_set
      38             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
      39             :                                               cp_fm_scale,&
      40             :                                               cp_fm_transpose,&
      41             :                                               cp_fm_uplo_to_full
      42             :    USE cp_fm_diag,                      ONLY: choose_eigv_solver,&
      43             :                                               cp_fm_geeig
      44             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      45             :                                               cp_fm_struct_release,&
      46             :                                               cp_fm_struct_type
      47             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      48             :                                               cp_fm_get_diag,&
      49             :                                               cp_fm_get_info,&
      50             :                                               cp_fm_get_submatrix,&
      51             :                                               cp_fm_release,&
      52             :                                               cp_fm_set_element,&
      53             :                                               cp_fm_to_fm_submat,&
      54             :                                               cp_fm_type
      55             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      56             :    USE input_constants,                 ONLY: ot_precond_full_single,&
      57             :                                               tddfpt_singlet,&
      58             :                                               tddfpt_spin_cons,&
      59             :                                               tddfpt_spin_flip,&
      60             :                                               tddfpt_triplet,&
      61             :                                               xas_dip_len
      62             :    USE kinds,                           ONLY: dp
      63             :    USE mathlib,                         ONLY: get_diag
      64             :    USE message_passing,                 ONLY: mp_para_env_type
      65             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      66             :    USE physcon,                         ONLY: a_fine
      67             :    USE preconditioner_types,            ONLY: destroy_preconditioner,&
      68             :                                               init_preconditioner,&
      69             :                                               preconditioner_type
      70             :    USE qs_environment_types,            ONLY: get_qs_env,&
      71             :                                               qs_environment_type
      72             :    USE qs_mo_methods,                   ONLY: calculate_subspace_eigenvalues
      73             :    USE qs_mo_types,                     ONLY: get_mo_set,&
      74             :                                               mo_set_type
      75             :    USE qs_ot_eigensolver,               ONLY: ot_eigensolver
      76             :    USE xas_tdp_kernel,                  ONLY: kernel_coulomb_xc,&
      77             :                                               kernel_exchange
      78             :    USE xas_tdp_types,                   ONLY: donor_state_type,&
      79             :                                               xas_tdp_control_type,&
      80             :                                               xas_tdp_env_type
      81             : 
      82             : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
      83             : #include "./base/base_uses.f90"
      84             : 
      85             :    IMPLICIT NONE
      86             :    PRIVATE
      87             : 
      88             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_utils'
      89             : 
      90             :    PUBLIC :: setup_xas_tdp_prob, solve_xas_tdp_prob, include_rcs_soc, &
      91             :              include_os_soc, rcs_amew_soc_elements
      92             : 
      93             :    !A helper type for SOC
      94             :    TYPE dbcsr_soc_package_type
      95             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sg => NULL()
      96             :       TYPE(dbcsr_type), POINTER     :: dbcsr_tp => NULL()
      97             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sc => NULL()
      98             :       TYPE(dbcsr_type), POINTER     :: dbcsr_sf => NULL()
      99             :       TYPE(dbcsr_type), POINTER     :: dbcsr_prod => NULL()
     100             :       TYPE(dbcsr_type), POINTER     :: dbcsr_ovlp => NULL()
     101             :       TYPE(dbcsr_type), POINTER     :: dbcsr_tmp => NULL()
     102             :       TYPE(dbcsr_type), POINTER     :: dbcsr_work => NULL()
     103             :    END TYPE dbcsr_soc_package_type
     104             : 
     105             : CONTAINS
     106             : 
     107             : ! **************************************************************************************************
     108             : !> \brief Builds the matrix that defines the XAS TDDFPT generalized eigenvalue problem to be solved
     109             : !>        for excitation energies omega. The problem has the form omega*G*C = M*C, where C contains
     110             : !>        the response orbitals coefficients. The matrix M and the metric G are stored in the given
     111             : !>        donor_state
     112             : !> \param donor_state the donor_state for which the problem is restricted
     113             : !> \param qs_env ...
     114             : !> \param xas_tdp_env ...
     115             : !> \param xas_tdp_control ...
     116             : !> \note  the matrix M is symmetric and has the form | M_d   M_o |
     117             : !>                                                   | M_o   M_d |,
     118             : !>       -In the SPIN-RESTRICTED case:
     119             : !>        depending on whether we consider singlet or triplet excitation, the diagonal (M_d) and
     120             : !>        off-diagonal (M_o) parts of M differ:
     121             : !>        - For singlet: M_d = A + 2B + C_aa + C_ab - D
     122             : !>                       M_o = 2B + C_aa + C_ab - E
     123             : !>        - For triplet: M_d = A + C_aa - C_ab - D
     124             : !>                       M_o = C_aa - C_ab - E
     125             : !>        where other subroutines computes the matrices A, B, E, D and G, which are:
     126             : !>        - A: the ground-state contribution: F_pq*delta_IJ - epsilon_IJ*S_pq
     127             : !>        - B: the Coulomb kernel ~(pI|Jq)
     128             : !>        - C: the xc kernel c_aa (double derivatibe wrt to n_alpha) and C_ab (wrt n_alpha and n_beta)
     129             : !>        - D: the on-digonal exact exchange kernel ~(pq|IJ)
     130             : !>        - E: the off-diagonal exact exchange kernel ~(pJ|Iq)
     131             : !>        - G: the metric  S_pq*delta_IJ
     132             : !>        For the xc functionals, C_aa + C_ab or C_aa - C_ab are stored in the same matrix
     133             : !>        In the above definitions, I,J label the donnor MOs and p,q the sgfs of the basis
     134             : !>
     135             : !>       -In the SPIN-UNRESTRICTED, spin-conserving case:
     136             : !>        the on- and off-diagonal elements of M are:
     137             : !>                     M_d = A + B + C -D
     138             : !>                     M_o = B + C - E
     139             : !>        where the submatrices A, B, C, D and E are:
     140             : !>        - A: the groun-state contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab
     141             : !>        - B: the Coulomb kernel: (pI_a|J_b q)
     142             : !>        - C: the xc kernel: (pI_a|fxc_ab|J_b q)
     143             : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     144             : !>        - E: the off-diagonal exact-exchange kernel: (pJ_b|I_a q) delta_ab
     145             : !>        - G: the metric S_pq*delta_IJ*delta_ab
     146             : !>        p,q label the sgfs, I,J the donro MOs and a,b the spins
     147             : !>
     148             : !>       -In both above cases, the matrix M is always  projected onto the unperturbed unoccupied
     149             : !>        ground state: M <= Q * M * Q^T = (1 - SP) * M * (1 - PS)
     150             : !>
     151             : !>       -In the SPIN-FLIP case:
     152             : !>        Only the TDA is implemented, that is, there are only on-diagonal elements:
     153             : !>                    M_d = A + C - D
     154             : !>        where the submatrices A, C and D are:
     155             : !>        - A: the ground state-contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab, but here,
     156             : !>                                            the alph-alpha quadrant has the beta Fock matrix and
     157             : !>                                            the beta-beta quadrant has the alpha Fock matrix
     158             : !>        - C: the SF xc kernel: (pI_a|fxc|J_bq), fxc = 1/m * (vxc_a -vxc_b)
     159             : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     160             : !>        To ensure that all excitation start from a given spin to the opposite, we then multiply
     161             : !>        by a Q projector where we swap the alpha-alpha and beta-beta spin-quadrants
     162             : !>
     163             : !>        All possibilities: TDA or full-TDDFT, singlet or triplet, xc or hybrid, etc are treated
     164             : !>        in the same routine to avoid recomputing stuff
     165             : !>        Under TDA, only the on-diagonal elements of M are computed
     166             : !>        In the case of non-TDA, one turns the problem Hermitian
     167             : ! **************************************************************************************************
     168          56 :    SUBROUTINE setup_xas_tdp_prob(donor_state, qs_env, xas_tdp_env, xas_tdp_control)
     169             : 
     170             :       TYPE(donor_state_type), POINTER                    :: donor_state
     171             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     172             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     173             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     174             : 
     175             :       CHARACTER(len=*), PARAMETER :: routineN = 'setup_xas_tdp_prob'
     176             : 
     177             :       INTEGER                                            :: handle
     178          56 :       INTEGER, DIMENSION(:), POINTER                     :: submat_blk_size
     179             :       LOGICAL                                            :: do_coul, do_hfx, do_os, do_sc, do_sf, &
     180             :                                                             do_sg, do_tda, do_tp, do_xc
     181             :       REAL(dp)                                           :: eps_filter, sx
     182             :       TYPE(dbcsr_distribution_type), POINTER             :: submat_dist
     183          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker, xc_ker
     184             :       TYPE(dbcsr_type)                                   :: matrix_a, matrix_a_sf, matrix_b, proj_Q, &
     185             :                                                             proj_Q_sf, work
     186             :       TYPE(dbcsr_type), POINTER :: matrix_c_sc, matrix_c_sf, matrix_c_sg, matrix_c_tp, matrix_d, &
     187             :          matrix_e_sc, sc_matrix_tdp, sf_matrix_tdp, sg_matrix_tdp, tp_matrix_tdp
     188             : 
     189          56 :       NULLIFY (sg_matrix_tdp, tp_matrix_tdp, submat_dist, submat_blk_size, matrix_c_sf)
     190          56 :       NULLIFY (matrix_c_sg, matrix_c_tp, matrix_c_sc, matrix_d, matrix_e_sc)
     191          56 :       NULLIFY (sc_matrix_tdp, sf_matrix_tdp, ex_ker, xc_ker)
     192             : 
     193          56 :       CALL timeset(routineN, handle)
     194             : 
     195             : !  Initialization
     196          56 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     197          56 :       do_sc = xas_tdp_control%do_spin_cons
     198          56 :       do_sf = xas_tdp_control%do_spin_flip
     199          56 :       do_sg = xas_tdp_control%do_singlet
     200          56 :       do_tp = xas_tdp_control%do_triplet
     201          56 :       do_xc = xas_tdp_control%do_xc
     202          56 :       do_hfx = xas_tdp_control%do_hfx
     203          56 :       do_coul = xas_tdp_control%do_coulomb
     204          56 :       do_tda = xas_tdp_control%tamm_dancoff
     205          56 :       sx = xas_tdp_control%sx
     206          56 :       eps_filter = xas_tdp_control%eps_filter
     207          56 :       IF (do_sc) THEN
     208           8 :          ALLOCATE (donor_state%sc_matrix_tdp)
     209           8 :          sc_matrix_tdp => donor_state%sc_matrix_tdp
     210             :       END IF
     211          56 :       IF (do_sf) THEN
     212           2 :          ALLOCATE (donor_state%sf_matrix_tdp)
     213           2 :          sf_matrix_tdp => donor_state%sf_matrix_tdp
     214             :       END IF
     215          56 :       IF (do_sg) THEN
     216          48 :          ALLOCATE (donor_state%sg_matrix_tdp)
     217          48 :          sg_matrix_tdp => donor_state%sg_matrix_tdp
     218             :       END IF
     219          56 :       IF (do_tp) THEN
     220           2 :          ALLOCATE (donor_state%tp_matrix_tdp)
     221           2 :          tp_matrix_tdp => donor_state%tp_matrix_tdp
     222             :       END IF
     223             : 
     224             : !  Get the dist and block size of all matrices A, B, C, etc
     225          56 :       CALL compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
     226          56 :       submat_dist => donor_state%dbcsr_dist
     227          56 :       submat_blk_size => donor_state%blk_size
     228             : 
     229             : !  Allocate and compute all the matrices A, B, C, etc we will need
     230             : 
     231             :       ! The projector(s) on the unoccupied unperturbed ground state 1-SP and associated work matrix
     232          56 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     233          56 :          CALL get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env)
     234             :       END IF
     235          56 :       IF (do_sf) THEN !spin-flip
     236           2 :          CALL get_q_projector(proj_Q_sf, donor_state, do_os, xas_tdp_env, do_sf=.TRUE.)
     237             :       END IF
     238             :       CALL dbcsr_create(matrix=work, matrix_type=dbcsr_type_no_symmetry, dist=submat_dist, &
     239          56 :                         name="WORK", row_blk_size=submat_blk_size, col_blk_size=submat_blk_size)
     240             : 
     241             :       ! The ground state contribution(s)
     242          56 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     243          56 :          CALL build_gs_contribution(matrix_a, donor_state, do_os, qs_env)
     244             :       END IF
     245          56 :       IF (do_sf) THEN !spin-flip
     246           2 :          CALL build_gs_contribution(matrix_a_sf, donor_state, do_os, qs_env, do_sf=.TRUE.)
     247             :       END IF
     248             : 
     249             :       ! The Coulomb and XC kernels. Internal analysis to know which matrix to compute
     250          56 :       CALL dbcsr_allocate_matrix_set(xc_ker, 4)
     251          56 :       ALLOCATE (xc_ker(1)%matrix, xc_ker(2)%matrix, xc_ker(3)%matrix, xc_ker(4)%matrix)
     252          56 :       CALL kernel_coulomb_xc(matrix_b, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     253          56 :       matrix_c_sg => xc_ker(1)%matrix; matrix_c_tp => xc_ker(2)%matrix
     254          56 :       matrix_c_sc => xc_ker(3)%matrix; matrix_c_sf => xc_ker(4)%matrix
     255             : 
     256             :       ! The exact exchange. Internal analysis to know which matrices to compute
     257          56 :       CALL dbcsr_allocate_matrix_set(ex_ker, 2)
     258          56 :       ALLOCATE (ex_ker(1)%matrix, ex_ker(2)%matrix)
     259          56 :       CALL kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     260          56 :       matrix_d => ex_ker(1)%matrix; matrix_e_sc => ex_ker(2)%matrix
     261             : 
     262             :       ! Build the metric G, also need its inverse in case of full-TDDFT
     263          56 :       IF (do_tda) THEN
     264         100 :          ALLOCATE (donor_state%metric(1))
     265          50 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os)
     266             :       ELSE
     267          18 :          ALLOCATE (donor_state%metric(2))
     268           6 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os, do_inv=.TRUE.)
     269             :       END IF
     270             : 
     271             : !  Build the eigenvalue problem, depending on the case (TDA, singlet, triplet, hfx, etc ...)
     272          56 :       IF (do_tda) THEN
     273             : 
     274          50 :          IF (do_sc) THEN ! open-shell spin-conserving under TDA
     275             : 
     276             :             ! The final matrix is M = A + B + C - D
     277           8 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     278           8 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 1.0_dp)
     279             : 
     280           8 :             IF (do_xc) CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 1.0_dp) !xc kernel
     281           8 :             IF (do_hfx) CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     282             : 
     283             :             ! The product with the Q projector
     284           8 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     285           8 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     286             : 
     287             :          END IF !do_sc
     288             : 
     289          50 :          IF (do_sf) THEN ! open-shell spin-flip under TDA
     290             : 
     291             :             ! The final matrix is M = A + C - D
     292           2 :             CALL dbcsr_copy(sf_matrix_tdp, matrix_a_sf, name="OS MATRIX TDP")
     293             : 
     294           2 :             IF (do_xc) CALL dbcsr_add(sf_matrix_tdp, matrix_c_sf, 1.0_dp, 1.0_dp) !xc kernel
     295           2 :             IF (do_hfx) CALL dbcsr_add(sf_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     296             : 
     297             :             ! Take the product with the (spin-flip) Q projector
     298           2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q_sf, sf_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     299           2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q_sf, 0.0_dp, sf_matrix_tdp, filter_eps=eps_filter)
     300             : 
     301             :          END IF !do_sf
     302             : 
     303          50 :          IF (do_sg) THEN ! singlets under TDA
     304             : 
     305             :             ! The final matrix is M = A + 2B + (C_aa + C_ab) - D
     306          42 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     307          42 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     308             : 
     309          42 :             IF (do_xc) CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 1.0_dp) ! xc kernel
     310          42 :             IF (do_hfx) CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     311             : 
     312             :             ! Take the product with the Q projector:
     313          42 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     314          42 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     315             : 
     316             :          END IF !do_sg (TDA)
     317             : 
     318          50 :          IF (do_tp) THEN ! triplets under TDA
     319             : 
     320             :             ! The final matrix is M =  A + (C_aa - C_ab) - D
     321           2 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     322             : 
     323           2 :             IF (do_xc) CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 1.0_dp) ! xc_kernel
     324           2 :             IF (do_hfx) CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     325             : 
     326             :             ! Take the product with the Q projector:
     327           2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     328           2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     329             : 
     330             :          END IF !do_tp (TDA)
     331             : 
     332             :       ELSE ! not TDA
     333             : 
     334             :          ! In the case of full-TDDFT, the problem is turned Hermitian with the help of auxiliary
     335             :          ! matrices AUX = (A-D+E)^(+-0.5) that are stored in donor_state
     336             :          CALL build_aux_matrix(1.0E-8_dp, sx, matrix_a, matrix_d, matrix_e_sc, do_hfx, proj_Q, &
     337           6 :                                work, donor_state, eps_filter, qs_env)
     338             : 
     339           6 :          IF (do_sc) THEN !full-TDDFT open-shell spin-conserving
     340             : 
     341             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     342             :             ! M = A + 2B + 2C - D - E
     343           0 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     344           0 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     345             : 
     346           0 :             IF (do_hfx) THEN !scaled hfx
     347           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     348           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     349             :             END IF
     350           0 :             IF (do_xc) THEN
     351           0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 2.0_dp)
     352             :             END IF
     353             : 
     354             :             ! Take the product with the Q projector
     355           0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     356           0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     357             : 
     358             :             ! Take the product with the inverse metric
     359             :             ! M <= G^-1 * M * G^-1
     360             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sc_matrix_tdp, &
     361           0 :                                 0.0_dp, work, filter_eps=eps_filter)
     362             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     363           0 :                                 sc_matrix_tdp, filter_eps=eps_filter)
     364             : 
     365             :          END IF
     366             : 
     367           6 :          IF (do_sg) THEN ! full-TDDFT singlets
     368             : 
     369             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     370             :             ! M = A + 4B + 2(C_aa + C_ab) - D - E
     371           6 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     372           6 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 4.0_dp)
     373             : 
     374           6 :             IF (do_hfx) THEN !scaled hfx
     375           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     376           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     377             :             END IF
     378           6 :             IF (do_xc) THEN !xc kernel
     379           6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 2.0_dp)
     380             :             END IF
     381             : 
     382             :             ! Take the product with the Q projector
     383           6 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     384           6 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     385             : 
     386             :             ! Take the product with the inverse metric
     387             :             ! M <= G^-1 * M * G^-1
     388             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sg_matrix_tdp, &
     389           6 :                                 0.0_dp, work, filter_eps=eps_filter)
     390             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     391           6 :                                 sg_matrix_tdp, filter_eps=eps_filter)
     392             : 
     393             :          END IF ! singlets
     394             : 
     395           6 :          IF (do_tp) THEN ! full-TDDFT triplets
     396             : 
     397             :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     398             :             ! M = A + 2(C_aa - C_ab) - D - E
     399           0 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     400             : 
     401           0 :             IF (do_hfx) THEN !scaled hfx
     402           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     403           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     404             :             END IF
     405           0 :             IF (do_xc) THEN
     406           0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 2.0_dp)
     407             :             END IF
     408             : 
     409             :             ! Take the product with the Q projector
     410           0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     411           0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     412             : 
     413             :             ! Take the product with the inverse metric
     414             :             ! M <= G^-1 * M * G^-1
     415             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tp_matrix_tdp, &
     416           0 :                                 0.0_dp, work, filter_eps=eps_filter)
     417             :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     418           0 :                                 tp_matrix_tdp, filter_eps=eps_filter)
     419             : 
     420             :          END IF ! triplets
     421             : 
     422             :       END IF ! test on TDA
     423             : 
     424             : !  Clean-up
     425          56 :       CALL dbcsr_release(matrix_a)
     426          56 :       CALL dbcsr_release(matrix_a_sf)
     427          56 :       CALL dbcsr_release(matrix_b)
     428          56 :       CALL dbcsr_release(proj_Q)
     429          56 :       CALL dbcsr_release(proj_Q_sf)
     430          56 :       CALL dbcsr_release(work)
     431          56 :       CALL dbcsr_deallocate_matrix_set(ex_ker)
     432          56 :       CALL dbcsr_deallocate_matrix_set(xc_ker)
     433             : 
     434          56 :       CALL timestop(handle)
     435             : 
     436          56 :    END SUBROUTINE setup_xas_tdp_prob
     437             : 
     438             : ! **************************************************************************************************
     439             : !> \brief Solves the XAS TDP generalized eigenvalue problem omega*C = matrix_tdp*C using standard
     440             : !>        full diagonalization methods. The problem is Hermitian (made that way even if not TDA)
     441             : !> \param donor_state ...
     442             : !> \param xas_tdp_control ...
     443             : !> \param xas_tdp_env ...
     444             : !> \param qs_env ...
     445             : !> \param ex_type whether we deal with singlets, triplets, spin-conserving open-shell or spin-flip
     446             : !> \note The computed eigenvalues and eigenvectors are stored in the donor_state
     447             : !>       The eigenvectors are the LR-coefficients. In case of TDA, c^- is stored. In the general
     448             : !>       case, the sum c^+ + c^- is stored.
     449             : !>      - Spin-restricted:
     450             : !>       In case both singlets and triplets are considered, this routine must be called twice. This
     451             : !>       is the choice that was made because the body of the routine is exactly the same in both cases
     452             : !>       Note that for singlet we solve for u = 1/sqrt(2)*(c_alpha + c_beta) = sqrt(2)*c
     453             : !>       and that for triplets we solve for v = 1/sqrt(2)*(c_alpha - c_beta) = sqrt(2)*c
     454             : !>      - Spin-unrestricted:
     455             : !>       The problem is solved for the LR coefficients c_pIa as they are (not linear combination)
     456             : !>       The routine might be called twice (once for spin-conservign, one for spin-flip)
     457             : ! **************************************************************************************************
     458          60 :    SUBROUTINE solve_xas_tdp_prob(donor_state, xas_tdp_control, xas_tdp_env, qs_env, ex_type)
     459             : 
     460             :       TYPE(donor_state_type), POINTER                    :: donor_state
     461             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     462             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     463             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     464             :       INTEGER, INTENT(IN)                                :: ex_type
     465             : 
     466             :       CHARACTER(len=*), PARAMETER :: routineN = 'solve_xas_tdp_prob'
     467             : 
     468             :       INTEGER                                            :: first_ex, handle, i, imo, ispin, nao, &
     469             :                                                             ndo_mo, nelectron, nevals, nocc, nrow, &
     470             :                                                             nspins, ot_nevals
     471             :       LOGICAL                                            :: do_os, do_range, do_sf
     472             :       REAL(dp)                                           :: ot_elb
     473          60 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling, tmp_evals
     474          60 :       REAL(dp), DIMENSION(:), POINTER                    :: lr_evals
     475             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     476             :       TYPE(cp_fm_struct_type), POINTER                   :: ex_struct, fm_struct, ot_fm_struct
     477             :       TYPE(cp_fm_type)                                   :: c_diff, c_sum, lhs_matrix, rhs_matrix, &
     478             :                                                             work
     479             :       TYPE(cp_fm_type), POINTER                          :: lr_coeffs
     480             :       TYPE(dbcsr_type)                                   :: tmp_mat, tmp_mat2
     481             :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp
     482             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     483             : 
     484          60 :       CALL timeset(routineN, handle)
     485             : 
     486          60 :       NULLIFY (para_env, blacs_env, fm_struct, matrix_tdp)
     487          60 :       NULLIFY (ex_struct, lr_evals, lr_coeffs)
     488          60 :       CPASSERT(ASSOCIATED(xas_tdp_env))
     489             : 
     490          60 :       do_os = .FALSE.
     491          60 :       do_sf = .FALSE.
     492          60 :       IF (ex_type == tddfpt_spin_cons) THEN
     493           8 :          matrix_tdp => donor_state%sc_matrix_tdp
     494           8 :          do_os = .TRUE.
     495          52 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     496           2 :          matrix_tdp => donor_state%sf_matrix_tdp
     497           2 :          do_os = .TRUE.
     498           2 :          do_sf = .TRUE.
     499          50 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     500          48 :          matrix_tdp => donor_state%sg_matrix_tdp
     501           2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     502           2 :          matrix_tdp => donor_state%tp_matrix_tdp
     503             :       END IF
     504          60 :       CALL get_qs_env(qs_env=qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_total=nelectron)
     505             : 
     506             : !     Initialization
     507          60 :       nspins = 1; IF (do_os) nspins = 2
     508          60 :       CALL cp_fm_get_info(donor_state%gs_coeffs, nrow_global=nao)
     509          60 :       CALL dbcsr_get_info(matrix_tdp, nfullrows_total=nrow)
     510          60 :       ndo_mo = donor_state%ndo_mo
     511          60 :       nocc = nelectron/2; IF (do_os) nocc = nelectron
     512          60 :       nocc = ndo_mo*nocc
     513             : 
     514             :       !solve by energy_range or number of states ?
     515          60 :       do_range = .FALSE.
     516          60 :       IF (xas_tdp_control%e_range > 0.0_dp) do_range = .TRUE.
     517             : 
     518             :       ! create the fm infrastructure
     519             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nrow, &
     520          60 :                                para_env=para_env, ncol_global=nrow)
     521          60 :       CALL cp_fm_create(rhs_matrix, fm_struct)
     522          60 :       CALL cp_fm_create(work, fm_struct)
     523             : 
     524             : !     Test on TDA
     525          60 :       IF (xas_tdp_control%tamm_dancoff) THEN
     526             : 
     527          54 :          IF (xas_tdp_control%do_ot) THEN
     528             : 
     529             :             !need to precompute the number of evals for OT
     530           4 :             IF (do_range) THEN
     531             : 
     532             :                !in case of energy range criterion, use LUMO eigenvalues as estimate
     533           4 :                ot_elb = xas_tdp_env%lumo_evals(1)%array(1)
     534           4 :                IF (do_os) ot_elb = MIN(ot_elb, xas_tdp_env%lumo_evals(2)%array(1))
     535             : 
     536        1028 :                ot_nevals = COUNT(xas_tdp_env%lumo_evals(1)%array - ot_elb .LE. xas_tdp_control%e_range)
     537           4 :                IF (do_os) ot_nevals = ot_nevals + &
     538           0 :                                       COUNT(xas_tdp_env%lumo_evals(2)%array - ot_elb .LE. xas_tdp_control%e_range)
     539             : 
     540             :             ELSE
     541             : 
     542           0 :                ot_nevals = nspins*nao - nocc/ndo_mo
     543           0 :                IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < ot_nevals) THEN
     544           0 :                   ot_nevals = xas_tdp_control%n_excited
     545             :                END IF
     546             :             END IF
     547           4 :             ot_nevals = ndo_mo*ot_nevals !as in input description, multiply by multiplicity of donor state
     548             : 
     549             : !           Organize results data
     550           4 :             first_ex = 1
     551          12 :             ALLOCATE (tmp_evals(ot_nevals))
     552             :             CALL cp_fm_struct_create(ot_fm_struct, context=blacs_env, para_env=para_env, &
     553           4 :                                      nrow_global=nrow, ncol_global=ot_nevals)
     554           4 :             CALL cp_fm_create(c_sum, ot_fm_struct)
     555             : 
     556             :             CALL xas_ot_solver(matrix_tdp, donor_state%metric(1)%matrix, c_sum, tmp_evals, ot_nevals, &
     557           4 :                                do_sf, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     558             : 
     559           8 :             CALL cp_fm_struct_release(ot_fm_struct)
     560             : 
     561             :          ELSE
     562             : 
     563             : !           Organize results data
     564          50 :             first_ex = nocc + 1 !where to find the first proper eigenvalue
     565         150 :             ALLOCATE (tmp_evals(nrow))
     566          50 :             CALL cp_fm_create(c_sum, fm_struct)
     567             : 
     568             : !           Get the main matrix_tdp as an fm
     569          50 :             CALL copy_dbcsr_to_fm(matrix_tdp, rhs_matrix)
     570             : 
     571             : !           Get the metric as a fm
     572          50 :             CALL cp_fm_create(lhs_matrix, fm_struct)
     573          50 :             CALL copy_dbcsr_to_fm(donor_state%metric(1)%matrix, lhs_matrix)
     574             : 
     575             :             !Diagonalisation (Cholesky decomposition). In TDA, c_sum = c^-
     576          50 :             CALL cp_fm_geeig(rhs_matrix, lhs_matrix, c_sum, tmp_evals, work)
     577             : 
     578             : !           TDA specific clean-up
     579         150 :             CALL cp_fm_release(lhs_matrix)
     580             : 
     581             :          END IF
     582             : 
     583             :       ELSE ! not TDA
     584             : 
     585             : !        Organize results data
     586           6 :          first_ex = nocc + 1
     587          18 :          ALLOCATE (tmp_evals(nrow))
     588           6 :          CALL cp_fm_create(c_sum, fm_struct)
     589             : 
     590             : !        Need to multiply the current matrix_tdp with the auxiliary matrix
     591             : !        tmp_mat =  (A-D+E)^0.5 * M * (A-D+E)^0.5
     592           6 :          CALL dbcsr_create(matrix=tmp_mat, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     593           6 :          CALL dbcsr_create(matrix=tmp_mat2, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     594             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, matrix_tdp, &
     595           6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     596             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, tmp_mat2, donor_state%matrix_aux, &
     597           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     598             : 
     599             : !        Get the matrix as a fm
     600           6 :          CALL copy_dbcsr_to_fm(tmp_mat, rhs_matrix)
     601             : 
     602             : !        Solve the "turned-Hermitian" eigenvalue problem
     603           6 :          CALL choose_eigv_solver(rhs_matrix, work, tmp_evals)
     604             : 
     605             : !        Currently, work = (A-D+E)^0.5 (c^+ - c^-) and tmp_evals = omega^2
     606             : !        Put tiny almost zero eigenvalues to zero (corresponding to occupied MOs)
     607         150 :          WHERE (tmp_evals < 1.0E-4_dp) tmp_evals = 0.0_dp
     608             : 
     609             : !        Retrieve c_diff = (c^+ - c^-) for normalization
     610             : !        (c^+ - c^-) = 1/omega^2 * M * (A-D+E)^0.5 * work
     611           6 :          CALL cp_fm_create(c_diff, fm_struct)
     612             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_tdp, donor_state%matrix_aux, &
     613           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     614           6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, work, c_diff, ncol=nrow)
     615             : 
     616          12 :          ALLOCATE (scaling(nrow))
     617         150 :          scaling = 0.0_dp
     618         150 :          WHERE (ABS(tmp_evals) > 1.0E-8_dp) scaling = 1.0_dp/tmp_evals
     619           6 :          CALL cp_fm_column_scale(c_diff, scaling)
     620             : 
     621             : !        Normalize with the metric: c_diff * G * c_diff = +- 1
     622         150 :          scaling = 0.0_dp
     623           6 :          CALL get_normal_scaling(scaling, c_diff, donor_state)
     624           6 :          CALL cp_fm_column_scale(c_diff, scaling)
     625             : 
     626             : !        Get the actual eigenvalues
     627         150 :          tmp_evals = SQRT(tmp_evals)
     628             : 
     629             : !        Get c_sum = (c^+ + c^-), which appears in all transition density related expressions
     630             : !        c_sum = -1/omega G^-1 * (A-D+E) * (c^+ - c^-)
     631             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, donor_state%matrix_aux, &
     632           6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     633             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tmp_mat2, &
     634           6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     635           6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, c_diff, c_sum, ncol=nrow)
     636         150 :          WHERE (tmp_evals .NE. 0) scaling = -1.0_dp/tmp_evals
     637           6 :          CALL cp_fm_column_scale(c_sum, scaling)
     638             : 
     639             : !        Full TDDFT specific clean-up
     640           6 :          CALL cp_fm_release(c_diff)
     641           6 :          CALL dbcsr_release(tmp_mat)
     642           6 :          CALL dbcsr_release(tmp_mat2)
     643          18 :          DEALLOCATE (scaling)
     644             : 
     645             :       END IF ! TDA
     646             : 
     647             : !     Full matrix clean-up
     648          60 :       CALL cp_fm_release(rhs_matrix)
     649          60 :       CALL cp_fm_release(work)
     650             : 
     651             : !  Reorganize the eigenvalues, we want a lr_evals array with the proper dimension and where the
     652             : !  first element is the first eval. Need a case study on do_range/ot
     653          60 :       IF (xas_tdp_control%do_ot) THEN
     654             : 
     655           4 :          nevals = ot_nevals
     656             : 
     657          56 :       ELSE IF (do_range) THEN
     658             : 
     659          94 :          WHERE (tmp_evals > tmp_evals(first_ex) + xas_tdp_control%e_range) tmp_evals = 0.0_dp
     660          48 :          nevals = MAXLOC(tmp_evals, 1) - nocc
     661             : 
     662             :       ELSE
     663             : 
     664             :          !Determine the number of evals to keep base on N_EXCITED
     665          54 :          nevals = nspins*nao - nocc/ndo_mo
     666          54 :          IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < nevals) THEN
     667             :             nevals = xas_tdp_control%n_excited
     668             :          END IF
     669          54 :          nevals = ndo_mo*nevals !as in input description, multiply by # of donor MOs
     670             : 
     671             :       END IF
     672             : 
     673         180 :       ALLOCATE (lr_evals(nevals))
     674         964 :       lr_evals(:) = tmp_evals(first_ex:first_ex + nevals - 1)
     675             : 
     676             : !  Reorganize the eigenvectors in array of cp_fm so that each ndo_mo columns corresponds to an
     677             : !  excited state. Makes later calls to those easier and more efficient
     678             : !  In case of open-shell, we store the coeffs in the same logic as the matrix => first block where
     679             : !  the columns are the c_Ialpha and second block with columns as c_Ibeta
     680             :       CALL cp_fm_struct_create(ex_struct, nrow_global=nao, ncol_global=ndo_mo*nspins*nevals, &
     681          60 :                                para_env=para_env, context=blacs_env)
     682          60 :       ALLOCATE (lr_coeffs)
     683          60 :       CALL cp_fm_create(lr_coeffs, ex_struct)
     684             : 
     685         964 :       DO i = 1, nevals
     686        2100 :          DO ispin = 1, nspins
     687        3464 :             DO imo = 1, ndo_mo
     688             : 
     689             :                CALL cp_fm_to_fm_submat(msource=c_sum, mtarget=lr_coeffs, &
     690             :                                        nrow=nao, ncol=1, s_firstrow=((ispin - 1)*ndo_mo + imo - 1)*nao + 1, &
     691             :                                        s_firstcol=first_ex + i - 1, t_firstrow=1, &
     692        2560 :                                        t_firstcol=(i - 1)*ndo_mo*nspins + (ispin - 1)*ndo_mo + imo)
     693             :             END DO !imo
     694             :          END DO !ispin
     695             :       END DO !istate
     696             : 
     697          60 :       IF (ex_type == tddfpt_spin_cons) THEN
     698           8 :          donor_state%sc_coeffs => lr_coeffs
     699           8 :          donor_state%sc_evals => lr_evals
     700          52 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     701           2 :          donor_state%sf_coeffs => lr_coeffs
     702           2 :          donor_state%sf_evals => lr_evals
     703          50 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     704          48 :          donor_state%sg_coeffs => lr_coeffs
     705          48 :          donor_State%sg_evals => lr_evals
     706           2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     707           2 :          donor_state%tp_coeffs => lr_coeffs
     708           2 :          donor_state%tp_evals => lr_evals
     709             :       END IF
     710             : 
     711             : !  Clean-up
     712          60 :       CALL cp_fm_release(c_sum)
     713          60 :       CALL cp_fm_struct_release(fm_struct)
     714          60 :       CALL cp_fm_struct_release(ex_struct)
     715             : 
     716             : !  Perform a partial clean-up of the donor_state
     717          60 :       CALL dbcsr_release(matrix_tdp)
     718             : 
     719          60 :       CALL timestop(handle)
     720             : 
     721         300 :    END SUBROUTINE solve_xas_tdp_prob
     722             : 
     723             : ! **************************************************************************************************
     724             : !> \brief An iterative solver based on OT for the TDA generalized eigV problem lambda Sx = Hx
     725             : !> \param matrix_tdp the RHS matrix (dbcsr)
     726             : !> \param metric the LHS matrix (dbcsr)
     727             : !> \param evecs the corresponding eigenvectors (fm)
     728             : !> \param evals the corresponding eigenvalues
     729             : !> \param neig the number of wanted eigenvalues
     730             : !> \param do_sf whther spin-flip TDDFT is on
     731             : !> \param donor_state ...
     732             : !> \param xas_tdp_env ...
     733             : !> \param xas_tdp_control ...
     734             : !> \param qs_env ...
     735             : ! **************************************************************************************************
     736           4 :    SUBROUTINE xas_ot_solver(matrix_tdp, metric, evecs, evals, neig, do_sf, donor_state, xas_tdp_env, &
     737             :                             xas_tdp_control, qs_env)
     738             : 
     739             :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp, metric
     740             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: evecs
     741             :       REAL(dp), DIMENSION(:)                             :: evals
     742             :       INTEGER, INTENT(IN)                                :: neig
     743             :       LOGICAL                                            :: do_sf
     744             :       TYPE(donor_state_type), POINTER                    :: donor_state
     745             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     746             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     747             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     748             : 
     749             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xas_ot_solver'
     750             : 
     751             :       INTEGER                                            :: handle, max_iter, ndo_mo, nelec_spin(2), &
     752             :                                                             nocc, nrow, output_unit
     753             :       LOGICAL                                            :: do_os
     754             :       REAL(dp)                                           :: eps_iter
     755             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     756             :       TYPE(cp_fm_struct_type), POINTER                   :: ortho_struct
     757             :       TYPE(cp_fm_type)                                   :: ortho_space
     758             :       TYPE(dbcsr_type), POINTER                          :: ot_prec
     759             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     760             :       TYPE(preconditioner_type), POINTER                 :: precond
     761             : 
     762           4 :       NULLIFY (para_env, blacs_env, ortho_struct, ot_prec)
     763             : 
     764           4 :       CALL timeset(routineN, handle)
     765             : 
     766           4 :       output_unit = cp_logger_get_default_io_unit()
     767           4 :       IF (output_unit > 0) THEN
     768             :          WRITE (output_unit, "(/,T5,A)") &
     769           2 :             "Using OT eigensolver for diagonalization: "
     770             :       END IF
     771             : 
     772           4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     773           4 :       ndo_mo = donor_state%ndo_mo
     774           4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_spin=nelec_spin)
     775           4 :       CALL cp_fm_get_info(evecs, nrow_global=nrow)
     776           4 :       max_iter = xas_tdp_control%ot_max_iter
     777           4 :       eps_iter = xas_tdp_control%ot_eps_iter
     778           4 :       nocc = nelec_spin(1)/2*ndo_mo
     779           4 :       IF (do_os) nocc = SUM(nelec_spin)*ndo_mo
     780             : 
     781             : !  Initialize relevent matrices
     782           4 :       ALLOCATE (ot_prec)
     783           4 :       CALL dbcsr_create(ot_prec, template=matrix_tdp)
     784             :       CALL cp_fm_struct_create(ortho_struct, context=blacs_env, para_env=para_env, &
     785           4 :                                nrow_global=nrow, ncol_global=nocc)
     786           4 :       CALL cp_fm_create(ortho_space, ortho_struct)
     787             : 
     788             :       CALL prep_for_ot(evecs, ortho_space, ot_prec, neig, do_sf, donor_state, xas_tdp_env, &
     789           4 :                        xas_tdp_control, qs_env)
     790             : 
     791             : !  Prepare the preconditioner
     792           4 :       ALLOCATE (precond)
     793           4 :       CALL init_preconditioner(precond, para_env, blacs_env)
     794           4 :       precond%in_use = ot_precond_full_single ! because applying this conditioner is only a mm
     795           4 :       precond%dbcsr_matrix => ot_prec
     796             : 
     797             : !  Actually solving the eigenvalue problem
     798             :       CALL ot_eigensolver(matrix_h=matrix_tdp, matrix_s=metric, matrix_c_fm=evecs, &
     799             :                           eps_gradient=eps_iter, iter_max=max_iter, silent=.FALSE., &
     800             :                           ot_settings=xas_tdp_control%ot_settings, &
     801             :                           matrix_orthogonal_space_fm=ortho_space, &
     802           4 :                           preconditioner=precond)
     803           4 :       CALL calculate_subspace_eigenvalues(evecs, matrix_tdp, evals_arg=evals)
     804             : 
     805             : !  Clean-up
     806           4 :       CALL cp_fm_struct_release(ortho_struct)
     807           4 :       CALL cp_fm_release(ortho_space)
     808           4 :       CALL dbcsr_release(ot_prec)
     809           4 :       CALL destroy_preconditioner(precond)
     810           4 :       DEALLOCATE (precond)
     811             : 
     812           4 :       CALL timestop(handle)
     813             : 
     814           4 :    END SUBROUTINE xas_ot_solver
     815             : 
     816             : ! **************************************************************************************************
     817             : !> \brief Prepares all required matrices for the OT eigensolver (precond, ortho space and guesses)
     818             : !> \param guess the guess eigenvectors absed on LUMOs, in fm format
     819             : !> \param ortho the orthogonal space in fm format (occupied MOs)
     820             : !> \param precond the OT preconditioner in DBCSR format
     821             : !> \param neig ...
     822             : !> \param do_sf ...
     823             : !> \param donor_state ...
     824             : !> \param xas_tdp_env ...
     825             : !> \param xas_tdp_control ...
     826             : !> \param qs_env ...
     827             : !> \note Matrices are allocate before entry
     828             : ! **************************************************************************************************
     829           8 :    SUBROUTINE prep_for_ot(guess, ortho, precond, neig, do_sf, donor_state, xas_tdp_env, &
     830             :                           xas_tdp_control, qs_env)
     831             : 
     832             :       TYPE(cp_fm_type), INTENT(IN)                       :: guess, ortho
     833             :       TYPE(dbcsr_type)                                   :: precond
     834             :       INTEGER                                            :: neig
     835             :       LOGICAL                                            :: do_sf
     836             :       TYPE(donor_state_type), POINTER                    :: donor_state
     837             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     838             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     839             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     840             : 
     841             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'prep_for_ot'
     842             : 
     843             :       INTEGER :: handle, i, iblk, ido_mo, ispin, jblk, maxel, minel, nao, natom, ndo_mo, &
     844             :          nelec_spin(2), nhomo(2), nlumo(2), nspins, start_block, start_col, start_row
     845             :       LOGICAL                                            :: do_os, found
     846           4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
     847             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
     848             :       TYPE(dbcsr_iterator_type)                          :: iter
     849           4 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
     850             : 
     851           4 :       NULLIFY (mos, mo_coeff, pblock)
     852             : 
     853             :       !REMINDER on the organization of the xas_tdp matrix. It is DBCSR format, with a super bock
     854             :       !structure. First block structure is spin quadrants: upper left is alpha-alpha spin and lower
     855             :       !right is beta-beta spin. Then each quadrants is divided in a ndo_mo x ndo_mo grid (1x1 for 1s,
     856             :       !2s, 3x3 for 2p). Each block in this grid has the normal DBCSR structure and dist, simply
     857             :       !replicated. The resulting eigenvectors follow the same logic.
     858             : 
     859           4 :       CALL timeset(routineN, handle)
     860             : 
     861           4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     862           0 :       nspins = 1; IF (do_os) nspins = 2
     863           4 :       ndo_mo = donor_state%ndo_mo
     864           4 :       CALL cp_fm_get_info(xas_tdp_env%lumo_evecs(1), nrow_global=nao)
     865           4 :       CALL get_qs_env(qs_env, natom=natom, nelectron_spin=nelec_spin)
     866             : 
     867             :       !Compute the number of guesses for each spins
     868           4 :       IF (do_os) THEN
     869           0 :          minel = MINLOC(nelec_spin, 1)
     870           0 :          maxel = 3 - minel
     871           0 :          nlumo(minel) = (neig/ndo_mo + nelec_spin(maxel) - nelec_spin(minel))/2
     872           0 :          nlumo(maxel) = neig/ndo_mo - nlumo(minel)
     873             :       ELSE
     874           4 :          nlumo(1) = neig/ndo_mo
     875             :       END IF
     876             : 
     877             :       !Building the guess vectors based on the LUMOs. Copy LUMOs into approriate spin/do_mo
     878             :       !quadrant/block. Order within a block does not matter
     879             :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so guess are alpha LUMOs
     880             :       start_row = 0
     881             :       start_col = 0
     882           8 :       DO ispin = 1, nspins
     883          12 :          DO ido_mo = 1, ndo_mo
     884             : 
     885             :             CALL cp_fm_to_fm_submat(msource=xas_tdp_env%lumo_evecs(ispin), mtarget=guess, &
     886             :                                     nrow=nao, ncol=nlumo(ispin), s_firstrow=1, s_firstcol=1, &
     887           4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     888             : 
     889           4 :             start_row = start_row + nao
     890           8 :             start_col = start_col + nlumo(ispin)
     891             : 
     892             :          END DO
     893             :       END DO
     894             : 
     895             :       !Build the orthogonal space according to the same principles, but based on occupied MOs
     896             :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so ortho space is beta HOMOs
     897           4 :       CALL get_qs_env(qs_env, mos=mos)
     898           4 :       nhomo = 0
     899           8 :       DO ispin = 1, nspins
     900           8 :          CALL get_mo_set(mos(ispin), homo=nhomo(ispin))
     901             :       END DO
     902             : 
     903             :       start_row = 0
     904             :       start_col = 0
     905           8 :       DO i = 1, nspins
     906           4 :          ispin = i; IF (do_sf) ispin = 3 - i
     907           4 :          CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff)
     908             : 
     909          12 :          DO ido_mo = 1, ndo_mo
     910             : 
     911             :             CALL cp_fm_to_fm_submat(msource=mo_coeff, mtarget=ortho, nrow=nao, ncol=nhomo(ispin), &
     912             :                                     s_firstrow=1, s_firstcol=1, &
     913           4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     914             : 
     915           4 :             start_row = start_row + nao
     916           8 :             start_col = start_col + nhomo(ispin)
     917             : 
     918             :          END DO
     919             :       END DO
     920             : 
     921             :       !Build the preconditioner. Copy the "canonical" pre-computed matrix into the proper spin/do_mo
     922             :       !quadrants/blocks. The end matrix is purely block diagonal
     923           8 :       DO ispin = 1, nspins
     924             : 
     925           4 :          CALL dbcsr_iterator_start(iter, xas_tdp_env%ot_prec(ispin)%matrix)
     926        9316 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     927             : 
     928        9312 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
     929             : 
     930        9312 :             CALL dbcsr_get_block_p(xas_tdp_env%ot_prec(ispin)%matrix, iblk, jblk, pblock, found)
     931             : 
     932        9316 :             IF (found) THEN
     933             : 
     934        9312 :                start_block = (ispin - 1)*ndo_mo*natom
     935       18624 :                DO ido_mo = 1, ndo_mo
     936        9312 :                   CALL dbcsr_put_block(precond, start_block + iblk, start_block + jblk, pblock)
     937             : 
     938       18624 :                   start_block = start_block + natom
     939             : 
     940             :                END DO
     941             :             END IF
     942             : 
     943             :          END DO !dbcsr iter
     944          12 :          CALL dbcsr_iterator_stop(iter)
     945             :       END DO
     946             : 
     947           4 :       CALL dbcsr_finalize(precond)
     948             : 
     949           4 :       CALL timestop(handle)
     950             : 
     951           4 :    END SUBROUTINE prep_for_ot
     952             : 
     953             : ! **************************************************************************************************
     954             : !> \brief Returns the scaling to apply to normalize the LR eigenvectors.
     955             : !> \param scaling the scaling array to apply
     956             : !> \param lr_coeffs the linear response coefficients as a fm
     957             : !> \param donor_state ...
     958             : !> \note The LR coeffs are normalized when c^T G c = +- 1, G is the metric, c = c^- for TDA and
     959             : !>       c = c^+ - c^- for the full problem
     960             : ! **************************************************************************************************
     961           6 :    SUBROUTINE get_normal_scaling(scaling, lr_coeffs, donor_state)
     962             : 
     963             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling
     964             :       TYPE(cp_fm_type), INTENT(IN)                       :: lr_coeffs
     965             :       TYPE(donor_state_type), POINTER                    :: donor_state
     966             : 
     967             :       INTEGER                                            :: nrow, nscal, nvals
     968             :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
     969             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     970             :       TYPE(cp_fm_struct_type), POINTER                   :: norm_struct, work_struct
     971             :       TYPE(cp_fm_type)                                   :: fm_norm, work
     972             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     973             : 
     974           6 :       NULLIFY (para_env, blacs_env, norm_struct, work_struct)
     975             : 
     976             : !  Creating the matrix structures and initializing the work matrices
     977             :       CALL cp_fm_get_info(lr_coeffs, context=blacs_env, para_env=para_env, &
     978           6 :                           matrix_struct=work_struct, ncol_global=nvals, nrow_global=nrow)
     979             :       CALL cp_fm_struct_create(norm_struct, para_env=para_env, context=blacs_env, &
     980           6 :                                nrow_global=nvals, ncol_global=nvals)
     981             : 
     982           6 :       CALL cp_fm_create(work, work_struct)
     983           6 :       CALL cp_fm_create(fm_norm, norm_struct)
     984             : 
     985             : !  Taking c^T * G * C
     986           6 :       CALL cp_dbcsr_sm_fm_multiply(donor_state%metric(1)%matrix, lr_coeffs, work, ncol=nvals)
     987           6 :       CALL parallel_gemm('T', 'N', nvals, nvals, nrow, 1.0_dp, lr_coeffs, work, 0.0_dp, fm_norm)
     988             : 
     989             : !  Computing the needed scaling
     990          18 :       ALLOCATE (diag(nvals))
     991           6 :       CALL cp_fm_get_diag(fm_norm, diag)
     992         150 :       WHERE (ABS(diag) > 1.0E-8_dp) diag = 1.0_dp/SQRT(ABS(diag))
     993             : 
     994           6 :       nscal = SIZE(scaling)
     995         150 :       scaling(1:nscal) = diag(1:nscal)
     996             : 
     997             : !  Clean-up
     998           6 :       CALL cp_fm_release(work)
     999           6 :       CALL cp_fm_release(fm_norm)
    1000           6 :       CALL cp_fm_struct_release(norm_struct)
    1001             : 
    1002          18 :    END SUBROUTINE get_normal_scaling
    1003             : 
    1004             : ! **************************************************************************************************
    1005             : !> \brief This subroutine computes the row/column block structure as well as the dbcsr ditrinution
    1006             : !>        for the submatrices making up the generalized XAS TDP eigenvalue problem. They all share
    1007             : !>        the same properties, which are based on the replication of the KS matrix. Stored in the
    1008             : !>        donor_state_type
    1009             : !> \param donor_state ...
    1010             : !> \param do_os whether this is a open-shell calculation
    1011             : !> \param qs_env ...
    1012             : ! **************************************************************************************************
    1013          56 :    SUBROUTINE compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
    1014             : 
    1015             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1016             :       LOGICAL, INTENT(IN)                                :: do_os
    1017             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1018             : 
    1019             :       INTEGER                                            :: group, i, nao, nblk_row, ndo_mo, nspins, &
    1020             :                                                             scol_dist, srow_dist
    1021          56 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_sub, row_blk_size, &
    1022          56 :                                                             row_dist, row_dist_sub, submat_blk_size
    1023          56 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1024             :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, submat_dist
    1025          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks
    1026             : 
    1027          56 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, row_dist, col_dist, pgrid, col_dist_sub)
    1028          56 :       NULLIFY (row_dist_sub, submat_dist, submat_blk_size)
    1029             : 
    1030             : !  The submatrices are indexed by M_{pi sig,qj tau}, where p,q label basis functions and i,j donor
    1031             : !  MOs and sig,tau the spins. For each spin and donor MOs combination, one has a submatrix of the
    1032             : !  size of the KS matrix (nao x nao) with the same dbcsr block structure
    1033             : 
    1034             : !  Initialization
    1035          56 :       ndo_mo = donor_state%ndo_mo
    1036          56 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, dbcsr_dist=dbcsr_dist)
    1037          56 :       CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=row_blk_size)
    1038             :       CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
    1039          56 :                                   pgrid=pgrid)
    1040         658 :       nao = SUM(row_blk_size)
    1041          56 :       nblk_row = SIZE(row_blk_size)
    1042          56 :       srow_dist = SIZE(row_dist)
    1043          56 :       scol_dist = SIZE(col_dist)
    1044          56 :       nspins = 1; IF (do_os) nspins = 2
    1045             : 
    1046             : !  Creation if submatrix block size and col/row distribution
    1047         168 :       ALLOCATE (submat_blk_size(ndo_mo*nspins*nblk_row))
    1048         168 :       ALLOCATE (row_dist_sub(ndo_mo*nspins*srow_dist))
    1049         168 :       ALLOCATE (col_dist_sub(ndo_mo*nspins*scol_dist))
    1050             : 
    1051         132 :       DO i = 1, ndo_mo*nspins
    1052        1416 :          submat_blk_size((i - 1)*nblk_row + 1:i*nblk_row) = row_blk_size
    1053        1416 :          row_dist_sub((i - 1)*srow_dist + 1:i*srow_dist) = row_dist
    1054        1472 :          col_dist_sub((i - 1)*scol_dist + 1:i*scol_dist) = col_dist
    1055             :       END DO
    1056             : 
    1057             : !  Create the submatrix dbcsr distribution
    1058          56 :       ALLOCATE (submat_dist)
    1059             :       CALL dbcsr_distribution_new(submat_dist, group=group, pgrid=pgrid, row_dist=row_dist_sub, &
    1060          56 :                                   col_dist=col_dist_sub)
    1061             : 
    1062          56 :       donor_state%dbcsr_dist => submat_dist
    1063          56 :       donor_state%blk_size => submat_blk_size
    1064             : 
    1065             : !  Clean-up
    1066          56 :       DEALLOCATE (col_dist_sub, row_dist_sub)
    1067             : 
    1068         168 :    END SUBROUTINE compute_submat_dist_and_blk_size
    1069             : 
    1070             : ! **************************************************************************************************
    1071             : !> \brief Returns the projector on the unperturbed unoccupied ground state Q = 1 - SP on the block
    1072             : !>        diagonal of a matrix with the standard size and distribution.
    1073             : !> \param proj_Q the matrix with the projector
    1074             : !> \param donor_state ...
    1075             : !> \param do_os whether it is open-shell calculation
    1076             : !> \param xas_tdp_env ...
    1077             : !> \param do_sf whether the projector should be prepared for spin-flip excitations
    1078             : !> \note In the spin-flip case, the alpha spins are sent to beta and vice-versa. The structure of
    1079             : !>       the projector is changed by swapping the alpha-alpha with the beta-beta block, which
    1080             : !>       naturally take the spin change into account. Only for open-shell.
    1081             : ! **************************************************************************************************
    1082          58 :    SUBROUTINE get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env, do_sf)
    1083             : 
    1084             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q
    1085             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1086             :       LOGICAL, INTENT(IN)                                :: do_os
    1087             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1088             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1089             : 
    1090             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_q_projector'
    1091             : 
    1092             :       INTEGER                                            :: handle, iblk, imo, ispin, jblk, &
    1093             :                                                             nblk_row, ndo_mo, nspins
    1094          58 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_q, row_blk_size
    1095             :       LOGICAL                                            :: found_block, my_dosf
    1096          58 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1097             :       TYPE(dbcsr_distribution_type), POINTER             :: dist_q
    1098             :       TYPE(dbcsr_iterator_type)                          :: iter
    1099             :       TYPE(dbcsr_type), POINTER                          :: one_sp
    1100             : 
    1101          58 :       NULLIFY (work_block, one_sp, row_blk_size, dist_q, blk_size_q)
    1102             : 
    1103          58 :       CALL timeset(routineN, handle)
    1104             : 
    1105             : !  Initialization
    1106          58 :       nspins = 1; IF (do_os) nspins = 2
    1107          58 :       ndo_mo = donor_state%ndo_mo
    1108          58 :       one_sp => xas_tdp_env%q_projector(1)%matrix
    1109          58 :       CALL dbcsr_get_info(one_sp, row_blk_size=row_blk_size)
    1110          58 :       nblk_row = SIZE(row_blk_size)
    1111          58 :       my_dosf = .FALSE.
    1112          58 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1113          58 :       dist_q => donor_state%dbcsr_dist
    1114          58 :       blk_size_q => donor_state%blk_size
    1115             : 
    1116             :       ! the projector is not symmetric
    1117             :       CALL dbcsr_create(matrix=proj_Q, name="PROJ Q", matrix_type=dbcsr_type_no_symmetry, dist=dist_q, &
    1118          58 :                         row_blk_size=blk_size_q, col_blk_size=blk_size_q)
    1119             : 
    1120             : !  Fill the projector by looping over 1-SP and duplicating blocks. (all on the spin-MO block diagonal)
    1121         126 :       DO ispin = 1, nspins
    1122          68 :          one_sp => xas_tdp_env%q_projector(ispin)%matrix
    1123             : 
    1124             :          !if spin-flip, swap the alpha-alpha and beta-beta blocks
    1125          68 :          IF (my_dosf) one_sp => xas_tdp_env%q_projector(3 - ispin)%matrix
    1126             : 
    1127          68 :          CALL dbcsr_iterator_start(iter, one_sp)
    1128       19154 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1129             : 
    1130       19086 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1131             : 
    1132             :             ! get the block
    1133       19086 :             CALL dbcsr_get_block_p(one_sp, iblk, jblk, work_block, found_block)
    1134             : 
    1135       19086 :             IF (found_block) THEN
    1136             : 
    1137       38182 :                DO imo = 1, ndo_mo
    1138             :                   CALL dbcsr_put_block(proj_Q, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1139       38182 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1140             :                END DO
    1141             : 
    1142             :             END IF
    1143       19086 :             NULLIFY (work_block)
    1144             : 
    1145             :          END DO !iterator
    1146         194 :          CALL dbcsr_iterator_stop(iter)
    1147             :       END DO !ispin
    1148             : 
    1149          58 :       CALL dbcsr_finalize(proj_Q)
    1150             : 
    1151          58 :       CALL timestop(handle)
    1152             : 
    1153          58 :    END SUBROUTINE get_q_projector
    1154             : 
    1155             : ! **************************************************************************************************
    1156             : !> \brief Builds the matrix containing the ground state contribution to the matrix_tdp (aka matrix A)
    1157             : !>         => A_{pis,qjt} = (F_pq*delta_ij - epsilon_ij*S_pq) delta_st, where:
    1158             : !>         F is the KS matrix
    1159             : !>         S is the overlap matrix
    1160             : !>         epsilon_ij is the donor MO eigenvalue
    1161             : !>         i,j labels the MOs, p,q the AOs and s,t the spins
    1162             : !> \param matrix_a  pointer to a DBCSR matrix containing A
    1163             : !> \param donor_state ...
    1164             : !> \param do_os ...
    1165             : !> \param qs_env ...
    1166             : !> \param do_sf whether the ground state contribution should accommodate spin-flip
    1167             : !> \note Even localized non-canonical MOs are diagonalized in their subsapce => eps_ij = eps_ii*delta_ij
    1168             : !>       Use GW2X corrected evals as eps_ii. If not GW2X correction, these are the default KS energies
    1169             : ! **************************************************************************************************
    1170          58 :    SUBROUTINE build_gs_contribution(matrix_a, donor_state, do_os, qs_env, do_sf)
    1171             : 
    1172             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a
    1173             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1174             :       LOGICAL, INTENT(IN)                                :: do_os
    1175             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1176             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1177             : 
    1178             :       CHARACTER(len=*), PARAMETER :: routineN = 'build_gs_contribution'
    1179             : 
    1180             :       INTEGER                                            :: handle, iblk, imo, ispin, jblk, &
    1181             :                                                             nblk_row, ndo_mo, nspins
    1182          58 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_a, row_blk_size
    1183             :       LOGICAL                                            :: found_block, my_dosf
    1184          58 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1185             :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, dist_a
    1186             :       TYPE(dbcsr_iterator_type)                          :: iter
    1187          58 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: m_ks, matrix_ks, matrix_s
    1188             :       TYPE(dbcsr_type)                                   :: work_matrix
    1189             : 
    1190          58 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, work_block, matrix_s, m_ks)
    1191          58 :       NULLIFY (dist_a, blk_size_a)
    1192             : 
    1193             :       !  Note: matrix A is symmetric and block diagonal. If ADMM, the ks matrix is the total one,
    1194             :       !        and it is corrected for eigenvalues (done at xas_tdp_init)
    1195             : 
    1196          58 :       CALL timeset(routineN, handle)
    1197             : 
    1198             : !  Initialization
    1199          58 :       nspins = 1; IF (do_os) nspins = 2
    1200          58 :       ndo_mo = donor_state%ndo_mo
    1201          58 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, matrix_s=matrix_s, dbcsr_dist=dbcsr_dist)
    1202          58 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1203          58 :       nblk_row = SIZE(row_blk_size)
    1204          58 :       dist_a => donor_state%dbcsr_dist
    1205          58 :       blk_size_a => donor_state%blk_size
    1206             : 
    1207             : !  Prepare the KS matrix pointer
    1208         184 :       ALLOCATE (m_ks(nspins))
    1209          58 :       m_ks(1)%matrix => matrix_ks(1)%matrix
    1210          58 :       IF (do_os) m_ks(2)%matrix => matrix_ks(2)%matrix
    1211             : 
    1212             : ! If spin-flip, swap the KS alpha-alpha and beta-beta quadrants.
    1213          58 :       my_dosf = .FALSE.
    1214          58 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1215           2 :       IF (my_dosf .AND. do_os) THEN
    1216           2 :          m_ks(1)%matrix => matrix_ks(2)%matrix
    1217           2 :          m_ks(2)%matrix => matrix_ks(1)%matrix
    1218             :       END IF
    1219             : 
    1220             : !  Creating the symmetric matrix A (and work)
    1221             :       CALL dbcsr_create(matrix=matrix_a, name="MATRIX A", matrix_type=dbcsr_type_symmetric, &
    1222          58 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1223             :       CALL dbcsr_create(matrix=work_matrix, name="WORK MAT", matrix_type=dbcsr_type_symmetric, &
    1224          58 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1225             : 
    1226         126 :       DO ispin = 1, nspins
    1227             : 
    1228             : !     Loop over the blocks of KS and put them on the spin-MO block diagonal of matrix A
    1229          68 :          CALL dbcsr_iterator_start(iter, m_ks(ispin)%matrix)
    1230        9415 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1231             : 
    1232        9347 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1233             : 
    1234             : !           Get the block
    1235        9347 :             CALL dbcsr_get_block_p(m_ks(ispin)%matrix, iblk, jblk, work_block, found_block)
    1236             : 
    1237        9347 :             IF (found_block) THEN
    1238             : 
    1239             : !              The KS matrix only appears on diagonal of matrix A => loop over II donor MOs
    1240       18704 :                DO imo = 1, ndo_mo
    1241             : 
    1242             : !                 Put the block as it is
    1243             :                   CALL dbcsr_put_block(matrix_a, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1244       18704 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1245             : 
    1246             :                END DO !imo
    1247             :             END IF !found_block
    1248        9347 :             NULLIFY (work_block)
    1249             :          END DO ! iteration on KS blocks
    1250          68 :          CALL dbcsr_iterator_stop(iter)
    1251             : 
    1252             : !     Loop over the blocks of S and put them on the block diagonal of work
    1253             : 
    1254          68 :          CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1255        9415 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1256             : 
    1257        9347 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1258             : 
    1259             : !           Get the block
    1260        9347 :             CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1261             : 
    1262        9347 :             IF (found_block) THEN
    1263             : 
    1264             : !              Add S matrix on block diagonal as epsilon_ii*S_pq
    1265       18704 :                DO imo = 1, ndo_mo
    1266             : 
    1267             :                   CALL dbcsr_put_block(work_matrix, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1268             :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, &
    1269      238070 :                                        donor_state%gw2x_evals(imo, ispin)*work_block)
    1270             :                END DO !imo
    1271             :             END IF !found block
    1272        9347 :             NULLIFY (work_block)
    1273             :          END DO ! iteration on S blocks
    1274         262 :          CALL dbcsr_iterator_stop(iter)
    1275             : 
    1276             :       END DO !ispin
    1277          58 :       CALL dbcsr_finalize(matrix_a)
    1278          58 :       CALL dbcsr_finalize(work_matrix)
    1279             : 
    1280             : !  Take matrix_a = matrix_a - work
    1281          58 :       CALL dbcsr_add(matrix_a, work_matrix, 1.0_dp, -1.0_dp)
    1282          58 :       CALL dbcsr_finalize(matrix_a)
    1283             : 
    1284             : !  Clean-up
    1285          58 :       CALL dbcsr_release(work_matrix)
    1286          58 :       DEALLOCATE (m_ks)
    1287             : 
    1288          58 :       CALL timestop(handle)
    1289             : 
    1290          58 :    END SUBROUTINE build_gs_contribution
    1291             : 
    1292             : ! **************************************************************************************************
    1293             : !> \brief Creates the metric (aka  matrix G) needed for the generalized eigenvalue problem and inverse
    1294             : !>         => G_{pis,qjt} = S_pq*delta_ij*delta_st
    1295             : !> \param matrix_g dbcsr matrix containing G
    1296             : !> \param donor_state ...
    1297             : !> \param qs_env ...
    1298             : !> \param do_os if open-shell calculation
    1299             : !> \param do_inv if the inverse of G should be computed
    1300             : ! **************************************************************************************************
    1301         168 :    SUBROUTINE build_metric(matrix_g, donor_state, qs_env, do_os, do_inv)
    1302             : 
    1303             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_g
    1304             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1305             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1306             :       LOGICAL, INTENT(IN)                                :: do_os
    1307             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_inv
    1308             : 
    1309             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_metric'
    1310             : 
    1311             :       INTEGER                                            :: handle, i, iblk, jblk, nao, nblk_row, &
    1312             :                                                             ndo_mo, nspins
    1313          56 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_g, row_blk_size
    1314             :       LOGICAL                                            :: found_block, my_do_inv
    1315          56 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1316             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1317             :       TYPE(dbcsr_distribution_type), POINTER             :: dist_g
    1318             :       TYPE(dbcsr_iterator_type)                          :: iter
    1319          56 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1320             :       TYPE(dbcsr_type)                                   :: matrix_sinv
    1321             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1322             : 
    1323          56 :       NULLIFY (matrix_s, row_blk_size, work_block, para_env, blacs_env, dist_g, blk_size_g)
    1324             : 
    1325          56 :       CALL timeset(routineN, handle)
    1326             : 
    1327             : !  Initialization
    1328          56 :       nspins = 1; IF (do_os) nspins = 2
    1329          56 :       ndo_mo = donor_state%ndo_mo
    1330          56 :       CALL get_qs_env(qs_env=qs_env, matrix_s=matrix_s)
    1331          56 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size, nfullrows_total=nao)
    1332          56 :       nblk_row = SIZE(row_blk_size)
    1333          56 :       my_do_inv = .FALSE.
    1334          56 :       IF (PRESENT(do_inv)) my_do_inv = do_inv
    1335          56 :       dist_g => donor_state%dbcsr_dist
    1336          56 :       blk_size_g => donor_state%blk_size
    1337             : 
    1338             : !  Creating the symmetric  matrices G and G^-1 with the right size and distribution
    1339          56 :       ALLOCATE (matrix_g(1)%matrix)
    1340             :       CALL dbcsr_create(matrix=matrix_g(1)%matrix, name="MATRIX G", matrix_type=dbcsr_type_symmetric, &
    1341          56 :                         dist=dist_g, row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1342             : 
    1343             : !  Fill the matrices G by looping over the block of S and putting them on the diagonal
    1344          56 :       CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1345        9384 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1346             : 
    1347        9328 :          CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1348             : 
    1349             : !        Get the block
    1350        9328 :          CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1351             : 
    1352        9328 :          IF (found_block) THEN
    1353             : 
    1354             : !           Go over the diagonal of G => donor MOs ii, spin ss
    1355       18679 :             DO i = 1, ndo_mo*nspins
    1356       18679 :                CALL dbcsr_put_block(matrix_g(1)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1357             :             END DO
    1358             : 
    1359             :          END IF
    1360        9328 :          NULLIFY (work_block)
    1361             : 
    1362             :       END DO ! dbcsr_iterator
    1363          56 :       CALL dbcsr_iterator_stop(iter)
    1364             : 
    1365             : !  Finalize
    1366          56 :       CALL dbcsr_finalize(matrix_g(1)%matrix)
    1367             : 
    1368             : !  If the inverse of G is required, do the same as above with the inverse
    1369          56 :       IF (my_do_inv) THEN
    1370             : 
    1371           6 :          CPASSERT(SIZE(matrix_g) == 2)
    1372             : 
    1373             :          ! Create the matrix
    1374           6 :          ALLOCATE (matrix_g(2)%matrix)
    1375             :          CALL dbcsr_create(matrix=matrix_g(2)%matrix, name="MATRIX GINV", &
    1376             :                            matrix_type=dbcsr_type_symmetric, dist=dist_g, &
    1377           6 :                            row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1378             : 
    1379             :          ! Invert the overlap matrix
    1380           6 :          CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1381           6 :          CALL dbcsr_copy(matrix_sinv, matrix_s(1)%matrix)
    1382           6 :          CALL cp_dbcsr_cholesky_decompose(matrix_sinv, para_env=para_env, blacs_env=blacs_env)
    1383           6 :          CALL cp_dbcsr_cholesky_invert(matrix_sinv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
    1384             : 
    1385             : !     Fill the matrices G^-1 by looping over the block of S^-1 and putting them on the diagonal
    1386           6 :          CALL dbcsr_iterator_start(iter, matrix_sinv)
    1387          24 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1388             : 
    1389          18 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1390             : 
    1391             : !           Get the block
    1392          18 :             CALL dbcsr_get_block_p(matrix_sinv, iblk, jblk, work_block, found_block)
    1393             : 
    1394          18 :             IF (found_block) THEN
    1395             : 
    1396             : !              Go over the diagonal of G => donor MOs ii spin ss
    1397          36 :                DO i = 1, ndo_mo*nspins
    1398          36 :                   CALL dbcsr_put_block(matrix_g(2)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1399             :                END DO
    1400             : 
    1401             :             END IF
    1402          18 :             NULLIFY (work_block)
    1403             : 
    1404             :          END DO ! dbcsr_iterator
    1405           6 :          CALL dbcsr_iterator_stop(iter)
    1406             : 
    1407             :          !  Finalize
    1408           6 :          CALL dbcsr_finalize(matrix_g(2)%matrix)
    1409             : 
    1410             :          !  Clean-up
    1411           6 :          CALL dbcsr_release(matrix_sinv)
    1412             :       END IF !do_inv
    1413             : 
    1414          56 :       CALL timestop(handle)
    1415             : 
    1416          56 :    END SUBROUTINE build_metric
    1417             : 
    1418             : ! **************************************************************************************************
    1419             : !> \brief Builds the auxiliary matrix (A-D+E)^+0.5 needed for the transofrmation of the
    1420             : !>        full-TDDFT problem into an Hermitian one
    1421             : !> \param threshold a threshold for allowed negative eigenvalues
    1422             : !> \param sx the amount of exact exchange
    1423             : !> \param matrix_a the ground state contribution matrix A
    1424             : !> \param matrix_d the on-diagonal exchange kernel matrix (ab|IJ)
    1425             : !> \param matrix_e the off-diagonal exchange kernel matrix (aJ|Ib)
    1426             : !> \param do_hfx if exact exchange is included
    1427             : !> \param proj_Q ...
    1428             : !> \param work ...
    1429             : !> \param donor_state ...
    1430             : !> \param eps_filter for the dbcsr multiplication
    1431             : !> \param qs_env ...
    1432             : ! **************************************************************************************************
    1433           6 :    SUBROUTINE build_aux_matrix(threshold, sx, matrix_a, matrix_d, matrix_e, do_hfx, proj_Q, &
    1434             :                                work, donor_state, eps_filter, qs_env)
    1435             : 
    1436             :       REAL(dp), INTENT(IN)                               :: threshold, sx
    1437             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a, matrix_d, matrix_e
    1438             :       LOGICAL, INTENT(IN)                                :: do_hfx
    1439             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q, work
    1440             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1441             :       REAL(dp), INTENT(IN)                               :: eps_filter
    1442             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1443             : 
    1444             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_aux_matrix'
    1445             : 
    1446             :       INTEGER                                            :: handle, ndep
    1447             :       REAL(dp)                                           :: evals(2)
    1448             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1449             :       TYPE(dbcsr_type)                                   :: tmp_mat
    1450             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1451             : 
    1452           6 :       NULLIFY (blacs_env, para_env)
    1453             : 
    1454           6 :       CALL timeset(routineN, handle)
    1455             : 
    1456           6 :       CALL dbcsr_copy(tmp_mat, matrix_a)
    1457           6 :       IF (do_hfx) THEN
    1458           6 :          CALL dbcsr_add(tmp_mat, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
    1459           6 :          CALL dbcsr_add(tmp_mat, matrix_e, 1.0_dp, 1.0_dp*sx)
    1460             :       END IF
    1461             : 
    1462             :       ! Take the product with the Q projector:
    1463           6 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tmp_mat, 0.0_dp, work, filter_eps=eps_filter)
    1464           6 :       CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tmp_mat, filter_eps=eps_filter)
    1465             : 
    1466             :       ! Actually computing and storing the auxiliary matrix
    1467           6 :       ALLOCATE (donor_state%matrix_aux)
    1468           6 :       CALL dbcsr_create(matrix=donor_state%matrix_aux, template=matrix_a, name="MAT AUX")
    1469             : 
    1470           6 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1471             : 
    1472             :       ! good quality sqrt
    1473           6 :       CALL cp_dbcsr_power(tmp_mat, 0.5_dp, threshold, ndep, para_env, blacs_env, eigenvalues=evals)
    1474             : 
    1475           6 :       CALL dbcsr_copy(donor_state%matrix_aux, tmp_mat)
    1476             : 
    1477             :       ! Warn the user if matrix not positive semi-definite
    1478           6 :       IF (evals(1) < 0.0_dp .AND. ABS(evals(1)) > threshold) THEN
    1479           0 :          CPWARN("The full TDDFT problem might not have been soundly turned Hermitian. Try TDA.")
    1480             :       END IF
    1481             : 
    1482             :       ! clean-up
    1483           6 :       CALL dbcsr_release(tmp_mat)
    1484             : 
    1485           6 :       CALL timestop(handle)
    1486             : 
    1487           6 :    END SUBROUTINE build_aux_matrix
    1488             : 
    1489             : ! **************************************************************************************************
    1490             : !> \brief Includes the SOC effects on the precomputed spin-conserving and spin-flip excitations
    1491             : !>        from an open-shell calculation (UKS or ROKS). This is a perturbative treatment
    1492             : !> \param donor_state ...
    1493             : !> \param xas_tdp_env ...
    1494             : !> \param xas_tdp_control ...
    1495             : !> \param qs_env ...
    1496             : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1497             : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1498             : !>       energies and corresponding linear combinations of lr_coeffs.
    1499             : !>       The AMEWs are normalized
    1500             : !>       Only for open-shell calculations
    1501             : ! **************************************************************************************************
    1502           2 :    SUBROUTINE include_os_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1503             : 
    1504             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1505             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1506             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1507             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1508             : 
    1509             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_os_soc'
    1510             : 
    1511             :       INTEGER                                            :: group, handle, homo, iex, isc, isf, nao, &
    1512             :                                                             ndo_mo, ndo_so, nex, npcols, nprows, &
    1513             :                                                             nsc, nsf, ntot, tas(2), tbs(2)
    1514           2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1515           2 :                                                             row_dist, row_dist_new
    1516           2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1517             :       LOGICAL                                            :: do_roks, do_uks
    1518             :       REAL(dp)                                           :: eps_filter, gs_sum, soc
    1519           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1520           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1521           2 :                                                             gsex_block
    1522           2 :       REAL(dp), DIMENSION(:), POINTER                    :: sc_evals, sf_evals
    1523             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1524             :       TYPE(cp_cfm_type)                                  :: evecs_cfm, pert_cfm
    1525             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    1526             :                                                             vec_struct, work_struct
    1527             :       TYPE(cp_fm_type)                                   :: gsex_fm, img_fm, prod_work, real_fm, &
    1528             :                                                             vec_soc_x, vec_soc_y, vec_soc_z, &
    1529             :                                                             vec_work, work_fm
    1530             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sc_coeffs, sf_coeffs
    1531             :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1532           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1533             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1534             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sc, &
    1535             :                                                             dbcsr_sf, dbcsr_tmp, dbcsr_work, &
    1536             :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1537           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1538             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1539             : 
    1540           2 :       NULLIFY (gs_coeffs, sc_coeffs, sf_coeffs, matrix_s, orb_soc_x, orb_soc_y, orb_soc_z, mos)
    1541           2 :       NULLIFY (full_struct, para_env, blacs_env, mo_coeff, sc_evals, sf_evals, vec_struct, prod_struct)
    1542           2 :       NULLIFY (work_struct, gsex_struct, col_dist, row_dist)
    1543           2 :       NULLIFY (col_blk_size, row_blk_size, row_dist_new, pgrid, dbcsr_sc, dbcsr_sf, dbcsr_work)
    1544           2 :       NULLIFY (dbcsr_tmp, dbcsr_ovlp, dbcsr_prod)
    1545             : 
    1546           2 :       CALL timeset(routineN, handle)
    1547             : 
    1548             : ! Initialization
    1549           2 :       sc_coeffs => donor_state%sc_coeffs
    1550           2 :       sf_coeffs => donor_state%sf_coeffs
    1551           2 :       sc_evals => donor_state%sc_evals
    1552           2 :       sf_evals => donor_state%sf_evals
    1553           2 :       nsc = SIZE(sc_evals)
    1554           2 :       nsf = SIZE(sf_evals)
    1555           2 :       ntot = 1 + nsc + nsf
    1556           2 :       nex = nsc !by contrutciotn nsc == nsf, but keep 2 counts for clarity
    1557           2 :       ndo_mo = donor_state%ndo_mo
    1558           2 :       ndo_so = 2*ndo_mo
    1559           2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, mos=mos, matrix_s=matrix_s)
    1560           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1561           2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1562           2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    1563           2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    1564           2 :       do_roks = xas_tdp_control%do_roks
    1565           2 :       do_uks = xas_tdp_control%do_uks
    1566           2 :       eps_filter = xas_tdp_control%eps_filter
    1567             : 
    1568             :       ! For the GS coeffs, we use the same structure both for ROKS and UKS here => allows us to write
    1569             :       ! general code later on, and not use IF (do_roks) statements every second line
    1570           2 :       IF (do_uks) gs_coeffs => donor_state%gs_coeffs
    1571           2 :       IF (do_roks) THEN
    1572             :          CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1573           0 :                                   nrow_global=nao, ncol_global=ndo_so)
    1574           0 :          ALLOCATE (gs_coeffs)
    1575           0 :          CALL cp_fm_create(gs_coeffs, vec_struct)
    1576             : 
    1577             :          ! only alpha donor MOs are stored, need to copy them intoboth the alpha and the beta slot
    1578             :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1579             :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1580           0 :                                  t_firstcol=1)
    1581             :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1582             :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1583           0 :                                  t_firstcol=ndo_mo + 1)
    1584             : 
    1585           0 :          CALL cp_fm_struct_release(vec_struct)
    1586             :       END IF
    1587             : 
    1588             : ! Creating the real and the imaginary part of the SOC perturbation matrix
    1589             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    1590           2 :                                nrow_global=ntot, ncol_global=ntot)
    1591           2 :       CALL cp_fm_create(real_fm, full_struct)
    1592           2 :       CALL cp_fm_create(img_fm, full_struct)
    1593             : 
    1594             : ! Put the excitation energies on the diagonal of the real  matrix. Element 1,1 is the ground state
    1595          26 :       DO isc = 1, nsc
    1596          26 :          CALL cp_fm_set_element(real_fm, 1 + isc, 1 + isc, sc_evals(isc))
    1597             :       END DO
    1598          26 :       DO isf = 1, nsf
    1599          26 :          CALL cp_fm_set_element(real_fm, 1 + nsc + isf, 1 + nsc + isf, sf_evals(isf))
    1600             :       END DO
    1601             : 
    1602             : ! Create the bdcsr machinery
    1603           2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    1604             :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    1605           2 :                                   npcols=npcols, nprows=nprows)
    1606           8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    1607          26 :       DO iex = 1, nex
    1608          24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    1609          26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    1610             :       END DO
    1611           2 :       ALLOCATE (coeffs_dist, prod_dist)
    1612             :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    1613           2 :                                   col_dist=col_dist)
    1614             :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    1615           2 :                                   col_dist=col_dist)
    1616             : 
    1617             :       !Create the matrices
    1618           4 :       ALLOCATE (col_blk_size(nex))
    1619          26 :       col_blk_size = ndo_so
    1620           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1621             : 
    1622           2 :       ALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    1623             :       CALL dbcsr_create(matrix=dbcsr_sc, name="SPIN CONS", matrix_type=dbcsr_type_no_symmetry, &
    1624           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1625             :       CALL dbcsr_create(matrix=dbcsr_sf, name="SPIN FLIP", matrix_type=dbcsr_type_no_symmetry, &
    1626           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1627             :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    1628           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1629             :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    1630           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1631             :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    1632           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1633             : 
    1634          26 :       col_blk_size = 1
    1635             :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    1636           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1637           2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    1638             : 
    1639           2 :       dbcsr_soc_package%dbcsr_sc => dbcsr_sc
    1640           2 :       dbcsr_soc_package%dbcsr_sf => dbcsr_sf
    1641           2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    1642           2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    1643           2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    1644           2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    1645             : 
    1646             :       !Filling the coeffs matrices by copying from the stored fms
    1647           2 :       CALL copy_fm_to_dbcsr(sc_coeffs, dbcsr_sc)
    1648           2 :       CALL copy_fm_to_dbcsr(sf_coeffs, dbcsr_sf)
    1649             : 
    1650             : ! Precompute what we can before looping over excited states.
    1651             :       ! Need to compute the scalar: sum_i sum_sigma <phi^0_i,sigma|SOC|phi^0_i,sigma>, where all
    1652             :       ! occupied MOs are taken into account
    1653             : 
    1654             :       !start with the alpha MOs
    1655           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    1656           6 :       ALLOCATE (diag(homo))
    1657           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1658             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1659           2 :                                nrow_global=homo, ncol_global=homo)
    1660           2 :       CALL cp_fm_create(vec_work, vec_struct)
    1661           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1662             : 
    1663             :       ! <alpha|SOC_z|alpha> => spin integration yields +1
    1664           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1665           2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1666           2 :       CALL cp_fm_get_diag(prod_work, diag)
    1667          20 :       gs_sum = SUM(diag)
    1668             : 
    1669           2 :       CALL cp_fm_release(vec_work)
    1670           2 :       CALL cp_fm_release(prod_work)
    1671           2 :       CALL cp_fm_struct_release(prod_struct)
    1672           2 :       DEALLOCATE (diag)
    1673           2 :       NULLIFY (vec_struct)
    1674             : 
    1675             :       ! Now do the same with the beta gs coeffs
    1676           2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    1677           6 :       ALLOCATE (diag(homo))
    1678           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1679             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1680           2 :                                nrow_global=homo, ncol_global=homo)
    1681           2 :       CALL cp_fm_create(vec_work, vec_struct)
    1682           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1683             : 
    1684             :       ! <beta|SOC_z|beta> => spin integration yields -1
    1685           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1686           2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1687           2 :       CALL cp_fm_get_diag(prod_work, diag)
    1688          20 :       gs_sum = gs_sum - SUM(diag) ! -1 because of spin integration
    1689             : 
    1690           2 :       CALL cp_fm_release(vec_work)
    1691           2 :       CALL cp_fm_release(prod_work)
    1692           2 :       CALL cp_fm_struct_release(prod_struct)
    1693           2 :       DEALLOCATE (diag)
    1694             : 
    1695             :       ! Need to compute: <phi^0_Isigma|SOC|phi^0_Jtau> for the donor MOs
    1696             : 
    1697             :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1698           2 :                                nrow_global=nao, ncol_global=ndo_so)
    1699             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1700           2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    1701           2 :       CALL cp_fm_create(vec_soc_x, vec_struct) ! for SOC_x*gs_coeffs
    1702           2 :       CALL cp_fm_create(vec_soc_y, vec_struct) ! for SOC_y*gs_coeffs
    1703           2 :       CALL cp_fm_create(vec_soc_z, vec_struct) ! for SOC_z*gs_coeffs
    1704           2 :       CALL cp_fm_create(prod_work, prod_struct)
    1705           6 :       ALLOCATE (diag(ndo_so))
    1706             : 
    1707          16 :       ALLOCATE (domo_soc_x(ndo_so, ndo_so), domo_soc_y(ndo_so, ndo_so), domo_soc_z(ndo_so, ndo_so))
    1708             : 
    1709           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_so)
    1710           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_work)
    1711           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_x)
    1712             : 
    1713           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_so)
    1714           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_work)
    1715           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_y)
    1716             : 
    1717           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_so)
    1718           2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_work)
    1719           2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_z)
    1720             : 
    1721             :       ! some more useful work matrices
    1722             :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    1723           2 :                                nrow_global=nex, ncol_global=nex)
    1724           2 :       CALL cp_fm_create(work_fm, work_struct)
    1725             : 
    1726             : !  Looping over the excited states, computing the SOC and filling the perturbation matrix
    1727             : !  There are 3 loops to do: sc-sc, sc-sf and sf-sf
    1728             : !  The final perturbation matrix is Hermitian, only the upper diagonal is needed
    1729             : 
    1730             :       !need some work matrices for the GS stuff
    1731             :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    1732           2 :                                nrow_global=nex*ndo_so, ncol_global=ndo_so)
    1733           2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    1734           6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    1735             : 
    1736             : !  Start with ground-state/spin-conserving SOC:
    1737             :       !  <Psi_0|SOC|Psi_Jsc> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,sigma>
    1738             : 
    1739             :       !compute -sc_coeffs*SOC_Z*gs_coeffs, minus sign because SOC_z antisymmetric
    1740           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_z, 0.0_dp, gsex_fm)
    1741             : 
    1742          26 :       DO isc = 1, nsc
    1743             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    1744          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1745          24 :          diag(:) = get_diag(gsex_block)
    1746         168 :          soc = SUM(diag(1:ndo_mo)) - SUM(diag(ndo_mo + 1:ndo_so)) !minus sign because of spin integration
    1747             : 
    1748             :          !purely imaginary contribution
    1749          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + isc, soc)
    1750             :       END DO !isc
    1751             : 
    1752             : !  Then ground-state/spin-flip SOC:
    1753             :       !<Psi_0|SOC|Psi_Jsf> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,tau>   sigma != tau
    1754             : 
    1755             :       !compute  -sc_coeffs*SOC_x*gs_coeffs, imaginary contribution
    1756           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_x, 0.0_dp, gsex_fm)
    1757             : 
    1758          26 :       DO isf = 1, nsf
    1759             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1760          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1761          24 :          diag(:) = get_diag(gsex_block)
    1762         168 :          soc = SUM(diag) !alpha and beta parts are simply added due to spin integration
    1763          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsc + isf, soc)
    1764             :       END DO !isf
    1765             : 
    1766             :       !compute -sc_coeffs*SOC_y*gs_coeffs, real contribution
    1767           2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_y, 0.0_dp, gsex_fm)
    1768             : 
    1769          26 :       DO isf = 1, nsf
    1770             :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1771          24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1772          24 :          diag(:) = get_diag(gsex_block)
    1773          96 :          soc = SUM(diag(1:ndo_mo)) ! alpha-beta
    1774          96 :          soc = soc - SUM(diag(ndo_mo + 1:ndo_so)) !beta-alpha
    1775          26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsc + isf, soc)
    1776             :       END DO !isf
    1777             : 
    1778             :       !ground-state cleanup
    1779           2 :       CALL cp_fm_release(gsex_fm)
    1780           2 :       CALL cp_fm_release(vec_soc_x)
    1781           2 :       CALL cp_fm_release(vec_soc_y)
    1782           2 :       CALL cp_fm_release(vec_soc_z)
    1783           2 :       CALL cp_fm_release(prod_work)
    1784           2 :       CALL cp_fm_struct_release(gsex_struct)
    1785           2 :       CALL cp_fm_struct_release(prod_struct)
    1786           2 :       CALL cp_fm_struct_release(vec_struct)
    1787           2 :       DEALLOCATE (gsex_block)
    1788             : 
    1789             : !  Then spin-conserving/spin-conserving SOC
    1790             : !  <Psi_Isc|SOC|Psi_Jsc> =
    1791             : !  sum_k,sigma [<psi^Isc_k,sigma|SOC|psi^Jsc_k,sigma> + <psi^Isc_k,sigma|psi^Jsc_k,sigma> * gs_sum]
    1792             : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isc_l,sigma|psi^Jsc_k,sigma>
    1793             : 
    1794             :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1795           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1796           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1797             : 
    1798             :       !the overlap as well
    1799             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, dbcsr_work, &
    1800           2 :                           filter_eps=eps_filter)
    1801           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1802             : 
    1803             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=1.0_dp, &
    1804             :                                 pref_diagb=-1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1805           2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1806             : 
    1807           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1808             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1809           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=2)
    1810             : 
    1811             : !  Then spin-flip/spin-flip SOC
    1812             : !  <Psi_Isf|SOC|Psi_Jsf> =
    1813             : !  sum_k,sigma [<psi^Isf_k,tau|SOC|psi^Jsf_k,tau> + <psi^Isf_k,tau|psi^Jsf_k,tau> * gs_sum]
    1814             : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isf_l,tau|psi^Jsf_k,tau> , tau != sigma
    1815             : 
    1816             :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1817           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1818           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1819             : 
    1820             :       !the overlap as well
    1821             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1822           2 :                           dbcsr_work, filter_eps=eps_filter)
    1823           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1824             : 
    1825             :       !note: the different prefactors are derived from the fact that because of spin-flip, we have
    1826             :       !alpha-gs and beta-lr interaction
    1827             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=-1.0_dp, &
    1828             :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1829           2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1830             : 
    1831           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1832             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1833           2 :                               s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    1834             : 
    1835             : !  Finally the spin-conserving/spin-flip interaction
    1836             : ! <Psi_Isc|SOC|Psi_Jsf> =   sum_k,sigma <psi^Isc_k,sigma|SOC|psi^Isf_k,tau>
    1837             : !                           - sum_k,l,sigma <psi^0_k,tau|SOC|psi^0_l,sigma
    1838             : 
    1839           2 :       tas(1) = ndo_mo + 1; tbs(1) = 1
    1840           2 :       tas(2) = 1; tbs(2) = ndo_mo + 1
    1841             : 
    1842             :       !the overlap
    1843             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1844           2 :                           dbcsr_work, filter_eps=eps_filter)
    1845           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1846             : 
    1847             :       !start with the imaginary contribution
    1848           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1849           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1850             : 
    1851             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, pref_diaga=1.0_dp, &
    1852             :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    1853           2 :                                 tracea_start=tas, traceb_start=tbs)
    1854             : 
    1855           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1856             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1857           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1858             : 
    1859             :       !follow up with the real contribution
    1860           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1861           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1862             : 
    1863             :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, pref_diaga=1.0_dp, &
    1864             :                                 pref_diagb=-1.0_dp, pref_tracea=1.0_dp, pref_traceb=-1.0_dp, &
    1865           2 :                                 tracea_start=tas, traceb_start=tbs)
    1866             : 
    1867           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1868             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1869           2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1870             : 
    1871             : !  Setting up the complex Hermitian perturbed matrix
    1872           2 :       CALL cp_cfm_create(pert_cfm, full_struct)
    1873           2 :       CALL cp_fm_to_cfm(real_fm, img_fm, pert_cfm)
    1874             : 
    1875           2 :       CALL cp_fm_release(real_fm)
    1876           2 :       CALL cp_fm_release(img_fm)
    1877             : 
    1878             : !  Diagonalize the perturbed matrix
    1879           6 :       ALLOCATE (tmp_evals(ntot))
    1880           2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    1881           2 :       CALL cp_cfm_heevd(pert_cfm, evecs_cfm, tmp_evals)
    1882             : 
    1883             :       !shift the energies such that the GS has zero and store all that in soc_evals (\wo the GS)
    1884           6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    1885          50 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    1886             : 
    1887             : !  The SOC dipole oscillator strengths
    1888             :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1889           2 :                                    xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1890             : 
    1891             : !  And quadrupole
    1892           2 :       IF (xas_tdp_control%do_quad) THEN
    1893             :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1894           0 :                                           xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1895             :       END IF
    1896             : 
    1897             : ! Clean-up
    1898           2 :       CALL cp_cfm_release(pert_cfm)
    1899           2 :       CALL cp_cfm_release(evecs_cfm)
    1900           2 :       CALL cp_fm_struct_release(full_struct)
    1901           2 :       IF (do_roks) THEN
    1902           0 :          CALL cp_fm_release(gs_coeffs)
    1903           0 :          DEALLOCATE (gs_coeffs)
    1904             :       END IF
    1905           2 :       CALL dbcsr_distribution_release(coeffs_dist)
    1906           2 :       CALL dbcsr_distribution_release(prod_dist)
    1907           2 :       CALL dbcsr_release(dbcsr_sc)
    1908           2 :       CALL dbcsr_release(dbcsr_sf)
    1909           2 :       CALL dbcsr_release(dbcsr_prod)
    1910           2 :       CALL dbcsr_release(dbcsr_ovlp)
    1911           2 :       CALL dbcsr_release(dbcsr_tmp)
    1912           2 :       CALL dbcsr_release(dbcsr_work)
    1913           2 :       CALL cp_fm_release(work_fm)
    1914           2 :       CALL cp_fm_struct_release(work_struct)
    1915           2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    1916           2 :       DEALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    1917             : 
    1918           2 :       CALL timestop(handle)
    1919             : 
    1920          30 :    END SUBROUTINE include_os_soc
    1921             : 
    1922             : ! **************************************************************************************************
    1923             : !> \brief Includes the SOC effects on the precomputed restricted closed-shell singlet and triplet
    1924             : !>        excitations. This is a perturbative treatmnent
    1925             : !> \param donor_state ...
    1926             : !> \param xas_tdp_env ...
    1927             : !> \param xas_tdp_control ...
    1928             : !> \param qs_env ...
    1929             : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1930             : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1931             : !>       energies and corresponding linear combinations of lr_coeffs.
    1932             : !>       The AMEWs are normalized
    1933             : !>       Only for spin-restricted calculations
    1934             : !>       The ms=-1,+1 triplets are not explicitely computed in the first place. Assume they have
    1935             : !>       the same energy as the ms=0 triplets and apply the spin raising and lowering operators
    1936             : !>       on the latter to get their AMEWs => this is the qusi-degenerate perturbation theory
    1937             : !>       approach by Neese (QDPT)
    1938             : ! **************************************************************************************************
    1939           2 :    SUBROUTINE include_rcs_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1940             : 
    1941             :       TYPE(donor_state_type), POINTER                    :: donor_state
    1942             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1943             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1944             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1945             : 
    1946             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_rcs_soc'
    1947             : 
    1948             :       INTEGER                                            :: group, handle, iex, isg, itp, nao, &
    1949             :                                                             ndo_mo, nex, npcols, nprows, nsg, &
    1950             :                                                             ntot, ntp
    1951           2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1952           2 :                                                             row_dist, row_dist_new
    1953           2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1954             :       REAL(dp)                                           :: eps_filter, soc_gst, sqrt2
    1955           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1956           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1957           2 :                                                             gstp_block
    1958           2 :       REAL(dp), DIMENSION(:), POINTER                    :: sg_evals, tp_evals
    1959             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1960             :       TYPE(cp_cfm_type)                                  :: evecs_cfm, hami_cfm
    1961             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gstp_struct, prod_struct, &
    1962             :                                                             vec_struct, work_struct
    1963             :       TYPE(cp_fm_type)                                   :: gstp_fm, img_fm, prod_fm, real_fm, &
    1964             :                                                             tmp_fm, vec_soc_x, vec_soc_y, &
    1965             :                                                             vec_soc_z, work_fm
    1966             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, sg_coeffs, tp_coeffs
    1967             :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1968           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1969             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1970             :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sg, &
    1971             :                                                             dbcsr_tmp, dbcsr_tp, dbcsr_work, &
    1972             :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1973             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1974             : 
    1975           2 :       NULLIFY (sg_coeffs, tp_coeffs, gs_coeffs, sg_evals, tp_evals, full_struct)
    1976           2 :       NULLIFY (para_env, blacs_env, prod_struct, vec_struct, orb_soc_y, orb_soc_z)
    1977           2 :       NULLIFY (matrix_s, orb_soc_x)
    1978           2 :       NULLIFY (work_struct, dbcsr_dist, coeffs_dist, prod_dist, pgrid)
    1979           2 :       NULLIFY (col_dist, row_dist, col_blk_size, row_blk_size, row_dist_new, gstp_struct)
    1980           2 :       NULLIFY (dbcsr_tp, dbcsr_sg, dbcsr_prod, dbcsr_work, dbcsr_ovlp, dbcsr_tmp)
    1981             : 
    1982           2 :       CALL timeset(routineN, handle)
    1983             : 
    1984             : !  Initialization
    1985           2 :       CPASSERT(ASSOCIATED(xas_tdp_control))
    1986           2 :       gs_coeffs => donor_state%gs_coeffs
    1987           2 :       sg_coeffs => donor_state%sg_coeffs
    1988           2 :       tp_coeffs => donor_state%tp_coeffs
    1989           2 :       sg_evals => donor_state%sg_evals
    1990           2 :       tp_evals => donor_state%tp_evals
    1991           2 :       nsg = SIZE(sg_evals)
    1992           2 :       ntp = SIZE(tp_evals)
    1993           2 :       ntot = 1 + nsg + 3*ntp
    1994           2 :       ndo_mo = donor_state%ndo_mo
    1995           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s)
    1996           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1997           2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1998           2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    1999           2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    2000             :       !by construction nsg == ntp, keep those separate for more code clarity though
    2001           2 :       CPASSERT(nsg == ntp)
    2002           2 :       nex = nsg
    2003           2 :       eps_filter = xas_tdp_control%eps_filter
    2004             : 
    2005             : !  Creating the real part and imaginary part of the final SOC fm
    2006           2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2007             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2008           2 :                                nrow_global=ntot, ncol_global=ntot)
    2009           2 :       CALL cp_fm_create(real_fm, full_struct)
    2010           2 :       CALL cp_fm_create(img_fm, full_struct)
    2011             : 
    2012             : !  Put the excitation energies on the diagonal of the real matrix
    2013          26 :       DO isg = 1, nsg
    2014          26 :          CALL cp_fm_set_element(real_fm, 1 + isg, 1 + isg, sg_evals(isg))
    2015             :       END DO
    2016          26 :       DO itp = 1, ntp
    2017             :          ! first T^-1, then T^0, then T^+1
    2018          24 :          CALL cp_fm_set_element(real_fm, 1 + itp + nsg, 1 + itp + nsg, tp_evals(itp))
    2019          24 :          CALL cp_fm_set_element(real_fm, 1 + itp + ntp + nsg, 1 + itp + ntp + nsg, tp_evals(itp))
    2020          26 :          CALL cp_fm_set_element(real_fm, 1 + itp + 2*ntp + nsg, 1 + itp + 2*ntp + nsg, tp_evals(itp))
    2021             :       END DO
    2022             : 
    2023             : !  Create the dbcsr machinery (for fast MM, the core of this routine)
    2024           2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    2025             :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    2026           2 :                                   npcols=npcols, nprows=nprows)
    2027           8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    2028          26 :       DO iex = 1, nex
    2029          24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    2030          26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    2031             :       END DO
    2032           2 :       ALLOCATE (coeffs_dist, prod_dist)
    2033             :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    2034           2 :                                   col_dist=col_dist)
    2035             :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    2036           2 :                                   col_dist=col_dist)
    2037             : 
    2038             :       !Create the matrices
    2039           4 :       ALLOCATE (col_blk_size(nex))
    2040          26 :       col_blk_size = ndo_mo
    2041           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    2042             : 
    2043           2 :       ALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    2044             :       CALL dbcsr_create(matrix=dbcsr_sg, name="SINGLETS", matrix_type=dbcsr_type_no_symmetry, &
    2045           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2046             :       CALL dbcsr_create(matrix=dbcsr_tp, name="TRIPLETS", matrix_type=dbcsr_type_no_symmetry, &
    2047           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2048             :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    2049           2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2050             :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    2051           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2052             :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    2053           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2054             : 
    2055          26 :       col_blk_size = 1
    2056             :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    2057           2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2058           2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    2059             : 
    2060           2 :       dbcsr_soc_package%dbcsr_sg => dbcsr_sg
    2061           2 :       dbcsr_soc_package%dbcsr_tp => dbcsr_tp
    2062           2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    2063           2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    2064           2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    2065           2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    2066             : 
    2067             :       !Filling the coeffs matrices by copying from the stored fms
    2068           2 :       CALL copy_fm_to_dbcsr(sg_coeffs, dbcsr_sg)
    2069           2 :       CALL copy_fm_to_dbcsr(tp_coeffs, dbcsr_tp)
    2070             : 
    2071             : !  Create the work and helper fms
    2072           2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2073             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2074           2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2075           2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2076           2 :       CALL cp_fm_create(vec_soc_x, vec_struct)
    2077           2 :       CALL cp_fm_create(vec_soc_y, vec_struct)
    2078           2 :       CALL cp_fm_create(vec_soc_z, vec_struct)
    2079             :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    2080           2 :                                nrow_global=nex, ncol_global=nex)
    2081           2 :       CALL cp_fm_create(work_fm, work_struct)
    2082           2 :       CALL cp_fm_create(tmp_fm, work_struct)
    2083           6 :       ALLOCATE (diag(ndo_mo))
    2084             : 
    2085             : !  Precompute everything we can before looping over excited states
    2086           2 :       sqrt2 = SQRT(2.0_dp)
    2087             : 
    2088             :       ! The subset of the donor MOs matrix elements: <phi_I^0|Hsoc|phi_J^0> (kept as global array, small)
    2089          16 :       ALLOCATE (domo_soc_x(ndo_mo, ndo_mo), domo_soc_y(ndo_mo, ndo_mo), domo_soc_z(ndo_mo, ndo_mo))
    2090             : 
    2091           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_mo)
    2092           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_fm)
    2093           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_x)
    2094             : 
    2095           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_mo)
    2096           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_fm)
    2097           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_y)
    2098             : 
    2099           2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_mo)
    2100           2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_fm)
    2101           2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_z)
    2102             : 
    2103             : !  Only have SOC between singlet-triplet triplet-triplet and ground_state-triplet, the resulting
    2104             : !  matrix is Hermitian i.e. the real part is symmetric and the imaginary part is anti-symmetric.
    2105             : !  Can only fill upper half
    2106             : 
    2107             :       !Start with the ground state/triplet SOC, SOC*gs_coeffs already computed above
    2108             :       !note: we are computing <0|H|T>, but have SOC*gs_coeffs instead of gs_coeffs*SOC in store. Since
    2109             :       !      the SOC Hamiltonian is anti-symmetric, a - signs pops up in the gemms below
    2110             : 
    2111             :       CALL cp_fm_struct_create(gstp_struct, context=blacs_env, para_env=para_env, &
    2112           2 :                                nrow_global=ntp*ndo_mo, ncol_global=ndo_mo)
    2113           2 :       CALL cp_fm_create(gstp_fm, gstp_struct)
    2114           6 :       ALLOCATE (gstp_block(ndo_mo, ndo_mo))
    2115             : 
    2116             :       !gs-triplet with Ms=+-1, imaginary part
    2117           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_x, 0.0_dp, gstp_fm)
    2118             : 
    2119          26 :       DO itp = 1, ntp
    2120             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2121          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2122          24 :          diag(:) = get_diag(gstp_block)
    2123          96 :          soc_gst = SUM(diag)
    2124          24 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_x|T^-1>
    2125          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + 2*ntp + itp, soc_gst) ! <0|H_x|T^+1>
    2126             :       END DO
    2127             : 
    2128             :       !gs-triplet with Ms=+-1, real part
    2129           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_y, 0.0_dp, gstp_fm)
    2130             : 
    2131          26 :       DO itp = 1, ntp
    2132             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2133          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2134          24 :          diag(:) = get_diag(gstp_block)
    2135          96 :          soc_gst = SUM(diag)
    2136          24 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_y|T^-1>
    2137          26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + 2*ntp + itp, -1.0_dp*soc_gst) ! <0|H_y|T^+1>
    2138             :       END DO
    2139             : 
    2140             :       !gs-triplet with Ms=0, purely imaginary
    2141           2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_z, 0.0_dp, gstp_fm)
    2142             : 
    2143          26 :       DO itp = 1, ntp
    2144             :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2145          24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2146          24 :          diag(:) = get_diag(gstp_block)
    2147          96 :          soc_gst = sqrt2*SUM(diag)
    2148          26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + ntp + itp, soc_gst)
    2149             :       END DO
    2150             : 
    2151             :       !gs clean-up
    2152           2 :       CALL cp_fm_release(prod_fm)
    2153           2 :       CALL cp_fm_release(vec_soc_x)
    2154           2 :       CALL cp_fm_release(vec_soc_y)
    2155           2 :       CALL cp_fm_release(vec_soc_z)
    2156           2 :       CALL cp_fm_release(gstp_fm)
    2157           2 :       CALL cp_fm_struct_release(gstp_struct)
    2158           2 :       CALL cp_fm_struct_release(prod_struct)
    2159           2 :       DEALLOCATE (gstp_block)
    2160             : 
    2161             :       !Now do the singlet-triplet SOC
    2162             :       !start by computing the singlet-triplet overlap
    2163             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2164           2 :                           dbcsr_work, filter_eps=eps_filter)
    2165           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2166             : 
    2167             :       !singlet-triplet with Ms=+-1, imaginary part
    2168           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2169           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2170             : 
    2171             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2172           2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2173             : 
    2174             :       !<S|H_x|T^-1>
    2175           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2176             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2177             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2178           2 :                               t_firstcol=1 + nsg + 1)
    2179             : 
    2180             :       !<S|H_x|T^+1> takes a minus sign
    2181           2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2182             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2183             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2184           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2185             : 
    2186             :       !singlet-triplet with Ms=+-1, real part
    2187           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2188           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2189             : 
    2190             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2191           2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2192             : 
    2193             :       !<S|H_y|T^-1>
    2194           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2195             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2196             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2197           2 :                               t_firstcol=1 + nsg + 1)
    2198             : 
    2199             :       !<S|H_y|T^+1>
    2200             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2201             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2202           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2203             : 
    2204             :       !singlet-triplet with Ms=0, purely imaginary
    2205           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2206           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2207             : 
    2208             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2209           2 :                                  pref_trace=-1.0_dp, pref_overall=1.0_dp)
    2210             : 
    2211             :       !<S|H_z|T^0>
    2212           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2213             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2214             :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2215           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2216             : 
    2217             :       !Now the triplet-triplet SOC
    2218             :       !start by computing the overlap
    2219             :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2220           2 :                           dbcsr_work, filter_eps=eps_filter)
    2221           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2222             : 
    2223             :       !Ms=0 to Ms=+-1 SOC, imaginary part
    2224           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2225           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2226             : 
    2227             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2228           2 :                                  pref_trace=1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2229             : 
    2230             :       !<T^0|H_x|T^+1>
    2231           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2232             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2233             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2234           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2235             : 
    2236             :       !<T^-1|H_x|T^0>, takes a minus sign and a transpose (because computed <T^0|H_x|T^-1>)
    2237           2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2238           2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2239             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2240             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2241           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2242             : 
    2243             :       !Ms=0 to Ms=+-1 SOC, real part
    2244           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2245           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2246             : 
    2247             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2248           2 :                                  pref_trace=1.0_dp, pref_overall=0.5_dp*sqrt2)
    2249             : 
    2250             :       !<T^0|H_y|T^+1>
    2251           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2252             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2253             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2254           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2255             : 
    2256             :       !<T^-1|H_y|T^0>, takes a minus sign and a transpose
    2257           2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2258           2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2259             :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2260             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2261           2 :                               t_firstcol=1 + nsg + ntp + 1)
    2262             : 
    2263             :       !Ms=1 to Ms=1 and Ms=-1 to Ms=-1 SOC, purely imaginary
    2264           2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2265           2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2266             : 
    2267             :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2268           2 :                                  pref_trace=1.0_dp, pref_overall=1.0_dp)
    2269             : 
    2270             :       !<T^+1|H_z|T^+1>
    2271           2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2272             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2273             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2274           2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2275             : 
    2276             :       !<T^-1|H_z|T^-1>, takes a minus sign
    2277           2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2278             :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2279             :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2280           2 :                               t_firstcol=1 + nsg + 1)
    2281             : 
    2282             : !  Intermediate clean-up
    2283           2 :       CALL cp_fm_struct_release(work_struct)
    2284           2 :       CALL cp_fm_release(work_fm)
    2285           2 :       CALL cp_fm_release(tmp_fm)
    2286           2 :       DEALLOCATE (diag, domo_soc_x, domo_soc_y, domo_soc_z)
    2287             : 
    2288             : !  Set-up the complex hermitian perturbation matrix
    2289           2 :       CALL cp_cfm_create(hami_cfm, full_struct)
    2290           2 :       CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
    2291             : 
    2292           2 :       CALL cp_fm_release(real_fm)
    2293           2 :       CALL cp_fm_release(img_fm)
    2294             : 
    2295             : !  Diagonalize the Hamiltonian
    2296           6 :       ALLOCATE (tmp_evals(ntot))
    2297           2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    2298           2 :       CALL cp_cfm_heevd(hami_cfm, evecs_cfm, tmp_evals)
    2299             : 
    2300             :       !  Adjust the energies so the GS has zero, and store in the donor_state (without the GS)
    2301           6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    2302          98 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    2303             : 
    2304             : !  Compute the dipole oscillator strengths
    2305             :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2306           2 :                                    xas_tdp_control, qs_env)
    2307             : 
    2308             : !  And the quadrupole (if needed)
    2309           2 :       IF (xas_tdp_control%do_quad) THEN
    2310             :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2311           0 :                                           xas_tdp_control, qs_env)
    2312             :       END IF
    2313             : 
    2314             : !  Clean-up
    2315           2 :       CALL cp_fm_struct_release(full_struct)
    2316           2 :       CALL cp_cfm_release(hami_cfm)
    2317           2 :       CALL cp_cfm_release(evecs_cfm)
    2318           2 :       CALL dbcsr_distribution_release(coeffs_dist)
    2319           2 :       CALL dbcsr_distribution_release(prod_dist)
    2320           2 :       CALL dbcsr_release(dbcsr_sg)
    2321           2 :       CALL dbcsr_release(dbcsr_tp)
    2322           2 :       CALL dbcsr_release(dbcsr_prod)
    2323           2 :       CALL dbcsr_release(dbcsr_ovlp)
    2324           2 :       CALL dbcsr_release(dbcsr_tmp)
    2325           2 :       CALL dbcsr_release(dbcsr_work)
    2326           2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    2327           2 :       DEALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    2328             : 
    2329           2 :       CALL timestop(handle)
    2330             : 
    2331          22 :    END SUBROUTINE include_rcs_soc
    2332             : 
    2333             : ! **************************************************************************************************
    2334             : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2335             : !>        excited state AMEWs with ground state, for the open-shell case
    2336             : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2337             : !> \param ao_op the operator in the basis of the atomic orbitals
    2338             : !> \param gs_coeffs the coefficient of the GS donor MOs. Ecplicitely passed because of special
    2339             : !>                  format in the ROKS case (see include_os_soc routine)
    2340             : !> \param dbcsr_soc_package inhertited from the main SOC routine
    2341             : !> \param donor_state ...
    2342             : !> \param eps_filter ...
    2343             : !> \param qs_env ...
    2344             : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sc, sf
    2345             : !>       We assume that the operator is spin-independent => only <0|0>, <0|sc>, <sc|sc> and <sf|sf>
    2346             : !>       yield non-zero matrix elements
    2347             : !>       Only for open-shell calculations
    2348             : ! **************************************************************************************************
    2349           2 :    SUBROUTINE get_os_amew_op(amew_op, ao_op, gs_coeffs, dbcsr_soc_package, donor_state, &
    2350             :                              eps_filter, qs_env)
    2351             : 
    2352             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2353             :          INTENT(OUT)                                     :: amew_op
    2354             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2355             :       TYPE(cp_fm_type), INTENT(IN)                       :: gs_coeffs
    2356             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2357             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2358             :       REAL(dp), INTENT(IN)                               :: eps_filter
    2359             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2360             : 
    2361             :       INTEGER                                            :: dim_op, homo, i, isc, nao, ndo_mo, &
    2362             :                                                             ndo_so, nex, nsc, nsf, ntot
    2363             :       REAL(dp)                                           :: op
    2364           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gsgs_op
    2365           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, gsex_block, tmp
    2366             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2367             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    2368             :                                                             tmp_struct, vec_struct
    2369             :       TYPE(cp_fm_type)                                   :: gsex_fm, prod_work, tmp_fm, vec_work, &
    2370             :                                                             work_fm
    2371             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff, sc_coeffs, sf_coeffs
    2372           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2373             :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2374             :                                                             dbcsr_sc, dbcsr_sf, dbcsr_tmp, &
    2375             :                                                             dbcsr_work
    2376           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2377             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2378             : 
    2379           2 :       NULLIFY (matrix_s, para_env, blacs_env, full_struct, vec_struct, prod_struct, mos)
    2380           2 :       NULLIFY (mo_coeff, ao_op_i, tmp_struct)
    2381           2 :       NULLIFY (dbcsr_sc, dbcsr_sf, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2382             : 
    2383             : !  Iinitialization
    2384           2 :       dim_op = SIZE(ao_op)
    2385           2 :       sc_coeffs => donor_state%sc_coeffs
    2386           2 :       sf_coeffs => donor_state%sf_coeffs
    2387           2 :       nsc = SIZE(donor_state%sc_evals)
    2388           2 :       nsf = SIZE(donor_state%sf_evals)
    2389           2 :       nex = nsc
    2390           2 :       ntot = 1 + nsc + nsf
    2391           2 :       ndo_mo = donor_state%ndo_mo
    2392           2 :       ndo_so = 2*donor_state%ndo_mo !open-shell => nspins = 2
    2393           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2394           2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    2395             : 
    2396           2 :       dbcsr_sc => dbcsr_soc_package%dbcsr_sc
    2397           2 :       dbcsr_sf => dbcsr_soc_package%dbcsr_sf
    2398           2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2399           2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2400           2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2401           2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2402             : 
    2403             : !  Create the amew_op matrix set
    2404             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2405           2 :                                nrow_global=ntot, ncol_global=ntot)
    2406          12 :       ALLOCATE (amew_op(dim_op))
    2407           8 :       DO i = 1, dim_op
    2408           8 :          CALL cp_fm_create(amew_op(i), full_struct)
    2409             :       END DO
    2410             : 
    2411             : !  Before looping, need to evaluate sum_j,sigma <phi^0_j,sgima|op|phi^0_j,sigma>, for each dimension
    2412             : !  of the operator
    2413           6 :       ALLOCATE (gsgs_op(dim_op))
    2414             : 
    2415             :       !start with the alpha MOs
    2416           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    2417           6 :       ALLOCATE (diag(homo))
    2418           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2419             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2420           2 :                                nrow_global=homo, ncol_global=homo)
    2421           2 :       CALL cp_fm_create(vec_work, vec_struct)
    2422           2 :       CALL cp_fm_create(prod_work, prod_struct)
    2423             : 
    2424           8 :       DO i = 1, dim_op
    2425             : 
    2426           6 :          ao_op_i => ao_op(i)%matrix
    2427             : 
    2428           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2429           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2430           6 :          CALL cp_fm_get_diag(prod_work, diag)
    2431          62 :          gsgs_op(i) = SUM(diag)
    2432             : 
    2433             :       END DO !i
    2434             : 
    2435           2 :       CALL cp_fm_release(vec_work)
    2436           2 :       CALL cp_fm_release(prod_work)
    2437           2 :       CALL cp_fm_struct_release(prod_struct)
    2438           2 :       DEALLOCATE (diag)
    2439           2 :       NULLIFY (vec_struct)
    2440             : 
    2441             :       !then beta orbitals
    2442           2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    2443           6 :       ALLOCATE (diag(homo))
    2444           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2445             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2446           2 :                                nrow_global=homo, ncol_global=homo)
    2447           2 :       CALL cp_fm_create(vec_work, vec_struct)
    2448           2 :       CALL cp_fm_create(prod_work, prod_struct)
    2449             : 
    2450           8 :       DO i = 1, dim_op
    2451             : 
    2452           6 :          ao_op_i => ao_op(i)%matrix
    2453             : 
    2454           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2455           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2456           6 :          CALL cp_fm_get_diag(prod_work, diag)
    2457          62 :          gsgs_op(i) = gsgs_op(i) + SUM(diag)
    2458             : 
    2459             :       END DO !i
    2460             : 
    2461           2 :       CALL cp_fm_release(vec_work)
    2462           2 :       CALL cp_fm_release(prod_work)
    2463           2 :       CALL cp_fm_struct_release(prod_struct)
    2464           2 :       DEALLOCATE (diag)
    2465           2 :       NULLIFY (vec_struct)
    2466             : 
    2467             : !  Before looping over excited AMEWs, define some work matrices and structures
    2468             :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    2469           2 :                                nrow_global=nao, ncol_global=ndo_so)
    2470             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2471           2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    2472             :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    2473           2 :                                nrow_global=ndo_so*nex, ncol_global=ndo_so)
    2474             :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2475           2 :                                nrow_global=nex, ncol_global=nex)
    2476           2 :       CALL cp_fm_create(vec_work, vec_struct) !for op*|phi>
    2477           2 :       CALL cp_fm_create(prod_work, prod_struct) !for any <phi|op|phi>
    2478           2 :       CALL cp_fm_create(work_fm, full_struct)
    2479           2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    2480           2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2481           6 :       ALLOCATE (diag(ndo_so))
    2482           8 :       ALLOCATE (domo_op(ndo_so, ndo_so))
    2483           6 :       ALLOCATE (tmp(ndo_so, ndo_so))
    2484           6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    2485             : 
    2486             : !  Loop over the dimensions of the operator
    2487           8 :       DO i = 1, dim_op
    2488             : 
    2489           6 :          ao_op_i => ao_op(i)%matrix
    2490             : 
    2491             :          !put the gs-gs contribution
    2492           6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2493             : 
    2494             :          !  Precompute what we can before looping over excited states
    2495             :          ! Need the operator in the donor MOs basis <phi^0_I,sigma|op_i|phi^0_J,tau>
    2496           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_work, ncol=ndo_so)
    2497           6 :          CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_work, 0.0_dp, prod_work)
    2498           6 :          CALL cp_fm_get_submatrix(prod_work, domo_op)
    2499             : 
    2500             :          !  Do the ground-state/spin-conserving operator
    2501           6 :          CALL parallel_gemm('T', 'N', ndo_so*nsc, ndo_so, nao, 1.0_dp, sc_coeffs, vec_work, 0.0_dp, gsex_fm)
    2502          78 :          DO isc = 1, nsc
    2503             :             CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    2504          72 :                                      start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    2505          72 :             diag(:) = get_diag(gsex_block)
    2506         504 :             op = SUM(diag)
    2507          78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isc, op)
    2508             :          END DO !isc
    2509             : 
    2510             :          !  The spin-conserving/spin-conserving operator
    2511             :          !overlap
    2512             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, &
    2513           6 :                              dbcsr_work, filter_eps=eps_filter)
    2514           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2515             : 
    2516             :          !operator in SC LR-orbital basis
    2517           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2518           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2519             : 
    2520             :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_diaga=1.0_dp, &
    2521             :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2522           6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2523             : 
    2524           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2525             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2526           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2527             : 
    2528             :          !  The spin-flip/spin-flip operator
    2529             :          !overlap
    2530             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    2531           6 :                              dbcsr_work, filter_eps=eps_filter)
    2532           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2533             : 
    2534             :          !operator in SF LR-orbital basis
    2535           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2536           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2537             : 
    2538             :          !need to reorganize the domo_op array by swapping the alpha-alpha and the beta-beta quarter
    2539          78 :          tmp(1:ndo_mo, 1:ndo_mo) = domo_op(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so)
    2540          78 :          tmp(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so) = domo_op(1:ndo_mo, 1:ndo_mo)
    2541             : 
    2542             :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, tmp, pref_diaga=1.0_dp, &
    2543             :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2544           6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2545             : 
    2546           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2547             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2548           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    2549             : 
    2550             :          !Symmetry => only upper diag explicitly built
    2551           8 :          CALL cp_fm_uplo_to_full(amew_op(i), work_fm)
    2552             : 
    2553             :       END DO !i
    2554             : 
    2555             : !  Clean-up
    2556           2 :       CALL cp_fm_struct_release(full_struct)
    2557           2 :       CALL cp_fm_struct_release(prod_struct)
    2558           2 :       CALL cp_fm_struct_release(vec_struct)
    2559           2 :       CALL cp_fm_struct_release(tmp_struct)
    2560           2 :       CALL cp_fm_struct_release(gsex_struct)
    2561           2 :       CALL cp_fm_release(work_fm)
    2562           2 :       CALL cp_fm_release(tmp_fm)
    2563           2 :       CALL cp_fm_release(vec_work)
    2564           2 :       CALL cp_fm_release(prod_work)
    2565           2 :       CALL cp_fm_release(gsex_fm)
    2566             : 
    2567          14 :    END SUBROUTINE get_os_amew_op
    2568             : 
    2569             : ! **************************************************************************************************
    2570             : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2571             : !>        excited state AMEWs with ground state, singlet and triplet with Ms = -1,0,+1
    2572             : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2573             : !> \param ao_op the operator in the basis of the atomic orbitals
    2574             : !> \param dbcsr_soc_package inherited from the main SOC routine
    2575             : !> \param donor_state ...
    2576             : !> \param eps_filter for dbcsr multiplication
    2577             : !> \param qs_env ...
    2578             : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sg, tp(-1), tp(0). tp(+1)
    2579             : !>       We assume that the operator is spin-independent => only <0|0>, <0|S>, <S|S> and <T|T>
    2580             : !>       yield non-zero matrix elements
    2581             : !>       Only for spin-restricted calculations
    2582             : ! **************************************************************************************************
    2583           2 :    SUBROUTINE get_rcs_amew_op(amew_op, ao_op, dbcsr_soc_package, donor_state, eps_filter, qs_env)
    2584             : 
    2585             :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2586             :          INTENT(OUT)                                     :: amew_op
    2587             :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2588             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2589             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2590             :       REAL(dp), INTENT(IN)                               :: eps_filter
    2591             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2592             : 
    2593             :       INTEGER                                            :: dim_op, homo, i, isg, nao, ndo_mo, nex, &
    2594             :                                                             nsg, ntot, ntp
    2595             :       REAL(dp)                                           :: op, sqrt2
    2596           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gs_diag, gsgs_op
    2597           2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, sggs_block
    2598             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2599             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsgs_struct, prod_struct, &
    2600             :                                                             sggs_struct, std_struct, tmp_struct, &
    2601             :                                                             vec_struct
    2602             :       TYPE(cp_fm_type)                                   :: gs_fm, prod_fm, sggs_fm, tmp_fm, vec_op, &
    2603             :                                                             work_fm
    2604             :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sg_coeffs
    2605           2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2606             :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2607             :                                                             dbcsr_sg, dbcsr_tmp, dbcsr_tp, &
    2608             :                                                             dbcsr_work
    2609           2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2610             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2611             : 
    2612           2 :       NULLIFY (gs_coeffs, sg_coeffs, matrix_s, full_struct, prod_struct, vec_struct, blacs_env)
    2613           2 :       NULLIFY (para_env, mo_coeff, mos, gsgs_struct, std_struct, tmp_struct, sggs_struct)
    2614           2 :       NULLIFY (ao_op_i, dbcsr_tp, dbcsr_sg, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2615             : 
    2616             : !  Initialization
    2617           2 :       gs_coeffs => donor_state%gs_coeffs
    2618           2 :       sg_coeffs => donor_state%sg_coeffs
    2619           2 :       nsg = SIZE(donor_state%sg_evals)
    2620           2 :       ntp = nsg; nex = nsg !all the same by construction, keep them separate for clarity
    2621           2 :       ntot = 1 + nsg + 3*ntp
    2622           2 :       ndo_mo = donor_state%ndo_mo
    2623           2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2624           2 :       sqrt2 = SQRT(2.0_dp)
    2625           2 :       dim_op = SIZE(ao_op)
    2626             : 
    2627           2 :       dbcsr_sg => dbcsr_soc_package%dbcsr_sg
    2628           2 :       dbcsr_tp => dbcsr_soc_package%dbcsr_tp
    2629           2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2630           2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2631           2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2632           2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2633             : 
    2634             : !  Create the amew_op matrix
    2635             :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2636           2 :                                nrow_global=ntot, ncol_global=ntot)
    2637          12 :       ALLOCATE (amew_op(dim_op))
    2638           8 :       DO i = 1, dim_op
    2639           8 :          CALL cp_fm_create(amew_op(i), full_struct)
    2640             :       END DO !i
    2641             : 
    2642             : !  Deal with the GS-GS contribution <0|0> = 2*sum_j <phi_j|op|phi_j>
    2643           2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, nao=nao, homo=homo)
    2644             :       CALL cp_fm_struct_create(gsgs_struct, context=blacs_env, para_env=para_env, &
    2645           2 :                                nrow_global=homo, ncol_global=homo)
    2646           2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=std_struct)
    2647           2 :       CALL cp_fm_create(gs_fm, gsgs_struct)
    2648           2 :       CALL cp_fm_create(work_fm, std_struct)
    2649           6 :       ALLOCATE (gsgs_op(dim_op))
    2650           6 :       ALLOCATE (gs_diag(homo))
    2651             : 
    2652           8 :       DO i = 1, dim_op
    2653             : 
    2654           6 :          ao_op_i => ao_op(i)%matrix
    2655             : 
    2656           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, work_fm, ncol=homo)
    2657           6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, work_fm, 0.0_dp, gs_fm)
    2658           6 :          CALL cp_fm_get_diag(gs_fm, gs_diag)
    2659          62 :          gsgs_op(i) = 2.0_dp*SUM(gs_diag)
    2660             : 
    2661             :       END DO !i
    2662             : 
    2663           2 :       CALL cp_fm_release(gs_fm)
    2664           2 :       CALL cp_fm_release(work_fm)
    2665           2 :       CALL cp_fm_struct_release(gsgs_struct)
    2666           2 :       DEALLOCATE (gs_diag)
    2667             : 
    2668             : !  Create the work and helper fms
    2669           2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2670             :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2671           2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2672           2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2673           2 :       CALL cp_fm_create(vec_op, vec_struct)
    2674             :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2675           2 :                                nrow_global=nex, ncol_global=nex)
    2676             :       CALL cp_fm_struct_create(sggs_struct, context=blacs_env, para_env=para_env, &
    2677           2 :                                nrow_global=ndo_mo*nsg, ncol_global=ndo_mo)
    2678           2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2679           2 :       CALL cp_fm_create(work_fm, full_struct)
    2680           2 :       CALL cp_fm_create(sggs_fm, sggs_struct)
    2681           6 :       ALLOCATE (diag(ndo_mo))
    2682           8 :       ALLOCATE (domo_op(ndo_mo, ndo_mo))
    2683           6 :       ALLOCATE (sggs_block(ndo_mo, ndo_mo))
    2684             : 
    2685             : ! Iterate over the dimensions of the operator
    2686             : ! Note: operator matrices are asusmed symmetric, can only do upper half
    2687           8 :       DO i = 1, dim_op
    2688             : 
    2689           6 :          ao_op_i => ao_op(i)%matrix
    2690             : 
    2691             :          ! The GS-GS contribution
    2692           6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2693             : 
    2694             :          ! Compute the operator in the donor MOs basis
    2695           6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_op, ncol=ndo_mo)
    2696           6 :          CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_op, 0.0_dp, prod_fm)
    2697           6 :          CALL cp_fm_get_submatrix(prod_fm, domo_op)
    2698             : 
    2699             :          ! Compute the ground-state/singlet components. ao_op*gs_coeffs already stored in vec_op
    2700           6 :          CALL parallel_gemm('T', 'N', ndo_mo*nsg, ndo_mo, nao, 1.0_dp, sg_coeffs, vec_op, 0.0_dp, sggs_fm)
    2701          78 :          DO isg = 1, nsg
    2702             :             CALL cp_fm_get_submatrix(fm=sggs_fm, target_m=sggs_block, start_row=(isg - 1)*ndo_mo + 1, &
    2703          72 :                                      start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2704          72 :             diag(:) = get_diag(sggs_block)
    2705         288 :             op = sqrt2*SUM(diag)
    2706          78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isg, op)
    2707             :          END DO
    2708             : 
    2709             :          ! do the singlet-singlet components
    2710             :          !start with the overlap
    2711             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sg, 0.0_dp, &
    2712           6 :                              dbcsr_work, filter_eps=eps_filter)
    2713           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2714             : 
    2715             :          !then the operator in the LR orbital basis
    2716           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2717           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2718             : 
    2719             :          !use the soc routine, it is compatible
    2720             :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2721           6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2722             : 
    2723           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2724             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2725           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2726             : 
    2727             :          ! compute the triplet-triplet components
    2728             :          !the overlap
    2729             :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2730           6 :                              dbcsr_work, filter_eps=eps_filter)
    2731           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2732             : 
    2733             :          !the operator in the LR orbital basis
    2734           6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2735           6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2736             : 
    2737             :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2738           6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2739             : 
    2740           6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2741             :          !<T^-1|op|T^-1>
    2742             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2743           6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, t_firstcol=1 + nsg + 1)
    2744             :          !<T^0|op|T^0>
    2745             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2746             :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2747           6 :                                  t_firstcol=1 + nsg + ntp + 1)
    2748             :          !<T^-1|op|T^-1>
    2749             :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2750             :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2751           6 :                                  t_firstcol=1 + nsg + 2*ntp + 1)
    2752             : 
    2753             :          ! Symmetrize the matrix (only upper triangle built)
    2754           8 :          CALL cp_fm_uplo_to_full(amew_op(i), work_fm)
    2755             : 
    2756             :       END DO !i
    2757             : 
    2758             : !  Clean-up
    2759           2 :       CALL cp_fm_release(prod_fm)
    2760           2 :       CALL cp_fm_release(work_fm)
    2761           2 :       CALL cp_fm_release(tmp_fm)
    2762           2 :       CALL cp_fm_release(vec_op)
    2763           2 :       CALL cp_fm_release(sggs_fm)
    2764           2 :       CALL cp_fm_struct_release(prod_struct)
    2765           2 :       CALL cp_fm_struct_release(full_struct)
    2766           2 :       CALL cp_fm_struct_release(tmp_struct)
    2767           2 :       CALL cp_fm_struct_release(sggs_struct)
    2768             : 
    2769          12 :    END SUBROUTINE get_rcs_amew_op
    2770             : 
    2771             : ! **************************************************************************************************
    2772             : !> \brief Computes the os SOC matrix elements between excited states AMEWs based on the LR orbitals
    2773             : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2774             : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2775             : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2776             : !> \param domo_soc the SOC in the basis of the donor MOs
    2777             : !> \param pref_diaga ...
    2778             : !> \param pref_diagb ...
    2779             : !> \param pref_tracea ...
    2780             : !> \param pref_traceb ...
    2781             : !> \param pref_diags see notes
    2782             : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2783             : !> \param tracea_start the indices where to start in the trace part for alpha
    2784             : !> \param traceb_start the indices where to start in the trace part for beta
    2785             : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2786             : !>       soc_ij =   pref_diaga*SUM(alpha part of diag of lr_soc_ij)
    2787             : !>                + pref_diagb*SUM(beta part of diag of lr_soc_ij)
    2788             : !>                + pref_tracea*SUM(alpha part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2789             : !>                + pref_traceb*SUM(beta part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2790             : !>       optinally, one can add pref_diags*SUM(diag lr_ovlp_ij)
    2791             : ! **************************************************************************************************
    2792          20 :    SUBROUTINE os_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_diaga, &
    2793             :                                    pref_diagb, pref_tracea, pref_traceb, pref_diags, &
    2794             :                                    symmetric, tracea_start, traceb_start)
    2795             : 
    2796             :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2797             :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2798             :       REAL(dp)                                           :: pref_diaga, pref_diagb, pref_tracea, &
    2799             :                                                             pref_traceb
    2800             :       REAL(dp), OPTIONAL                                 :: pref_diags
    2801             :       LOGICAL, OPTIONAL                                  :: symmetric
    2802             :       INTEGER, DIMENSION(2), OPTIONAL                    :: tracea_start, traceb_start
    2803             : 
    2804             :       INTEGER                                            :: iex, jex, ndo_mo, ndo_so
    2805             :       INTEGER, DIMENSION(2)                              :: tas, tbs
    2806             :       LOGICAL                                            :: do_diags, found, my_symm
    2807             :       REAL(dp)                                           :: soc_elem
    2808          20 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2809          20 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2810             :       TYPE(dbcsr_iterator_type)                          :: iter
    2811             : 
    2812          20 :       ndo_so = SIZE(domo_soc, 1)
    2813          20 :       ndo_mo = ndo_so/2
    2814          60 :       ALLOCATE (diag(ndo_so))
    2815          20 :       my_symm = .FALSE.
    2816          20 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2817          20 :       do_diags = .FALSE.
    2818          20 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2819             : 
    2820             :       !by default, alpha part is (1:ndo_mo,1:ndo_mo) and beta is (ndo_mo+1:ndo_so,ndo_mo+1:ndo_so)
    2821             :       !note: in some SF cases, that might change, mainly because the spin-flip LR-coeffs have
    2822             :       !inverse order, that is: the beta-coeffs in the alpha spot and the alpha coeffs in the
    2823             :       !beta spot
    2824          60 :       tas = 1
    2825          60 :       tbs = ndo_mo + 1
    2826          20 :       IF (PRESENT(tracea_start)) tas = tracea_start
    2827          20 :       IF (PRESENT(traceb_start)) tbs = traceb_start
    2828             : 
    2829          20 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2830             :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2831          20 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2832        1460 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2833             : 
    2834        1440 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex)
    2835             : 
    2836        1440 :          IF (my_symm .AND. iex > jex) CYCLE
    2837             : 
    2838             :          !compute the soc matrix element
    2839         912 :          soc_elem = 0.0_dp
    2840         912 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2841         912 :          IF (found) THEN
    2842         444 :             diag(:) = get_diag(pblock)
    2843        3108 :             soc_elem = soc_elem + pref_diaga*SUM(diag(1:ndo_mo)) + pref_diagb*(SUM(diag(ndo_mo + 1:ndo_so)))
    2844             :          END IF
    2845             : 
    2846         912 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2847         912 :          IF (found) THEN
    2848             :             soc_elem = soc_elem &
    2849             :                        + pref_tracea*SUM(pblock(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)* &
    2850             :                                          domo_soc(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)) &
    2851             :                        + pref_traceb*SUM(pblock(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1)* &
    2852       12000 :                                          domo_soc(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1))
    2853             : 
    2854         480 :             IF (do_diags) THEN
    2855         336 :                diag(:) = get_diag(pblock)
    2856        2352 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2857             :             END IF
    2858             :          END IF
    2859             : 
    2860         912 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2861        3284 :          pblock = soc_elem
    2862             : 
    2863             :       END DO
    2864          20 :       CALL dbcsr_iterator_stop(iter)
    2865             : 
    2866          60 :    END SUBROUTINE os_amew_soc_elements
    2867             : 
    2868             : ! **************************************************************************************************
    2869             : !> \brief Computes the rcs SOC matrix elements between excited states AMEWs based on the LR orbitals
    2870             : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2871             : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2872             : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2873             : !> \param domo_soc the SOC in the basis of the donor MOs
    2874             : !> \param pref_trace see notes
    2875             : !> \param pref_overall see notes
    2876             : !> \param pref_diags see notes
    2877             : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2878             : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2879             : !>       soc_ij = pref_overall*(SUM(diag(lr_soc_ij)) + pref_trace*SUM(lr_overlap_ij*TRANSPOSE(domo_soc)))
    2880             : !>       optionally, the value pref_diags*SUM(diag(lr_overlap_ij)) can be added (before pref_overall)
    2881             : ! **************************************************************************************************
    2882         120 :    SUBROUTINE rcs_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_trace, &
    2883             :                                     pref_overall, pref_diags, symmetric)
    2884             : 
    2885             :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2886             :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2887             :       REAL(dp)                                           :: pref_trace, pref_overall
    2888             :       REAL(dp), OPTIONAL                                 :: pref_diags
    2889             :       LOGICAL, OPTIONAL                                  :: symmetric
    2890             : 
    2891             :       INTEGER                                            :: iex, jex
    2892             :       LOGICAL                                            :: do_diags, found, my_symm
    2893             :       REAL(dp)                                           :: soc_elem
    2894         120 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2895         120 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2896             :       TYPE(dbcsr_iterator_type)                          :: iter
    2897             : 
    2898         360 :       ALLOCATE (diag(SIZE(domo_soc, 1)))
    2899         120 :       my_symm = .FALSE.
    2900         120 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2901         120 :       do_diags = .FALSE.
    2902         120 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2903             : 
    2904         120 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2905             :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2906         120 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2907        2220 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2908             : 
    2909        2100 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex)
    2910             : 
    2911        2100 :          IF (my_symm .AND. iex > jex) CYCLE
    2912             : 
    2913             :          !compute the soc matrix element
    2914        1644 :          soc_elem = 0.0_dp
    2915        1644 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2916        1644 :          IF (found) THEN
    2917        1008 :             diag(:) = get_diag(pblock)
    2918        5328 :             soc_elem = soc_elem + SUM(diag)
    2919             :          END IF
    2920             : 
    2921        1644 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2922        1644 :          IF (found) THEN
    2923       31050 :             soc_elem = soc_elem + pref_trace*SUM(pblock*TRANSPOSE(domo_soc))
    2924             : 
    2925        1158 :             IF (do_diags) THEN
    2926         432 :                diag(:) = get_diag(pblock)
    2927        2250 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2928             :             END IF
    2929             :          END IF
    2930             : 
    2931        1644 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2932        5508 :          pblock = pref_overall*soc_elem
    2933             : 
    2934             :       END DO
    2935         120 :       CALL dbcsr_iterator_stop(iter)
    2936             : 
    2937         360 :    END SUBROUTINE rcs_amew_soc_elements
    2938             : 
    2939             : ! **************************************************************************************************
    2940             : !> \brief Computes the dipole oscillator strengths in the AMEWs basis for SOC
    2941             : !> \param soc_evecs_cfm the complex AMEWs coefficients
    2942             : !> \param dbcsr_soc_package ...
    2943             : !> \param donor_state ...
    2944             : !> \param xas_tdp_env ...
    2945             : !> \param xas_tdp_control ...
    2946             : !> \param qs_env ...
    2947             : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    2948             : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    2949             : ! **************************************************************************************************
    2950           4 :    SUBROUTINE compute_soc_dipole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2951             :                                       xas_tdp_control, qs_env, gs_coeffs)
    2952             : 
    2953             :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    2954             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2955             :       TYPE(donor_state_type), POINTER                    :: donor_state
    2956             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    2957             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    2958             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2959             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    2960             : 
    2961             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_dipole_fosc'
    2962             : 
    2963           4 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transdip
    2964             :       INTEGER                                            :: handle, i, nosc, ntot
    2965             :       LOGICAL                                            :: do_os, do_rcs
    2966           4 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: osc_xyz
    2967           4 :       REAL(dp), DIMENSION(:), POINTER                    :: soc_evals
    2968           4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: osc_str
    2969             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2970             :       TYPE(cp_cfm_type)                                  :: dip_cfm, work1_cfm, work2_cfm
    2971             :       TYPE(cp_fm_struct_type), POINTER                   :: dip_struct, full_struct
    2972           4 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_dip
    2973             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2974             : 
    2975           4 :       NULLIFY (para_env, blacs_env, dip_struct, full_struct, osc_str)
    2976           4 :       NULLIFY (soc_evals)
    2977             : 
    2978           4 :       CALL timeset(routineN, handle)
    2979             : 
    2980             :       !init
    2981           4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2982           4 :       do_os = xas_tdp_control%do_spin_cons
    2983           4 :       do_rcs = xas_tdp_control%do_singlet
    2984           4 :       soc_evals => donor_state%soc_evals
    2985           4 :       nosc = SIZE(soc_evals)
    2986           4 :       ntot = nosc + 1 !because GS AMEW is in there
    2987          12 :       ALLOCATE (donor_state%soc_osc_str(nosc, 4))
    2988           4 :       osc_str => donor_state%soc_osc_str
    2989         596 :       osc_str(:, :) = 0.0_dp
    2990           4 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    2991             : 
    2992             :       !get some work arrays/matrix
    2993             :       CALL cp_fm_struct_create(dip_struct, context=blacs_env, para_env=para_env, &
    2994           4 :                                nrow_global=ntot, ncol_global=1)
    2995           4 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    2996           4 :       CALL cp_cfm_create(dip_cfm, dip_struct)
    2997           4 :       CALL cp_cfm_create(work1_cfm, full_struct)
    2998           4 :       CALL cp_cfm_create(work2_cfm, full_struct)
    2999          12 :       ALLOCATE (transdip(ntot, 1))
    3000             : 
    3001             :       !get the dipole in the AMEW basis
    3002           4 :       IF (do_os) THEN
    3003             :          CALL get_os_amew_op(amew_dip, xas_tdp_env%dipmat, gs_coeffs, dbcsr_soc_package, &
    3004           2 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3005             :       ELSE
    3006             :          CALL get_rcs_amew_op(amew_dip, xas_tdp_env%dipmat, dbcsr_soc_package, donor_state, &
    3007           2 :                               xas_tdp_control%eps_filter, qs_env)
    3008             :       END IF
    3009             : 
    3010          12 :       ALLOCATE (osc_xyz(nosc))
    3011          16 :       DO i = 1, 3 !cartesian coord x, y, z
    3012             : 
    3013             :          !Convert the real dipole into the cfm format for calculations
    3014          12 :          CALL cp_fm_to_cfm(msourcer=amew_dip(i), mtarget=work1_cfm)
    3015             : 
    3016             :          !compute amew_coeffs^dagger * amew_dip * amew_gs to get the transition moments
    3017             :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3018          12 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3019             :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3020          12 :                             (0.0_dp, 0.0_dp), dip_cfm)
    3021             : 
    3022          12 :          CALL cp_cfm_get_submatrix(dip_cfm, transdip)
    3023             : 
    3024             :          !transition dipoles are real numbers
    3025         444 :          osc_xyz(:) = REAL(transdip(2:ntot, 1))**2 + AIMAG(transdip(2:ntot, 1))**2
    3026         444 :          osc_str(:, 4) = osc_str(:, 4) + osc_xyz(:)
    3027         448 :          osc_str(:, i) = osc_xyz(:)
    3028             : 
    3029             :       END DO !i
    3030             : 
    3031             :       !multiply with appropriate prefac depending in the rep
    3032          20 :       DO i = 1, 4
    3033          20 :          IF (xas_tdp_control%dipole_form == xas_dip_len) THEN
    3034           0 :             osc_str(:, i) = 2.0_dp/3.0_dp*soc_evals(:)*osc_str(:, i)
    3035             :          ELSE
    3036        1184 :             osc_str(:, i) = 2.0_dp/3.0_dp/soc_evals(:)*osc_str(:, i)
    3037             :          END IF
    3038             :       END DO
    3039             : 
    3040             :       !clean-up
    3041           4 :       CALL cp_fm_struct_release(dip_struct)
    3042           4 :       CALL cp_cfm_release(work1_cfm)
    3043           4 :       CALL cp_cfm_release(work2_cfm)
    3044           4 :       CALL cp_cfm_release(dip_cfm)
    3045          16 :       DO i = 1, 3
    3046          16 :          CALL cp_fm_release(amew_dip(i))
    3047             :       END DO
    3048           4 :       DEALLOCATE (amew_dip, transdip)
    3049             : 
    3050           4 :       CALL timestop(handle)
    3051             : 
    3052          12 :    END SUBROUTINE compute_soc_dipole_fosc
    3053             : 
    3054             : ! **************************************************************************************************
    3055             : !> \brief Computes the quadrupole oscillator strengths in the AMEWs basis for SOC
    3056             : !> \param soc_evecs_cfm the complex AMEWs coefficients
    3057             : !> \param dbcsr_soc_package inherited from the main SOC routine
    3058             : !> \param donor_state ...
    3059             : !> \param xas_tdp_env ...
    3060             : !> \param xas_tdp_control ...
    3061             : !> \param qs_env ...
    3062             : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    3063             : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    3064             : ! **************************************************************************************************
    3065           0 :    SUBROUTINE compute_soc_quadrupole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, &
    3066             :                                           xas_tdp_env, xas_tdp_control, qs_env, gs_coeffs)
    3067             : 
    3068             :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    3069             :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    3070             :       TYPE(donor_state_type), POINTER                    :: donor_state
    3071             :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    3072             :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    3073             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3074             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    3075             : 
    3076             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_quadrupole_fosc'
    3077             : 
    3078           0 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:)             :: trace
    3079             :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transquad
    3080             :       INTEGER                                            :: handle, i, nosc, ntot
    3081             :       LOGICAL                                            :: do_os, do_rcs
    3082           0 :       REAL(dp), DIMENSION(:), POINTER                    :: osc_str, soc_evals
    3083             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3084             :       TYPE(cp_cfm_type)                                  :: quad_cfm, work1_cfm, work2_cfm
    3085             :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, quad_struct
    3086           0 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_quad
    3087             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3088             : 
    3089           0 :       NULLIFY (para_env, blacs_env, quad_struct, full_struct, osc_str)
    3090           0 :       NULLIFY (soc_evals)
    3091             : 
    3092           0 :       CALL timeset(routineN, handle)
    3093             : 
    3094             :       !init
    3095           0 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    3096           0 :       do_os = xas_tdp_control%do_spin_cons
    3097           0 :       do_rcs = xas_tdp_control%do_singlet
    3098           0 :       soc_evals => donor_state%soc_evals
    3099           0 :       nosc = SIZE(soc_evals)
    3100           0 :       ntot = nosc + 1 !because GS AMEW is in there
    3101           0 :       ALLOCATE (donor_state%soc_quad_osc_str(nosc))
    3102           0 :       osc_str => donor_state%soc_quad_osc_str
    3103           0 :       osc_str(:) = 0.0_dp
    3104           0 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    3105             : 
    3106             :       !get some work arrays/matrix
    3107             :       CALL cp_fm_struct_create(quad_struct, context=blacs_env, para_env=para_env, &
    3108           0 :                                nrow_global=ntot, ncol_global=1)
    3109           0 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    3110           0 :       CALL cp_cfm_create(quad_cfm, quad_struct)
    3111           0 :       CALL cp_cfm_create(work1_cfm, full_struct)
    3112           0 :       CALL cp_cfm_create(work2_cfm, full_struct)
    3113           0 :       ALLOCATE (transquad(ntot, 1))
    3114           0 :       ALLOCATE (trace(nosc))
    3115           0 :       trace = (0.0_dp, 0.0_dp)
    3116             : 
    3117             :       !get the quadrupole in the AMEWs basis
    3118           0 :       IF (do_os) THEN
    3119             :          CALL get_os_amew_op(amew_quad, xas_tdp_env%quadmat, gs_coeffs, dbcsr_soc_package, &
    3120           0 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3121             :       ELSE
    3122             :          CALL get_rcs_amew_op(amew_quad, xas_tdp_env%quadmat, dbcsr_soc_package, donor_state, &
    3123           0 :                               xas_tdp_control%eps_filter, qs_env)
    3124             :       END IF
    3125             : 
    3126           0 :       DO i = 1, 6 ! x2, xy, xz, y2, yz, z2
    3127             : 
    3128             :          !Convert the real quadrupole into a cfm for further calculation
    3129           0 :          CALL cp_fm_to_cfm(msourcer=amew_quad(i), mtarget=work1_cfm)
    3130             : 
    3131             :          !compute amew_coeffs^dagger * amew_quad * amew_gs to get the transition moments
    3132             :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3133           0 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3134             :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3135           0 :                             (0.0_dp, 0.0_dp), quad_cfm)
    3136             : 
    3137           0 :          CALL cp_cfm_get_submatrix(quad_cfm, transquad)
    3138             : 
    3139             :          !if x2, y2 or z2, need to keep track of trace
    3140           0 :          IF (i == 1 .OR. i == 4 .OR. i == 6) THEN
    3141           0 :             osc_str(:) = osc_str(:) + REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2
    3142           0 :             trace(:) = trace(:) + transquad(2:ntot, 1)
    3143             : 
    3144             :             !if xy, xz, or yz, need to count twice (for yx, zx and zy)
    3145             :          ELSE
    3146           0 :             osc_str(:) = osc_str(:) + 2.0_dp*(REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2)
    3147             :          END IF
    3148             : 
    3149             :       END DO !i
    3150             : 
    3151             :       !remove a third of the trace
    3152           0 :       osc_str(:) = osc_str(:) - 1._dp/3._dp*(REAL(trace(:))**2 + AIMAG(trace(:))**2)
    3153             : 
    3154             :       !multiply by the prefactor
    3155           0 :       osc_str(:) = osc_str(:)*1._dp/20._dp*a_fine**2*soc_evals(:)**3
    3156             : 
    3157             :       !clean-up
    3158           0 :       CALL cp_fm_struct_release(quad_struct)
    3159           0 :       CALL cp_cfm_release(work1_cfm)
    3160           0 :       CALL cp_cfm_release(work2_cfm)
    3161           0 :       CALL cp_cfm_release(quad_cfm)
    3162           0 :       CALL cp_fm_release(amew_quad)
    3163           0 :       DEALLOCATE (transquad, trace)
    3164             : 
    3165           0 :       CALL timestop(handle)
    3166             : 
    3167           0 :    END SUBROUTINE compute_soc_quadrupole_fosc
    3168             : 
    3169           0 : END MODULE xas_tdp_utils
    3170             :

Generated by: LCOV version 1.15