LCOV - code coverage report
Current view: top level - src - xas_tdp_utils.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 93.2 % 1317 1227
Test Date: 2025-12-04 06:27:48 Functions: 89.5 % 19 17

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Utilities for X-ray absorption spectroscopy using TDDFPT
      10              : !> \author AB (01.2018)
      11              : ! **************************************************************************************************
      12              : 
      13              : MODULE xas_tdp_utils
      14              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      15              :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      16              :    USE cp_cfm_types,                    ONLY: cp_cfm_create,&
      17              :                                               cp_cfm_get_info,&
      18              :                                               cp_cfm_get_submatrix,&
      19              :                                               cp_cfm_release,&
      20              :                                               cp_cfm_type,&
      21              :                                               cp_fm_to_cfm
      22              :    USE cp_dbcsr_api,                    ONLY: &
      23              :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_distribution_get, dbcsr_distribution_new, &
      24              :         dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
      25              :         dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      26              :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      27              :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_set, dbcsr_type, &
      28              :         dbcsr_type_no_symmetry, dbcsr_type_symmetric
      29              :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      30              :                                               cp_dbcsr_cholesky_invert
      31              :    USE cp_dbcsr_contrib,                ONLY: dbcsr_reserve_all_blocks
      32              :    USE cp_dbcsr_diag,                   ONLY: cp_dbcsr_power
      33              :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      34              :                                               copy_fm_to_dbcsr,&
      35              :                                               cp_dbcsr_sm_fm_multiply,&
      36              :                                               dbcsr_allocate_matrix_set,&
      37              :                                               dbcsr_deallocate_matrix_set
      38              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
      39              :                                               cp_fm_scale,&
      40              :                                               cp_fm_transpose,&
      41              :                                               cp_fm_uplo_to_full
      42              :    USE cp_fm_diag,                      ONLY: choose_eigv_solver,&
      43              :                                               cp_fm_geeig
      44              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      45              :                                               cp_fm_struct_release,&
      46              :                                               cp_fm_struct_type
      47              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      48              :                                               cp_fm_get_diag,&
      49              :                                               cp_fm_get_info,&
      50              :                                               cp_fm_get_submatrix,&
      51              :                                               cp_fm_release,&
      52              :                                               cp_fm_set_element,&
      53              :                                               cp_fm_to_fm_submat,&
      54              :                                               cp_fm_type
      55              :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit
      56              :    USE input_constants,                 ONLY: ot_precond_full_single,&
      57              :                                               tddfpt_singlet,&
      58              :                                               tddfpt_spin_cons,&
      59              :                                               tddfpt_spin_flip,&
      60              :                                               tddfpt_triplet,&
      61              :                                               xas_dip_len
      62              :    USE kinds,                           ONLY: dp
      63              :    USE mathlib,                         ONLY: get_diag
      64              :    USE message_passing,                 ONLY: mp_para_env_type
      65              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      66              :    USE physcon,                         ONLY: a_fine
      67              :    USE preconditioner_types,            ONLY: destroy_preconditioner,&
      68              :                                               init_preconditioner,&
      69              :                                               preconditioner_type
      70              :    USE qs_environment_types,            ONLY: get_qs_env,&
      71              :                                               qs_environment_type
      72              :    USE qs_mo_methods,                   ONLY: calculate_subspace_eigenvalues
      73              :    USE qs_mo_types,                     ONLY: get_mo_set,&
      74              :                                               mo_set_type
      75              :    USE qs_ot_eigensolver,               ONLY: ot_eigensolver
      76              :    USE xas_tdp_kernel,                  ONLY: kernel_coulomb_xc,&
      77              :                                               kernel_exchange
      78              :    USE xas_tdp_types,                   ONLY: donor_state_type,&
      79              :                                               xas_tdp_control_type,&
      80              :                                               xas_tdp_env_type
      81              : 
      82              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num
      83              : #include "./base/base_uses.f90"
      84              : 
      85              :    IMPLICIT NONE
      86              :    PRIVATE
      87              : 
      88              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'xas_tdp_utils'
      89              : 
      90              :    PUBLIC :: setup_xas_tdp_prob, solve_xas_tdp_prob, include_rcs_soc, &
      91              :              include_os_soc, rcs_amew_soc_elements
      92              : 
      93              :    !A helper type for SOC
      94              :    TYPE dbcsr_soc_package_type
      95              :       TYPE(dbcsr_type), POINTER     :: dbcsr_sg => NULL()
      96              :       TYPE(dbcsr_type), POINTER     :: dbcsr_tp => NULL()
      97              :       TYPE(dbcsr_type), POINTER     :: dbcsr_sc => NULL()
      98              :       TYPE(dbcsr_type), POINTER     :: dbcsr_sf => NULL()
      99              :       TYPE(dbcsr_type), POINTER     :: dbcsr_prod => NULL()
     100              :       TYPE(dbcsr_type), POINTER     :: dbcsr_ovlp => NULL()
     101              :       TYPE(dbcsr_type), POINTER     :: dbcsr_tmp => NULL()
     102              :       TYPE(dbcsr_type), POINTER     :: dbcsr_work => NULL()
     103              :    END TYPE dbcsr_soc_package_type
     104              : 
     105              : CONTAINS
     106              : 
     107              : ! **************************************************************************************************
     108              : !> \brief Builds the matrix that defines the XAS TDDFPT generalized eigenvalue problem to be solved
     109              : !>        for excitation energies omega. The problem has the form omega*G*C = M*C, where C contains
     110              : !>        the response orbitals coefficients. The matrix M and the metric G are stored in the given
     111              : !>        donor_state
     112              : !> \param donor_state the donor_state for which the problem is restricted
     113              : !> \param qs_env ...
     114              : !> \param xas_tdp_env ...
     115              : !> \param xas_tdp_control ...
     116              : !> \note  the matrix M is symmetric and has the form | M_d   M_o |
     117              : !>                                                   | M_o   M_d |,
     118              : !>       -In the SPIN-RESTRICTED case:
     119              : !>        depending on whether we consider singlet or triplet excitation, the diagonal (M_d) and
     120              : !>        off-diagonal (M_o) parts of M differ:
     121              : !>        - For singlet: M_d = A + 2B + C_aa + C_ab - D
     122              : !>                       M_o = 2B + C_aa + C_ab - E
     123              : !>        - For triplet: M_d = A + C_aa - C_ab - D
     124              : !>                       M_o = C_aa - C_ab - E
     125              : !>        where other subroutines computes the matrices A, B, E, D and G, which are:
     126              : !>        - A: the ground-state contribution: F_pq*delta_IJ - epsilon_IJ*S_pq
     127              : !>        - B: the Coulomb kernel ~(pI|Jq)
     128              : !>        - C: the xc kernel c_aa (double derivatibe wrt to n_alpha) and C_ab (wrt n_alpha and n_beta)
     129              : !>        - D: the on-digonal exact exchange kernel ~(pq|IJ)
     130              : !>        - E: the off-diagonal exact exchange kernel ~(pJ|Iq)
     131              : !>        - G: the metric  S_pq*delta_IJ
     132              : !>        For the xc functionals, C_aa + C_ab or C_aa - C_ab are stored in the same matrix
     133              : !>        In the above definitions, I,J label the donnor MOs and p,q the sgfs of the basis
     134              : !>
     135              : !>       -In the SPIN-UNRESTRICTED, spin-conserving case:
     136              : !>        the on- and off-diagonal elements of M are:
     137              : !>                     M_d = A + B + C -D
     138              : !>                     M_o = B + C - E
     139              : !>        where the submatrices A, B, C, D and E are:
     140              : !>        - A: the groun-state contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab
     141              : !>        - B: the Coulomb kernel: (pI_a|J_b q)
     142              : !>        - C: the xc kernel: (pI_a|fxc_ab|J_b q)
     143              : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     144              : !>        - E: the off-diagonal exact-exchange kernel: (pJ_b|I_a q) delta_ab
     145              : !>        - G: the metric S_pq*delta_IJ*delta_ab
     146              : !>        p,q label the sgfs, I,J the donro MOs and a,b the spins
     147              : !>
     148              : !>       -In both above cases, the matrix M is always  projected onto the unperturbed unoccupied
     149              : !>        ground state: M <= Q * M * Q^T = (1 - SP) * M * (1 - PS)
     150              : !>
     151              : !>       -In the SPIN-FLIP case:
     152              : !>        Only the TDA is implemented, that is, there are only on-diagonal elements:
     153              : !>                    M_d = A + C - D
     154              : !>        where the submatrices A, C and D are:
     155              : !>        - A: the ground state-contribution: (F_pq*delta_IJ - epsilon_IJ*S_pq) * delta_ab, but here,
     156              : !>                                            the alph-alpha quadrant has the beta Fock matrix and
     157              : !>                                            the beta-beta quadrant has the alpha Fock matrix
     158              : !>        - C: the SF xc kernel: (pI_a|fxc|J_bq), fxc = 1/m * (vxc_a -vxc_b)
     159              : !>        - D: the on-diagonal exact-exchange kernel: (pq|I_a J_b) delta_ab
     160              : !>        To ensure that all excitation start from a given spin to the opposite, we then multiply
     161              : !>        by a Q projector where we swap the alpha-alpha and beta-beta spin-quadrants
     162              : !>
     163              : !>        All possibilities: TDA or full-TDDFT, singlet or triplet, xc or hybrid, etc are treated
     164              : !>        in the same routine to avoid recomputing stuff
     165              : !>        Under TDA, only the on-diagonal elements of M are computed
     166              : !>        In the case of non-TDA, one turns the problem Hermitian
     167              : ! **************************************************************************************************
     168           72 :    SUBROUTINE setup_xas_tdp_prob(donor_state, qs_env, xas_tdp_env, xas_tdp_control)
     169              : 
     170              :       TYPE(donor_state_type), POINTER                    :: donor_state
     171              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     172              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     173              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     174              : 
     175              :       CHARACTER(len=*), PARAMETER :: routineN = 'setup_xas_tdp_prob'
     176              : 
     177              :       INTEGER                                            :: handle
     178           72 :       INTEGER, DIMENSION(:), POINTER                     :: submat_blk_size
     179              :       LOGICAL                                            :: do_coul, do_hfx, do_os, do_sc, do_sf, &
     180              :                                                             do_sg, do_tda, do_tp, do_xc
     181              :       REAL(dp)                                           :: eps_filter, sx
     182              :       TYPE(dbcsr_distribution_type), POINTER             :: submat_dist
     183           72 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ex_ker, xc_ker
     184              :       TYPE(dbcsr_type)                                   :: matrix_a, matrix_a_sf, matrix_b, proj_Q, &
     185              :                                                             proj_Q_sf, work
     186              :       TYPE(dbcsr_type), POINTER :: matrix_c_sc, matrix_c_sf, matrix_c_sg, matrix_c_tp, matrix_d, &
     187              :          matrix_e_sc, sc_matrix_tdp, sf_matrix_tdp, sg_matrix_tdp, tp_matrix_tdp
     188              : 
     189           72 :       NULLIFY (sg_matrix_tdp, tp_matrix_tdp, submat_dist, submat_blk_size, matrix_c_sf)
     190           72 :       NULLIFY (matrix_c_sg, matrix_c_tp, matrix_c_sc, matrix_d, matrix_e_sc)
     191           72 :       NULLIFY (sc_matrix_tdp, sf_matrix_tdp, ex_ker, xc_ker)
     192              : 
     193           72 :       CALL timeset(routineN, handle)
     194              : 
     195              : !  Initialization
     196           72 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     197           72 :       do_sc = xas_tdp_control%do_spin_cons
     198           72 :       do_sf = xas_tdp_control%do_spin_flip
     199           72 :       do_sg = xas_tdp_control%do_singlet
     200           72 :       do_tp = xas_tdp_control%do_triplet
     201           72 :       do_xc = xas_tdp_control%do_xc
     202           72 :       do_hfx = xas_tdp_control%do_hfx
     203           72 :       do_coul = xas_tdp_control%do_coulomb
     204           72 :       do_tda = xas_tdp_control%tamm_dancoff
     205           72 :       sx = xas_tdp_control%sx
     206           72 :       eps_filter = xas_tdp_control%eps_filter
     207           72 :       IF (do_sc) THEN
     208           12 :          ALLOCATE (donor_state%sc_matrix_tdp)
     209           12 :          sc_matrix_tdp => donor_state%sc_matrix_tdp
     210              :       END IF
     211           72 :       IF (do_sf) THEN
     212            2 :          ALLOCATE (donor_state%sf_matrix_tdp)
     213            2 :          sf_matrix_tdp => donor_state%sf_matrix_tdp
     214              :       END IF
     215           72 :       IF (do_sg) THEN
     216           60 :          ALLOCATE (donor_state%sg_matrix_tdp)
     217           60 :          sg_matrix_tdp => donor_state%sg_matrix_tdp
     218              :       END IF
     219           72 :       IF (do_tp) THEN
     220            2 :          ALLOCATE (donor_state%tp_matrix_tdp)
     221            2 :          tp_matrix_tdp => donor_state%tp_matrix_tdp
     222              :       END IF
     223              : 
     224              : !  Get the dist and block size of all matrices A, B, C, etc
     225           72 :       CALL compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
     226           72 :       submat_dist => donor_state%dbcsr_dist
     227           72 :       submat_blk_size => donor_state%blk_size
     228              : 
     229              : !  Allocate and compute all the matrices A, B, C, etc we will need
     230              : 
     231              :       ! The projector(s) on the unoccupied unperturbed ground state 1-SP and associated work matrix
     232           72 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     233           72 :          CALL get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env)
     234              :       END IF
     235           72 :       IF (do_sf) THEN !spin-flip
     236            2 :          CALL get_q_projector(proj_Q_sf, donor_state, do_os, xas_tdp_env, do_sf=.TRUE.)
     237              :       END IF
     238              :       CALL dbcsr_create(matrix=work, matrix_type=dbcsr_type_no_symmetry, dist=submat_dist, &
     239           72 :                         name="WORK", row_blk_size=submat_blk_size, col_blk_size=submat_blk_size)
     240              : 
     241              :       ! The ground state contribution(s)
     242           72 :       IF (do_sg .OR. do_tp .OR. do_sc) THEN !spin-conserving
     243           72 :          CALL build_gs_contribution(matrix_a, donor_state, do_os, qs_env)
     244              :       END IF
     245           72 :       IF (do_sf) THEN !spin-flip
     246            2 :          CALL build_gs_contribution(matrix_a_sf, donor_state, do_os, qs_env, do_sf=.TRUE.)
     247              :       END IF
     248              : 
     249              :       ! The Coulomb and XC kernels. Internal analysis to know which matrix to compute
     250           72 :       CALL dbcsr_allocate_matrix_set(xc_ker, 4)
     251           72 :       ALLOCATE (xc_ker(1)%matrix, xc_ker(2)%matrix, xc_ker(3)%matrix, xc_ker(4)%matrix)
     252           72 :       CALL kernel_coulomb_xc(matrix_b, xc_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     253           72 :       matrix_c_sg => xc_ker(1)%matrix; matrix_c_tp => xc_ker(2)%matrix
     254           72 :       matrix_c_sc => xc_ker(3)%matrix; matrix_c_sf => xc_ker(4)%matrix
     255              : 
     256              :       ! The exact exchange. Internal analysis to know which matrices to compute
     257           72 :       CALL dbcsr_allocate_matrix_set(ex_ker, 2)
     258           72 :       ALLOCATE (ex_ker(1)%matrix, ex_ker(2)%matrix)
     259           72 :       CALL kernel_exchange(ex_ker, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     260           72 :       matrix_d => ex_ker(1)%matrix; matrix_e_sc => ex_ker(2)%matrix
     261              : 
     262              :       ! Build the metric G, also need its inverse in case of full-TDDFT
     263           72 :       IF (do_tda) THEN
     264          132 :          ALLOCATE (donor_state%metric(1))
     265           66 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os)
     266              :       ELSE
     267           18 :          ALLOCATE (donor_state%metric(2))
     268            6 :          CALL build_metric(donor_state%metric, donor_state, qs_env, do_os, do_inv=.TRUE.)
     269              :       END IF
     270              : 
     271              : !  Build the eigenvalue problem, depending on the case (TDA, singlet, triplet, hfx, etc ...)
     272           72 :       IF (do_tda) THEN
     273              : 
     274           66 :          IF (do_sc) THEN ! open-shell spin-conserving under TDA
     275              : 
     276              :             ! The final matrix is M = A + B + C - D
     277           12 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     278           12 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 1.0_dp)
     279              : 
     280           12 :             IF (do_xc) CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 1.0_dp) !xc kernel
     281           12 :             IF (do_hfx) CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     282              : 
     283              :             ! The product with the Q projector
     284           12 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     285           12 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     286              : 
     287              :          END IF !do_sc
     288              : 
     289           66 :          IF (do_sf) THEN ! open-shell spin-flip under TDA
     290              : 
     291              :             ! The final matrix is M = A + C - D
     292            2 :             CALL dbcsr_copy(sf_matrix_tdp, matrix_a_sf, name="OS MATRIX TDP")
     293              : 
     294            2 :             IF (do_xc) CALL dbcsr_add(sf_matrix_tdp, matrix_c_sf, 1.0_dp, 1.0_dp) !xc kernel
     295            2 :             IF (do_hfx) CALL dbcsr_add(sf_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
     296              : 
     297              :             ! Take the product with the (spin-flip) Q projector
     298            2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q_sf, sf_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     299            2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q_sf, 0.0_dp, sf_matrix_tdp, filter_eps=eps_filter)
     300              : 
     301              :          END IF !do_sf
     302              : 
     303           66 :          IF (do_sg) THEN ! singlets under TDA
     304              : 
     305              :             ! The final matrix is M = A + 2B + (C_aa + C_ab) - D
     306           54 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     307           54 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     308              : 
     309           54 :             IF (do_xc) CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 1.0_dp) ! xc kernel
     310           54 :             IF (do_hfx) CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     311              : 
     312              :             ! Take the product with the Q projector:
     313           54 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     314           54 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     315              : 
     316              :          END IF !do_sg (TDA)
     317              : 
     318           66 :          IF (do_tp) THEN ! triplets under TDA
     319              : 
     320              :             ! The final matrix is M =  A + (C_aa - C_ab) - D
     321            2 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     322              : 
     323            2 :             IF (do_xc) CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 1.0_dp) ! xc_kernel
     324            2 :             IF (do_hfx) CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx) ! scaled hfx
     325              : 
     326              :             ! Take the product with the Q projector:
     327            2 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     328            2 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     329              : 
     330              :          END IF !do_tp (TDA)
     331              : 
     332              :       ELSE ! not TDA
     333              : 
     334              :          ! In the case of full-TDDFT, the problem is turned Hermitian with the help of auxiliary
     335              :          ! matrices AUX = (A-D+E)^(+-0.5) that are stored in donor_state
     336              :          CALL build_aux_matrix(1.0E-8_dp, sx, matrix_a, matrix_d, matrix_e_sc, do_hfx, proj_Q, &
     337            6 :                                work, donor_state, eps_filter, qs_env)
     338              : 
     339            6 :          IF (do_sc) THEN !full-TDDFT open-shell spin-conserving
     340              : 
     341              :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     342              :             ! M = A + 2B + 2C - D - E
     343            0 :             CALL dbcsr_copy(sc_matrix_tdp, matrix_a, name="OS MATRIX TDP")
     344            0 :             IF (do_coul) CALL dbcsr_add(sc_matrix_tdp, matrix_b, 1.0_dp, 2.0_dp)
     345              : 
     346            0 :             IF (do_hfx) THEN !scaled hfx
     347            0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     348            0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     349              :             END IF
     350            0 :             IF (do_xc) THEN
     351            0 :                CALL dbcsr_add(sc_matrix_tdp, matrix_c_sc, 1.0_dp, 2.0_dp)
     352              :             END IF
     353              : 
     354              :             ! Take the product with the Q projector
     355            0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sc_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     356            0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sc_matrix_tdp, filter_eps=eps_filter)
     357              : 
     358              :             ! Take the product with the inverse metric
     359              :             ! M <= G^-1 * M * G^-1
     360              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sc_matrix_tdp, &
     361            0 :                                 0.0_dp, work, filter_eps=eps_filter)
     362              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     363            0 :                                 sc_matrix_tdp, filter_eps=eps_filter)
     364              : 
     365              :          END IF
     366              : 
     367            6 :          IF (do_sg) THEN ! full-TDDFT singlets
     368              : 
     369              :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     370              :             ! M = A + 4B + 2(C_aa + C_ab) - D - E
     371            6 :             CALL dbcsr_copy(sg_matrix_tdp, matrix_a, name="SINGLET MATRIX TDP")
     372            6 :             IF (do_coul) CALL dbcsr_add(sg_matrix_tdp, matrix_b, 1.0_dp, 4.0_dp)
     373              : 
     374            6 :             IF (do_hfx) THEN !scaled hfx
     375            6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     376            6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     377              :             END IF
     378            6 :             IF (do_xc) THEN !xc kernel
     379            6 :                CALL dbcsr_add(sg_matrix_tdp, matrix_c_sg, 1.0_dp, 2.0_dp)
     380              :             END IF
     381              : 
     382              :             ! Take the product with the Q projector
     383            6 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, sg_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     384            6 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, sg_matrix_tdp, filter_eps=eps_filter)
     385              : 
     386              :             ! Take the product with the inverse metric
     387              :             ! M <= G^-1 * M * G^-1
     388              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, sg_matrix_tdp, &
     389            6 :                                 0.0_dp, work, filter_eps=eps_filter)
     390              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     391            6 :                                 sg_matrix_tdp, filter_eps=eps_filter)
     392              : 
     393              :          END IF ! singlets
     394              : 
     395            6 :          IF (do_tp) THEN ! full-TDDFT triplets
     396              : 
     397              :             ! The final matrix is the sum of the on- and off-diagonal elements as in the description
     398              :             ! M = A + 2(C_aa - C_ab) - D - E
     399            0 :             CALL dbcsr_copy(tp_matrix_tdp, matrix_a, name="TRIPLET MATRIX TDP")
     400              : 
     401            0 :             IF (do_hfx) THEN !scaled hfx
     402            0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_d, 1.0_dp, -1.0_dp*sx)
     403            0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_e_sc, 1.0_dp, -1.0_dp*sx)
     404              :             END IF
     405            0 :             IF (do_xc) THEN
     406            0 :                CALL dbcsr_add(tp_matrix_tdp, matrix_c_tp, 1.0_dp, 2.0_dp)
     407              :             END IF
     408              : 
     409              :             ! Take the product with the Q projector
     410            0 :             CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tp_matrix_tdp, 0.0_dp, work, filter_eps=eps_filter)
     411            0 :             CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tp_matrix_tdp, filter_eps=eps_filter)
     412              : 
     413              :             ! Take the product with the inverse metric
     414              :             ! M <= G^-1 * M * G^-1
     415              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tp_matrix_tdp, &
     416            0 :                                 0.0_dp, work, filter_eps=eps_filter)
     417              :             CALL dbcsr_multiply('N', 'N', 1.0_dp, work, donor_state%metric(2)%matrix, 0.0_dp, &
     418            0 :                                 tp_matrix_tdp, filter_eps=eps_filter)
     419              : 
     420              :          END IF ! triplets
     421              : 
     422              :       END IF ! test on TDA
     423              : 
     424              : !  Clean-up
     425           72 :       CALL dbcsr_release(matrix_a)
     426           72 :       CALL dbcsr_release(matrix_a_sf)
     427           72 :       CALL dbcsr_release(matrix_b)
     428           72 :       CALL dbcsr_release(proj_Q)
     429           72 :       CALL dbcsr_release(proj_Q_sf)
     430           72 :       CALL dbcsr_release(work)
     431           72 :       CALL dbcsr_deallocate_matrix_set(ex_ker)
     432           72 :       CALL dbcsr_deallocate_matrix_set(xc_ker)
     433              : 
     434           72 :       CALL timestop(handle)
     435              : 
     436           72 :    END SUBROUTINE setup_xas_tdp_prob
     437              : 
     438              : ! **************************************************************************************************
     439              : !> \brief Solves the XAS TDP generalized eigenvalue problem omega*C = matrix_tdp*C using standard
     440              : !>        full diagonalization methods. The problem is Hermitian (made that way even if not TDA)
     441              : !> \param donor_state ...
     442              : !> \param xas_tdp_control ...
     443              : !> \param xas_tdp_env ...
     444              : !> \param qs_env ...
     445              : !> \param ex_type whether we deal with singlets, triplets, spin-conserving open-shell or spin-flip
     446              : !> \note The computed eigenvalues and eigenvectors are stored in the donor_state
     447              : !>       The eigenvectors are the LR-coefficients. In case of TDA, c^- is stored. In the general
     448              : !>       case, the sum c^+ + c^- is stored.
     449              : !>      - Spin-restricted:
     450              : !>       In case both singlets and triplets are considered, this routine must be called twice. This
     451              : !>       is the choice that was made because the body of the routine is exactly the same in both cases
     452              : !>       Note that for singlet we solve for u = 1/sqrt(2)*(c_alpha + c_beta) = sqrt(2)*c
     453              : !>       and that for triplets we solve for v = 1/sqrt(2)*(c_alpha - c_beta) = sqrt(2)*c
     454              : !>      - Spin-unrestricted:
     455              : !>       The problem is solved for the LR coefficients c_pIa as they are (not linear combination)
     456              : !>       The routine might be called twice (once for spin-conservign, one for spin-flip)
     457              : ! **************************************************************************************************
     458           76 :    SUBROUTINE solve_xas_tdp_prob(donor_state, xas_tdp_control, xas_tdp_env, qs_env, ex_type)
     459              : 
     460              :       TYPE(donor_state_type), POINTER                    :: donor_state
     461              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     462              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     463              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     464              :       INTEGER, INTENT(IN)                                :: ex_type
     465              : 
     466              :       CHARACTER(len=*), PARAMETER :: routineN = 'solve_xas_tdp_prob'
     467              : 
     468              :       INTEGER                                            :: first_ex, handle, i, imo, ispin, nao, &
     469              :                                                             ndo_mo, nelectron, nevals, nocc, nrow, &
     470              :                                                             nspins, ot_nevals
     471              :       LOGICAL                                            :: do_os, do_range, do_sf
     472              :       REAL(dp)                                           :: ot_elb
     473           76 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling, tmp_evals
     474           76 :       REAL(dp), DIMENSION(:), POINTER                    :: lr_evals
     475              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     476              :       TYPE(cp_fm_struct_type), POINTER                   :: ex_struct, fm_struct, ot_fm_struct
     477              :       TYPE(cp_fm_type)                                   :: c_diff, c_sum, lhs_matrix, rhs_matrix, &
     478              :                                                             work
     479              :       TYPE(cp_fm_type), POINTER                          :: lr_coeffs
     480              :       TYPE(dbcsr_type)                                   :: tmp_mat, tmp_mat2
     481              :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp
     482              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     483              : 
     484           76 :       CALL timeset(routineN, handle)
     485              : 
     486           76 :       NULLIFY (para_env, blacs_env, fm_struct, matrix_tdp)
     487           76 :       NULLIFY (ex_struct, lr_evals, lr_coeffs)
     488           76 :       CPASSERT(ASSOCIATED(xas_tdp_env))
     489              : 
     490           76 :       do_os = .FALSE.
     491           76 :       do_sf = .FALSE.
     492           76 :       IF (ex_type == tddfpt_spin_cons) THEN
     493           12 :          matrix_tdp => donor_state%sc_matrix_tdp
     494           12 :          do_os = .TRUE.
     495           64 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     496            2 :          matrix_tdp => donor_state%sf_matrix_tdp
     497            2 :          do_os = .TRUE.
     498            2 :          do_sf = .TRUE.
     499           62 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     500           60 :          matrix_tdp => donor_state%sg_matrix_tdp
     501            2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     502            2 :          matrix_tdp => donor_state%tp_matrix_tdp
     503              :       END IF
     504           76 :       CALL get_qs_env(qs_env=qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_total=nelectron)
     505              : 
     506              : !     Initialization
     507           76 :       nspins = 1; IF (do_os) nspins = 2
     508           76 :       CALL cp_fm_get_info(donor_state%gs_coeffs, nrow_global=nao)
     509           76 :       CALL dbcsr_get_info(matrix_tdp, nfullrows_total=nrow)
     510           76 :       ndo_mo = donor_state%ndo_mo
     511           76 :       nocc = nelectron/2; IF (do_os) nocc = nelectron
     512           76 :       nocc = ndo_mo*nocc
     513              : 
     514              :       !solve by energy_range or number of states ?
     515           76 :       do_range = .FALSE.
     516           76 :       IF (xas_tdp_control%e_range > 0.0_dp) do_range = .TRUE.
     517              : 
     518              :       ! create the fm infrastructure
     519              :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nrow, &
     520           76 :                                para_env=para_env, ncol_global=nrow)
     521           76 :       CALL cp_fm_create(rhs_matrix, fm_struct)
     522           76 :       CALL cp_fm_create(work, fm_struct)
     523              : 
     524              : !     Test on TDA
     525           76 :       IF (xas_tdp_control%tamm_dancoff) THEN
     526              : 
     527           70 :          IF (xas_tdp_control%do_ot) THEN
     528              : 
     529              :             !need to precompute the number of evals for OT
     530            4 :             IF (do_range) THEN
     531              : 
     532              :                !in case of energy range criterion, use LUMO eigenvalues as estimate
     533            4 :                ot_elb = xas_tdp_env%lumo_evals(1)%array(1)
     534            4 :                IF (do_os) ot_elb = MIN(ot_elb, xas_tdp_env%lumo_evals(2)%array(1))
     535              : 
     536         1028 :                ot_nevals = COUNT(xas_tdp_env%lumo_evals(1)%array - ot_elb <= xas_tdp_control%e_range)
     537            4 :                IF (do_os) ot_nevals = ot_nevals + &
     538            0 :                                       COUNT(xas_tdp_env%lumo_evals(2)%array - ot_elb <= xas_tdp_control%e_range)
     539              : 
     540              :             ELSE
     541              : 
     542            0 :                ot_nevals = nspins*nao - nocc/ndo_mo
     543            0 :                IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < ot_nevals) THEN
     544            0 :                   ot_nevals = xas_tdp_control%n_excited
     545              :                END IF
     546              :             END IF
     547            4 :             ot_nevals = ndo_mo*ot_nevals !as in input description, multiply by multiplicity of donor state
     548              : 
     549              : !           Organize results data
     550            4 :             first_ex = 1
     551           12 :             ALLOCATE (tmp_evals(ot_nevals))
     552              :             CALL cp_fm_struct_create(ot_fm_struct, context=blacs_env, para_env=para_env, &
     553            4 :                                      nrow_global=nrow, ncol_global=ot_nevals)
     554            4 :             CALL cp_fm_create(c_sum, ot_fm_struct)
     555              : 
     556              :             CALL xas_ot_solver(matrix_tdp, donor_state%metric(1)%matrix, c_sum, tmp_evals, ot_nevals, &
     557            4 :                                do_sf, donor_state, xas_tdp_env, xas_tdp_control, qs_env)
     558              : 
     559            8 :             CALL cp_fm_struct_release(ot_fm_struct)
     560              : 
     561              :          ELSE
     562              : 
     563              : !           Organize results data
     564           66 :             first_ex = nocc + 1 !where to find the first proper eigenvalue
     565          198 :             ALLOCATE (tmp_evals(nrow))
     566           66 :             CALL cp_fm_create(c_sum, fm_struct)
     567              : 
     568              : !           Get the main matrix_tdp as an fm
     569           66 :             CALL copy_dbcsr_to_fm(matrix_tdp, rhs_matrix)
     570              : 
     571              : !           Get the metric as a fm
     572           66 :             CALL cp_fm_create(lhs_matrix, fm_struct)
     573           66 :             CALL copy_dbcsr_to_fm(donor_state%metric(1)%matrix, lhs_matrix)
     574              : 
     575              :             !Diagonalisation (Cholesky decomposition). In TDA, c_sum = c^-
     576           66 :             CALL cp_fm_geeig(rhs_matrix, lhs_matrix, c_sum, tmp_evals, work)
     577              : 
     578              : !           TDA specific clean-up
     579          198 :             CALL cp_fm_release(lhs_matrix)
     580              : 
     581              :          END IF
     582              : 
     583              :       ELSE ! not TDA
     584              : 
     585              : !        Organize results data
     586            6 :          first_ex = nocc + 1
     587           18 :          ALLOCATE (tmp_evals(nrow))
     588            6 :          CALL cp_fm_create(c_sum, fm_struct)
     589              : 
     590              : !        Need to multiply the current matrix_tdp with the auxiliary matrix
     591              : !        tmp_mat =  (A-D+E)^0.5 * M * (A-D+E)^0.5
     592            6 :          CALL dbcsr_create(matrix=tmp_mat, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     593            6 :          CALL dbcsr_create(matrix=tmp_mat2, template=matrix_tdp, matrix_type=dbcsr_type_no_symmetry)
     594              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, matrix_tdp, &
     595            6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     596              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, tmp_mat2, donor_state%matrix_aux, &
     597            6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     598              : 
     599              : !        Get the matrix as a fm
     600            6 :          CALL copy_dbcsr_to_fm(tmp_mat, rhs_matrix)
     601              : 
     602              : !        Solve the "turned-Hermitian" eigenvalue problem
     603            6 :          CALL choose_eigv_solver(rhs_matrix, work, tmp_evals)
     604              : 
     605              : !        Currently, work = (A-D+E)^0.5 (c^+ - c^-) and tmp_evals = omega^2
     606              : !        Put tiny almost zero eigenvalues to zero (corresponding to occupied MOs)
     607          150 :          WHERE (tmp_evals < 1.0E-4_dp) tmp_evals = 0.0_dp
     608              : 
     609              : !        Retrieve c_diff = (c^+ - c^-) for normalization
     610              : !        (c^+ - c^-) = 1/omega^2 * M * (A-D+E)^0.5 * work
     611            6 :          CALL cp_fm_create(c_diff, fm_struct)
     612              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_tdp, donor_state%matrix_aux, &
     613            6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     614            6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, work, c_diff, ncol=nrow)
     615              : 
     616           12 :          ALLOCATE (scaling(nrow))
     617          150 :          scaling = 0.0_dp
     618          150 :          WHERE (ABS(tmp_evals) > 1.0E-8_dp) scaling = 1.0_dp/tmp_evals
     619            6 :          CALL cp_fm_column_scale(c_diff, scaling)
     620              : 
     621              : !        Normalize with the metric: c_diff * G * c_diff = +- 1
     622          150 :          scaling = 0.0_dp
     623            6 :          CALL get_normal_scaling(scaling, c_diff, donor_state)
     624            6 :          CALL cp_fm_column_scale(c_diff, scaling)
     625              : 
     626              : !        Get the actual eigenvalues
     627          150 :          tmp_evals = SQRT(tmp_evals)
     628              : 
     629              : !        Get c_sum = (c^+ + c^-), which appears in all transition density related expressions
     630              : !        c_sum = -1/omega G^-1 * (A-D+E) * (c^+ - c^-)
     631              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%matrix_aux, donor_state%matrix_aux, &
     632            6 :                              0.0_dp, tmp_mat2, filter_eps=xas_tdp_control%eps_filter)
     633              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, donor_state%metric(2)%matrix, tmp_mat2, &
     634            6 :                              0.0_dp, tmp_mat, filter_eps=xas_tdp_control%eps_filter)
     635            6 :          CALL cp_dbcsr_sm_fm_multiply(tmp_mat, c_diff, c_sum, ncol=nrow)
     636          150 :          WHERE (tmp_evals /= 0) scaling = -1.0_dp/tmp_evals
     637            6 :          CALL cp_fm_column_scale(c_sum, scaling)
     638              : 
     639              : !        Full TDDFT specific clean-up
     640            6 :          CALL cp_fm_release(c_diff)
     641            6 :          CALL dbcsr_release(tmp_mat)
     642            6 :          CALL dbcsr_release(tmp_mat2)
     643           18 :          DEALLOCATE (scaling)
     644              : 
     645              :       END IF ! TDA
     646              : 
     647              : !     Full matrix clean-up
     648           76 :       CALL cp_fm_release(rhs_matrix)
     649           76 :       CALL cp_fm_release(work)
     650              : 
     651              : !  Reorganize the eigenvalues, we want a lr_evals array with the proper dimension and where the
     652              : !  first element is the first eval. Need a case study on do_range/ot
     653           76 :       IF (xas_tdp_control%do_ot) THEN
     654              : 
     655            4 :          nevals = ot_nevals
     656              : 
     657           72 :       ELSE IF (do_range) THEN
     658              : 
     659          188 :          WHERE (tmp_evals > tmp_evals(first_ex) + xas_tdp_control%e_range) tmp_evals = 0.0_dp
     660           96 :          nevals = MAXLOC(tmp_evals, 1) - nocc
     661              : 
     662              :       ELSE
     663              : 
     664              :          !Determine the number of evals to keep base on N_EXCITED
     665           68 :          nevals = nspins*nao - nocc/ndo_mo
     666           68 :          IF (xas_tdp_control%n_excited > 0 .AND. xas_tdp_control%n_excited < nevals) THEN
     667              :             nevals = xas_tdp_control%n_excited
     668              :          END IF
     669           68 :          nevals = ndo_mo*nevals !as in input description, multiply by # of donor MOs
     670              : 
     671              :       END IF
     672              : 
     673           76 :       xas_tdp_env%nvirt = nevals
     674          228 :       ALLOCATE (lr_evals(nevals))
     675         1196 :       lr_evals(:) = tmp_evals(first_ex:first_ex + nevals - 1)
     676              : 
     677              : !  Reorganize the eigenvectors in array of cp_fm so that each ndo_mo columns corresponds to an
     678              : !  excited state. Makes later calls to those easier and more efficient
     679              : !  In case of open-shell, we store the coeffs in the same logic as the matrix => first block where
     680              : !  the columns are the c_Ialpha and second block with columns as c_Ibeta
     681              :       CALL cp_fm_struct_create(ex_struct, nrow_global=nao, ncol_global=ndo_mo*nspins*nevals, &
     682           76 :                                para_env=para_env, context=blacs_env)
     683           76 :       ALLOCATE (lr_coeffs)
     684           76 :       CALL cp_fm_create(lr_coeffs, ex_struct)
     685              : 
     686         1196 :       DO i = 1, nevals
     687         2596 :          DO ispin = 1, nspins
     688         4208 :             DO imo = 1, ndo_mo
     689              : 
     690              :                CALL cp_fm_to_fm_submat(msource=c_sum, mtarget=lr_coeffs, &
     691              :                                        nrow=nao, ncol=1, s_firstrow=((ispin - 1)*ndo_mo + imo - 1)*nao + 1, &
     692              :                                        s_firstcol=first_ex + i - 1, t_firstrow=1, &
     693         3088 :                                        t_firstcol=(i - 1)*ndo_mo*nspins + (ispin - 1)*ndo_mo + imo)
     694              :             END DO !imo
     695              :          END DO !ispin
     696              :       END DO !istate
     697              : 
     698           76 :       IF (ex_type == tddfpt_spin_cons) THEN
     699           12 :          donor_state%sc_coeffs => lr_coeffs
     700           12 :          donor_state%sc_evals => lr_evals
     701           64 :       ELSE IF (ex_type == tddfpt_spin_flip) THEN
     702            2 :          donor_state%sf_coeffs => lr_coeffs
     703            2 :          donor_state%sf_evals => lr_evals
     704           62 :       ELSE IF (ex_type == tddfpt_singlet) THEN
     705           60 :          donor_state%sg_coeffs => lr_coeffs
     706           60 :          donor_State%sg_evals => lr_evals
     707            2 :       ELSE IF (ex_type == tddfpt_triplet) THEN
     708            2 :          donor_state%tp_coeffs => lr_coeffs
     709            2 :          donor_state%tp_evals => lr_evals
     710              :       END IF
     711              : 
     712              : !  Clean-up
     713           76 :       CALL cp_fm_release(c_sum)
     714           76 :       CALL cp_fm_struct_release(fm_struct)
     715           76 :       CALL cp_fm_struct_release(ex_struct)
     716              : 
     717              : !  Perform a partial clean-up of the donor_state
     718           76 :       CALL dbcsr_release(matrix_tdp)
     719              : 
     720           76 :       CALL timestop(handle)
     721              : 
     722          380 :    END SUBROUTINE solve_xas_tdp_prob
     723              : 
     724              : ! **************************************************************************************************
     725              : !> \brief An iterative solver based on OT for the TDA generalized eigV problem lambda Sx = Hx
     726              : !> \param matrix_tdp the RHS matrix (dbcsr)
     727              : !> \param metric the LHS matrix (dbcsr)
     728              : !> \param evecs the corresponding eigenvectors (fm)
     729              : !> \param evals the corresponding eigenvalues
     730              : !> \param neig the number of wanted eigenvalues
     731              : !> \param do_sf whther spin-flip TDDFT is on
     732              : !> \param donor_state ...
     733              : !> \param xas_tdp_env ...
     734              : !> \param xas_tdp_control ...
     735              : !> \param qs_env ...
     736              : ! **************************************************************************************************
     737            4 :    SUBROUTINE xas_ot_solver(matrix_tdp, metric, evecs, evals, neig, do_sf, donor_state, xas_tdp_env, &
     738              :                             xas_tdp_control, qs_env)
     739              : 
     740              :       TYPE(dbcsr_type), POINTER                          :: matrix_tdp, metric
     741              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: evecs
     742              :       REAL(dp), DIMENSION(:)                             :: evals
     743              :       INTEGER, INTENT(IN)                                :: neig
     744              :       LOGICAL                                            :: do_sf
     745              :       TYPE(donor_state_type), POINTER                    :: donor_state
     746              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     747              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     748              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     749              : 
     750              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xas_ot_solver'
     751              : 
     752              :       INTEGER                                            :: handle, max_iter, ndo_mo, nelec_spin(2), &
     753              :                                                             nocc, nrow, output_unit
     754              :       LOGICAL                                            :: do_os
     755              :       REAL(dp)                                           :: eps_iter
     756              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     757              :       TYPE(cp_fm_struct_type), POINTER                   :: ortho_struct
     758              :       TYPE(cp_fm_type)                                   :: ortho_space
     759              :       TYPE(dbcsr_type), POINTER                          :: ot_prec
     760              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     761              :       TYPE(preconditioner_type), POINTER                 :: precond
     762              : 
     763            4 :       NULLIFY (para_env, blacs_env, ortho_struct, ot_prec)
     764              : 
     765            4 :       CALL timeset(routineN, handle)
     766              : 
     767            4 :       output_unit = cp_logger_get_default_io_unit()
     768            4 :       IF (output_unit > 0) THEN
     769              :          WRITE (output_unit, "(/,T5,A)") &
     770            2 :             "Using OT eigensolver for diagonalization: "
     771              :       END IF
     772              : 
     773            4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     774            4 :       ndo_mo = donor_state%ndo_mo
     775            4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, nelectron_spin=nelec_spin)
     776            4 :       CALL cp_fm_get_info(evecs, nrow_global=nrow)
     777            4 :       max_iter = xas_tdp_control%ot_max_iter
     778            4 :       eps_iter = xas_tdp_control%ot_eps_iter
     779            4 :       nocc = nelec_spin(1)/2*ndo_mo
     780            4 :       IF (do_os) nocc = SUM(nelec_spin)*ndo_mo
     781              : 
     782              : !  Initialize relevent matrices
     783            4 :       ALLOCATE (ot_prec)
     784            4 :       CALL dbcsr_create(ot_prec, template=matrix_tdp)
     785              :       CALL cp_fm_struct_create(ortho_struct, context=blacs_env, para_env=para_env, &
     786            4 :                                nrow_global=nrow, ncol_global=nocc)
     787            4 :       CALL cp_fm_create(ortho_space, ortho_struct)
     788              : 
     789              :       CALL prep_for_ot(evecs, ortho_space, ot_prec, neig, do_sf, donor_state, xas_tdp_env, &
     790            4 :                        xas_tdp_control, qs_env)
     791              : 
     792              : !  Prepare the preconditioner
     793            4 :       ALLOCATE (precond)
     794            4 :       CALL init_preconditioner(precond, para_env, blacs_env)
     795            4 :       precond%in_use = ot_precond_full_single ! because applying this conditioner is only a mm
     796            4 :       precond%dbcsr_matrix => ot_prec
     797              : 
     798              : !  Actually solving the eigenvalue problem
     799              :       CALL ot_eigensolver(matrix_h=matrix_tdp, matrix_s=metric, matrix_c_fm=evecs, &
     800              :                           eps_gradient=eps_iter, iter_max=max_iter, silent=.FALSE., &
     801              :                           ot_settings=xas_tdp_control%ot_settings, &
     802              :                           matrix_orthogonal_space_fm=ortho_space, &
     803            4 :                           preconditioner=precond)
     804            4 :       CALL calculate_subspace_eigenvalues(evecs, matrix_tdp, evals_arg=evals)
     805              : 
     806              : !  Clean-up
     807            4 :       CALL cp_fm_struct_release(ortho_struct)
     808            4 :       CALL cp_fm_release(ortho_space)
     809            4 :       CALL dbcsr_release(ot_prec)
     810            4 :       CALL destroy_preconditioner(precond)
     811            4 :       DEALLOCATE (precond)
     812              : 
     813            4 :       CALL timestop(handle)
     814              : 
     815            4 :    END SUBROUTINE xas_ot_solver
     816              : 
     817              : ! **************************************************************************************************
     818              : !> \brief Prepares all required matrices for the OT eigensolver (precond, ortho space and guesses)
     819              : !> \param guess the guess eigenvectors absed on LUMOs, in fm format
     820              : !> \param ortho the orthogonal space in fm format (occupied MOs)
     821              : !> \param precond the OT preconditioner in DBCSR format
     822              : !> \param neig ...
     823              : !> \param do_sf ...
     824              : !> \param donor_state ...
     825              : !> \param xas_tdp_env ...
     826              : !> \param xas_tdp_control ...
     827              : !> \param qs_env ...
     828              : !> \note Matrices are allocate before entry
     829              : ! **************************************************************************************************
     830            8 :    SUBROUTINE prep_for_ot(guess, ortho, precond, neig, do_sf, donor_state, xas_tdp_env, &
     831              :                           xas_tdp_control, qs_env)
     832              : 
     833              :       TYPE(cp_fm_type), INTENT(IN)                       :: guess, ortho
     834              :       TYPE(dbcsr_type)                                   :: precond
     835              :       INTEGER                                            :: neig
     836              :       LOGICAL                                            :: do_sf
     837              :       TYPE(donor_state_type), POINTER                    :: donor_state
     838              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
     839              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
     840              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     841              : 
     842              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'prep_for_ot'
     843              : 
     844              :       INTEGER :: handle, i, iblk, ido_mo, ispin, jblk, maxel, minel, nao, natom, ndo_mo, &
     845              :          nelec_spin(2), nhomo(2), nlumo(2), nspins, start_block, start_col, start_row
     846              :       LOGICAL                                            :: do_os, found
     847            4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
     848              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
     849              :       TYPE(dbcsr_iterator_type)                          :: iter
     850            4 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
     851              : 
     852            4 :       NULLIFY (mos, mo_coeff, pblock)
     853              : 
     854              :       !REMINDER on the organization of the xas_tdp matrix. It is DBCSR format, with a super bock
     855              :       !structure. First block structure is spin quadrants: upper left is alpha-alpha spin and lower
     856              :       !right is beta-beta spin. Then each quadrants is divided in a ndo_mo x ndo_mo grid (1x1 for 1s,
     857              :       !2s, 3x3 for 2p). Each block in this grid has the normal DBCSR structure and dist, simply
     858              :       !replicated. The resulting eigenvectors follow the same logic.
     859              : 
     860            4 :       CALL timeset(routineN, handle)
     861              : 
     862            4 :       do_os = xas_tdp_control%do_uks .OR. xas_tdp_control%do_roks
     863            0 :       nspins = 1; IF (do_os) nspins = 2
     864            4 :       ndo_mo = donor_state%ndo_mo
     865            4 :       CALL cp_fm_get_info(xas_tdp_env%lumo_evecs(1), nrow_global=nao)
     866            4 :       CALL get_qs_env(qs_env, natom=natom, nelectron_spin=nelec_spin)
     867              : 
     868              :       !Compute the number of guesses for each spins
     869            4 :       IF (do_os) THEN
     870            0 :          minel = MINLOC(nelec_spin, 1)
     871            0 :          maxel = 3 - minel
     872            0 :          nlumo(minel) = (neig/ndo_mo + nelec_spin(maxel) - nelec_spin(minel))/2
     873            0 :          nlumo(maxel) = neig/ndo_mo - nlumo(minel)
     874              :       ELSE
     875            4 :          nlumo(1) = neig/ndo_mo
     876              :       END IF
     877              : 
     878              :       !Building the guess vectors based on the LUMOs. Copy LUMOs into approriate spin/do_mo
     879              :       !quadrant/block. Order within a block does not matter
     880              :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so guess are alpha LUMOs
     881              :       start_row = 0
     882              :       start_col = 0
     883            8 :       DO ispin = 1, nspins
     884           12 :          DO ido_mo = 1, ndo_mo
     885              : 
     886              :             CALL cp_fm_to_fm_submat(msource=xas_tdp_env%lumo_evecs(ispin), mtarget=guess, &
     887              :                                     nrow=nao, ncol=nlumo(ispin), s_firstrow=1, s_firstcol=1, &
     888            4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     889              : 
     890            4 :             start_row = start_row + nao
     891            8 :             start_col = start_col + nlumo(ispin)
     892              : 
     893              :          END DO
     894              :       END DO
     895              : 
     896              :       !Build the orthogonal space according to the same principles, but based on occupied MOs
     897              :       !Note: in spin-flip, the upper left quadrant is for beta-alpha transition, so ortho space is beta HOMOs
     898            4 :       CALL get_qs_env(qs_env, mos=mos)
     899            4 :       nhomo = 0
     900            8 :       DO ispin = 1, nspins
     901            8 :          CALL get_mo_set(mos(ispin), homo=nhomo(ispin))
     902              :       END DO
     903              : 
     904              :       start_row = 0
     905              :       start_col = 0
     906            8 :       DO i = 1, nspins
     907            4 :          ispin = i; IF (do_sf) ispin = 3 - i
     908            4 :          CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff)
     909              : 
     910           12 :          DO ido_mo = 1, ndo_mo
     911              : 
     912              :             CALL cp_fm_to_fm_submat(msource=mo_coeff, mtarget=ortho, nrow=nao, ncol=nhomo(ispin), &
     913              :                                     s_firstrow=1, s_firstcol=1, &
     914            4 :                                     t_firstrow=start_row + 1, t_firstcol=start_col + 1)
     915              : 
     916            4 :             start_row = start_row + nao
     917            8 :             start_col = start_col + nhomo(ispin)
     918              : 
     919              :          END DO
     920              :       END DO
     921              : 
     922              :       !Build the preconditioner. Copy the "canonical" pre-computed matrix into the proper spin/do_mo
     923              :       !quadrants/blocks. The end matrix is purely block diagonal
     924            8 :       DO ispin = 1, nspins
     925              : 
     926            4 :          CALL dbcsr_iterator_start(iter, xas_tdp_env%ot_prec(ispin)%matrix)
     927         9316 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     928              : 
     929         9312 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
     930              : 
     931         9312 :             CALL dbcsr_get_block_p(xas_tdp_env%ot_prec(ispin)%matrix, iblk, jblk, pblock, found)
     932              : 
     933         9316 :             IF (found) THEN
     934              : 
     935         9312 :                start_block = (ispin - 1)*ndo_mo*natom
     936        18624 :                DO ido_mo = 1, ndo_mo
     937         9312 :                   CALL dbcsr_put_block(precond, start_block + iblk, start_block + jblk, pblock)
     938              : 
     939        18624 :                   start_block = start_block + natom
     940              : 
     941              :                END DO
     942              :             END IF
     943              : 
     944              :          END DO !dbcsr iter
     945           12 :          CALL dbcsr_iterator_stop(iter)
     946              :       END DO
     947              : 
     948            4 :       CALL dbcsr_finalize(precond)
     949              : 
     950            4 :       CALL timestop(handle)
     951              : 
     952            4 :    END SUBROUTINE prep_for_ot
     953              : 
     954              : ! **************************************************************************************************
     955              : !> \brief Returns the scaling to apply to normalize the LR eigenvectors.
     956              : !> \param scaling the scaling array to apply
     957              : !> \param lr_coeffs the linear response coefficients as a fm
     958              : !> \param donor_state ...
     959              : !> \note The LR coeffs are normalized when c^T G c = +- 1, G is the metric, c = c^- for TDA and
     960              : !>       c = c^+ - c^- for the full problem
     961              : ! **************************************************************************************************
     962            6 :    SUBROUTINE get_normal_scaling(scaling, lr_coeffs, donor_state)
     963              : 
     964              :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: scaling
     965              :       TYPE(cp_fm_type), INTENT(IN)                       :: lr_coeffs
     966              :       TYPE(donor_state_type), POINTER                    :: donor_state
     967              : 
     968              :       INTEGER                                            :: nrow, nscal, nvals
     969              :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
     970              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     971              :       TYPE(cp_fm_struct_type), POINTER                   :: norm_struct, work_struct
     972              :       TYPE(cp_fm_type)                                   :: fm_norm, work
     973              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     974              : 
     975            6 :       NULLIFY (para_env, blacs_env, norm_struct, work_struct)
     976              : 
     977              : !  Creating the matrix structures and initializing the work matrices
     978              :       CALL cp_fm_get_info(lr_coeffs, context=blacs_env, para_env=para_env, &
     979            6 :                           matrix_struct=work_struct, ncol_global=nvals, nrow_global=nrow)
     980              :       CALL cp_fm_struct_create(norm_struct, para_env=para_env, context=blacs_env, &
     981            6 :                                nrow_global=nvals, ncol_global=nvals)
     982              : 
     983            6 :       CALL cp_fm_create(work, work_struct)
     984            6 :       CALL cp_fm_create(fm_norm, norm_struct)
     985              : 
     986              : !  Taking c^T * G * C
     987            6 :       CALL cp_dbcsr_sm_fm_multiply(donor_state%metric(1)%matrix, lr_coeffs, work, ncol=nvals)
     988            6 :       CALL parallel_gemm('T', 'N', nvals, nvals, nrow, 1.0_dp, lr_coeffs, work, 0.0_dp, fm_norm)
     989              : 
     990              : !  Computing the needed scaling
     991           18 :       ALLOCATE (diag(nvals))
     992            6 :       CALL cp_fm_get_diag(fm_norm, diag)
     993          150 :       WHERE (ABS(diag) > 1.0E-8_dp) diag = 1.0_dp/SQRT(ABS(diag))
     994              : 
     995            6 :       nscal = SIZE(scaling)
     996          150 :       scaling(1:nscal) = diag(1:nscal)
     997              : 
     998              : !  Clean-up
     999            6 :       CALL cp_fm_release(work)
    1000            6 :       CALL cp_fm_release(fm_norm)
    1001            6 :       CALL cp_fm_struct_release(norm_struct)
    1002              : 
    1003           18 :    END SUBROUTINE get_normal_scaling
    1004              : 
    1005              : ! **************************************************************************************************
    1006              : !> \brief This subroutine computes the row/column block structure as well as the dbcsr ditrinution
    1007              : !>        for the submatrices making up the generalized XAS TDP eigenvalue problem. They all share
    1008              : !>        the same properties, which are based on the replication of the KS matrix. Stored in the
    1009              : !>        donor_state_type
    1010              : !> \param donor_state ...
    1011              : !> \param do_os whether this is a open-shell calculation
    1012              : !> \param qs_env ...
    1013              : ! **************************************************************************************************
    1014           72 :    SUBROUTINE compute_submat_dist_and_blk_size(donor_state, do_os, qs_env)
    1015              : 
    1016              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1017              :       LOGICAL, INTENT(IN)                                :: do_os
    1018              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1019              : 
    1020              :       INTEGER                                            :: group, i, nao, nblk_row, ndo_mo, nspins, &
    1021              :                                                             scol_dist, srow_dist
    1022           72 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, col_dist_sub, row_blk_size, &
    1023           72 :                                                             row_dist, row_dist_sub, submat_blk_size
    1024           72 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1025              :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, submat_dist
    1026           72 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_ks
    1027              : 
    1028           72 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, row_dist, col_dist, pgrid, col_dist_sub)
    1029           72 :       NULLIFY (row_dist_sub, submat_dist, submat_blk_size)
    1030              : 
    1031              : !  The submatrices are indexed by M_{pi sig,qj tau}, where p,q label basis functions and i,j donor
    1032              : !  MOs and sig,tau the spins. For each spin and donor MOs combination, one has a submatrix of the
    1033              : !  size of the KS matrix (nao x nao) with the same dbcsr block structure
    1034              : 
    1035              : !  Initialization
    1036           72 :       ndo_mo = donor_state%ndo_mo
    1037           72 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, dbcsr_dist=dbcsr_dist)
    1038           72 :       CALL dbcsr_get_info(matrix_ks(1)%matrix, row_blk_size=row_blk_size)
    1039              :       CALL dbcsr_distribution_get(dbcsr_dist, row_dist=row_dist, col_dist=col_dist, group=group, &
    1040           72 :                                   pgrid=pgrid)
    1041          718 :       nao = SUM(row_blk_size)
    1042           72 :       nblk_row = SIZE(row_blk_size)
    1043           72 :       srow_dist = SIZE(row_dist)
    1044           72 :       scol_dist = SIZE(col_dist)
    1045           72 :       nspins = 1; IF (do_os) nspins = 2
    1046              : 
    1047              : !  Creation if submatrix block size and col/row distribution
    1048          216 :       ALLOCATE (submat_blk_size(ndo_mo*nspins*nblk_row))
    1049          216 :       ALLOCATE (row_dist_sub(ndo_mo*nspins*srow_dist))
    1050          216 :       ALLOCATE (col_dist_sub(ndo_mo*nspins*scol_dist))
    1051              : 
    1052          168 :       DO i = 1, ndo_mo*nspins
    1053         1576 :          submat_blk_size((i - 1)*nblk_row + 1:i*nblk_row) = row_blk_size
    1054         1576 :          row_dist_sub((i - 1)*srow_dist + 1:i*srow_dist) = row_dist
    1055         1648 :          col_dist_sub((i - 1)*scol_dist + 1:i*scol_dist) = col_dist
    1056              :       END DO
    1057              : 
    1058              : !  Create the submatrix dbcsr distribution
    1059           72 :       ALLOCATE (submat_dist)
    1060              :       CALL dbcsr_distribution_new(submat_dist, group=group, pgrid=pgrid, row_dist=row_dist_sub, &
    1061           72 :                                   col_dist=col_dist_sub)
    1062              : 
    1063           72 :       donor_state%dbcsr_dist => submat_dist
    1064           72 :       donor_state%blk_size => submat_blk_size
    1065              : 
    1066              : !  Clean-up
    1067           72 :       DEALLOCATE (col_dist_sub, row_dist_sub)
    1068              : 
    1069          216 :    END SUBROUTINE compute_submat_dist_and_blk_size
    1070              : 
    1071              : ! **************************************************************************************************
    1072              : !> \brief Returns the projector on the unperturbed unoccupied ground state Q = 1 - SP on the block
    1073              : !>        diagonal of a matrix with the standard size and distribution.
    1074              : !> \param proj_Q the matrix with the projector
    1075              : !> \param donor_state ...
    1076              : !> \param do_os whether it is open-shell calculation
    1077              : !> \param xas_tdp_env ...
    1078              : !> \param do_sf whether the projector should be prepared for spin-flip excitations
    1079              : !> \note In the spin-flip case, the alpha spins are sent to beta and vice-versa. The structure of
    1080              : !>       the projector is changed by swapping the alpha-alpha with the beta-beta block, which
    1081              : !>       naturally take the spin change into account. Only for open-shell.
    1082              : ! **************************************************************************************************
    1083           74 :    SUBROUTINE get_q_projector(proj_Q, donor_state, do_os, xas_tdp_env, do_sf)
    1084              : 
    1085              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q
    1086              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1087              :       LOGICAL, INTENT(IN)                                :: do_os
    1088              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1089              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1090              : 
    1091              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'get_q_projector'
    1092              : 
    1093              :       INTEGER                                            :: handle, iblk, imo, ispin, jblk, &
    1094              :                                                             nblk_row, ndo_mo, nspins
    1095           74 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_q, row_blk_size
    1096              :       LOGICAL                                            :: found_block, my_dosf
    1097           74 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1098              :       TYPE(dbcsr_distribution_type), POINTER             :: dist_q
    1099              :       TYPE(dbcsr_iterator_type)                          :: iter
    1100              :       TYPE(dbcsr_type), POINTER                          :: one_sp
    1101              : 
    1102           74 :       NULLIFY (work_block, one_sp, row_blk_size, dist_q, blk_size_q)
    1103              : 
    1104           74 :       CALL timeset(routineN, handle)
    1105              : 
    1106              : !  Initialization
    1107           74 :       nspins = 1; IF (do_os) nspins = 2
    1108           74 :       ndo_mo = donor_state%ndo_mo
    1109           74 :       one_sp => xas_tdp_env%q_projector(1)%matrix
    1110           74 :       CALL dbcsr_get_info(one_sp, row_blk_size=row_blk_size)
    1111           74 :       nblk_row = SIZE(row_blk_size)
    1112           74 :       my_dosf = .FALSE.
    1113           74 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1114           74 :       dist_q => donor_state%dbcsr_dist
    1115           74 :       blk_size_q => donor_state%blk_size
    1116              : 
    1117              :       ! the projector is not symmetric
    1118              :       CALL dbcsr_create(matrix=proj_Q, name="PROJ Q", matrix_type=dbcsr_type_no_symmetry, dist=dist_q, &
    1119           74 :                         row_blk_size=blk_size_q, col_blk_size=blk_size_q)
    1120              : 
    1121              : !  Fill the projector by looping over 1-SP and duplicating blocks. (all on the spin-MO block diagonal)
    1122          162 :       DO ispin = 1, nspins
    1123           88 :          one_sp => xas_tdp_env%q_projector(ispin)%matrix
    1124              : 
    1125              :          !if spin-flip, swap the alpha-alpha and beta-beta blocks
    1126           88 :          IF (my_dosf) one_sp => xas_tdp_env%q_projector(3 - ispin)%matrix
    1127              : 
    1128           88 :          CALL dbcsr_iterator_start(iter, one_sp)
    1129        19282 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1130              : 
    1131        19194 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1132              : 
    1133              :             ! get the block
    1134        19194 :             CALL dbcsr_get_block_p(one_sp, iblk, jblk, work_block, found_block)
    1135              : 
    1136        19194 :             IF (found_block) THEN
    1137              : 
    1138        38398 :                DO imo = 1, ndo_mo
    1139              :                   CALL dbcsr_put_block(proj_Q, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1140        38398 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1141              :                END DO
    1142              : 
    1143              :             END IF
    1144        19194 :             NULLIFY (work_block)
    1145              : 
    1146              :          END DO !iterator
    1147          250 :          CALL dbcsr_iterator_stop(iter)
    1148              :       END DO !ispin
    1149              : 
    1150           74 :       CALL dbcsr_finalize(proj_Q)
    1151              : 
    1152           74 :       CALL timestop(handle)
    1153              : 
    1154           74 :    END SUBROUTINE get_q_projector
    1155              : 
    1156              : ! **************************************************************************************************
    1157              : !> \brief Builds the matrix containing the ground state contribution to the matrix_tdp (aka matrix A)
    1158              : !>         => A_{pis,qjt} = (F_pq*delta_ij - epsilon_ij*S_pq) delta_st, where:
    1159              : !>         F is the KS matrix
    1160              : !>         S is the overlap matrix
    1161              : !>         epsilon_ij is the donor MO eigenvalue
    1162              : !>         i,j labels the MOs, p,q the AOs and s,t the spins
    1163              : !> \param matrix_a  pointer to a DBCSR matrix containing A
    1164              : !> \param donor_state ...
    1165              : !> \param do_os ...
    1166              : !> \param qs_env ...
    1167              : !> \param do_sf whether the ground state contribution should accommodate spin-flip
    1168              : !> \note Even localized non-canonical MOs are diagonalized in their subsapce => eps_ij = eps_ii*delta_ij
    1169              : !>       Use GW2X corrected evals as eps_ii. If not GW2X correction, these are the default KS energies
    1170              : ! **************************************************************************************************
    1171           74 :    SUBROUTINE build_gs_contribution(matrix_a, donor_state, do_os, qs_env, do_sf)
    1172              : 
    1173              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a
    1174              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1175              :       LOGICAL, INTENT(IN)                                :: do_os
    1176              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1177              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_sf
    1178              : 
    1179              :       CHARACTER(len=*), PARAMETER :: routineN = 'build_gs_contribution'
    1180              : 
    1181              :       INTEGER                                            :: handle, iblk, imo, ispin, jblk, &
    1182              :                                                             nblk_row, ndo_mo, nspins
    1183           74 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_a, row_blk_size
    1184              :       LOGICAL                                            :: found_block, my_dosf
    1185           74 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1186              :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist, dist_a
    1187              :       TYPE(dbcsr_iterator_type)                          :: iter
    1188           74 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: m_ks, matrix_ks, matrix_s
    1189              :       TYPE(dbcsr_type)                                   :: work_matrix
    1190              : 
    1191           74 :       NULLIFY (matrix_ks, dbcsr_dist, row_blk_size, work_block, matrix_s, m_ks)
    1192           74 :       NULLIFY (dist_a, blk_size_a)
    1193              : 
    1194              :       !  Note: matrix A is symmetric and block diagonal. If ADMM, the ks matrix is the total one,
    1195              :       !        and it is corrected for eigenvalues (done at xas_tdp_init)
    1196              : 
    1197           74 :       CALL timeset(routineN, handle)
    1198              : 
    1199              : !  Initialization
    1200           74 :       nspins = 1; IF (do_os) nspins = 2
    1201           74 :       ndo_mo = donor_state%ndo_mo
    1202           74 :       CALL get_qs_env(qs_env=qs_env, matrix_ks=matrix_ks, matrix_s=matrix_s, dbcsr_dist=dbcsr_dist)
    1203           74 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1204           74 :       nblk_row = SIZE(row_blk_size)
    1205           74 :       dist_a => donor_state%dbcsr_dist
    1206           74 :       blk_size_a => donor_state%blk_size
    1207              : 
    1208              : !  Prepare the KS matrix pointer
    1209          236 :       ALLOCATE (m_ks(nspins))
    1210           74 :       m_ks(1)%matrix => matrix_ks(1)%matrix
    1211           74 :       IF (do_os) m_ks(2)%matrix => matrix_ks(2)%matrix
    1212              : 
    1213              : ! If spin-flip, swap the KS alpha-alpha and beta-beta quadrants.
    1214           74 :       my_dosf = .FALSE.
    1215           74 :       IF (PRESENT(do_sf)) my_dosf = do_sf
    1216            2 :       IF (my_dosf .AND. do_os) THEN
    1217            2 :          m_ks(1)%matrix => matrix_ks(2)%matrix
    1218            2 :          m_ks(2)%matrix => matrix_ks(1)%matrix
    1219              :       END IF
    1220              : 
    1221              : !  Creating the symmetric matrix A (and work)
    1222              :       CALL dbcsr_create(matrix=matrix_a, name="MATRIX A", matrix_type=dbcsr_type_symmetric, &
    1223           74 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1224              :       CALL dbcsr_create(matrix=work_matrix, name="WORK MAT", matrix_type=dbcsr_type_symmetric, &
    1225           74 :                         dist=dist_a, row_blk_size=blk_size_a, col_blk_size=blk_size_a)
    1226              : 
    1227          162 :       DO ispin = 1, nspins
    1228              : 
    1229              : !     Loop over the blocks of KS and put them on the spin-MO block diagonal of matrix A
    1230           88 :          CALL dbcsr_iterator_start(iter, m_ks(ispin)%matrix)
    1231         9504 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1232              : 
    1233         9416 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1234              : 
    1235              : !           Get the block
    1236         9416 :             CALL dbcsr_get_block_p(m_ks(ispin)%matrix, iblk, jblk, work_block, found_block)
    1237              : 
    1238         9416 :             IF (found_block) THEN
    1239              : 
    1240              : !              The KS matrix only appears on diagonal of matrix A => loop over II donor MOs
    1241        18842 :                DO imo = 1, ndo_mo
    1242              : 
    1243              : !                 Put the block as it is
    1244              :                   CALL dbcsr_put_block(matrix_a, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1245        18842 :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, work_block)
    1246              : 
    1247              :                END DO !imo
    1248              :             END IF !found_block
    1249         9416 :             NULLIFY (work_block)
    1250              :          END DO ! iteration on KS blocks
    1251           88 :          CALL dbcsr_iterator_stop(iter)
    1252              : 
    1253              : !     Loop over the blocks of S and put them on the block diagonal of work
    1254              : 
    1255           88 :          CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1256         9504 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1257              : 
    1258         9416 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1259              : 
    1260              : !           Get the block
    1261         9416 :             CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1262              : 
    1263         9416 :             IF (found_block) THEN
    1264              : 
    1265              : !              Add S matrix on block diagonal as epsilon_ii*S_pq
    1266        18842 :                DO imo = 1, ndo_mo
    1267              : 
    1268              :                   CALL dbcsr_put_block(work_matrix, ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + iblk, &
    1269              :                                        ((ispin - 1)*ndo_mo + imo - 1)*nblk_row + jblk, &
    1270       242160 :                                        donor_state%gw2x_evals(imo, ispin)*work_block)
    1271              :                END DO !imo
    1272              :             END IF !found block
    1273         9416 :             NULLIFY (work_block)
    1274              :          END DO ! iteration on S blocks
    1275          338 :          CALL dbcsr_iterator_stop(iter)
    1276              : 
    1277              :       END DO !ispin
    1278           74 :       CALL dbcsr_finalize(matrix_a)
    1279           74 :       CALL dbcsr_finalize(work_matrix)
    1280              : 
    1281              : !  Take matrix_a = matrix_a - work
    1282           74 :       CALL dbcsr_add(matrix_a, work_matrix, 1.0_dp, -1.0_dp)
    1283           74 :       CALL dbcsr_finalize(matrix_a)
    1284              : 
    1285              : !  Clean-up
    1286           74 :       CALL dbcsr_release(work_matrix)
    1287           74 :       DEALLOCATE (m_ks)
    1288              : 
    1289           74 :       CALL timestop(handle)
    1290              : 
    1291           74 :    END SUBROUTINE build_gs_contribution
    1292              : 
    1293              : ! **************************************************************************************************
    1294              : !> \brief Creates the metric (aka  matrix G) needed for the generalized eigenvalue problem and inverse
    1295              : !>         => G_{pis,qjt} = S_pq*delta_ij*delta_st
    1296              : !> \param matrix_g dbcsr matrix containing G
    1297              : !> \param donor_state ...
    1298              : !> \param qs_env ...
    1299              : !> \param do_os if open-shell calculation
    1300              : !> \param do_inv if the inverse of G should be computed
    1301              : ! **************************************************************************************************
    1302          216 :    SUBROUTINE build_metric(matrix_g, donor_state, qs_env, do_os, do_inv)
    1303              : 
    1304              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_g
    1305              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1306              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1307              :       LOGICAL, INTENT(IN)                                :: do_os
    1308              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_inv
    1309              : 
    1310              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_metric'
    1311              : 
    1312              :       INTEGER                                            :: handle, i, iblk, jblk, nao, nblk_row, &
    1313              :                                                             ndo_mo, nspins
    1314           72 :       INTEGER, DIMENSION(:), POINTER                     :: blk_size_g, row_blk_size
    1315              :       LOGICAL                                            :: found_block, my_do_inv
    1316           72 :       REAL(dp), DIMENSION(:, :), POINTER                 :: work_block
    1317              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1318              :       TYPE(dbcsr_distribution_type), POINTER             :: dist_g
    1319              :       TYPE(dbcsr_iterator_type)                          :: iter
    1320           72 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1321              :       TYPE(dbcsr_type)                                   :: matrix_sinv
    1322              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1323              : 
    1324           72 :       NULLIFY (matrix_s, row_blk_size, work_block, para_env, blacs_env, dist_g, blk_size_g)
    1325              : 
    1326           72 :       CALL timeset(routineN, handle)
    1327              : 
    1328              : !  Initialization
    1329           72 :       nspins = 1; IF (do_os) nspins = 2
    1330           72 :       ndo_mo = donor_state%ndo_mo
    1331           72 :       CALL get_qs_env(qs_env=qs_env, matrix_s=matrix_s)
    1332           72 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size, nfullrows_total=nao)
    1333           72 :       nblk_row = SIZE(row_blk_size)
    1334           72 :       my_do_inv = .FALSE.
    1335           72 :       IF (PRESENT(do_inv)) my_do_inv = do_inv
    1336           72 :       dist_g => donor_state%dbcsr_dist
    1337           72 :       blk_size_g => donor_state%blk_size
    1338              : 
    1339              : !  Creating the symmetric  matrices G and G^-1 with the right size and distribution
    1340           72 :       ALLOCATE (matrix_g(1)%matrix)
    1341              :       CALL dbcsr_create(matrix=matrix_g(1)%matrix, name="MATRIX G", matrix_type=dbcsr_type_symmetric, &
    1342           72 :                         dist=dist_g, row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1343              : 
    1344              : !  Fill the matrices G by looping over the block of S and putting them on the diagonal
    1345           72 :       CALL dbcsr_iterator_start(iter, matrix_s(1)%matrix)
    1346         9449 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1347              : 
    1348         9377 :          CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1349              : 
    1350              : !        Get the block
    1351         9377 :          CALL dbcsr_get_block_p(matrix_s(1)%matrix, iblk, jblk, work_block, found_block)
    1352              : 
    1353         9377 :          IF (found_block) THEN
    1354              : 
    1355              : !           Go over the diagonal of G => donor MOs ii, spin ss
    1356        18797 :             DO i = 1, ndo_mo*nspins
    1357        18797 :                CALL dbcsr_put_block(matrix_g(1)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1358              :             END DO
    1359              : 
    1360              :          END IF
    1361         9377 :          NULLIFY (work_block)
    1362              : 
    1363              :       END DO ! dbcsr_iterator
    1364           72 :       CALL dbcsr_iterator_stop(iter)
    1365              : 
    1366              : !  Finalize
    1367           72 :       CALL dbcsr_finalize(matrix_g(1)%matrix)
    1368              : 
    1369              : !  If the inverse of G is required, do the same as above with the inverse
    1370           72 :       IF (my_do_inv) THEN
    1371              : 
    1372            6 :          CPASSERT(SIZE(matrix_g) == 2)
    1373              : 
    1374              :          ! Create the matrix
    1375            6 :          ALLOCATE (matrix_g(2)%matrix)
    1376              :          CALL dbcsr_create(matrix=matrix_g(2)%matrix, name="MATRIX GINV", &
    1377              :                            matrix_type=dbcsr_type_symmetric, dist=dist_g, &
    1378            6 :                            row_blk_size=blk_size_g, col_blk_size=blk_size_g)
    1379              : 
    1380              :          ! Invert the overlap matrix
    1381            6 :          CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1382            6 :          CALL dbcsr_copy(matrix_sinv, matrix_s(1)%matrix)
    1383            6 :          CALL cp_dbcsr_cholesky_decompose(matrix_sinv, para_env=para_env, blacs_env=blacs_env)
    1384            6 :          CALL cp_dbcsr_cholesky_invert(matrix_sinv, para_env=para_env, blacs_env=blacs_env, uplo_to_full=.TRUE.)
    1385              : 
    1386              : !     Fill the matrices G^-1 by looping over the block of S^-1 and putting them on the diagonal
    1387            6 :          CALL dbcsr_iterator_start(iter, matrix_sinv)
    1388           24 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    1389              : 
    1390           18 :             CALL dbcsr_iterator_next_block(iter, row=iblk, column=jblk)
    1391              : 
    1392              : !           Get the block
    1393           18 :             CALL dbcsr_get_block_p(matrix_sinv, iblk, jblk, work_block, found_block)
    1394              : 
    1395           18 :             IF (found_block) THEN
    1396              : 
    1397              : !              Go over the diagonal of G => donor MOs ii spin ss
    1398           36 :                DO i = 1, ndo_mo*nspins
    1399           36 :                   CALL dbcsr_put_block(matrix_g(2)%matrix, (i - 1)*nblk_row + iblk, (i - 1)*nblk_row + jblk, work_block)
    1400              :                END DO
    1401              : 
    1402              :             END IF
    1403           18 :             NULLIFY (work_block)
    1404              : 
    1405              :          END DO ! dbcsr_iterator
    1406            6 :          CALL dbcsr_iterator_stop(iter)
    1407              : 
    1408              :          !  Finalize
    1409            6 :          CALL dbcsr_finalize(matrix_g(2)%matrix)
    1410              : 
    1411              :          !  Clean-up
    1412            6 :          CALL dbcsr_release(matrix_sinv)
    1413              :       END IF !do_inv
    1414              : 
    1415           72 :       CALL timestop(handle)
    1416              : 
    1417           72 :    END SUBROUTINE build_metric
    1418              : 
    1419              : ! **************************************************************************************************
    1420              : !> \brief Builds the auxiliary matrix (A-D+E)^+0.5 needed for the transofrmation of the
    1421              : !>        full-TDDFT problem into an Hermitian one
    1422              : !> \param threshold a threshold for allowed negative eigenvalues
    1423              : !> \param sx the amount of exact exchange
    1424              : !> \param matrix_a the ground state contribution matrix A
    1425              : !> \param matrix_d the on-diagonal exchange kernel matrix (ab|IJ)
    1426              : !> \param matrix_e the off-diagonal exchange kernel matrix (aJ|Ib)
    1427              : !> \param do_hfx if exact exchange is included
    1428              : !> \param proj_Q ...
    1429              : !> \param work ...
    1430              : !> \param donor_state ...
    1431              : !> \param eps_filter for the dbcsr multiplication
    1432              : !> \param qs_env ...
    1433              : ! **************************************************************************************************
    1434            6 :    SUBROUTINE build_aux_matrix(threshold, sx, matrix_a, matrix_d, matrix_e, do_hfx, proj_Q, &
    1435              :                                work, donor_state, eps_filter, qs_env)
    1436              : 
    1437              :       REAL(dp), INTENT(IN)                               :: threshold, sx
    1438              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_a, matrix_d, matrix_e
    1439              :       LOGICAL, INTENT(IN)                                :: do_hfx
    1440              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: proj_Q, work
    1441              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1442              :       REAL(dp), INTENT(IN)                               :: eps_filter
    1443              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1444              : 
    1445              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'build_aux_matrix'
    1446              : 
    1447              :       INTEGER                                            :: handle, ndep
    1448              :       REAL(dp)                                           :: evals(2)
    1449              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1450              :       TYPE(dbcsr_type)                                   :: tmp_mat
    1451              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1452              : 
    1453            6 :       NULLIFY (blacs_env, para_env)
    1454              : 
    1455            6 :       CALL timeset(routineN, handle)
    1456              : 
    1457            6 :       CALL dbcsr_copy(tmp_mat, matrix_a)
    1458            6 :       IF (do_hfx) THEN
    1459            6 :          CALL dbcsr_add(tmp_mat, matrix_d, 1.0_dp, -1.0_dp*sx) !scaled hfx
    1460            6 :          CALL dbcsr_add(tmp_mat, matrix_e, 1.0_dp, 1.0_dp*sx)
    1461              :       END IF
    1462              : 
    1463              :       ! Take the product with the Q projector:
    1464            6 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, proj_Q, tmp_mat, 0.0_dp, work, filter_eps=eps_filter)
    1465            6 :       CALL dbcsr_multiply('N', 'T', 1.0_dp, work, proj_Q, 0.0_dp, tmp_mat, filter_eps=eps_filter)
    1466              : 
    1467              :       ! Actually computing and storing the auxiliary matrix
    1468            6 :       ALLOCATE (donor_state%matrix_aux)
    1469            6 :       CALL dbcsr_create(matrix=donor_state%matrix_aux, template=matrix_a, name="MAT AUX")
    1470              : 
    1471            6 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    1472              : 
    1473              :       ! good quality sqrt
    1474            6 :       CALL cp_dbcsr_power(tmp_mat, 0.5_dp, threshold, ndep, para_env, blacs_env, eigenvalues=evals)
    1475              : 
    1476            6 :       CALL dbcsr_copy(donor_state%matrix_aux, tmp_mat)
    1477              : 
    1478              :       ! Warn the user if matrix not positive semi-definite
    1479            6 :       IF (evals(1) < 0.0_dp .AND. ABS(evals(1)) > threshold) THEN
    1480            0 :          CPWARN("The full TDDFT problem might not have been soundly turned Hermitian. Try TDA.")
    1481              :       END IF
    1482              : 
    1483              :       ! clean-up
    1484            6 :       CALL dbcsr_release(tmp_mat)
    1485              : 
    1486            6 :       CALL timestop(handle)
    1487              : 
    1488            6 :    END SUBROUTINE build_aux_matrix
    1489              : 
    1490              : ! **************************************************************************************************
    1491              : !> \brief Includes the SOC effects on the precomputed spin-conserving and spin-flip excitations
    1492              : !>        from an open-shell calculation (UKS or ROKS). This is a perturbative treatment
    1493              : !> \param donor_state ...
    1494              : !> \param xas_tdp_env ...
    1495              : !> \param xas_tdp_control ...
    1496              : !> \param qs_env ...
    1497              : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1498              : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1499              : !>       energies and corresponding linear combinations of lr_coeffs.
    1500              : !>       The AMEWs are normalized
    1501              : !>       Only for open-shell calculations
    1502              : ! **************************************************************************************************
    1503            2 :    SUBROUTINE include_os_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1504              : 
    1505              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1506              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1507              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1508              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1509              : 
    1510              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_os_soc'
    1511              : 
    1512              :       INTEGER                                            :: group, handle, homo, iex, isc, isf, nao, &
    1513              :                                                             ndo_mo, ndo_so, nex, npcols, nprows, &
    1514              :                                                             nsc, nsf, ntot, tas(2), tbs(2)
    1515            2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1516            2 :                                                             row_dist, row_dist_new
    1517            2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1518              :       LOGICAL                                            :: do_roks, do_uks
    1519              :       REAL(dp)                                           :: eps_filter, gs_sum, soc
    1520            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1521            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1522            2 :                                                             gsex_block
    1523            2 :       REAL(dp), DIMENSION(:), POINTER                    :: sc_evals, sf_evals
    1524              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1525              :       TYPE(cp_cfm_type)                                  :: evecs_cfm, pert_cfm
    1526              :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    1527              :                                                             vec_struct, work_struct
    1528              :       TYPE(cp_fm_type)                                   :: gsex_fm, img_fm, prod_work, real_fm, &
    1529              :                                                             vec_soc_x, vec_soc_y, vec_soc_z, &
    1530              :                                                             vec_work, work_fm
    1531              :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sc_coeffs, sf_coeffs
    1532              :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1533            2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1534              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1535              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sc, &
    1536              :                                                             dbcsr_sf, dbcsr_tmp, dbcsr_work, &
    1537              :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1538            2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1539              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1540              : 
    1541            2 :       NULLIFY (gs_coeffs, sc_coeffs, sf_coeffs, matrix_s, orb_soc_x, orb_soc_y, orb_soc_z, mos)
    1542            2 :       NULLIFY (full_struct, para_env, blacs_env, mo_coeff, sc_evals, sf_evals, vec_struct, prod_struct)
    1543            2 :       NULLIFY (work_struct, gsex_struct, col_dist, row_dist)
    1544            2 :       NULLIFY (col_blk_size, row_blk_size, row_dist_new, pgrid, dbcsr_sc, dbcsr_sf, dbcsr_work)
    1545            2 :       NULLIFY (dbcsr_tmp, dbcsr_ovlp, dbcsr_prod)
    1546              : 
    1547            2 :       CALL timeset(routineN, handle)
    1548              : 
    1549              : ! Initialization
    1550            2 :       sc_coeffs => donor_state%sc_coeffs
    1551            2 :       sf_coeffs => donor_state%sf_coeffs
    1552            2 :       sc_evals => donor_state%sc_evals
    1553            2 :       sf_evals => donor_state%sf_evals
    1554            2 :       nsc = SIZE(sc_evals)
    1555            2 :       nsf = SIZE(sf_evals)
    1556            2 :       ntot = 1 + nsc + nsf
    1557            2 :       nex = nsc !by contrutciotn nsc == nsf, but keep 2 counts for clarity
    1558            2 :       ndo_mo = donor_state%ndo_mo
    1559            2 :       ndo_so = 2*ndo_mo
    1560            2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env, mos=mos, matrix_s=matrix_s)
    1561            2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1562            2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1563            2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    1564            2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    1565            2 :       do_roks = xas_tdp_control%do_roks
    1566            2 :       do_uks = xas_tdp_control%do_uks
    1567            2 :       eps_filter = xas_tdp_control%eps_filter
    1568              : 
    1569              :       ! For the GS coeffs, we use the same structure both for ROKS and UKS here => allows us to write
    1570              :       ! general code later on, and not use IF (do_roks) statements every second line
    1571            2 :       IF (do_uks) gs_coeffs => donor_state%gs_coeffs
    1572            2 :       IF (do_roks) THEN
    1573              :          CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1574            0 :                                   nrow_global=nao, ncol_global=ndo_so)
    1575            0 :          ALLOCATE (gs_coeffs)
    1576            0 :          CALL cp_fm_create(gs_coeffs, vec_struct)
    1577              : 
    1578              :          ! only alpha donor MOs are stored, need to copy them intoboth the alpha and the beta slot
    1579              :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1580              :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1581            0 :                                  t_firstcol=1)
    1582              :          CALL cp_fm_to_fm_submat(msource=donor_state%gs_coeffs, mtarget=gs_coeffs, nrow=nao, &
    1583              :                                  ncol=ndo_mo, s_firstrow=1, s_firstcol=1, t_firstrow=1, &
    1584            0 :                                  t_firstcol=ndo_mo + 1)
    1585              : 
    1586            0 :          CALL cp_fm_struct_release(vec_struct)
    1587              :       END IF
    1588              : 
    1589              : ! Creating the real and the imaginary part of the SOC perturbation matrix
    1590              :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    1591            2 :                                nrow_global=ntot, ncol_global=ntot)
    1592            2 :       CALL cp_fm_create(real_fm, full_struct, set_zero=.TRUE.)
    1593            2 :       CALL cp_fm_create(img_fm, full_struct, set_zero=.TRUE.)
    1594              : 
    1595              : ! Put the excitation energies on the diagonal of the real  matrix. Element 1,1 is the ground state
    1596           26 :       DO isc = 1, nsc
    1597           26 :          CALL cp_fm_set_element(real_fm, 1 + isc, 1 + isc, sc_evals(isc))
    1598              :       END DO
    1599           26 :       DO isf = 1, nsf
    1600           26 :          CALL cp_fm_set_element(real_fm, 1 + nsc + isf, 1 + nsc + isf, sf_evals(isf))
    1601              :       END DO
    1602              : 
    1603              : ! Create the bdcsr machinery
    1604            2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    1605              :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    1606            2 :                                   npcols=npcols, nprows=nprows)
    1607            8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    1608           26 :       DO iex = 1, nex
    1609           24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    1610           26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    1611              :       END DO
    1612            2 :       ALLOCATE (coeffs_dist, prod_dist)
    1613              :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    1614            2 :                                   col_dist=col_dist)
    1615              :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    1616            2 :                                   col_dist=col_dist)
    1617              : 
    1618              :       !Create the matrices
    1619            4 :       ALLOCATE (col_blk_size(nex))
    1620           26 :       col_blk_size = ndo_so
    1621            2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    1622              : 
    1623            2 :       ALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    1624              :       CALL dbcsr_create(matrix=dbcsr_sc, name="SPIN CONS", matrix_type=dbcsr_type_no_symmetry, &
    1625            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1626              :       CALL dbcsr_create(matrix=dbcsr_sf, name="SPIN FLIP", matrix_type=dbcsr_type_no_symmetry, &
    1627            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1628              :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    1629            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    1630              :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    1631            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1632              :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    1633            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1634              : 
    1635           26 :       col_blk_size = 1
    1636              :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    1637            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    1638            2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    1639              : 
    1640            2 :       dbcsr_soc_package%dbcsr_sc => dbcsr_sc
    1641            2 :       dbcsr_soc_package%dbcsr_sf => dbcsr_sf
    1642            2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    1643            2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    1644            2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    1645            2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    1646              : 
    1647              :       !Filling the coeffs matrices by copying from the stored fms
    1648            2 :       CALL copy_fm_to_dbcsr(sc_coeffs, dbcsr_sc)
    1649            2 :       CALL copy_fm_to_dbcsr(sf_coeffs, dbcsr_sf)
    1650              : 
    1651              : ! Precompute what we can before looping over excited states.
    1652              :       ! Need to compute the scalar: sum_i sum_sigma <phi^0_i,sigma|SOC|phi^0_i,sigma>, where all
    1653              :       ! occupied MOs are taken into account
    1654              : 
    1655              :       !start with the alpha MOs
    1656            2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    1657            6 :       ALLOCATE (diag(homo))
    1658            2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1659              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1660            2 :                                nrow_global=homo, ncol_global=homo)
    1661            2 :       CALL cp_fm_create(vec_work, vec_struct)
    1662            2 :       CALL cp_fm_create(prod_work, prod_struct)
    1663              : 
    1664              :       ! <alpha|SOC_z|alpha> => spin integration yields +1
    1665            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1666            2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1667            2 :       CALL cp_fm_get_diag(prod_work, diag)
    1668           20 :       gs_sum = SUM(diag)
    1669              : 
    1670            2 :       CALL cp_fm_release(vec_work)
    1671            2 :       CALL cp_fm_release(prod_work)
    1672            2 :       CALL cp_fm_struct_release(prod_struct)
    1673            2 :       DEALLOCATE (diag)
    1674            2 :       NULLIFY (vec_struct)
    1675              : 
    1676              :       ! Now do the same with the beta gs coeffs
    1677            2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    1678            6 :       ALLOCATE (diag(homo))
    1679            2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    1680              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1681            2 :                                nrow_global=homo, ncol_global=homo)
    1682            2 :       CALL cp_fm_create(vec_work, vec_struct)
    1683            2 :       CALL cp_fm_create(prod_work, prod_struct)
    1684              : 
    1685              :       ! <beta|SOC_z|beta> => spin integration yields -1
    1686            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, mo_coeff, vec_work, ncol=homo)
    1687            2 :       CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    1688            2 :       CALL cp_fm_get_diag(prod_work, diag)
    1689           20 :       gs_sum = gs_sum - SUM(diag) ! -1 because of spin integration
    1690              : 
    1691            2 :       CALL cp_fm_release(vec_work)
    1692            2 :       CALL cp_fm_release(prod_work)
    1693            2 :       CALL cp_fm_struct_release(prod_struct)
    1694            2 :       DEALLOCATE (diag)
    1695              : 
    1696              :       ! Need to compute: <phi^0_Isigma|SOC|phi^0_Jtau> for the donor MOs
    1697              : 
    1698              :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    1699            2 :                                nrow_global=nao, ncol_global=ndo_so)
    1700              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    1701            2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    1702            2 :       CALL cp_fm_create(vec_soc_x, vec_struct) ! for SOC_x*gs_coeffs
    1703            2 :       CALL cp_fm_create(vec_soc_y, vec_struct) ! for SOC_y*gs_coeffs
    1704            2 :       CALL cp_fm_create(vec_soc_z, vec_struct) ! for SOC_z*gs_coeffs
    1705            2 :       CALL cp_fm_create(prod_work, prod_struct)
    1706            6 :       ALLOCATE (diag(ndo_so))
    1707              : 
    1708           16 :       ALLOCATE (domo_soc_x(ndo_so, ndo_so), domo_soc_y(ndo_so, ndo_so), domo_soc_z(ndo_so, ndo_so))
    1709              : 
    1710            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_so)
    1711            2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_work)
    1712            2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_x)
    1713              : 
    1714            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_so)
    1715            2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_work)
    1716            2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_y)
    1717              : 
    1718            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_so)
    1719            2 :       CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_work)
    1720            2 :       CALL cp_fm_get_submatrix(prod_work, domo_soc_z)
    1721              : 
    1722              :       ! some more useful work matrices
    1723              :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    1724            2 :                                nrow_global=nex, ncol_global=nex)
    1725            2 :       CALL cp_fm_create(work_fm, work_struct, set_zero=.TRUE.)
    1726              : 
    1727              : !  Looping over the excited states, computing the SOC and filling the perturbation matrix
    1728              : !  There are 3 loops to do: sc-sc, sc-sf and sf-sf
    1729              : !  The final perturbation matrix is Hermitian, only the upper diagonal is needed
    1730              : 
    1731              :       !need some work matrices for the GS stuff
    1732              :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    1733            2 :                                nrow_global=nex*ndo_so, ncol_global=ndo_so)
    1734            2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    1735            6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    1736              : 
    1737              : !  Start with ground-state/spin-conserving SOC:
    1738              :       !  <Psi_0|SOC|Psi_Jsc> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,sigma>
    1739              : 
    1740              :       !compute -sc_coeffs*SOC_Z*gs_coeffs, minus sign because SOC_z antisymmetric
    1741            2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_z, 0.0_dp, gsex_fm)
    1742              : 
    1743           26 :       DO isc = 1, nsc
    1744              :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    1745           24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1746           24 :          diag(:) = get_diag(gsex_block)
    1747          168 :          soc = SUM(diag(1:ndo_mo)) - SUM(diag(ndo_mo + 1:ndo_so)) !minus sign because of spin integration
    1748              : 
    1749              :          !purely imaginary contribution
    1750           26 :          CALL cp_fm_set_element(img_fm, 1, 1 + isc, soc)
    1751              :       END DO !isc
    1752              : 
    1753              : !  Then ground-state/spin-flip SOC:
    1754              :       !<Psi_0|SOC|Psi_Jsf> = sum_k,sigma <phi^0_k,sigma|SOC|phi^Jsc_k,tau>   sigma != tau
    1755              : 
    1756              :       !compute  -sc_coeffs*SOC_x*gs_coeffs, imaginary contribution
    1757            2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_x, 0.0_dp, gsex_fm)
    1758              : 
    1759           26 :       DO isf = 1, nsf
    1760              :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1761           24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1762           24 :          diag(:) = get_diag(gsex_block)
    1763          168 :          soc = SUM(diag) !alpha and beta parts are simply added due to spin integration
    1764           26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsc + isf, soc)
    1765              :       END DO !isf
    1766              : 
    1767              :       !compute -sc_coeffs*SOC_y*gs_coeffs, real contribution
    1768            2 :       CALL parallel_gemm('T', 'N', nex*ndo_so, ndo_so, nao, -1.0_dp, sc_coeffs, vec_soc_y, 0.0_dp, gsex_fm)
    1769              : 
    1770           26 :       DO isf = 1, nsf
    1771              :          CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isf - 1)*ndo_so + 1, &
    1772           24 :                                   start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    1773           24 :          diag(:) = get_diag(gsex_block)
    1774           96 :          soc = SUM(diag(1:ndo_mo)) ! alpha-beta
    1775           96 :          soc = soc - SUM(diag(ndo_mo + 1:ndo_so)) !beta-alpha
    1776           26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsc + isf, soc)
    1777              :       END DO !isf
    1778              : 
    1779              :       !ground-state cleanup
    1780            2 :       CALL cp_fm_release(gsex_fm)
    1781            2 :       CALL cp_fm_release(vec_soc_x)
    1782            2 :       CALL cp_fm_release(vec_soc_y)
    1783            2 :       CALL cp_fm_release(vec_soc_z)
    1784            2 :       CALL cp_fm_release(prod_work)
    1785            2 :       CALL cp_fm_struct_release(gsex_struct)
    1786            2 :       CALL cp_fm_struct_release(prod_struct)
    1787            2 :       CALL cp_fm_struct_release(vec_struct)
    1788            2 :       DEALLOCATE (gsex_block)
    1789              : 
    1790              : !  Then spin-conserving/spin-conserving SOC
    1791              : !  <Psi_Isc|SOC|Psi_Jsc> =
    1792              : !  sum_k,sigma [<psi^Isc_k,sigma|SOC|psi^Jsc_k,sigma> + <psi^Isc_k,sigma|psi^Jsc_k,sigma> * gs_sum]
    1793              : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isc_l,sigma|psi^Jsc_k,sigma>
    1794              : 
    1795              :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1796            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1797            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1798              : 
    1799              :       !the overlap as well
    1800              :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, dbcsr_work, &
    1801            2 :                           filter_eps=eps_filter)
    1802            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1803              : 
    1804              :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=1.0_dp, &
    1805              :                                 pref_diagb=-1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1806            2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1807              : 
    1808            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1809              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1810            2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=2)
    1811              : 
    1812              : !  Then spin-flip/spin-flip SOC
    1813              : !  <Psi_Isf|SOC|Psi_Jsf> =
    1814              : !  sum_k,sigma [<psi^Isf_k,tau|SOC|psi^Jsf_k,tau> + <psi^Isf_k,tau|psi^Jsf_k,tau> * gs_sum]
    1815              : !  - sum_k,l,sigma <psi^0_k,sigma|SOC|psi^0_l,sigma> * <psi^Isf_l,tau|psi^Jsf_k,tau> , tau != sigma
    1816              : 
    1817              :       !Same spin integration => only SOC_z matters, and the contribution is purely imaginary
    1818            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1819            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1820              : 
    1821              :       !the overlap as well
    1822              :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1823            2 :                           dbcsr_work, filter_eps=eps_filter)
    1824            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1825              : 
    1826              :       !note: the different prefactors are derived from the fact that because of spin-flip, we have
    1827              :       !alpha-gs and beta-lr interaction
    1828              :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, pref_diaga=-1.0_dp, &
    1829              :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=1.0_dp, &
    1830            2 :                                 pref_diags=gs_sum, symmetric=.TRUE.)
    1831              : 
    1832            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1833              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1834            2 :                               s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    1835              : 
    1836              : !  Finally the spin-conserving/spin-flip interaction
    1837              : ! <Psi_Isc|SOC|Psi_Jsf> =   sum_k,sigma <psi^Isc_k,sigma|SOC|psi^Isf_k,tau>
    1838              : !                           - sum_k,l,sigma <psi^0_k,tau|SOC|psi^0_l,sigma
    1839              : 
    1840            2 :       tas(1) = ndo_mo + 1; tbs(1) = 1
    1841            2 :       tas(2) = 1; tbs(2) = ndo_mo + 1
    1842              : 
    1843              :       !the overlap
    1844              :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    1845            2 :                           dbcsr_work, filter_eps=eps_filter)
    1846            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    1847              : 
    1848              :       !start with the imaginary contribution
    1849            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1850            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1851              : 
    1852              :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, pref_diaga=1.0_dp, &
    1853              :                                 pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    1854            2 :                                 tracea_start=tas, traceb_start=tbs)
    1855              : 
    1856            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1857              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1858            2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1859              : 
    1860              :       !follow up with the real contribution
    1861            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    1862            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    1863              : 
    1864              :       CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, pref_diaga=1.0_dp, &
    1865              :                                 pref_diagb=-1.0_dp, pref_tracea=1.0_dp, pref_traceb=-1.0_dp, &
    1866            2 :                                 tracea_start=tas, traceb_start=tbs)
    1867              : 
    1868            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, work_fm)
    1869              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, s_firstrow=1, &
    1870            2 :                               s_firstcol=1, t_firstrow=2, t_firstcol=1 + nsc + 1)
    1871              : 
    1872              : !  Setting up the complex Hermitian perturbed matrix
    1873            2 :       CALL cp_cfm_create(pert_cfm, full_struct)
    1874            2 :       CALL cp_fm_to_cfm(real_fm, img_fm, pert_cfm)
    1875              : 
    1876            2 :       CALL cp_fm_release(real_fm)
    1877            2 :       CALL cp_fm_release(img_fm)
    1878              : 
    1879              : !  Diagonalize the perturbed matrix
    1880            6 :       ALLOCATE (tmp_evals(ntot))
    1881            2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    1882            2 :       CALL cp_cfm_heevd(pert_cfm, evecs_cfm, tmp_evals)
    1883              : 
    1884              :       !shift the energies such that the GS has zero and store all that in soc_evals (\wo the GS)
    1885            6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    1886           50 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    1887              : 
    1888              : !  The SOC dipole oscillator strengths
    1889              :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1890            2 :                                    xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1891              : 
    1892              : !  And quadrupole
    1893            2 :       IF (xas_tdp_control%do_quad) THEN
    1894              :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    1895            0 :                                           xas_tdp_control, qs_env, gs_coeffs=gs_coeffs)
    1896              :       END IF
    1897              : 
    1898              : ! Clean-up
    1899            2 :       CALL cp_cfm_release(pert_cfm)
    1900            2 :       CALL cp_cfm_release(evecs_cfm)
    1901            2 :       CALL cp_fm_struct_release(full_struct)
    1902            2 :       IF (do_roks) THEN
    1903            0 :          CALL cp_fm_release(gs_coeffs)
    1904            0 :          DEALLOCATE (gs_coeffs)
    1905              :       END IF
    1906            2 :       CALL dbcsr_distribution_release(coeffs_dist)
    1907            2 :       CALL dbcsr_distribution_release(prod_dist)
    1908            2 :       CALL dbcsr_release(dbcsr_sc)
    1909            2 :       CALL dbcsr_release(dbcsr_sf)
    1910            2 :       CALL dbcsr_release(dbcsr_prod)
    1911            2 :       CALL dbcsr_release(dbcsr_ovlp)
    1912            2 :       CALL dbcsr_release(dbcsr_tmp)
    1913            2 :       CALL dbcsr_release(dbcsr_work)
    1914            2 :       CALL cp_fm_release(work_fm)
    1915            2 :       CALL cp_fm_struct_release(work_struct)
    1916            2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    1917            2 :       DEALLOCATE (dbcsr_sc, dbcsr_sf, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    1918              : 
    1919            2 :       CALL timestop(handle)
    1920              : 
    1921           26 :    END SUBROUTINE include_os_soc
    1922              : 
    1923              : ! **************************************************************************************************
    1924              : !> \brief Includes the SOC effects on the precomputed restricted closed-shell singlet and triplet
    1925              : !>        excitations. This is a perturbative treatmnent
    1926              : !> \param donor_state ...
    1927              : !> \param xas_tdp_env ...
    1928              : !> \param xas_tdp_control ...
    1929              : !> \param qs_env ...
    1930              : !> \note Using AMEWs, build an hermitian matrix with all excited states SOC coupling + the
    1931              : !>       excitation energies on the diagonal. Then diagonalize it to get the new excitation
    1932              : !>       energies and corresponding linear combinations of lr_coeffs.
    1933              : !>       The AMEWs are normalized
    1934              : !>       Only for spin-restricted calculations
    1935              : !>       The ms=-1,+1 triplets are not explicitely computed in the first place. Assume they have
    1936              : !>       the same energy as the ms=0 triplets and apply the spin raising and lowering operators
    1937              : !>       on the latter to get their AMEWs => this is the qusi-degenerate perturbation theory
    1938              : !>       approach by Neese (QDPT)
    1939              : ! **************************************************************************************************
    1940            2 :    SUBROUTINE include_rcs_soc(donor_state, xas_tdp_env, xas_tdp_control, qs_env)
    1941              : 
    1942              :       TYPE(donor_state_type), POINTER                    :: donor_state
    1943              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    1944              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    1945              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1946              : 
    1947              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'include_rcs_soc'
    1948              : 
    1949              :       INTEGER                                            :: group, handle, iex, isg, itp, nao, &
    1950              :                                                             ndo_mo, nex, npcols, nprows, nsg, &
    1951              :                                                             ntot, ntp
    1952            2 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
    1953            2 :                                                             row_dist, row_dist_new
    1954            2 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1955              :       REAL(dp)                                           :: eps_filter, soc_gst, sqrt2
    1956            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, tmp_evals
    1957            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_soc_x, domo_soc_y, domo_soc_z, &
    1958            2 :                                                             gstp_block
    1959            2 :       REAL(dp), DIMENSION(:), POINTER                    :: sg_evals, tp_evals
    1960              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1961              :       TYPE(cp_cfm_type)                                  :: evecs_cfm, hami_cfm
    1962              :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gstp_struct, prod_struct, &
    1963              :                                                             vec_struct, work_struct
    1964              :       TYPE(cp_fm_type)                                   :: gstp_fm, img_fm, prod_fm, real_fm, &
    1965              :                                                             tmp_fm, vec_soc_x, vec_soc_y, &
    1966              :                                                             vec_soc_z, work_fm
    1967              :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, sg_coeffs, tp_coeffs
    1968              :       TYPE(dbcsr_distribution_type), POINTER             :: coeffs_dist, dbcsr_dist, prod_dist
    1969            2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    1970              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    1971              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_ovlp, dbcsr_prod, dbcsr_sg, &
    1972              :                                                             dbcsr_tmp, dbcsr_tp, dbcsr_work, &
    1973              :                                                             orb_soc_x, orb_soc_y, orb_soc_z
    1974              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1975              : 
    1976            2 :       NULLIFY (sg_coeffs, tp_coeffs, gs_coeffs, sg_evals, tp_evals, full_struct)
    1977            2 :       NULLIFY (para_env, blacs_env, prod_struct, vec_struct, orb_soc_y, orb_soc_z)
    1978            2 :       NULLIFY (matrix_s, orb_soc_x)
    1979            2 :       NULLIFY (work_struct, dbcsr_dist, coeffs_dist, prod_dist, pgrid)
    1980            2 :       NULLIFY (col_dist, row_dist, col_blk_size, row_blk_size, row_dist_new, gstp_struct)
    1981            2 :       NULLIFY (dbcsr_tp, dbcsr_sg, dbcsr_prod, dbcsr_work, dbcsr_ovlp, dbcsr_tmp)
    1982              : 
    1983            2 :       CALL timeset(routineN, handle)
    1984              : 
    1985              : !  Initialization
    1986            2 :       CPASSERT(ASSOCIATED(xas_tdp_control))
    1987            2 :       gs_coeffs => donor_state%gs_coeffs
    1988            2 :       sg_coeffs => donor_state%sg_coeffs
    1989            2 :       tp_coeffs => donor_state%tp_coeffs
    1990            2 :       sg_evals => donor_state%sg_evals
    1991            2 :       tp_evals => donor_state%tp_evals
    1992            2 :       nsg = SIZE(sg_evals)
    1993            2 :       ntp = SIZE(tp_evals)
    1994            2 :       ntot = 1 + nsg + 3*ntp
    1995            2 :       ndo_mo = donor_state%ndo_mo
    1996            2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s)
    1997            2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    1998            2 :       orb_soc_x => xas_tdp_env%orb_soc(1)%matrix
    1999            2 :       orb_soc_y => xas_tdp_env%orb_soc(2)%matrix
    2000            2 :       orb_soc_z => xas_tdp_env%orb_soc(3)%matrix
    2001              :       !by construction nsg == ntp, keep those separate for more code clarity though
    2002            2 :       CPASSERT(nsg == ntp)
    2003            2 :       nex = nsg
    2004            2 :       eps_filter = xas_tdp_control%eps_filter
    2005              : 
    2006              : !  Creating the real part and imaginary part of the final SOC fm
    2007            2 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2008              :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2009            2 :                                nrow_global=ntot, ncol_global=ntot)
    2010            2 :       CALL cp_fm_create(real_fm, full_struct, set_zero=.TRUE.)
    2011            2 :       CALL cp_fm_create(img_fm, full_struct, set_zero=.TRUE.)
    2012              : 
    2013              : !  Put the excitation energies on the diagonal of the real matrix
    2014           26 :       DO isg = 1, nsg
    2015           26 :          CALL cp_fm_set_element(real_fm, 1 + isg, 1 + isg, sg_evals(isg))
    2016              :       END DO
    2017           26 :       DO itp = 1, ntp
    2018              :          ! first T^-1, then T^0, then T^+1
    2019           24 :          CALL cp_fm_set_element(real_fm, 1 + itp + nsg, 1 + itp + nsg, tp_evals(itp))
    2020           24 :          CALL cp_fm_set_element(real_fm, 1 + itp + ntp + nsg, 1 + itp + ntp + nsg, tp_evals(itp))
    2021           26 :          CALL cp_fm_set_element(real_fm, 1 + itp + 2*ntp + nsg, 1 + itp + 2*ntp + nsg, tp_evals(itp))
    2022              :       END DO
    2023              : 
    2024              : !  Create the dbcsr machinery (for fast MM, the core of this routine)
    2025            2 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist)
    2026              :       CALL dbcsr_distribution_get(dbcsr_dist, group=group, row_dist=row_dist, pgrid=pgrid, &
    2027            2 :                                   npcols=npcols, nprows=nprows)
    2028            8 :       ALLOCATE (col_dist(nex), row_dist_new(nex))
    2029           26 :       DO iex = 1, nex
    2030           24 :          col_dist(iex) = MODULO(npcols - iex, npcols)
    2031           26 :          row_dist_new(iex) = MODULO(nprows - iex, nprows)
    2032              :       END DO
    2033            2 :       ALLOCATE (coeffs_dist, prod_dist)
    2034              :       CALL dbcsr_distribution_new(coeffs_dist, group=group, pgrid=pgrid, row_dist=row_dist, &
    2035            2 :                                   col_dist=col_dist)
    2036              :       CALL dbcsr_distribution_new(prod_dist, group=group, pgrid=pgrid, row_dist=row_dist_new, &
    2037            2 :                                   col_dist=col_dist)
    2038              : 
    2039              :       !Create the matrices
    2040            4 :       ALLOCATE (col_blk_size(nex))
    2041           26 :       col_blk_size = ndo_mo
    2042            2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, row_blk_size=row_blk_size)
    2043              : 
    2044            2 :       ALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_ovlp, dbcsr_tmp, dbcsr_prod)
    2045              :       CALL dbcsr_create(matrix=dbcsr_sg, name="SINGLETS", matrix_type=dbcsr_type_no_symmetry, &
    2046            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2047              :       CALL dbcsr_create(matrix=dbcsr_tp, name="TRIPLETS", matrix_type=dbcsr_type_no_symmetry, &
    2048            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2049              :       CALL dbcsr_create(matrix=dbcsr_work, name="WORK", matrix_type=dbcsr_type_no_symmetry, &
    2050            2 :                         dist=coeffs_dist, row_blk_size=row_blk_size, col_blk_size=col_blk_size)
    2051              :       CALL dbcsr_create(matrix=dbcsr_prod, name="PROD", matrix_type=dbcsr_type_no_symmetry, &
    2052            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2053              :       CALL dbcsr_create(matrix=dbcsr_ovlp, name="OVLP", matrix_type=dbcsr_type_no_symmetry, &
    2054            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2055              : 
    2056           26 :       col_blk_size = 1
    2057              :       CALL dbcsr_create(matrix=dbcsr_tmp, name="TMP", matrix_type=dbcsr_type_no_symmetry, &
    2058            2 :                         dist=prod_dist, row_blk_size=col_blk_size, col_blk_size=col_blk_size)
    2059            2 :       CALL dbcsr_reserve_all_blocks(dbcsr_tmp)
    2060              : 
    2061            2 :       dbcsr_soc_package%dbcsr_sg => dbcsr_sg
    2062            2 :       dbcsr_soc_package%dbcsr_tp => dbcsr_tp
    2063            2 :       dbcsr_soc_package%dbcsr_work => dbcsr_work
    2064            2 :       dbcsr_soc_package%dbcsr_ovlp => dbcsr_ovlp
    2065            2 :       dbcsr_soc_package%dbcsr_prod => dbcsr_prod
    2066            2 :       dbcsr_soc_package%dbcsr_tmp => dbcsr_tmp
    2067              : 
    2068              :       !Filling the coeffs matrices by copying from the stored fms
    2069            2 :       CALL copy_fm_to_dbcsr(sg_coeffs, dbcsr_sg)
    2070            2 :       CALL copy_fm_to_dbcsr(tp_coeffs, dbcsr_tp)
    2071              : 
    2072              : !  Create the work and helper fms
    2073            2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2074              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2075            2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2076            2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2077            2 :       CALL cp_fm_create(vec_soc_x, vec_struct)
    2078            2 :       CALL cp_fm_create(vec_soc_y, vec_struct)
    2079            2 :       CALL cp_fm_create(vec_soc_z, vec_struct)
    2080              :       CALL cp_fm_struct_create(work_struct, context=blacs_env, para_env=para_env, &
    2081            2 :                                nrow_global=nex, ncol_global=nex)
    2082            2 :       CALL cp_fm_create(work_fm, work_struct)
    2083            2 :       CALL cp_fm_create(tmp_fm, work_struct)
    2084            6 :       ALLOCATE (diag(ndo_mo))
    2085              : 
    2086              : !  Precompute everything we can before looping over excited states
    2087            2 :       sqrt2 = SQRT(2.0_dp)
    2088              : 
    2089              :       ! The subset of the donor MOs matrix elements: <phi_I^0|Hsoc|phi_J^0> (kept as global array, small)
    2090           16 :       ALLOCATE (domo_soc_x(ndo_mo, ndo_mo), domo_soc_y(ndo_mo, ndo_mo), domo_soc_z(ndo_mo, ndo_mo))
    2091              : 
    2092            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_x, gs_coeffs, vec_soc_x, ncol=ndo_mo)
    2093            2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_x, 0.0_dp, prod_fm)
    2094            2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_x)
    2095              : 
    2096            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_y, gs_coeffs, vec_soc_y, ncol=ndo_mo)
    2097            2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_y, 0.0_dp, prod_fm)
    2098            2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_y)
    2099              : 
    2100            2 :       CALL cp_dbcsr_sm_fm_multiply(orb_soc_z, gs_coeffs, vec_soc_z, ncol=ndo_mo)
    2101            2 :       CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_soc_z, 0.0_dp, prod_fm)
    2102            2 :       CALL cp_fm_get_submatrix(prod_fm, domo_soc_z)
    2103              : 
    2104              : !  Only have SOC between singlet-triplet triplet-triplet and ground_state-triplet, the resulting
    2105              : !  matrix is Hermitian i.e. the real part is symmetric and the imaginary part is anti-symmetric.
    2106              : !  Can only fill upper half
    2107              : 
    2108              :       !Start with the ground state/triplet SOC, SOC*gs_coeffs already computed above
    2109              :       !note: we are computing <0|H|T>, but have SOC*gs_coeffs instead of gs_coeffs*SOC in store. Since
    2110              :       !      the SOC Hamiltonian is anti-symmetric, a - signs pops up in the gemms below
    2111              : 
    2112              :       CALL cp_fm_struct_create(gstp_struct, context=blacs_env, para_env=para_env, &
    2113            2 :                                nrow_global=ntp*ndo_mo, ncol_global=ndo_mo)
    2114            2 :       CALL cp_fm_create(gstp_fm, gstp_struct)
    2115            6 :       ALLOCATE (gstp_block(ndo_mo, ndo_mo))
    2116              : 
    2117              :       !gs-triplet with Ms=+-1, imaginary part
    2118            2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_x, 0.0_dp, gstp_fm)
    2119              : 
    2120           26 :       DO itp = 1, ntp
    2121              :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2122           24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2123           24 :          diag(:) = get_diag(gstp_block)
    2124           96 :          soc_gst = SUM(diag)
    2125           24 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_x|T^-1>
    2126           26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + 2*ntp + itp, soc_gst) ! <0|H_x|T^+1>
    2127              :       END DO
    2128              : 
    2129              :       !gs-triplet with Ms=+-1, real part
    2130            2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_y, 0.0_dp, gstp_fm)
    2131              : 
    2132           26 :       DO itp = 1, ntp
    2133              :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2134           24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2135           24 :          diag(:) = get_diag(gstp_block)
    2136           96 :          soc_gst = SUM(diag)
    2137           24 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + itp, -1.0_dp*soc_gst) ! <0|H_y|T^-1>
    2138           26 :          CALL cp_fm_set_element(real_fm, 1, 1 + nsg + 2*ntp + itp, -1.0_dp*soc_gst) ! <0|H_y|T^+1>
    2139              :       END DO
    2140              : 
    2141              :       !gs-triplet with Ms=0, purely imaginary
    2142            2 :       CALL parallel_gemm('T', 'N', ndo_mo*ntp, ndo_mo, nao, -1.0_dp, tp_coeffs, vec_soc_z, 0.0_dp, gstp_fm)
    2143              : 
    2144           26 :       DO itp = 1, ntp
    2145              :          CALL cp_fm_get_submatrix(fm=gstp_fm, target_m=gstp_block, start_row=(itp - 1)*ndo_mo + 1, &
    2146           24 :                                   start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2147           24 :          diag(:) = get_diag(gstp_block)
    2148           96 :          soc_gst = sqrt2*SUM(diag)
    2149           26 :          CALL cp_fm_set_element(img_fm, 1, 1 + nsg + ntp + itp, soc_gst)
    2150              :       END DO
    2151              : 
    2152              :       !gs clean-up
    2153            2 :       CALL cp_fm_release(prod_fm)
    2154            2 :       CALL cp_fm_release(vec_soc_x)
    2155            2 :       CALL cp_fm_release(vec_soc_y)
    2156            2 :       CALL cp_fm_release(vec_soc_z)
    2157            2 :       CALL cp_fm_release(gstp_fm)
    2158            2 :       CALL cp_fm_struct_release(gstp_struct)
    2159            2 :       CALL cp_fm_struct_release(prod_struct)
    2160            2 :       DEALLOCATE (gstp_block)
    2161              : 
    2162              :       !Now do the singlet-triplet SOC
    2163              :       !start by computing the singlet-triplet overlap
    2164              :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2165            2 :                           dbcsr_work, filter_eps=eps_filter)
    2166            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2167              : 
    2168              :       !singlet-triplet with Ms=+-1, imaginary part
    2169            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2170            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2171              : 
    2172              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2173            2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2174              : 
    2175              :       !<S|H_x|T^-1>
    2176            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2177              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2178              :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2179            2 :                               t_firstcol=1 + nsg + 1)
    2180              : 
    2181              :       !<S|H_x|T^+1> takes a minus sign
    2182            2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2183              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2184              :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2185            2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2186              : 
    2187              :       !singlet-triplet with Ms=+-1, real part
    2188            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2189            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2190              : 
    2191              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2192            2 :                                  pref_trace=-1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2193              : 
    2194              :       !<S|H_y|T^-1>
    2195            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2196              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2197              :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2198            2 :                               t_firstcol=1 + nsg + 1)
    2199              : 
    2200              :       !<S|H_y|T^+1>
    2201              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2202              :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2203            2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2204              : 
    2205              :       !singlet-triplet with Ms=0, purely imaginary
    2206            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2207            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2208              : 
    2209              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2210            2 :                                  pref_trace=-1.0_dp, pref_overall=1.0_dp)
    2211              : 
    2212              :       !<S|H_z|T^0>
    2213            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2214              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2215              :                               s_firstrow=1, s_firstcol=1, t_firstrow=2, &
    2216            2 :                               t_firstcol=1 + nsg + ntp + 1)
    2217              : 
    2218              :       !Now the triplet-triplet SOC
    2219              :       !start by computing the overlap
    2220              :       CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2221            2 :                           dbcsr_work, filter_eps=eps_filter)
    2222            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2223              : 
    2224              :       !Ms=0 to Ms=+-1 SOC, imaginary part
    2225            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_x, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2226            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2227              : 
    2228              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_x, &
    2229            2 :                                  pref_trace=1.0_dp, pref_overall=-0.5_dp*sqrt2)
    2230              : 
    2231              :       !<T^0|H_x|T^+1>
    2232            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2233              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2234              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2235            2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2236              : 
    2237              :       !<T^-1|H_x|T^0>, takes a minus sign and a transpose (because computed <T^0|H_x|T^-1>)
    2238            2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2239            2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2240              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2241              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2242            2 :                               t_firstcol=1 + nsg + ntp + 1)
    2243              : 
    2244              :       !Ms=0 to Ms=+-1 SOC, real part
    2245            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_y, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2246            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2247              : 
    2248              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_y, &
    2249            2 :                                  pref_trace=1.0_dp, pref_overall=0.5_dp*sqrt2)
    2250              : 
    2251              :       !<T^0|H_y|T^+1>
    2252            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2253              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2254              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2255            2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2256              : 
    2257              :       !<T^-1|H_y|T^0>, takes a minus sign and a transpose
    2258            2 :       CALL cp_fm_transpose(tmp_fm, work_fm)
    2259            2 :       CALL cp_fm_scale(-1.0_dp, work_fm)
    2260              :       CALL cp_fm_to_fm_submat(msource=work_fm, mtarget=real_fm, nrow=nex, ncol=nex, &
    2261              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2262            2 :                               t_firstcol=1 + nsg + ntp + 1)
    2263              : 
    2264              :       !Ms=1 to Ms=1 and Ms=-1 to Ms=-1 SOC, purely imaginary
    2265            2 :       CALL dbcsr_multiply('N', 'N', 1.0_dp, orb_soc_z, dbcsr_tp, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2266            2 :       CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2267              : 
    2268              :       CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_soc_z, &
    2269            2 :                                  pref_trace=1.0_dp, pref_overall=1.0_dp)
    2270              : 
    2271              :       !<T^+1|H_z|T^+1>
    2272            2 :       CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2273              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2274              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2275            2 :                               t_firstcol=1 + nsg + 2*ntp + 1)
    2276              : 
    2277              :       !<T^-1|H_z|T^-1>, takes a minus sign
    2278            2 :       CALL cp_fm_scale(-1.0_dp, tmp_fm)
    2279              :       CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=img_fm, nrow=nex, ncol=nex, &
    2280              :                               s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, &
    2281            2 :                               t_firstcol=1 + nsg + 1)
    2282              : 
    2283              : !  Intermediate clean-up
    2284            2 :       CALL cp_fm_struct_release(work_struct)
    2285            2 :       CALL cp_fm_release(work_fm)
    2286            2 :       CALL cp_fm_release(tmp_fm)
    2287            2 :       DEALLOCATE (diag, domo_soc_x, domo_soc_y, domo_soc_z)
    2288              : 
    2289              : !  Set-up the complex hermitian perturbation matrix
    2290            2 :       CALL cp_cfm_create(hami_cfm, full_struct)
    2291            2 :       CALL cp_fm_to_cfm(real_fm, img_fm, hami_cfm)
    2292              : 
    2293            2 :       CALL cp_fm_release(real_fm)
    2294            2 :       CALL cp_fm_release(img_fm)
    2295              : 
    2296              : !  Diagonalize the Hamiltonian
    2297            6 :       ALLOCATE (tmp_evals(ntot))
    2298            2 :       CALL cp_cfm_create(evecs_cfm, full_struct)
    2299            2 :       CALL cp_cfm_heevd(hami_cfm, evecs_cfm, tmp_evals)
    2300              : 
    2301              :       !  Adjust the energies so the GS has zero, and store in the donor_state (without the GS)
    2302            6 :       ALLOCATE (donor_state%soc_evals(ntot - 1))
    2303           98 :       donor_state%soc_evals(:) = tmp_evals(2:ntot) - tmp_evals(1)
    2304              : 
    2305              : !  Compute the dipole oscillator strengths
    2306              :       CALL compute_soc_dipole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2307            2 :                                    xas_tdp_control, qs_env)
    2308              : 
    2309              : !  And the quadrupole (if needed)
    2310            2 :       IF (xas_tdp_control%do_quad) THEN
    2311              :          CALL compute_soc_quadrupole_fosc(evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2312            0 :                                           xas_tdp_control, qs_env)
    2313              :       END IF
    2314              : 
    2315              : !  Clean-up
    2316            2 :       CALL cp_fm_struct_release(full_struct)
    2317            2 :       CALL cp_cfm_release(hami_cfm)
    2318            2 :       CALL cp_cfm_release(evecs_cfm)
    2319            2 :       CALL dbcsr_distribution_release(coeffs_dist)
    2320            2 :       CALL dbcsr_distribution_release(prod_dist)
    2321            2 :       CALL dbcsr_release(dbcsr_sg)
    2322            2 :       CALL dbcsr_release(dbcsr_tp)
    2323            2 :       CALL dbcsr_release(dbcsr_prod)
    2324            2 :       CALL dbcsr_release(dbcsr_ovlp)
    2325            2 :       CALL dbcsr_release(dbcsr_tmp)
    2326            2 :       CALL dbcsr_release(dbcsr_work)
    2327            2 :       DEALLOCATE (coeffs_dist, prod_dist, col_dist, col_blk_size, row_dist_new)
    2328            2 :       DEALLOCATE (dbcsr_sg, dbcsr_tp, dbcsr_work, dbcsr_prod, dbcsr_ovlp, dbcsr_tmp)
    2329              : 
    2330            2 :       CALL timestop(handle)
    2331              : 
    2332           20 :    END SUBROUTINE include_rcs_soc
    2333              : 
    2334              : ! **************************************************************************************************
    2335              : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2336              : !>        excited state AMEWs with ground state, for the open-shell case
    2337              : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2338              : !> \param ao_op the operator in the basis of the atomic orbitals
    2339              : !> \param gs_coeffs the coefficient of the GS donor MOs. Ecplicitely passed because of special
    2340              : !>                  format in the ROKS case (see include_os_soc routine)
    2341              : !> \param dbcsr_soc_package inhertited from the main SOC routine
    2342              : !> \param donor_state ...
    2343              : !> \param eps_filter ...
    2344              : !> \param qs_env ...
    2345              : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sc, sf
    2346              : !>       We assume that the operator is spin-independent => only <0|0>, <0|sc>, <sc|sc> and <sf|sf>
    2347              : !>       yield non-zero matrix elements
    2348              : !>       Only for open-shell calculations
    2349              : ! **************************************************************************************************
    2350            2 :    SUBROUTINE get_os_amew_op(amew_op, ao_op, gs_coeffs, dbcsr_soc_package, donor_state, &
    2351              :                              eps_filter, qs_env)
    2352              : 
    2353              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2354              :          INTENT(OUT)                                     :: amew_op
    2355              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2356              :       TYPE(cp_fm_type), INTENT(IN)                       :: gs_coeffs
    2357              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2358              :       TYPE(donor_state_type), POINTER                    :: donor_state
    2359              :       REAL(dp), INTENT(IN)                               :: eps_filter
    2360              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2361              : 
    2362              :       INTEGER                                            :: dim_op, homo, i, isc, nao, ndo_mo, &
    2363              :                                                             ndo_so, nex, nsc, nsf, ntot
    2364              :       REAL(dp)                                           :: op
    2365            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gsgs_op
    2366            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, gsex_block, tmp
    2367              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2368              :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsex_struct, prod_struct, &
    2369              :                                                             tmp_struct, vec_struct
    2370              :       TYPE(cp_fm_type)                                   :: gsex_fm, prod_work, tmp_fm, vec_work, &
    2371              :                                                             work_fm
    2372              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff, sc_coeffs, sf_coeffs
    2373            2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2374              :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2375              :                                                             dbcsr_sc, dbcsr_sf, dbcsr_tmp, &
    2376              :                                                             dbcsr_work
    2377            2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2378              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2379              : 
    2380            2 :       NULLIFY (matrix_s, para_env, blacs_env, full_struct, vec_struct, prod_struct, mos)
    2381            2 :       NULLIFY (mo_coeff, ao_op_i, tmp_struct)
    2382            2 :       NULLIFY (dbcsr_sc, dbcsr_sf, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2383              : 
    2384              : !  Iinitialization
    2385            2 :       dim_op = SIZE(ao_op)
    2386            2 :       sc_coeffs => donor_state%sc_coeffs
    2387            2 :       sf_coeffs => donor_state%sf_coeffs
    2388            2 :       nsc = SIZE(donor_state%sc_evals)
    2389            2 :       nsf = SIZE(donor_state%sf_evals)
    2390            2 :       nex = nsc
    2391            2 :       ntot = 1 + nsc + nsf
    2392            2 :       ndo_mo = donor_state%ndo_mo
    2393            2 :       ndo_so = 2*donor_state%ndo_mo !open-shell => nspins = 2
    2394            2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2395            2 :       CALL dbcsr_get_info(matrix_s(1)%matrix, nfullrows_total=nao)
    2396              : 
    2397            2 :       dbcsr_sc => dbcsr_soc_package%dbcsr_sc
    2398            2 :       dbcsr_sf => dbcsr_soc_package%dbcsr_sf
    2399            2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2400            2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2401            2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2402            2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2403              : 
    2404              : !  Create the amew_op matrix set
    2405              :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2406            2 :                                nrow_global=ntot, ncol_global=ntot)
    2407           12 :       ALLOCATE (amew_op(dim_op))
    2408            8 :       DO i = 1, dim_op
    2409            8 :          CALL cp_fm_create(amew_op(i), full_struct, set_zero=.TRUE.)
    2410              :       END DO
    2411              : 
    2412              : !  Before looping, need to evaluate sum_j,sigma <phi^0_j,sgima|op|phi^0_j,sigma>, for each dimension
    2413              : !  of the operator
    2414            6 :       ALLOCATE (gsgs_op(dim_op))
    2415              : 
    2416              :       !start with the alpha MOs
    2417            2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, homo=homo)
    2418            6 :       ALLOCATE (diag(homo))
    2419            2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2420              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2421            2 :                                nrow_global=homo, ncol_global=homo)
    2422            2 :       CALL cp_fm_create(vec_work, vec_struct, set_zero=.TRUE.)
    2423            2 :       CALL cp_fm_create(prod_work, prod_struct, set_zero=.TRUE.)
    2424              : 
    2425            8 :       DO i = 1, dim_op
    2426              : 
    2427            6 :          ao_op_i => ao_op(i)%matrix
    2428              : 
    2429            6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2430            6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2431            6 :          CALL cp_fm_get_diag(prod_work, diag)
    2432           62 :          gsgs_op(i) = SUM(diag)
    2433              : 
    2434              :       END DO !i
    2435              : 
    2436            2 :       CALL cp_fm_release(vec_work)
    2437            2 :       CALL cp_fm_release(prod_work)
    2438            2 :       CALL cp_fm_struct_release(prod_struct)
    2439            2 :       DEALLOCATE (diag)
    2440            2 :       NULLIFY (vec_struct)
    2441              : 
    2442              :       !then beta orbitals
    2443            2 :       CALL get_mo_set(mos(2), mo_coeff=mo_coeff, homo=homo)
    2444            6 :       ALLOCATE (diag(homo))
    2445            2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=vec_struct)
    2446              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2447            2 :                                nrow_global=homo, ncol_global=homo)
    2448            2 :       CALL cp_fm_create(vec_work, vec_struct)
    2449            2 :       CALL cp_fm_create(prod_work, prod_struct)
    2450              : 
    2451            8 :       DO i = 1, dim_op
    2452              : 
    2453            6 :          ao_op_i => ao_op(i)%matrix
    2454              : 
    2455            6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, vec_work, ncol=homo)
    2456            6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, vec_work, 0.0_dp, prod_work)
    2457            6 :          CALL cp_fm_get_diag(prod_work, diag)
    2458           62 :          gsgs_op(i) = gsgs_op(i) + SUM(diag)
    2459              : 
    2460              :       END DO !i
    2461              : 
    2462            2 :       CALL cp_fm_release(vec_work)
    2463            2 :       CALL cp_fm_release(prod_work)
    2464            2 :       CALL cp_fm_struct_release(prod_struct)
    2465            2 :       DEALLOCATE (diag)
    2466            2 :       NULLIFY (vec_struct)
    2467              : 
    2468              : !  Before looping over excited AMEWs, define some work matrices and structures
    2469              :       CALL cp_fm_struct_create(vec_struct, context=blacs_env, para_env=para_env, &
    2470            2 :                                nrow_global=nao, ncol_global=ndo_so)
    2471              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2472            2 :                                nrow_global=ndo_so, ncol_global=ndo_so)
    2473              :       CALL cp_fm_struct_create(gsex_struct, context=blacs_env, para_env=para_env, &
    2474            2 :                                nrow_global=ndo_so*nex, ncol_global=ndo_so)
    2475              :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2476            2 :                                nrow_global=nex, ncol_global=nex)
    2477            2 :       CALL cp_fm_create(vec_work, vec_struct) !for op*|phi>
    2478            2 :       CALL cp_fm_create(prod_work, prod_struct) !for any <phi|op|phi>
    2479            2 :       CALL cp_fm_create(work_fm, full_struct)
    2480            2 :       CALL cp_fm_create(gsex_fm, gsex_struct)
    2481            2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2482            6 :       ALLOCATE (diag(ndo_so))
    2483            8 :       ALLOCATE (domo_op(ndo_so, ndo_so))
    2484            6 :       ALLOCATE (tmp(ndo_so, ndo_so))
    2485            6 :       ALLOCATE (gsex_block(ndo_so, ndo_so))
    2486              : 
    2487              : !  Loop over the dimensions of the operator
    2488            8 :       DO i = 1, dim_op
    2489              : 
    2490            6 :          ao_op_i => ao_op(i)%matrix
    2491              : 
    2492              :          !put the gs-gs contribution
    2493            6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2494              : 
    2495              :          !  Precompute what we can before looping over excited states
    2496              :          ! Need the operator in the donor MOs basis <phi^0_I,sigma|op_i|phi^0_J,tau>
    2497            6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_work, ncol=ndo_so)
    2498            6 :          CALL parallel_gemm('T', 'N', ndo_so, ndo_so, nao, 1.0_dp, gs_coeffs, vec_work, 0.0_dp, prod_work)
    2499            6 :          CALL cp_fm_get_submatrix(prod_work, domo_op)
    2500              : 
    2501              :          !  Do the ground-state/spin-conserving operator
    2502            6 :          CALL parallel_gemm('T', 'N', ndo_so*nsc, ndo_so, nao, 1.0_dp, sc_coeffs, vec_work, 0.0_dp, gsex_fm)
    2503           78 :          DO isc = 1, nsc
    2504              :             CALL cp_fm_get_submatrix(fm=gsex_fm, target_m=gsex_block, start_row=(isc - 1)*ndo_so + 1, &
    2505           72 :                                      start_col=1, n_rows=ndo_so, n_cols=ndo_so)
    2506           72 :             diag(:) = get_diag(gsex_block)
    2507          504 :             op = SUM(diag)
    2508           78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isc, op)
    2509              :          END DO !isc
    2510              : 
    2511              :          !  The spin-conserving/spin-conserving operator
    2512              :          !overlap
    2513              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sc, 0.0_dp, &
    2514            6 :                              dbcsr_work, filter_eps=eps_filter)
    2515            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2516              : 
    2517              :          !operator in SC LR-orbital basis
    2518            6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sc, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2519            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sc, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2520              : 
    2521              :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_diaga=1.0_dp, &
    2522              :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2523            6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2524              : 
    2525            6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2526              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2527            6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2528              : 
    2529              :          !  The spin-flip/spin-flip operator
    2530              :          !overlap
    2531              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sf, 0.0_dp, &
    2532            6 :                              dbcsr_work, filter_eps=eps_filter)
    2533            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2534              : 
    2535              :          !operator in SF LR-orbital basis
    2536            6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sf, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2537            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sf, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2538              : 
    2539              :          !need to reorganize the domo_op array by swapping the alpha-alpha and the beta-beta quarter
    2540           78 :          tmp(1:ndo_mo, 1:ndo_mo) = domo_op(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so)
    2541           78 :          tmp(ndo_mo + 1:ndo_so, ndo_mo + 1:ndo_so) = domo_op(1:ndo_mo, 1:ndo_mo)
    2542              : 
    2543              :          CALL os_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, tmp, pref_diaga=1.0_dp, &
    2544              :                                    pref_diagb=1.0_dp, pref_tracea=-1.0_dp, pref_traceb=-1.0_dp, &
    2545            6 :                                    pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2546              : 
    2547            6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2548              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2549            6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsc + 1, t_firstcol=1 + nsc + 1)
    2550              : 
    2551              :          !Symmetry => only upper diag explicitly built
    2552            8 :          CALL cp_fm_uplo_to_full(amew_op(i), work_fm)
    2553              : 
    2554              :       END DO !i
    2555              : 
    2556              : !  Clean-up
    2557            2 :       CALL cp_fm_struct_release(full_struct)
    2558            2 :       CALL cp_fm_struct_release(prod_struct)
    2559            2 :       CALL cp_fm_struct_release(vec_struct)
    2560            2 :       CALL cp_fm_struct_release(tmp_struct)
    2561            2 :       CALL cp_fm_struct_release(gsex_struct)
    2562            2 :       CALL cp_fm_release(work_fm)
    2563            2 :       CALL cp_fm_release(tmp_fm)
    2564            2 :       CALL cp_fm_release(vec_work)
    2565            2 :       CALL cp_fm_release(prod_work)
    2566            2 :       CALL cp_fm_release(gsex_fm)
    2567              : 
    2568           12 :    END SUBROUTINE get_os_amew_op
    2569              : 
    2570              : ! **************************************************************************************************
    2571              : !> \brief Computes the matrix elements of a one-body operator (given wrt AOs) in the basis of the
    2572              : !>        excited state AMEWs with ground state, singlet and triplet with Ms = -1,0,+1
    2573              : !> \param amew_op the operator in the basis of the AMEWs (array because could have x,y,z components)
    2574              : !> \param ao_op the operator in the basis of the atomic orbitals
    2575              : !> \param dbcsr_soc_package inherited from the main SOC routine
    2576              : !> \param donor_state ...
    2577              : !> \param eps_filter for dbcsr multiplication
    2578              : !> \param qs_env ...
    2579              : !> \note The ordering of the AMEWs is consistent with SOC and is gs, sg, tp(-1), tp(0). tp(+1)
    2580              : !>       We assume that the operator is spin-independent => only <0|0>, <0|S>, <S|S> and <T|T>
    2581              : !>       yield non-zero matrix elements
    2582              : !>       Only for spin-restricted calculations
    2583              : ! **************************************************************************************************
    2584            2 :    SUBROUTINE get_rcs_amew_op(amew_op, ao_op, dbcsr_soc_package, donor_state, eps_filter, qs_env)
    2585              : 
    2586              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    2587              :          INTENT(OUT)                                     :: amew_op
    2588              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: ao_op
    2589              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2590              :       TYPE(donor_state_type), POINTER                    :: donor_state
    2591              :       REAL(dp), INTENT(IN)                               :: eps_filter
    2592              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2593              : 
    2594              :       INTEGER                                            :: dim_op, homo, i, isg, nao, ndo_mo, nex, &
    2595              :                                                             nsg, ntot, ntp
    2596              :       REAL(dp)                                           :: op, sqrt2
    2597            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag, gs_diag, gsgs_op
    2598            2 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: domo_op, sggs_block
    2599              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2600              :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, gsgs_struct, prod_struct, &
    2601              :                                                             sggs_struct, std_struct, tmp_struct, &
    2602              :                                                             vec_struct
    2603              :       TYPE(cp_fm_type)                                   :: gs_fm, prod_fm, sggs_fm, tmp_fm, vec_op, &
    2604              :                                                             work_fm
    2605              :       TYPE(cp_fm_type), POINTER                          :: gs_coeffs, mo_coeff, sg_coeffs
    2606            2 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_s
    2607              :       TYPE(dbcsr_type), POINTER                          :: ao_op_i, dbcsr_ovlp, dbcsr_prod, &
    2608              :                                                             dbcsr_sg, dbcsr_tmp, dbcsr_tp, &
    2609              :                                                             dbcsr_work
    2610            2 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    2611              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2612              : 
    2613            2 :       NULLIFY (gs_coeffs, sg_coeffs, matrix_s, full_struct, prod_struct, vec_struct, blacs_env)
    2614            2 :       NULLIFY (para_env, mo_coeff, mos, gsgs_struct, std_struct, tmp_struct, sggs_struct)
    2615            2 :       NULLIFY (ao_op_i, dbcsr_tp, dbcsr_sg, dbcsr_ovlp, dbcsr_work, dbcsr_tmp, dbcsr_prod)
    2616              : 
    2617              : !  Initialization
    2618            2 :       gs_coeffs => donor_state%gs_coeffs
    2619            2 :       sg_coeffs => donor_state%sg_coeffs
    2620            2 :       nsg = SIZE(donor_state%sg_evals)
    2621            2 :       ntp = nsg; nex = nsg !all the same by construction, keep them separate for clarity
    2622            2 :       ntot = 1 + nsg + 3*ntp
    2623            2 :       ndo_mo = donor_state%ndo_mo
    2624            2 :       CALL get_qs_env(qs_env, matrix_s=matrix_s, para_env=para_env, blacs_env=blacs_env, mos=mos)
    2625            2 :       sqrt2 = SQRT(2.0_dp)
    2626            2 :       dim_op = SIZE(ao_op)
    2627              : 
    2628            2 :       dbcsr_sg => dbcsr_soc_package%dbcsr_sg
    2629            2 :       dbcsr_tp => dbcsr_soc_package%dbcsr_tp
    2630            2 :       dbcsr_work => dbcsr_soc_package%dbcsr_work
    2631            2 :       dbcsr_prod => dbcsr_soc_package%dbcsr_prod
    2632            2 :       dbcsr_ovlp => dbcsr_soc_package%dbcsr_ovlp
    2633            2 :       dbcsr_tmp => dbcsr_soc_package%dbcsr_tmp
    2634              : 
    2635              : !  Create the amew_op matrix
    2636              :       CALL cp_fm_struct_create(full_struct, context=blacs_env, para_env=para_env, &
    2637            2 :                                nrow_global=ntot, ncol_global=ntot)
    2638           12 :       ALLOCATE (amew_op(dim_op))
    2639            8 :       DO i = 1, dim_op
    2640            8 :          CALL cp_fm_create(amew_op(i), full_struct, set_zero=.TRUE.)
    2641              :       END DO !i
    2642              : 
    2643              : !  Deal with the GS-GS contribution <0|0> = 2*sum_j <phi_j|op|phi_j>
    2644            2 :       CALL get_mo_set(mos(1), mo_coeff=mo_coeff, nao=nao, homo=homo)
    2645              :       CALL cp_fm_struct_create(gsgs_struct, context=blacs_env, para_env=para_env, &
    2646            2 :                                nrow_global=homo, ncol_global=homo)
    2647            2 :       CALL cp_fm_get_info(mo_coeff, matrix_struct=std_struct)
    2648            2 :       CALL cp_fm_create(gs_fm, gsgs_struct)
    2649            2 :       CALL cp_fm_create(work_fm, std_struct)
    2650            6 :       ALLOCATE (gsgs_op(dim_op))
    2651            6 :       ALLOCATE (gs_diag(homo))
    2652              : 
    2653            8 :       DO i = 1, dim_op
    2654              : 
    2655            6 :          ao_op_i => ao_op(i)%matrix
    2656              : 
    2657            6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, mo_coeff, work_fm, ncol=homo)
    2658            6 :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, mo_coeff, work_fm, 0.0_dp, gs_fm)
    2659            6 :          CALL cp_fm_get_diag(gs_fm, gs_diag)
    2660           62 :          gsgs_op(i) = 2.0_dp*SUM(gs_diag)
    2661              : 
    2662              :       END DO !i
    2663              : 
    2664            2 :       CALL cp_fm_release(gs_fm)
    2665            2 :       CALL cp_fm_release(work_fm)
    2666            2 :       CALL cp_fm_struct_release(gsgs_struct)
    2667            2 :       DEALLOCATE (gs_diag)
    2668              : 
    2669              : !  Create the work and helper fms
    2670            2 :       CALL cp_fm_get_info(gs_coeffs, matrix_struct=vec_struct)
    2671              :       CALL cp_fm_struct_create(prod_struct, context=blacs_env, para_env=para_env, &
    2672            2 :                                nrow_global=ndo_mo, ncol_global=ndo_mo)
    2673            2 :       CALL cp_fm_create(prod_fm, prod_struct)
    2674            2 :       CALL cp_fm_create(vec_op, vec_struct)
    2675              :       CALL cp_fm_struct_create(tmp_struct, context=blacs_env, para_env=para_env, &
    2676            2 :                                nrow_global=nex, ncol_global=nex)
    2677              :       CALL cp_fm_struct_create(sggs_struct, context=blacs_env, para_env=para_env, &
    2678            2 :                                nrow_global=ndo_mo*nsg, ncol_global=ndo_mo)
    2679            2 :       CALL cp_fm_create(tmp_fm, tmp_struct)
    2680            2 :       CALL cp_fm_create(work_fm, full_struct)
    2681            2 :       CALL cp_fm_create(sggs_fm, sggs_struct)
    2682            6 :       ALLOCATE (diag(ndo_mo))
    2683            8 :       ALLOCATE (domo_op(ndo_mo, ndo_mo))
    2684            6 :       ALLOCATE (sggs_block(ndo_mo, ndo_mo))
    2685              : 
    2686              : ! Iterate over the dimensions of the operator
    2687              : ! Note: operator matrices are asusmed symmetric, can only do upper half
    2688            8 :       DO i = 1, dim_op
    2689              : 
    2690            6 :          ao_op_i => ao_op(i)%matrix
    2691              : 
    2692              :          ! The GS-GS contribution
    2693            6 :          CALL cp_fm_set_element(amew_op(i), 1, 1, gsgs_op(i))
    2694              : 
    2695              :          ! Compute the operator in the donor MOs basis
    2696            6 :          CALL cp_dbcsr_sm_fm_multiply(ao_op_i, gs_coeffs, vec_op, ncol=ndo_mo)
    2697            6 :          CALL parallel_gemm('T', 'N', ndo_mo, ndo_mo, nao, 1.0_dp, gs_coeffs, vec_op, 0.0_dp, prod_fm)
    2698            6 :          CALL cp_fm_get_submatrix(prod_fm, domo_op)
    2699              : 
    2700              :          ! Compute the ground-state/singlet components. ao_op*gs_coeffs already stored in vec_op
    2701            6 :          CALL parallel_gemm('T', 'N', ndo_mo*nsg, ndo_mo, nao, 1.0_dp, sg_coeffs, vec_op, 0.0_dp, sggs_fm)
    2702           78 :          DO isg = 1, nsg
    2703              :             CALL cp_fm_get_submatrix(fm=sggs_fm, target_m=sggs_block, start_row=(isg - 1)*ndo_mo + 1, &
    2704           72 :                                      start_col=1, n_rows=ndo_mo, n_cols=ndo_mo)
    2705           72 :             diag(:) = get_diag(sggs_block)
    2706          288 :             op = sqrt2*SUM(diag)
    2707           78 :             CALL cp_fm_set_element(amew_op(i), 1, 1 + isg, op)
    2708              :          END DO
    2709              : 
    2710              :          ! do the singlet-singlet components
    2711              :          !start with the overlap
    2712              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_sg, 0.0_dp, &
    2713            6 :                              dbcsr_work, filter_eps=eps_filter)
    2714            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2715              : 
    2716              :          !then the operator in the LR orbital basis
    2717            6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2718            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2719              : 
    2720              :          !use the soc routine, it is compatible
    2721              :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2722            6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2723              : 
    2724            6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2725              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2726            6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=2, t_firstcol=2)
    2727              : 
    2728              :          ! compute the triplet-triplet components
    2729              :          !the overlap
    2730              :          CALL dbcsr_multiply('N', 'N', 1.0_dp, matrix_s(1)%matrix, dbcsr_tp, 0.0_dp, &
    2731            6 :                              dbcsr_work, filter_eps=eps_filter)
    2732            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_tp, dbcsr_work, 0.0_dp, dbcsr_ovlp, filter_eps=eps_filter)
    2733              : 
    2734              :          !the operator in the LR orbital basis
    2735            6 :          CALL dbcsr_multiply('N', 'N', 1.0_dp, ao_op_i, dbcsr_sg, 0.0_dp, dbcsr_work, filter_eps=eps_filter)
    2736            6 :          CALL dbcsr_multiply('T', 'N', 1.0_dp, dbcsr_sg, dbcsr_work, 0.0_dp, dbcsr_prod, filter_eps=eps_filter)
    2737              : 
    2738              :          CALL rcs_amew_soc_elements(dbcsr_tmp, dbcsr_prod, dbcsr_ovlp, domo_op, pref_trace=-1.0_dp, &
    2739            6 :                                     pref_overall=1.0_dp, pref_diags=gsgs_op(i), symmetric=.TRUE.)
    2740              : 
    2741            6 :          CALL copy_dbcsr_to_fm(dbcsr_tmp, tmp_fm)
    2742              :          !<T^-1|op|T^-1>
    2743              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2744            6 :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 1, t_firstcol=1 + nsg + 1)
    2745              :          !<T^0|op|T^0>
    2746              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2747              :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + ntp + 1, &
    2748            6 :                                  t_firstcol=1 + nsg + ntp + 1)
    2749              :          !<T^-1|op|T^-1>
    2750              :          CALL cp_fm_to_fm_submat(msource=tmp_fm, mtarget=amew_op(i), nrow=nex, ncol=nex, &
    2751              :                                  s_firstrow=1, s_firstcol=1, t_firstrow=1 + nsg + 2*ntp + 1, &
    2752            6 :                                  t_firstcol=1 + nsg + 2*ntp + 1)
    2753              : 
    2754              :          ! Symmetrize the matrix (only upper triangle built)
    2755            8 :          CALL cp_fm_uplo_to_full(amew_op(i), work_fm)
    2756              : 
    2757              :       END DO !i
    2758              : 
    2759              : !  Clean-up
    2760            2 :       CALL cp_fm_release(prod_fm)
    2761            2 :       CALL cp_fm_release(work_fm)
    2762            2 :       CALL cp_fm_release(tmp_fm)
    2763            2 :       CALL cp_fm_release(vec_op)
    2764            2 :       CALL cp_fm_release(sggs_fm)
    2765            2 :       CALL cp_fm_struct_release(prod_struct)
    2766            2 :       CALL cp_fm_struct_release(full_struct)
    2767            2 :       CALL cp_fm_struct_release(tmp_struct)
    2768            2 :       CALL cp_fm_struct_release(sggs_struct)
    2769              : 
    2770           12 :    END SUBROUTINE get_rcs_amew_op
    2771              : 
    2772              : ! **************************************************************************************************
    2773              : !> \brief Computes the os SOC matrix elements between excited states AMEWs based on the LR orbitals
    2774              : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2775              : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2776              : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2777              : !> \param domo_soc the SOC in the basis of the donor MOs
    2778              : !> \param pref_diaga ...
    2779              : !> \param pref_diagb ...
    2780              : !> \param pref_tracea ...
    2781              : !> \param pref_traceb ...
    2782              : !> \param pref_diags see notes
    2783              : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2784              : !> \param tracea_start the indices where to start in the trace part for alpha
    2785              : !> \param traceb_start the indices where to start in the trace part for beta
    2786              : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2787              : !>       soc_ij =   pref_diaga*SUM(alpha part of diag of lr_soc_ij)
    2788              : !>                + pref_diagb*SUM(beta part of diag of lr_soc_ij)
    2789              : !>                + pref_tracea*SUM(alpha part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2790              : !>                + pref_traceb*SUM(beta part of lr_ovlp_ij*TRANSPOSE(domo_soc))
    2791              : !>       optinally, one can add pref_diags*SUM(diag lr_ovlp_ij)
    2792              : ! **************************************************************************************************
    2793           20 :    SUBROUTINE os_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_diaga, &
    2794              :                                    pref_diagb, pref_tracea, pref_traceb, pref_diags, &
    2795              :                                    symmetric, tracea_start, traceb_start)
    2796              : 
    2797              :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2798              :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2799              :       REAL(dp)                                           :: pref_diaga, pref_diagb, pref_tracea, &
    2800              :                                                             pref_traceb
    2801              :       REAL(dp), OPTIONAL                                 :: pref_diags
    2802              :       LOGICAL, OPTIONAL                                  :: symmetric
    2803              :       INTEGER, DIMENSION(2), OPTIONAL                    :: tracea_start, traceb_start
    2804              : 
    2805              :       INTEGER                                            :: iex, jex, ndo_mo, ndo_so
    2806              :       INTEGER, DIMENSION(2)                              :: tas, tbs
    2807              :       LOGICAL                                            :: do_diags, found, my_symm
    2808              :       REAL(dp)                                           :: soc_elem
    2809           20 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2810           20 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2811              :       TYPE(dbcsr_iterator_type)                          :: iter
    2812              : 
    2813           20 :       ndo_so = SIZE(domo_soc, 1)
    2814           20 :       ndo_mo = ndo_so/2
    2815           60 :       ALLOCATE (diag(ndo_so))
    2816           20 :       my_symm = .FALSE.
    2817           20 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2818           20 :       do_diags = .FALSE.
    2819           20 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2820              : 
    2821              :       !by default, alpha part is (1:ndo_mo,1:ndo_mo) and beta is (ndo_mo+1:ndo_so,ndo_mo+1:ndo_so)
    2822              :       !note: in some SF cases, that might change, mainly because the spin-flip LR-coeffs have
    2823              :       !inverse order, that is: the beta-coeffs in the alpha spot and the alpha coeffs in the
    2824              :       !beta spot
    2825           60 :       tas = 1
    2826           60 :       tbs = ndo_mo + 1
    2827           20 :       IF (PRESENT(tracea_start)) tas = tracea_start
    2828           20 :       IF (PRESENT(traceb_start)) tbs = traceb_start
    2829              : 
    2830           20 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2831              :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2832           20 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2833         1460 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2834              : 
    2835         1440 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex)
    2836              : 
    2837         1440 :          IF (my_symm .AND. iex > jex) CYCLE
    2838              : 
    2839              :          !compute the soc matrix element
    2840          912 :          soc_elem = 0.0_dp
    2841          912 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2842          912 :          IF (found) THEN
    2843          444 :             diag(:) = get_diag(pblock)
    2844         3108 :             soc_elem = soc_elem + pref_diaga*SUM(diag(1:ndo_mo)) + pref_diagb*(SUM(diag(ndo_mo + 1:ndo_so)))
    2845              :          END IF
    2846              : 
    2847          912 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2848          912 :          IF (found) THEN
    2849              :             soc_elem = soc_elem &
    2850              :                        + pref_tracea*SUM(pblock(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)* &
    2851              :                                          domo_soc(tas(1):tas(1) + ndo_mo - 1, tas(2):tas(2) + ndo_mo - 1)) &
    2852              :                        + pref_traceb*SUM(pblock(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1)* &
    2853        12000 :                                          domo_soc(tbs(1):tbs(1) + ndo_mo - 1, tbs(2):tbs(2) + ndo_mo - 1))
    2854              : 
    2855          480 :             IF (do_diags) THEN
    2856          336 :                diag(:) = get_diag(pblock)
    2857         2352 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2858              :             END IF
    2859              :          END IF
    2860              : 
    2861          912 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2862         3284 :          pblock = soc_elem
    2863              : 
    2864              :       END DO
    2865           20 :       CALL dbcsr_iterator_stop(iter)
    2866              : 
    2867           60 :    END SUBROUTINE os_amew_soc_elements
    2868              : 
    2869              : ! **************************************************************************************************
    2870              : !> \brief Computes the rcs SOC matrix elements between excited states AMEWs based on the LR orbitals
    2871              : !> \param amew_soc output dbcsr matrix with the SOC in the AMEW basis (needs to be fully resereved)
    2872              : !> \param lr_soc dbcsr matrix with the SOC wrt the LR orbitals
    2873              : !> \param lr_overlap dbcsr matrix with the excited states LR orbital overlap
    2874              : !> \param domo_soc the SOC in the basis of the donor MOs
    2875              : !> \param pref_trace see notes
    2876              : !> \param pref_overall see notes
    2877              : !> \param pref_diags see notes
    2878              : !> \param symmetric if the outcome is known to be symmetric, only elements with iex <= jex are done
    2879              : !> \note For an excited states pair i,j, the AMEW SOC matrix element is:
    2880              : !>       soc_ij = pref_overall*(SUM(diag(lr_soc_ij)) + pref_trace*SUM(lr_overlap_ij*TRANSPOSE(domo_soc)))
    2881              : !>       optionally, the value pref_diags*SUM(diag(lr_overlap_ij)) can be added (before pref_overall)
    2882              : ! **************************************************************************************************
    2883          120 :    SUBROUTINE rcs_amew_soc_elements(amew_soc, lr_soc, lr_overlap, domo_soc, pref_trace, &
    2884              :                                     pref_overall, pref_diags, symmetric)
    2885              : 
    2886              :       TYPE(dbcsr_type)                                   :: amew_soc, lr_soc, lr_overlap
    2887              :       REAL(dp), DIMENSION(:, :)                          :: domo_soc
    2888              :       REAL(dp)                                           :: pref_trace, pref_overall
    2889              :       REAL(dp), OPTIONAL                                 :: pref_diags
    2890              :       LOGICAL, OPTIONAL                                  :: symmetric
    2891              : 
    2892              :       INTEGER                                            :: iex, jex
    2893              :       LOGICAL                                            :: do_diags, found, my_symm
    2894              :       REAL(dp)                                           :: soc_elem
    2895          120 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: diag
    2896          120 :       REAL(dp), DIMENSION(:, :), POINTER                 :: pblock
    2897              :       TYPE(dbcsr_iterator_type)                          :: iter
    2898              : 
    2899          360 :       ALLOCATE (diag(SIZE(domo_soc, 1)))
    2900          120 :       my_symm = .FALSE.
    2901          120 :       IF (PRESENT(symmetric)) my_symm = symmetric
    2902          120 :       do_diags = .FALSE.
    2903          120 :       IF (PRESENT(pref_diags)) do_diags = .TRUE.
    2904              : 
    2905          120 :       CALL dbcsr_set(amew_soc, 0.0_dp)
    2906              :       !loop over the excited states pairs as the block of amew_soc (which are all reserved)
    2907          120 :       CALL dbcsr_iterator_start(iter, amew_soc)
    2908         2220 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    2909              : 
    2910         2100 :          CALL dbcsr_iterator_next_block(iter, row=iex, column=jex)
    2911              : 
    2912         2100 :          IF (my_symm .AND. iex > jex) CYCLE
    2913              : 
    2914              :          !compute the soc matrix element
    2915         1644 :          soc_elem = 0.0_dp
    2916         1644 :          CALL dbcsr_get_block_p(lr_soc, iex, jex, pblock, found)
    2917         1644 :          IF (found) THEN
    2918         1008 :             diag(:) = get_diag(pblock)
    2919         5328 :             soc_elem = soc_elem + SUM(diag)
    2920              :          END IF
    2921              : 
    2922         1644 :          CALL dbcsr_get_block_p(lr_overlap, iex, jex, pblock, found)
    2923         1644 :          IF (found) THEN
    2924        31050 :             soc_elem = soc_elem + pref_trace*SUM(pblock*TRANSPOSE(domo_soc))
    2925              : 
    2926         1158 :             IF (do_diags) THEN
    2927          432 :                diag(:) = get_diag(pblock)
    2928         2250 :                soc_elem = soc_elem + pref_diags*SUM(diag)
    2929              :             END IF
    2930              :          END IF
    2931              : 
    2932         1644 :          CALL dbcsr_get_block_p(amew_soc, iex, jex, pblock, found)
    2933         5508 :          pblock = pref_overall*soc_elem
    2934              : 
    2935              :       END DO
    2936          120 :       CALL dbcsr_iterator_stop(iter)
    2937              : 
    2938          360 :    END SUBROUTINE rcs_amew_soc_elements
    2939              : 
    2940              : ! **************************************************************************************************
    2941              : !> \brief Computes the dipole oscillator strengths in the AMEWs basis for SOC
    2942              : !> \param soc_evecs_cfm the complex AMEWs coefficients
    2943              : !> \param dbcsr_soc_package ...
    2944              : !> \param donor_state ...
    2945              : !> \param xas_tdp_env ...
    2946              : !> \param xas_tdp_control ...
    2947              : !> \param qs_env ...
    2948              : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    2949              : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    2950              : ! **************************************************************************************************
    2951            4 :    SUBROUTINE compute_soc_dipole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, xas_tdp_env, &
    2952              :                                       xas_tdp_control, qs_env, gs_coeffs)
    2953              : 
    2954              :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    2955              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    2956              :       TYPE(donor_state_type), POINTER                    :: donor_state
    2957              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    2958              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    2959              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2960              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    2961              : 
    2962              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_dipole_fosc'
    2963              : 
    2964            4 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transdip
    2965              :       INTEGER                                            :: handle, i, nosc, ntot
    2966              :       LOGICAL                                            :: do_os, do_rcs
    2967            4 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: osc_xyz
    2968            4 :       REAL(dp), DIMENSION(:), POINTER                    :: soc_evals
    2969            4 :       REAL(dp), DIMENSION(:, :), POINTER                 :: osc_str
    2970              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    2971              :       TYPE(cp_cfm_type)                                  :: dip_cfm, work1_cfm, work2_cfm
    2972              :       TYPE(cp_fm_struct_type), POINTER                   :: dip_struct, full_struct
    2973            4 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_dip
    2974              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2975              : 
    2976            4 :       NULLIFY (para_env, blacs_env, dip_struct, full_struct, osc_str)
    2977            4 :       NULLIFY (soc_evals)
    2978              : 
    2979            4 :       CALL timeset(routineN, handle)
    2980              : 
    2981              :       !init
    2982            4 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    2983            4 :       do_os = xas_tdp_control%do_spin_cons
    2984            4 :       do_rcs = xas_tdp_control%do_singlet
    2985            4 :       soc_evals => donor_state%soc_evals
    2986            4 :       nosc = SIZE(soc_evals)
    2987            4 :       ntot = nosc + 1 !because GS AMEW is in there
    2988           12 :       ALLOCATE (donor_state%soc_osc_str(nosc, 4))
    2989            4 :       osc_str => donor_state%soc_osc_str
    2990          596 :       osc_str(:, :) = 0.0_dp
    2991            4 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    2992              : 
    2993              :       !get some work arrays/matrix
    2994              :       CALL cp_fm_struct_create(dip_struct, context=blacs_env, para_env=para_env, &
    2995            4 :                                nrow_global=ntot, ncol_global=1)
    2996            4 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    2997            4 :       CALL cp_cfm_create(dip_cfm, dip_struct, set_zero=.TRUE.)
    2998            4 :       CALL cp_cfm_create(work1_cfm, full_struct, set_zero=.TRUE.)
    2999            4 :       CALL cp_cfm_create(work2_cfm, full_struct, set_zero=.TRUE.)
    3000           12 :       ALLOCATE (transdip(ntot, 1))
    3001              : 
    3002              :       !get the dipole in the AMEW basis
    3003            4 :       IF (do_os) THEN
    3004              :          CALL get_os_amew_op(amew_dip, xas_tdp_env%dipmat, gs_coeffs, dbcsr_soc_package, &
    3005            2 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3006              :       ELSE
    3007              :          CALL get_rcs_amew_op(amew_dip, xas_tdp_env%dipmat, dbcsr_soc_package, donor_state, &
    3008            2 :                               xas_tdp_control%eps_filter, qs_env)
    3009              :       END IF
    3010              : 
    3011           12 :       ALLOCATE (osc_xyz(nosc))
    3012           16 :       DO i = 1, 3 !cartesian coord x, y, z
    3013              : 
    3014              :          !Convert the real dipole into the cfm format for calculations
    3015           12 :          CALL cp_fm_to_cfm(msourcer=amew_dip(i), mtarget=work1_cfm)
    3016              : 
    3017              :          !compute amew_coeffs^dagger * amew_dip * amew_gs to get the transition moments
    3018              :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3019           12 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3020              :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3021           12 :                             (0.0_dp, 0.0_dp), dip_cfm)
    3022              : 
    3023           12 :          CALL cp_cfm_get_submatrix(dip_cfm, transdip)
    3024              : 
    3025              :          !transition dipoles are real numbers
    3026          444 :          osc_xyz(:) = REAL(transdip(2:ntot, 1))**2 + AIMAG(transdip(2:ntot, 1))**2
    3027          444 :          osc_str(:, 4) = osc_str(:, 4) + osc_xyz(:)
    3028          448 :          osc_str(:, i) = osc_xyz(:)
    3029              : 
    3030              :       END DO !i
    3031              : 
    3032              :       !multiply with appropriate prefac depending in the rep
    3033           20 :       DO i = 1, 4
    3034           20 :          IF (xas_tdp_control%dipole_form == xas_dip_len) THEN
    3035            0 :             osc_str(:, i) = 2.0_dp/3.0_dp*soc_evals(:)*osc_str(:, i)
    3036              :          ELSE
    3037         1184 :             osc_str(:, i) = 2.0_dp/3.0_dp/soc_evals(:)*osc_str(:, i)
    3038              :          END IF
    3039              :       END DO
    3040              : 
    3041              :       !clean-up
    3042            4 :       CALL cp_fm_struct_release(dip_struct)
    3043            4 :       CALL cp_cfm_release(work1_cfm)
    3044            4 :       CALL cp_cfm_release(work2_cfm)
    3045            4 :       CALL cp_cfm_release(dip_cfm)
    3046           16 :       DO i = 1, 3
    3047           16 :          CALL cp_fm_release(amew_dip(i))
    3048              :       END DO
    3049            4 :       DEALLOCATE (amew_dip, transdip)
    3050              : 
    3051            4 :       CALL timestop(handle)
    3052              : 
    3053            8 :    END SUBROUTINE compute_soc_dipole_fosc
    3054              : 
    3055              : ! **************************************************************************************************
    3056              : !> \brief Computes the quadrupole oscillator strengths in the AMEWs basis for SOC
    3057              : !> \param soc_evecs_cfm the complex AMEWs coefficients
    3058              : !> \param dbcsr_soc_package inherited from the main SOC routine
    3059              : !> \param donor_state ...
    3060              : !> \param xas_tdp_env ...
    3061              : !> \param xas_tdp_control ...
    3062              : !> \param qs_env ...
    3063              : !> \param gs_coeffs the ground state coefficients, given for open-shell because in ROKS, the gs_coeffs
    3064              : !>                  are stored slightly differently within SOC for efficiency and code uniquness
    3065              : ! **************************************************************************************************
    3066            0 :    SUBROUTINE compute_soc_quadrupole_fosc(soc_evecs_cfm, dbcsr_soc_package, donor_state, &
    3067              :                                           xas_tdp_env, xas_tdp_control, qs_env, gs_coeffs)
    3068              : 
    3069              :       TYPE(cp_cfm_type), INTENT(IN)                      :: soc_evecs_cfm
    3070              :       TYPE(dbcsr_soc_package_type)                       :: dbcsr_soc_package
    3071              :       TYPE(donor_state_type), POINTER                    :: donor_state
    3072              :       TYPE(xas_tdp_env_type), POINTER                    :: xas_tdp_env
    3073              :       TYPE(xas_tdp_control_type), POINTER                :: xas_tdp_control
    3074              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    3075              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: gs_coeffs
    3076              : 
    3077              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_soc_quadrupole_fosc'
    3078              : 
    3079            0 :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:)             :: trace
    3080              :       COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: transquad
    3081              :       INTEGER                                            :: handle, i, nosc, ntot
    3082              :       LOGICAL                                            :: do_os, do_rcs
    3083            0 :       REAL(dp), DIMENSION(:), POINTER                    :: osc_str, soc_evals
    3084              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    3085              :       TYPE(cp_cfm_type)                                  :: quad_cfm, work1_cfm, work2_cfm
    3086              :       TYPE(cp_fm_struct_type), POINTER                   :: full_struct, quad_struct
    3087            0 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: amew_quad
    3088              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    3089              : 
    3090            0 :       NULLIFY (para_env, blacs_env, quad_struct, full_struct, osc_str)
    3091            0 :       NULLIFY (soc_evals)
    3092              : 
    3093            0 :       CALL timeset(routineN, handle)
    3094              : 
    3095              :       !init
    3096            0 :       CALL get_qs_env(qs_env, para_env=para_env, blacs_env=blacs_env)
    3097            0 :       do_os = xas_tdp_control%do_spin_cons
    3098            0 :       do_rcs = xas_tdp_control%do_singlet
    3099            0 :       soc_evals => donor_state%soc_evals
    3100            0 :       nosc = SIZE(soc_evals)
    3101            0 :       ntot = nosc + 1 !because GS AMEW is in there
    3102            0 :       ALLOCATE (donor_state%soc_quad_osc_str(nosc))
    3103            0 :       osc_str => donor_state%soc_quad_osc_str
    3104            0 :       osc_str(:) = 0.0_dp
    3105            0 :       IF (do_os .AND. .NOT. PRESENT(gs_coeffs)) CPABORT("Need to pass gs_coeffs for open-shell")
    3106              : 
    3107              :       !get some work arrays/matrix
    3108              :       CALL cp_fm_struct_create(quad_struct, context=blacs_env, para_env=para_env, &
    3109            0 :                                nrow_global=ntot, ncol_global=1)
    3110            0 :       CALL cp_cfm_get_info(soc_evecs_cfm, matrix_struct=full_struct)
    3111            0 :       CALL cp_cfm_create(quad_cfm, quad_struct)
    3112            0 :       CALL cp_cfm_create(work1_cfm, full_struct)
    3113            0 :       CALL cp_cfm_create(work2_cfm, full_struct)
    3114            0 :       ALLOCATE (transquad(ntot, 1))
    3115            0 :       ALLOCATE (trace(nosc))
    3116            0 :       trace = (0.0_dp, 0.0_dp)
    3117              : 
    3118              :       !get the quadrupole in the AMEWs basis
    3119            0 :       IF (do_os) THEN
    3120              :          CALL get_os_amew_op(amew_quad, xas_tdp_env%quadmat, gs_coeffs, dbcsr_soc_package, &
    3121            0 :                              donor_state, xas_tdp_control%eps_filter, qs_env)
    3122              :       ELSE
    3123              :          CALL get_rcs_amew_op(amew_quad, xas_tdp_env%quadmat, dbcsr_soc_package, donor_state, &
    3124            0 :                               xas_tdp_control%eps_filter, qs_env)
    3125              :       END IF
    3126              : 
    3127            0 :       DO i = 1, 6 ! x2, xy, xz, y2, yz, z2
    3128              : 
    3129              :          !Convert the real quadrupole into a cfm for further calculation
    3130            0 :          CALL cp_fm_to_cfm(msourcer=amew_quad(i), mtarget=work1_cfm)
    3131              : 
    3132              :          !compute amew_coeffs^dagger * amew_quad * amew_gs to get the transition moments
    3133              :          CALL parallel_gemm('C', 'N', ntot, ntot, ntot, (1.0_dp, 0.0_dp), soc_evecs_cfm, work1_cfm, &
    3134            0 :                             (0.0_dp, 0.0_dp), work2_cfm)
    3135              :          CALL parallel_gemm('N', 'N', ntot, 1, ntot, (1.0_dp, 0.0_dp), work2_cfm, soc_evecs_cfm, &
    3136            0 :                             (0.0_dp, 0.0_dp), quad_cfm)
    3137              : 
    3138            0 :          CALL cp_cfm_get_submatrix(quad_cfm, transquad)
    3139              : 
    3140              :          !if x2, y2 or z2, need to keep track of trace
    3141            0 :          IF (i == 1 .OR. i == 4 .OR. i == 6) THEN
    3142            0 :             osc_str(:) = osc_str(:) + REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2
    3143            0 :             trace(:) = trace(:) + transquad(2:ntot, 1)
    3144              : 
    3145              :             !if xy, xz, or yz, need to count twice (for yx, zx and zy)
    3146              :          ELSE
    3147            0 :             osc_str(:) = osc_str(:) + 2.0_dp*(REAL(transquad(2:ntot, 1))**2 + AIMAG(transquad(2:ntot, 1))**2)
    3148              :          END IF
    3149              : 
    3150              :       END DO !i
    3151              : 
    3152              :       !remove a third of the trace
    3153            0 :       osc_str(:) = osc_str(:) - 1._dp/3._dp*(REAL(trace(:))**2 + AIMAG(trace(:))**2)
    3154              : 
    3155              :       !multiply by the prefactor
    3156            0 :       osc_str(:) = osc_str(:)*1._dp/20._dp*a_fine**2*soc_evals(:)**3
    3157              : 
    3158              :       !clean-up
    3159            0 :       CALL cp_fm_struct_release(quad_struct)
    3160            0 :       CALL cp_cfm_release(work1_cfm)
    3161            0 :       CALL cp_cfm_release(work2_cfm)
    3162            0 :       CALL cp_cfm_release(quad_cfm)
    3163            0 :       CALL cp_fm_release(amew_quad)
    3164            0 :       DEALLOCATE (transquad, trace)
    3165              : 
    3166            0 :       CALL timestop(handle)
    3167              : 
    3168            0 :    END SUBROUTINE compute_soc_quadrupole_fosc
    3169              : 
    3170            0 : END MODULE xas_tdp_utils
    3171              : 
        

Generated by: LCOV version 2.0-1