LCOV - code coverage report
Current view: top level - src - bse_util.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 84.6 % 722 611
Test Date: 2025-12-04 06:27:48 Functions: 93.3 % 15 14

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
      10              : !> \par History
      11              : !>      11.2023 created [Maximilian Graml]
      12              : ! **************************************************************************************************
      13              : MODULE bse_util
      14              :    USE atomic_kind_types,               ONLY: atomic_kind_type
      15              :    USE cell_types,                      ONLY: cell_type
      16              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      17              :    USE cp_control_types,                ONLY: dft_control_type
      18              :    USE cp_dbcsr_api,                    ONLY: dbcsr_create,&
      19              :                                               dbcsr_init_p,&
      20              :                                               dbcsr_p_type,&
      21              :                                               dbcsr_set,&
      22              :                                               dbcsr_type_symmetric
      23              :    USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_alloc_block_from_nbl
      24              :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply,&
      25              :                                               dbcsr_allocate_matrix_set,&
      26              :                                               dbcsr_deallocate_matrix_set
      27              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_trace,&
      28              :                                               cp_fm_uplo_to_full
      29              :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose,&
      30              :                                               cp_fm_cholesky_invert
      31              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      32              :                                               cp_fm_struct_release,&
      33              :                                               cp_fm_struct_type
      34              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      35              :                                               cp_fm_get_info,&
      36              :                                               cp_fm_release,&
      37              :                                               cp_fm_set_all,&
      38              :                                               cp_fm_to_fm_submat,&
      39              :                                               cp_fm_to_fm_submat_general,&
      40              :                                               cp_fm_type
      41              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      42              :                                               cp_logger_type
      43              :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      44              :                                               cp_print_key_unit_nr
      45              :    USE cp_realspace_grid_cube,          ONLY: cp_pw_to_cube
      46              :    USE input_constants,                 ONLY: bse_screening_alpha,&
      47              :                                               bse_screening_rpa,&
      48              :                                               bse_screening_tdhf,&
      49              :                                               use_mom_ref_coac
      50              :    USE input_section_types,             ONLY: section_vals_type
      51              :    USE kinds,                           ONLY: default_path_length,&
      52              :                                               dp,&
      53              :                                               int_8
      54              :    USE message_passing,                 ONLY: mp_para_env_type,&
      55              :                                               mp_request_type
      56              :    USE moments_utils,                   ONLY: get_reference_point
      57              :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      58              :                                               mp2_type
      59              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      60              :    USE particle_list_types,             ONLY: particle_list_type
      61              :    USE particle_types,                  ONLY: particle_type
      62              :    USE physcon,                         ONLY: evolt
      63              :    USE pw_env_types,                    ONLY: pw_env_get,&
      64              :                                               pw_env_type
      65              :    USE pw_poisson_types,                ONLY: pw_poisson_type
      66              :    USE pw_pool_types,                   ONLY: pw_pool_p_type,&
      67              :                                               pw_pool_type
      68              :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      69              :                                               pw_r3d_rs_type
      70              :    USE qs_collocate_density,            ONLY: calculate_wavefunction
      71              :    USE qs_environment_types,            ONLY: get_qs_env,&
      72              :                                               qs_environment_type
      73              :    USE qs_kind_types,                   ONLY: qs_kind_type
      74              :    USE qs_mo_types,                     ONLY: get_mo_set,&
      75              :                                               mo_set_type
      76              :    USE qs_moments,                      ONLY: build_local_moment_matrix
      77              :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      78              :    USE qs_subsys_types,                 ONLY: qs_subsys_get,&
      79              :                                               qs_subsys_type
      80              :    USE rpa_communication,               ONLY: communicate_buffer
      81              :    USE util,                            ONLY: sort,&
      82              :                                               sort_unique
      83              : #include "./base/base_uses.f90"
      84              : 
      85              :    IMPLICIT NONE
      86              : 
      87              :    PRIVATE
      88              : 
      89              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
      90              : 
      91              :    PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, &
      92              :              deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
      93              :              estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices, &
      94              :              determine_cutoff_indices, adapt_BSE_input_params, get_multipoles_mo, &
      95              :              reshuffle_eigvec, print_bse_nto_cubes, trace_exciton_descr
      96              : 
      97              : CONTAINS
      98              : 
      99              : ! **************************************************************************************************
     100              : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
     101              : !> \param fm_mat_S_ij_bse ...
     102              : !> \param fm_mat_S_ia_bse ...
     103              : !> \param fm_mat_S_bar_ia_bse ...
     104              : !> \param fm_mat_S_bar_ij_bse ...
     105              : !> \param fm_mat_Q_static_bse_gemm ...
     106              : !> \param dimen_RI ...
     107              : !> \param homo ...
     108              : !> \param virtual ...
     109              : ! **************************************************************************************************
     110          180 :    SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
     111              :                             fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
     112              :                             dimen_RI, homo, virtual)
     113              : 
     114              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
     115              :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
     116              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_Q_static_bse_gemm
     117              :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
     118              : 
     119              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mult_B_with_W'
     120              : 
     121              :       INTEGER                                            :: handle, i_global, iiB, info_chol, &
     122              :                                                             j_global, jjB, ncol_local, nrow_local
     123           30 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     124              :       TYPE(cp_fm_type)                                   :: fm_work
     125              : 
     126           30 :       CALL timeset(routineN, handle)
     127              : 
     128           30 :       CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
     129           30 :       CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
     130              : 
     131           30 :       CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
     132           30 :       CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
     133              : 
     134           30 :       CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
     135           30 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
     136              : 
     137              :       ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
     138              :       CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
     139              :                           nrow_local=nrow_local, &
     140              :                           ncol_local=ncol_local, &
     141              :                           row_indices=row_indices, &
     142           30 :                           col_indices=col_indices)
     143              : 
     144         2446 :       DO jjB = 1, ncol_local
     145         2416 :          j_global = col_indices(jjB)
     146       101008 :          DO iiB = 1, nrow_local
     147        98562 :             i_global = row_indices(iiB)
     148       100978 :             IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
     149         1208 :                fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
     150              :             END IF
     151              :          END DO
     152              :       END DO
     153              : 
     154              :       ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
     155           30 :       CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
     156              : 
     157           30 :       IF (info_chol /= 0) THEN
     158            0 :          CALL cp_abort(__LOCATION__, 'Cholesky decomposition failed for static polarization in BSE')
     159              :       END IF
     160              : 
     161              :       ! calculate [1+Q(i0)]^-1
     162           30 :       CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
     163              : 
     164              :       ! symmetrize the result
     165           30 :       CALL cp_fm_uplo_to_full(fm_mat_Q_static_bse_gemm, fm_work)
     166              : 
     167              :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
     168              :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
     169           30 :                          matrix_c=fm_mat_S_bar_ij_bse)
     170              : 
     171              :       ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
     172              :       ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
     173              :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
     174              :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
     175           30 :                          matrix_c=fm_mat_S_bar_ia_bse)
     176              : 
     177           30 :       CALL cp_fm_release(fm_work)
     178              : 
     179           30 :       CALL timestop(handle)
     180              : 
     181           30 :    END SUBROUTINE mult_B_with_W
     182              : 
     183              : ! **************************************************************************************************
     184              : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
     185              : !> to A_ia, jb which needs MPI communication.
     186              : !> \param fm_out ...
     187              : !> \param fm_in ...
     188              : !> \param beta ...
     189              : !> \param nrow_secidx_in ...
     190              : !> \param ncol_secidx_in ...
     191              : !> \param nrow_secidx_out ...
     192              : !> \param ncol_secidx_out ...
     193              : !> \param unit_nr ...
     194              : !> \param reordering ...
     195              : !> \param mp2_env ...
     196              : ! **************************************************************************************************
     197          434 :    SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
     198              :                                  nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
     199              : 
     200              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     201              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     202              :       REAL(kind=dp)                                      :: beta
     203              :       INTEGER, INTENT(IN)                                :: nrow_secidx_in, ncol_secidx_in, &
     204              :                                                             nrow_secidx_out, ncol_secidx_out
     205              :       INTEGER                                            :: unit_nr
     206              :       INTEGER, DIMENSION(4)                              :: reordering
     207              :       TYPE(mp2_type), INTENT(IN)                         :: mp2_env
     208              : 
     209              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
     210              : 
     211              :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
     212              :          iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, nprocs, &
     213              :          nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, row_idx_loc, &
     214              :          send_pcol, send_prow
     215          434 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     216              :                                                             num_entries_send
     217              :       INTEGER, DIMENSION(4)                              :: indices_in
     218          434 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     219          434 :                                                             row_indices_in, row_indices_out
     220              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     221          434 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     222              :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     223          434 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     224              : 
     225          434 :       CALL timeset(routineN, handle)
     226          434 :       CALL timeset(routineN//"_1_setup", handle2)
     227              : 
     228          434 :       para_env_out => fm_out%matrix_struct%para_env
     229              :       ! A_iajb
     230              :       ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
     231              :       CALL cp_fm_get_info(matrix=fm_out, &
     232              :                           nrow_local=nrow_local_out, &
     233              :                           ncol_local=ncol_local_out, &
     234              :                           row_indices=row_indices_out, &
     235              :                           col_indices=col_indices_out, &
     236              :                           nrow_block=nrow_block_out, &
     237          434 :                           ncol_block=ncol_block_out)
     238              : 
     239         1302 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     240         1302 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     241              : 
     242         1302 :       num_entries_rec(:) = 0
     243         1302 :       num_entries_send(:) = 0
     244              : 
     245          434 :       dummy = 0
     246              : 
     247              :       CALL cp_fm_get_info(matrix=fm_in, &
     248              :                           nrow_local=nrow_local_in, &
     249              :                           ncol_local=ncol_local_in, &
     250              :                           row_indices=row_indices_in, &
     251              :                           col_indices=col_indices_in, &
     252              :                           nrow_block=nrow_block_in, &
     253          434 :                           ncol_block=ncol_block_in)
     254              : 
     255          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     256           89 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     257          178 :             fm_out%matrix_struct%nrow_global
     258           89 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     259          178 :             fm_out%matrix_struct%ncol_global
     260              : 
     261           89 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     262           89 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     263              : 
     264           89 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     265          178 :             fm_in%matrix_struct%nrow_global
     266           89 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     267          178 :             fm_in%matrix_struct%ncol_global
     268              : 
     269           89 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     270           89 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     271              :       END IF
     272              : 
     273              :       ! Use scalapack wrapper to find process index in fm_out
     274              :       ! To that end, we obtain the global index in fm_out from the level indices
     275          434 :       indices_in(:) = 0
     276         8755 :       DO row_idx_loc = 1, nrow_local_in
     277         8321 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     278         8321 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     279        66443 :          DO col_idx_loc = 1, ncol_local_in
     280        57688 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     281        57688 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     282              : 
     283        57688 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     284        57688 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     285              : 
     286        57688 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     287        57688 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     288              : 
     289        57688 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     290              : 
     291        66009 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     292              : 
     293              :          END DO
     294              :       END DO
     295              : 
     296          434 :       CALL timestop(handle2)
     297              : 
     298          434 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     299          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     300           89 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     301              :       END IF
     302              : 
     303          434 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     304              : 
     305          434 :       CALL timestop(handle2)
     306              : 
     307          434 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     308          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     309           89 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     310              :       END IF
     311              : 
     312              :       ! Buffers for entries and their indices
     313         2170 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     314         2170 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     315              : 
     316              :       ! allocate data message and corresponding indices
     317         1302 :       DO iproc = 0, para_env_out%num_pe - 1
     318              : 
     319         2470 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     320        58990 :          buffer_rec(iproc)%msg = 0.0_dp
     321              : 
     322              :       END DO
     323              : 
     324         1302 :       DO iproc = 0, para_env_out%num_pe - 1
     325              : 
     326         2470 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     327        58990 :          buffer_send(iproc)%msg = 0.0_dp
     328              : 
     329              :       END DO
     330              : 
     331         1302 :       DO iproc = 0, para_env_out%num_pe - 1
     332              : 
     333         2470 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     334       118414 :          buffer_rec(iproc)%indx = 0
     335              : 
     336              :       END DO
     337              : 
     338         1302 :       DO iproc = 0, para_env_out%num_pe - 1
     339              : 
     340         2470 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     341       118414 :          buffer_send(iproc)%indx = 0
     342              : 
     343              :       END DO
     344              : 
     345          434 :       CALL timestop(handle2)
     346              : 
     347          434 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     348          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     349           89 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     350              :       END IF
     351              : 
     352         1302 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     353         1302 :       entry_counter(:) = 0
     354              : 
     355              :       ! Now we can write the actual data and indices to the send-buffer
     356         8755 :       DO row_idx_loc = 1, nrow_local_in
     357         8321 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     358         8321 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     359        66443 :          DO col_idx_loc = 1, ncol_local_in
     360        57688 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     361        57688 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     362              : 
     363        57688 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     364        57688 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     365              : 
     366        57688 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_out)
     367        57688 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     368              : 
     369        57688 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     370        57688 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     371              : 
     372              :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     373        57688 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     374              : 
     375        57688 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
     376        66009 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     377              : 
     378              :          END DO
     379              :       END DO
     380              : 
     381         6510 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     382              : 
     383          434 :       CALL timestop(handle2)
     384              : 
     385          434 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     386          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     387           89 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     388              :       END IF
     389              : 
     390              :       ! communicate the buffer
     391              :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     392          434 :                               buffer_send, req_array)
     393              : 
     394          434 :       CALL timestop(handle2)
     395              : 
     396          434 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     397          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     398           89 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     399              :       END IF
     400              : 
     401              :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     402          434 :       nprocs = para_env_out%num_pe
     403              : 
     404              : !$OMP PARALLEL DO DEFAULT(NONE) &
     405              : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec, beta) &
     406          434 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     407              :       DO iproc = 0, nprocs - 1
     408              :          DO i_entry_rec = 1, num_entries_rec(iproc)
     409              :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     410              :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     411              : 
     412              :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
     413              :          END DO
     414              :       END DO
     415              : !$OMP END PARALLEL DO
     416              : 
     417          434 :       CALL timestop(handle2)
     418              : 
     419          434 :       CALL timeset(routineN//"_7_cleanup", handle2)
     420          434 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     421           89 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     422              :       END IF
     423              : 
     424              :       !Clean up all the arrays from the communication process
     425         1302 :       DO iproc = 0, para_env_out%num_pe - 1
     426          868 :          DEALLOCATE (buffer_rec(iproc)%msg)
     427          868 :          DEALLOCATE (buffer_rec(iproc)%indx)
     428          868 :          DEALLOCATE (buffer_send(iproc)%msg)
     429         1302 :          DEALLOCATE (buffer_send(iproc)%indx)
     430              :       END DO
     431         2170 :       DEALLOCATE (buffer_rec, buffer_send)
     432          434 :       DEALLOCATE (req_array)
     433          434 :       DEALLOCATE (entry_counter)
     434          434 :       DEALLOCATE (num_entries_rec, num_entries_send)
     435              : 
     436          434 :       CALL timestop(handle2)
     437          434 :       CALL timestop(handle)
     438              : 
     439         3038 :    END SUBROUTINE fm_general_add_bse
     440              : 
     441              : ! **************************************************************************************************
     442              : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
     443              : !>  Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in  for the incoming (untruncated) matrix
     444              : !>  and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
     445              : !>  via parallel communication.
     446              : !> \param fm_out ...
     447              : !> \param fm_in ...
     448              : !> \param ncol_in ...
     449              : !> \param nrow_out ...
     450              : !> \param ncol_out ...
     451              : !> \param unit_nr ...
     452              : !> \param mp2_env ...
     453              : !> \param nrow_offset ...
     454              : !> \param ncol_offset ...
     455              : ! **************************************************************************************************
     456           90 :    SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
     457              :                           nrow_out, ncol_out, unit_nr, mp2_env, &
     458              :                           nrow_offset, ncol_offset)
     459              : 
     460              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     461              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     462              :       INTEGER                                            :: ncol_in, nrow_out, ncol_out, unit_nr
     463              :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
     464              :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow_offset, ncol_offset
     465              : 
     466              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'truncate_fm'
     467              : 
     468              :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
     469              :          idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
     470              :          ncol_local_in, ncol_local_out, nprocs, nrow_block_in, nrow_block_out, nrow_local_in, &
     471              :          nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
     472           90 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     473              :                                                             num_entries_send
     474           90 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     475           90 :                                                             row_indices_in, row_indices_out
     476              :       LOGICAL                                            :: correct_ncol, correct_nrow
     477              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     478           90 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     479              :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     480           90 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     481              : 
     482           90 :       CALL timeset(routineN, handle)
     483           90 :       CALL timeset(routineN//"_1_setup", handle2)
     484              : 
     485           90 :       correct_nrow = .FALSE.
     486           90 :       correct_ncol = .FALSE.
     487              :       !In case of truncation in the occupied space, we need to correct the interval of indices
     488           90 :       IF (PRESENT(nrow_offset)) THEN
     489           60 :          correct_nrow = .TRUE.
     490              :       END IF
     491           90 :       IF (PRESENT(ncol_offset)) THEN
     492           30 :          correct_ncol = .TRUE.
     493              :       END IF
     494              : 
     495           90 :       para_env_out => fm_out%matrix_struct%para_env
     496              : 
     497              :       CALL cp_fm_get_info(matrix=fm_out, &
     498              :                           nrow_local=nrow_local_out, &
     499              :                           ncol_local=ncol_local_out, &
     500              :                           row_indices=row_indices_out, &
     501              :                           col_indices=col_indices_out, &
     502              :                           nrow_block=nrow_block_out, &
     503           90 :                           ncol_block=ncol_block_out)
     504              : 
     505          270 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     506          270 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     507              : 
     508          270 :       num_entries_rec(:) = 0
     509          270 :       num_entries_send(:) = 0
     510              : 
     511           90 :       dummy = 0
     512              : 
     513              :       CALL cp_fm_get_info(matrix=fm_in, &
     514              :                           nrow_local=nrow_local_in, &
     515              :                           ncol_local=ncol_local_in, &
     516              :                           row_indices=row_indices_in, &
     517              :                           col_indices=col_indices_in, &
     518              :                           nrow_block=nrow_block_in, &
     519           90 :                           ncol_block=ncol_block_in)
     520              : 
     521           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     522            9 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     523           18 :             fm_out%matrix_struct%nrow_global
     524            9 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     525           18 :             fm_out%matrix_struct%ncol_global
     526              : 
     527            9 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     528            9 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     529              : 
     530            9 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     531           18 :             fm_in%matrix_struct%nrow_global
     532            9 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     533           18 :             fm_in%matrix_struct%ncol_global
     534              : 
     535            9 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     536            9 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     537              :       END IF
     538              : 
     539              :       ! We find global indices in S with nrow_in and ncol_in for truncation
     540         6854 :       DO col_idx_loc = 1, ncol_local_in
     541         6764 :          idx_col_in = col_indices_in(col_idx_loc)
     542              : 
     543         6764 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     544         6764 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     545              : 
     546              :          ! If occupied orbitals are included, these have to be handled differently
     547              :          ! due to their reversed indexing
     548         6764 :          IF (correct_nrow) THEN
     549         2604 :             idx_col_first = idx_col_first - nrow_offset + 1
     550         2604 :             IF (idx_col_first <= 0) CYCLE
     551              :          ELSE
     552         4160 :             IF (idx_col_first > nrow_out) EXIT
     553              :          END IF
     554         6764 :          IF (correct_ncol) THEN
     555          450 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     556          450 :             IF (idx_col_sec <= 0) CYCLE
     557              :          ELSE
     558         6314 :             IF (idx_col_sec > ncol_out) CYCLE
     559              :          END IF
     560              : 
     561         5970 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     562              : 
     563       251114 :          DO row_idx_loc = 1, nrow_local_in
     564       245054 :             idx_row_in = row_indices_in(row_idx_loc)
     565              : 
     566       245054 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     567       245054 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     568              : 
     569       245054 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     570              : 
     571       251818 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     572              : 
     573              :          END DO
     574              :       END DO
     575              : 
     576           90 :       CALL timestop(handle2)
     577              : 
     578           90 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     579           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     580            9 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     581              :       END IF
     582              : 
     583           90 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     584              : 
     585           90 :       CALL timestop(handle2)
     586              : 
     587           90 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     588           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     589            9 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     590              :       END IF
     591              : 
     592              :       ! Buffers for entries and their indices
     593          450 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     594          450 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     595              : 
     596              :       ! allocate data message and corresponding indices
     597          270 :       DO iproc = 0, para_env_out%num_pe - 1
     598              : 
     599          480 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     600       245324 :          buffer_rec(iproc)%msg = 0.0_dp
     601              : 
     602              :       END DO
     603              : 
     604          270 :       DO iproc = 0, para_env_out%num_pe - 1
     605              : 
     606          480 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     607       245324 :          buffer_send(iproc)%msg = 0.0_dp
     608              : 
     609              :       END DO
     610              : 
     611          270 :       DO iproc = 0, para_env_out%num_pe - 1
     612              : 
     613          480 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     614       490738 :          buffer_rec(iproc)%indx = 0
     615              : 
     616              :       END DO
     617              : 
     618          270 :       DO iproc = 0, para_env_out%num_pe - 1
     619              : 
     620          480 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     621       490738 :          buffer_send(iproc)%indx = 0
     622              : 
     623              :       END DO
     624              : 
     625           90 :       CALL timestop(handle2)
     626              : 
     627           90 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     628           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     629            9 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     630              :       END IF
     631              : 
     632          270 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     633          270 :       entry_counter(:) = 0
     634              : 
     635              :       ! Now we can write the actual data and indices to the send-buffer
     636         6854 :       DO col_idx_loc = 1, ncol_local_in
     637         6764 :          idx_col_in = col_indices_in(col_idx_loc)
     638              : 
     639         6764 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     640         6764 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     641              : 
     642              :          ! If occupied orbitals are included, these have to be handled differently
     643              :          ! due to their reversed indexing
     644         6764 :          IF (correct_nrow) THEN
     645         2604 :             idx_col_first = idx_col_first - nrow_offset + 1
     646         2604 :             IF (idx_col_first <= 0) CYCLE
     647              :          ELSE
     648         4160 :             IF (idx_col_first > nrow_out) EXIT
     649              :          END IF
     650         6764 :          IF (correct_ncol) THEN
     651          450 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     652          450 :             IF (idx_col_sec <= 0) CYCLE
     653              :          ELSE
     654         6314 :             IF (idx_col_sec > ncol_out) CYCLE
     655              :          END IF
     656              : 
     657         5970 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     658              : 
     659       251114 :          DO row_idx_loc = 1, nrow_local_in
     660       245054 :             idx_row_in = row_indices_in(row_idx_loc)
     661              : 
     662       245054 :             send_prow = fm_out%matrix_struct%g2p_row(idx_row_in)
     663              : 
     664       245054 :             send_pcol = fm_out%matrix_struct%g2p_col(idx_col_out)
     665              : 
     666       245054 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     667       245054 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     668              : 
     669              :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     670       245054 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     671              :             !No need to create row_out, since it is identical to incoming
     672              :             !We dont change the RI index for any fm_mat_XX_BSE
     673       245054 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
     674       251818 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     675              : 
     676              :          END DO
     677              :       END DO
     678              : 
     679         1350 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     680              : 
     681           90 :       CALL timestop(handle2)
     682              : 
     683           90 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     684           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     685            9 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     686              :       END IF
     687              : 
     688              :       ! communicate the buffer
     689              :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     690           90 :                               buffer_send, req_array)
     691              : 
     692           90 :       CALL timestop(handle2)
     693              : 
     694           90 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     695           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     696            9 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     697              :       END IF
     698              : 
     699              :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     700           90 :       nprocs = para_env_out%num_pe
     701              : 
     702              : !$OMP PARALLEL DO DEFAULT(NONE) &
     703              : !$OMP SHARED(fm_out, nprocs, num_entries_rec, buffer_rec) &
     704           90 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     705              :       DO iproc = 0, nprocs - 1
     706              :          DO i_entry_rec = 1, num_entries_rec(iproc)
     707              :             ii = fm_out%matrix_struct%g2l_row(buffer_rec(iproc)%indx(i_entry_rec, 1))
     708              :             jj = fm_out%matrix_struct%g2l_col(buffer_rec(iproc)%indx(i_entry_rec, 2))
     709              : 
     710              :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
     711              :          END DO
     712              :       END DO
     713              : !$OMP END PARALLEL DO
     714              : 
     715           90 :       CALL timestop(handle2)
     716              : 
     717           90 :       CALL timeset(routineN//"_7_cleanup", handle2)
     718           90 :       IF (unit_nr > 0 .AND. mp2_env%bse%bse_debug_print) THEN
     719            9 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     720              :       END IF
     721              : 
     722              :       !Clean up all the arrays from the communication process
     723          270 :       DO iproc = 0, para_env_out%num_pe - 1
     724          180 :          DEALLOCATE (buffer_rec(iproc)%msg)
     725          180 :          DEALLOCATE (buffer_rec(iproc)%indx)
     726          180 :          DEALLOCATE (buffer_send(iproc)%msg)
     727          270 :          DEALLOCATE (buffer_send(iproc)%indx)
     728              :       END DO
     729          450 :       DEALLOCATE (buffer_rec, buffer_send)
     730           90 :       DEALLOCATE (req_array)
     731           90 :       DEALLOCATE (entry_counter)
     732           90 :       DEALLOCATE (num_entries_rec, num_entries_send)
     733              : 
     734           90 :       CALL timestop(handle2)
     735           90 :       CALL timestop(handle)
     736              : 
     737          720 :    END SUBROUTINE truncate_fm
     738              : 
     739              : ! **************************************************************************************************
     740              : !> \brief ...
     741              : !> \param fm_mat_S_bar_ia_bse ...
     742              : !> \param fm_mat_S_bar_ij_bse ...
     743              : !> \param fm_mat_S_trunc ...
     744              : !> \param fm_mat_S_ij_trunc ...
     745              : !> \param fm_mat_S_ab_trunc ...
     746              : !> \param fm_mat_Q_static_bse_gemm ...
     747              : !> \param mp2_env ...
     748              : ! **************************************************************************************************
     749           30 :    SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
     750              :                                       fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
     751              :                                       fm_mat_Q_static_bse_gemm, mp2_env)
     752              : 
     753              :       TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
     754              :          fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse_gemm
     755              :       TYPE(mp2_type)                                     :: mp2_env
     756              : 
     757              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
     758              : 
     759              :       INTEGER                                            :: handle
     760              : 
     761           30 :       CALL timeset(routineN, handle)
     762              : 
     763           30 :       CALL cp_fm_release(fm_mat_S_bar_ia_bse)
     764           30 :       CALL cp_fm_release(fm_mat_S_bar_ij_bse)
     765           30 :       CALL cp_fm_release(fm_mat_S_trunc)
     766           30 :       CALL cp_fm_release(fm_mat_S_ij_trunc)
     767           30 :       CALL cp_fm_release(fm_mat_S_ab_trunc)
     768           30 :       CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
     769           30 :       IF (mp2_env%bse%do_nto_analysis) THEN
     770            4 :          DEALLOCATE (mp2_env%bse%bse_nto_state_list_final)
     771              :       END IF
     772              : 
     773           30 :       CALL timestop(handle)
     774              : 
     775           30 :    END SUBROUTINE deallocate_matrices_bse
     776              : 
     777              : ! **************************************************************************************************
     778              : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
     779              : !>  multiplication with the eigenvalues
     780              : !> \param fm_work ...
     781              : !> \param eig_vals ...
     782              : !> \param beta ...
     783              : !> \param gamma ...
     784              : !> \param do_transpose ...
     785              : ! **************************************************************************************************
     786           72 :    SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
     787              : 
     788              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_work
     789              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
     790              :          INTENT(IN)                                      :: eig_vals
     791              :       REAL(KIND=dp), INTENT(IN)                          :: beta
     792              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: gamma
     793              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_transpose
     794              : 
     795              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
     796              : 
     797              :       INTEGER                                            :: handle, i_row_global, ii, j_col_global, &
     798              :                                                             jj, ncol_local, nrow_local
     799           36 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     800              :       LOGICAL                                            :: my_do_transpose
     801              :       REAL(KIND=dp)                                      :: coeff, my_gamma
     802              : 
     803           36 :       CALL timeset(routineN, handle)
     804              : 
     805           36 :       IF (PRESENT(gamma)) THEN
     806           36 :          my_gamma = gamma
     807              :       ELSE
     808              :          my_gamma = 2.0_dp
     809              :       END IF
     810              : 
     811           36 :       IF (PRESENT(do_transpose)) THEN
     812           36 :          my_do_transpose = do_transpose
     813              :       ELSE
     814              :          my_do_transpose = .FALSE.
     815              :       END IF
     816              : 
     817              :       CALL cp_fm_get_info(matrix=fm_work, &
     818              :                           nrow_local=nrow_local, &
     819              :                           ncol_local=ncol_local, &
     820              :                           row_indices=row_indices, &
     821           36 :                           col_indices=col_indices)
     822              : 
     823           36 :       IF (my_do_transpose) THEN
     824         1764 :          DO jj = 1, ncol_local
     825         1728 :             j_col_global = col_indices(jj)
     826        43236 :             DO ii = 1, nrow_local
     827        41472 :                coeff = (eig_vals(j_col_global)**beta)/my_gamma
     828        43200 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     829              :             END DO
     830              :          END DO
     831              :       ELSE
     832            0 :          DO jj = 1, ncol_local
     833            0 :             DO ii = 1, nrow_local
     834            0 :                i_row_global = row_indices(ii)
     835            0 :                coeff = (eig_vals(i_row_global)**beta)/my_gamma
     836            0 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     837              :             END DO
     838              :          END DO
     839              :       END IF
     840              : 
     841           36 :       CALL timestop(handle)
     842              : 
     843           36 :    END SUBROUTINE comp_eigvec_coeff_BSE
     844              : 
     845              : ! **************************************************************************************************
     846              : !> \brief ...
     847              : !> \param idx_prim ...
     848              : !> \param idx_sec ...
     849              : !> \param eigvec_entries ...
     850              : ! **************************************************************************************************
     851         1166 :    SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
     852              : 
     853              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim, idx_sec
     854              :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     855              : 
     856              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'sort_excitations'
     857              : 
     858              :       INTEGER                                            :: handle, ii, kk, num_entries, num_mults
     859         1166 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim_work, idx_sec_work, tmp_index
     860              :       LOGICAL                                            :: unique_entries
     861         1166 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries_work
     862              : 
     863         1166 :       CALL timeset(routineN, handle)
     864              : 
     865         1166 :       num_entries = SIZE(idx_prim)
     866              : 
     867         3048 :       ALLOCATE (tmp_index(num_entries))
     868              : 
     869         1166 :       CALL sort(idx_prim, num_entries, tmp_index)
     870              : 
     871         1882 :       ALLOCATE (idx_sec_work(num_entries))
     872         3048 :       ALLOCATE (eigvec_entries_work(num_entries))
     873              : 
     874         3280 :       DO ii = 1, num_entries
     875         2114 :          idx_sec_work(ii) = idx_sec(tmp_index(ii))
     876         3280 :          eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
     877              :       END DO
     878              : 
     879         1166 :       DEALLOCATE (tmp_index)
     880         1166 :       DEALLOCATE (idx_sec)
     881         1166 :       DEALLOCATE (eigvec_entries)
     882              : 
     883         1166 :       CALL MOVE_ALLOC(idx_sec_work, idx_sec)
     884         1166 :       CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
     885              : 
     886              :       !Now check for multiple entries in first idx to check necessity of sorting in second idx
     887         1166 :       CALL sort_unique(idx_prim, unique_entries)
     888         1166 :       IF (.NOT. unique_entries) THEN
     889          540 :          ALLOCATE (idx_prim_work(num_entries))
     890         1608 :          idx_prim_work(:) = idx_prim(:)
     891              :          ! Find duplicate entries in idx_prim
     892         1608 :          DO ii = 1, num_entries
     893         1338 :             IF (idx_prim_work(ii) == 0) CYCLE
     894         5364 :             num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
     895          834 :             IF (num_mults > 1) THEN
     896              :                !Set all duplicate entries to 0
     897         1308 :                idx_prim_work(ii:ii + num_mults - 1) = 0
     898              :                !Start sorting in secondary index
     899         1206 :                ALLOCATE (idx_sec_work(num_mults))
     900         1206 :                ALLOCATE (eigvec_entries_work(num_mults))
     901         1308 :                idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
     902         1308 :                eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
     903          804 :                ALLOCATE (tmp_index(num_mults))
     904          402 :                CALL sort(idx_sec_work, num_mults, tmp_index)
     905              : 
     906              :                !Now write newly sorted indices to original arrays
     907         1308 :                DO kk = ii, ii + num_mults - 1
     908          906 :                   idx_sec(kk) = idx_sec_work(kk - ii + 1)
     909         1308 :                   eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
     910              :                END DO
     911              :                !Deallocate work arrays
     912          402 :                DEALLOCATE (tmp_index)
     913          402 :                DEALLOCATE (idx_sec_work)
     914          402 :                DEALLOCATE (eigvec_entries_work)
     915              :             END IF
     916         1608 :             idx_prim_work(ii) = idx_prim(ii)
     917              :          END DO
     918          270 :          DEALLOCATE (idx_prim_work)
     919              :       END IF
     920              : 
     921         1166 :       CALL timestop(handle)
     922              : 
     923         3498 :    END SUBROUTINE sort_excitations
     924              : 
     925              : ! **************************************************************************************************
     926              : !> \brief Roughly estimates the needed runtime and memory during the BSE run
     927              : !> \param homo_red ...
     928              : !> \param virtual_red ...
     929              : !> \param unit_nr ...
     930              : !> \param bse_abba ...
     931              : !> \param para_env ...
     932              : !> \param diag_runtime_est ...
     933              : ! **************************************************************************************************
     934           30 :    SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
     935              :                                      para_env, diag_runtime_est)
     936              : 
     937              :       INTEGER                                            :: homo_red, virtual_red, unit_nr
     938              :       LOGICAL                                            :: bse_abba
     939              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     940              :       REAL(KIND=dp)                                      :: diag_runtime_est
     941              : 
     942              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
     943              : 
     944              :       INTEGER                                            :: handle, num_BSE_matrices
     945              :       INTEGER(KIND=int_8)                                :: full_dim
     946              :       REAL(KIND=dp)                                      :: mem_est, mem_est_per_rank
     947              : 
     948           30 :       CALL timeset(routineN, handle)
     949              : 
     950              :       ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
     951           30 :       num_BSE_matrices = 2
     952              :       ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
     953              :       ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
     954           30 :       IF (bse_abba) THEN
     955           18 :          num_BSE_matrices = 10
     956              :       END IF
     957              : 
     958           30 :       full_dim = (INT(homo_red, KIND=int_8)**2*INT(virtual_red, KIND=int_8)**2)*INT(num_BSE_matrices, KIND=int_8)
     959           30 :       mem_est = REAL(8*full_dim, KIND=dp)/REAL(1024**3, KIND=dp)
     960           30 :       mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
     961              : 
     962           30 :       IF (unit_nr > 0) THEN
     963              :          ! WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
     964              :          !    mem_est
     965           15 :          WRITE (unit_nr, '(T2,A4,T7,A40,T68,ES13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
     966           30 :             mem_est
     967           15 :          WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
     968           30 :             mem_est_per_rank
     969           15 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
     970              :       END IF
     971              :       ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
     972              :       ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
     973              :       diag_runtime_est = REAL(INT(homo_red, KIND=int_8)*INT(virtual_red, KIND=int_8)/11000_int_8, KIND=dp)**3* &
     974           30 :                          10*32/REAL(para_env%num_pe, KIND=dp)
     975              : 
     976           30 :       CALL timestop(handle)
     977              : 
     978           30 :    END SUBROUTINE estimate_BSE_resources
     979              : 
     980              : ! **************************************************************************************************
     981              : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
     982              : !> singleparticle basis
     983              : !> \param fm_eigvec ...
     984              : !> \param idx_homo ...
     985              : !> \param idx_virt ...
     986              : !> \param eigvec_entries ...
     987              : !> \param i_exc ...
     988              : !> \param virtual ...
     989              : !> \param num_entries ...
     990              : !> \param mp2_env ...
     991              : ! **************************************************************************************************
     992         1166 :    SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
     993              :                                     i_exc, virtual, num_entries, mp2_env)
     994              : 
     995              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
     996              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_homo, idx_virt
     997              :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     998              :       INTEGER                                            :: i_exc, virtual, num_entries
     999              :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1000              : 
    1001              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
    1002              : 
    1003              :       INTEGER                                            :: eigvec_idx, handle, ii, iproc, jj, kk, &
    1004              :                                                             ncol_local, nrow_local, &
    1005              :                                                             num_entries_local
    1006              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_entries_to_comm
    1007         1166 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1008              :       REAL(KIND=dp)                                      :: eigvec_entry
    1009              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1010         1166 :          DIMENSION(:)                                    :: buffer_entries
    1011              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1012              : 
    1013         1166 :       CALL timeset(routineN, handle)
    1014              : 
    1015         1166 :       para_env => fm_eigvec%matrix_struct%para_env
    1016              : 
    1017              :       CALL cp_fm_get_info(matrix=fm_eigvec, &
    1018              :                           nrow_local=nrow_local, &
    1019              :                           ncol_local=ncol_local, &
    1020              :                           row_indices=row_indices, &
    1021         1166 :                           col_indices=col_indices)
    1022              : 
    1023         3498 :       ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
    1024         3498 :       num_entries_to_comm(:) = 0
    1025              : 
    1026        56494 :       DO jj = 1, ncol_local
    1027              :          !First check if i is localized on this proc
    1028        55328 :          IF (col_indices(jj) /= i_exc) THEN
    1029              :             CYCLE
    1030              :          END IF
    1031        29996 :          DO ii = 1, nrow_local
    1032        27664 :             eigvec_idx = row_indices(ii)
    1033        27664 :             eigvec_entry = fm_eigvec%local_data(ii, jj)
    1034        82992 :             IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
    1035         1057 :                num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
    1036              :             END IF
    1037              :          END DO
    1038              :       END DO
    1039              : 
    1040              :       !Gather number of entries of other processes
    1041         1166 :       CALL para_env%sum(num_entries_to_comm)
    1042              : 
    1043         1166 :       num_entries_local = num_entries_to_comm(para_env%mepos)
    1044              : 
    1045         5830 :       ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
    1046              : 
    1047         3498 :       DO iproc = 0, para_env%num_pe - 1
    1048         5832 :          ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
    1049         5832 :          ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
    1050         4446 :          buffer_entries(iproc)%msg = 0.0_dp
    1051        12390 :          buffer_entries(iproc)%indx = 0
    1052              :       END DO
    1053              : 
    1054              :       kk = 1
    1055        56494 :       DO jj = 1, ncol_local
    1056              :          !First check if i is localized on this proc
    1057        55328 :          IF (col_indices(jj) /= i_exc) THEN
    1058              :             CYCLE
    1059              :          END IF
    1060        29996 :          DO ii = 1, nrow_local
    1061        27664 :             eigvec_idx = row_indices(ii)
    1062        27664 :             eigvec_entry = fm_eigvec%local_data(ii, jj)
    1063        82992 :             IF (ABS(eigvec_entry) > mp2_env%bse%eps_x) THEN
    1064         1057 :                buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
    1065         1057 :                buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
    1066         1057 :                buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
    1067         1057 :                kk = kk + 1
    1068              :             END IF
    1069              :          END DO
    1070              :       END DO
    1071              : 
    1072         3498 :       DO iproc = 0, para_env%num_pe - 1
    1073         2332 :          CALL para_env%sum(buffer_entries(iproc)%msg)
    1074         3498 :          CALL para_env%sum(buffer_entries(iproc)%indx)
    1075              :       END DO
    1076              : 
    1077              :       !Now sum up gathered information
    1078         3498 :       num_entries = SUM(num_entries_to_comm)
    1079         3048 :       ALLOCATE (idx_homo(num_entries))
    1080         1882 :       ALLOCATE (idx_virt(num_entries))
    1081         3048 :       ALLOCATE (eigvec_entries(num_entries))
    1082              : 
    1083         1166 :       kk = 1
    1084         3498 :       DO iproc = 0, para_env%num_pe - 1
    1085         3498 :          IF (num_entries_to_comm(iproc) /= 0) THEN
    1086         3282 :             DO ii = 1, num_entries_to_comm(iproc)
    1087         2114 :                idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
    1088         2114 :                idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
    1089         2114 :                eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
    1090         3282 :                kk = kk + 1
    1091              :             END DO
    1092              :          END IF
    1093              :       END DO
    1094              : 
    1095              :       !Deallocate all the used arrays
    1096         3498 :       DO iproc = 0, para_env%num_pe - 1
    1097         2332 :          DEALLOCATE (buffer_entries(iproc)%msg)
    1098         3498 :          DEALLOCATE (buffer_entries(iproc)%indx)
    1099              :       END DO
    1100         4664 :       DEALLOCATE (buffer_entries)
    1101         1166 :       DEALLOCATE (num_entries_to_comm)
    1102         1166 :       NULLIFY (row_indices)
    1103         1166 :       NULLIFY (col_indices)
    1104              : 
    1105              :       !Now sort the results according to the involved singleparticle orbitals
    1106              :       ! (homo first, then virtual)
    1107         1166 :       CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
    1108              : 
    1109         1166 :       CALL timestop(handle)
    1110              : 
    1111         1166 :    END SUBROUTINE filter_eigvec_contrib
    1112              : 
    1113              : ! **************************************************************************************************
    1114              : !> \brief Reads cutoffs for BSE from mp2_env and compares to energies in Eigenval to extract
    1115              : !>        reduced homo/virtual and
    1116              : !> \param Eigenval array (1d) with energies, can be e.g. from GW or DFT
    1117              : !> \param homo Total number of occupied orbitals
    1118              : !> \param virtual Total number of unoccupied orbitals
    1119              : !> \param homo_red Total number of occupied orbitals to include after cutoff
    1120              : !> \param virt_red Total number of unoccupied orbitals to include after ctuoff
    1121              : !> \param homo_incl First occupied index to include after cutoff
    1122              : !> \param virt_incl Last unoccupied index to include after cutoff
    1123              : !> \param mp2_env ...
    1124              : ! **************************************************************************************************
    1125           60 :    SUBROUTINE determine_cutoff_indices(Eigenval, &
    1126              :                                        homo, virtual, &
    1127              :                                        homo_red, virt_red, &
    1128              :                                        homo_incl, virt_incl, &
    1129              :                                        mp2_env)
    1130              : 
    1131              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
    1132              :       INTEGER, INTENT(IN)                                :: homo, virtual
    1133              :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red, homo_incl, virt_incl
    1134              :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1135              : 
    1136              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'determine_cutoff_indices'
    1137              : 
    1138              :       INTEGER                                            :: handle, i_homo, j_virt
    1139              : 
    1140           60 :       CALL timeset(routineN, handle)
    1141              :       ! Determine index in homo and virtual for truncation
    1142              :       ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
    1143           60 :       IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
    1144              :          IF (-mp2_env%bse%bse_cutoff_occ < Eigenval(1) - Eigenval(homo) &
    1145           60 :              .OR. mp2_env%bse%bse_cutoff_occ < 0) THEN
    1146           60 :             homo_red = homo
    1147           60 :             homo_incl = 1
    1148              :          ELSE
    1149            0 :             homo_incl = 1
    1150            0 :             DO i_homo = 1, homo
    1151            0 :                IF (Eigenval(i_homo) - Eigenval(homo) > -mp2_env%bse%bse_cutoff_occ) THEN
    1152            0 :                   homo_incl = i_homo
    1153            0 :                   EXIT
    1154              :                END IF
    1155              :             END DO
    1156            0 :             homo_red = homo - homo_incl + 1
    1157              :          END IF
    1158              : 
    1159              :          IF (mp2_env%bse%bse_cutoff_empty > Eigenval(homo + virtual) - Eigenval(homo + 1) &
    1160           60 :              .OR. mp2_env%bse%bse_cutoff_empty < 0) THEN
    1161            0 :             virt_red = virtual
    1162            0 :             virt_incl = virtual
    1163              :          ELSE
    1164           60 :             virt_incl = homo + 1
    1165          764 :             DO j_virt = 1, virtual
    1166          764 :                IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) > mp2_env%bse%bse_cutoff_empty) THEN
    1167           60 :                   virt_incl = j_virt - 1
    1168           60 :                   EXIT
    1169              :                END IF
    1170              :             END DO
    1171           60 :             virt_red = virt_incl
    1172              :          END IF
    1173              :       ELSE
    1174            0 :          homo_red = homo
    1175            0 :          virt_red = virtual
    1176            0 :          homo_incl = 1
    1177            0 :          virt_incl = virtual
    1178              :       END IF
    1179              : 
    1180           60 :       CALL timestop(handle)
    1181              : 
    1182           60 :    END SUBROUTINE determine_cutoff_indices
    1183              : 
    1184              : ! **************************************************************************************************
    1185              : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
    1186              : !> \param fm_mat_S_ia_bse ...
    1187              : !> \param fm_mat_S_ij_bse ...
    1188              : !> \param fm_mat_S_ab_bse ...
    1189              : !> \param fm_mat_S_trunc ...
    1190              : !> \param fm_mat_S_ij_trunc ...
    1191              : !> \param fm_mat_S_ab_trunc ...
    1192              : !> \param Eigenval_scf ...
    1193              : !> \param Eigenval ...
    1194              : !> \param Eigenval_reduced ...
    1195              : !> \param homo ...
    1196              : !> \param virtual ...
    1197              : !> \param dimen_RI ...
    1198              : !> \param unit_nr ...
    1199              : !> \param bse_lev_virt ...
    1200              : !> \param homo_red ...
    1201              : !> \param virt_red ...
    1202              : !> \param mp2_env ...
    1203              : ! **************************************************************************************************
    1204           60 :    SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
    1205              :                                     fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
    1206           30 :                                     Eigenval_scf, Eigenval, Eigenval_reduced, &
    1207              :                                     homo, virtual, dimen_RI, unit_nr, &
    1208              :                                     bse_lev_virt, &
    1209              :                                     homo_red, virt_red, &
    1210              :                                     mp2_env)
    1211              : 
    1212              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
    1213              :                                                             fm_mat_S_ab_bse
    1214              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
    1215              :                                                             fm_mat_S_ab_trunc
    1216              :       REAL(KIND=dp), DIMENSION(:)                        :: Eigenval_scf, Eigenval
    1217              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: Eigenval_reduced
    1218              :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_RI, unit_nr, &
    1219              :                                                             bse_lev_virt
    1220              :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red
    1221              :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1222              : 
    1223              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
    1224              : 
    1225              :       INTEGER                                            :: handle, homo_incl, virt_incl
    1226              :       TYPE(cp_blacs_env_type), POINTER                   :: context
    1227              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ab, fm_struct_ia, fm_struct_ij
    1228              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1229              : 
    1230           30 :       CALL timeset(routineN, handle)
    1231              : 
    1232              :       ! Determine index in homo and virtual for truncation
    1233              :       ! Uses indices of outermost orbitals within energy range (-mp2_env%bse%bse_cutoff_occ,mp2_env%bse%bse_cutoff_empty)
    1234              : 
    1235              :       CALL determine_cutoff_indices(Eigenval_scf, &
    1236              :                                     homo, virtual, &
    1237              :                                     homo_red, virt_red, &
    1238              :                                     homo_incl, virt_incl, &
    1239           30 :                                     mp2_env)
    1240              : 
    1241           30 :       IF (unit_nr > 0) THEN
    1242           15 :          IF (mp2_env%bse%bse_cutoff_occ > 0) THEN
    1243           14 :             WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
    1244           28 :                mp2_env%bse%bse_cutoff_occ*evolt
    1245              :          ELSE
    1246            1 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
    1247              :          END IF
    1248           15 :          IF (mp2_env%bse%bse_cutoff_empty > 0) THEN
    1249           15 :             WRITE (unit_nr, '(T2,A4,T7,A26,T71,F10.3)') 'BSE|', 'Cutoff empty orbitals [eV]', &
    1250           30 :                mp2_env%bse%bse_cutoff_empty*evolt
    1251              :          ELSE
    1252            0 :             WRITE (unit_nr, '(T2,A4,T7,A34)') 'BSE|', 'No cutoff given for empty orbitals'
    1253              :          END IF
    1254           15 :          WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
    1255           15 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Last empty index (not MO index!)', virt_incl
    1256           15 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
    1257           15 :          WRITE (unit_nr, '(T2,A4,T7,A31,T71,F10.3)') 'BSE|', 'Energy of last empty index [eV]', Eigenval(homo + virt_incl)*evolt
    1258           15 :          WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
    1259           30 :             -(Eigenval(homo_incl) - Eigenval(homo))*evolt
    1260           15 :          WRITE (unit_nr, '(T2,A4,T7,A50,T71,F10.3)') 'BSE|', 'Energy difference of last empty index to LUMO [eV]', &
    1261           30 :             (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
    1262           15 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
    1263           15 :          WRITE (unit_nr, '(T2,A4,T7,A32,T71,I10)') 'BSE|', 'Number of GW-corrected empty MOs', mp2_env%ri_g0w0%corr_mos_virt
    1264           15 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1265              :       END IF
    1266           30 :       IF (unit_nr > 0) THEN
    1267           15 :          IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
    1268            0 :             CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
    1269              :          END IF
    1270           15 :          IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
    1271            0 :             CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
    1272              :          END IF
    1273              :       END IF
    1274              :       !Truncate full fm_S matrices
    1275              :       !Allocate new truncated matrices of proper size
    1276           30 :       para_env => fm_mat_S_ia_bse%matrix_struct%para_env
    1277           30 :       context => fm_mat_S_ia_bse%matrix_struct%context
    1278              : 
    1279           30 :       CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
    1280           30 :       CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
    1281           30 :       CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
    1282              : 
    1283           30 :       CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, name="fm_S_trunc", set_zero=.TRUE.)
    1284           30 :       CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, name="fm_S_ij_trunc", set_zero=.TRUE.)
    1285           30 :       CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, name="fm_S_ab_trunc", set_zero=.TRUE.)
    1286              : 
    1287              :       !Copy parts of original matrices to truncated ones
    1288           30 :       IF (mp2_env%bse%bse_cutoff_occ > 0 .OR. mp2_env%bse%bse_cutoff_empty > 0) THEN
    1289              :          !Truncate eigenvals
    1290           90 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1291              :          ! Include USE_KS_ENERGIES input
    1292           30 :          IF (mp2_env%bse%use_ks_energies) THEN
    1293            0 :             Eigenval_reduced(:) = Eigenval_scf(homo_incl:homo + virt_incl)
    1294              :          ELSE
    1295          496 :             Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
    1296              :          END IF
    1297              : 
    1298              :          CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
    1299              :                           homo_red, virt_red, unit_nr, mp2_env, &
    1300           30 :                           nrow_offset=homo_incl)
    1301              :          CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
    1302              :                           homo_red, homo_red, unit_nr, mp2_env, &
    1303           30 :                           homo_incl, homo_incl)
    1304              :          CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, bse_lev_virt, &
    1305           30 :                           virt_red, virt_red, unit_nr, mp2_env)
    1306              : 
    1307              :       ELSE
    1308            0 :          IF (unit_nr > 0) THEN
    1309            0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
    1310            0 :             WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1311              :          END IF
    1312            0 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1313              :          ! Include USE_KS_ENERGIES input
    1314            0 :          IF (mp2_env%bse%use_ks_energies) THEN
    1315            0 :             Eigenval_reduced(:) = Eigenval_scf(:)
    1316              :          ELSE
    1317            0 :             Eigenval_reduced(:) = Eigenval(:)
    1318              :          END IF
    1319              :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
    1320            0 :                                          1, 1, 1, 1, context)
    1321              :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
    1322            0 :                                          1, 1, 1, 1, context)
    1323              :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
    1324            0 :                                          1, 1, 1, 1, context)
    1325              :       END IF
    1326              : 
    1327           30 :       CALL cp_fm_struct_release(fm_struct_ia)
    1328           30 :       CALL cp_fm_struct_release(fm_struct_ij)
    1329           30 :       CALL cp_fm_struct_release(fm_struct_ab)
    1330              : 
    1331           30 :       NULLIFY (para_env)
    1332           30 :       NULLIFY (context)
    1333              : 
    1334           30 :       CALL timestop(handle)
    1335              : 
    1336           30 :    END SUBROUTINE truncate_BSE_matrices
    1337              : 
    1338              : ! **************************************************************************************************
    1339              : !> \brief ...
    1340              : !> \param fm_eigvec ...
    1341              : !> \param fm_eigvec_reshuffled ...
    1342              : !> \param homo ...
    1343              : !> \param virtual ...
    1344              : !> \param n_exc ...
    1345              : !> \param do_transpose ...
    1346              : !> \param unit_nr ...
    1347              : !> \param mp2_env ...
    1348              : ! **************************************************************************************************
    1349          900 :    SUBROUTINE reshuffle_eigvec(fm_eigvec, fm_eigvec_reshuffled, homo, virtual, n_exc, do_transpose, &
    1350              :                                unit_nr, mp2_env)
    1351              : 
    1352              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
    1353              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_eigvec_reshuffled
    1354              :       INTEGER, INTENT(IN)                                :: homo, virtual, n_exc
    1355              :       LOGICAL, INTENT(IN)                                :: do_transpose
    1356              :       INTEGER, INTENT(IN)                                :: unit_nr
    1357              :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1358              : 
    1359              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'reshuffle_eigvec'
    1360              : 
    1361              :       INTEGER                                            :: handle, my_m_col, my_n_row
    1362              :       INTEGER, DIMENSION(4)                              :: reordering
    1363              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_eigvec_col, &
    1364              :                                                             fm_struct_eigvec_reshuffled
    1365              :       TYPE(cp_fm_type)                                   :: fm_eigvec_col
    1366              : 
    1367          300 :       CALL timeset(routineN, handle)
    1368              : 
    1369              :       ! Define reordering:
    1370              :       ! (ia,11) to (a1,i1) for transposition
    1371              :       ! (ia,11) to (i1,a1) for default
    1372          300 :       IF (do_transpose) THEN
    1373           50 :          reordering = [2, 3, 1, 4]
    1374           50 :          my_n_row = virtual
    1375           50 :          my_m_col = homo
    1376              :       ELSE
    1377          250 :          reordering = [1, 3, 2, 4]
    1378          250 :          my_n_row = homo
    1379          250 :          my_m_col = virtual
    1380              :       END IF
    1381              : 
    1382              :       CALL cp_fm_struct_create(fm_struct_eigvec_col, &
    1383              :                                fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
    1384          300 :                                homo*virtual, 1)
    1385              :       CALL cp_fm_struct_create(fm_struct_eigvec_reshuffled, &
    1386              :                                fm_eigvec%matrix_struct%para_env, fm_eigvec%matrix_struct%context, &
    1387          300 :                                my_n_row, my_m_col)
    1388              : 
    1389              :       ! Resort indices
    1390          300 :       CALL cp_fm_create(fm_eigvec_col, fm_struct_eigvec_col, name="BSE_column_vector")
    1391          300 :       CALL cp_fm_set_all(fm_eigvec_col, 0.0_dp)
    1392          300 :       CALL cp_fm_create(fm_eigvec_reshuffled, fm_struct_eigvec_reshuffled, name="BSE_reshuffled_eigenvector")
    1393          300 :       CALL cp_fm_set_all(fm_eigvec_reshuffled, 0.0_dp)
    1394              :       ! Fill matrix
    1395              :       CALL cp_fm_to_fm_submat(fm_eigvec, fm_eigvec_col, &
    1396              :                               homo*virtual, 1, &
    1397              :                               1, n_exc, &
    1398          300 :                               1, 1)
    1399              :       ! Reshuffle
    1400              :       CALL fm_general_add_bse(fm_eigvec_reshuffled, fm_eigvec_col, 1.0_dp, &
    1401              :                               virtual, 1, &
    1402              :                               1, 1, &
    1403          300 :                               unit_nr, reordering, mp2_env)
    1404              : 
    1405          300 :       CALL cp_fm_release(fm_eigvec_col)
    1406          300 :       CALL cp_fm_struct_release(fm_struct_eigvec_col)
    1407          300 :       CALL cp_fm_struct_release(fm_struct_eigvec_reshuffled)
    1408              : 
    1409          300 :       CALL timestop(handle)
    1410              : 
    1411          300 :    END SUBROUTINE reshuffle_eigvec
    1412              : 
    1413              : ! **************************************************************************************************
    1414              : !> \brief Borrowed from the tddfpt module with slight adaptions
    1415              : !> \param qs_env ...
    1416              : !> \param mos ...
    1417              : !> \param istate ...
    1418              : !> \param info_approximation ...
    1419              : !> \param stride ...
    1420              : !> \param append_cube ...
    1421              : !> \param print_section ...
    1422              : ! **************************************************************************************************
    1423            0 :    SUBROUTINE print_bse_nto_cubes(qs_env, mos, istate, info_approximation, &
    1424              :                                   stride, append_cube, print_section)
    1425              : 
    1426              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1427              :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN)        :: mos
    1428              :       INTEGER, INTENT(IN)                                :: istate
    1429              :       CHARACTER(LEN=10)                                  :: info_approximation
    1430              :       INTEGER, DIMENSION(:), POINTER                     :: stride
    1431              :       LOGICAL, INTENT(IN)                                :: append_cube
    1432              :       TYPE(section_vals_type), POINTER                   :: print_section
    1433              : 
    1434              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'print_bse_nto_cubes'
    1435              : 
    1436              :       CHARACTER(LEN=default_path_length)                 :: filename, info_approx_trunc, &
    1437              :                                                             my_pos_cube, title
    1438              :       INTEGER                                            :: handle, i, iset, nmo, unit_nr_cube
    1439              :       LOGICAL                                            :: mpi_io
    1440            0 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
    1441              :       TYPE(cell_type), POINTER                           :: cell
    1442              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
    1443              :       TYPE(cp_logger_type), POINTER                      :: logger
    1444              :       TYPE(dft_control_type), POINTER                    :: dft_control
    1445              :       TYPE(particle_list_type), POINTER                  :: particles
    1446            0 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    1447              :       TYPE(pw_c1d_gs_type)                               :: wf_g
    1448              :       TYPE(pw_env_type), POINTER                         :: pw_env
    1449            0 :       TYPE(pw_pool_p_type), DIMENSION(:), POINTER        :: pw_pools
    1450              :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
    1451              :       TYPE(pw_r3d_rs_type)                               :: wf_r
    1452            0 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    1453              :       TYPE(qs_subsys_type), POINTER                      :: subsys
    1454              : 
    1455            0 :       logger => cp_get_default_logger()
    1456            0 :       CALL timeset(routineN, handle)
    1457              : 
    1458            0 :       CALL get_qs_env(qs_env=qs_env, dft_control=dft_control, pw_env=pw_env)
    1459            0 :       CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool, pw_pools=pw_pools)
    1460            0 :       CALL auxbas_pw_pool%create_pw(wf_r)
    1461            0 :       CALL auxbas_pw_pool%create_pw(wf_g)
    1462              : 
    1463            0 :       CALL get_qs_env(qs_env, subsys=subsys)
    1464            0 :       CALL qs_subsys_get(subsys, particles=particles)
    1465              : 
    1466            0 :       my_pos_cube = "REWIND"
    1467            0 :       IF (append_cube) THEN
    1468            0 :          my_pos_cube = "APPEND"
    1469              :       END IF
    1470              : 
    1471              :       CALL get_qs_env(qs_env=qs_env, &
    1472              :                       atomic_kind_set=atomic_kind_set, &
    1473              :                       qs_kind_set=qs_kind_set, &
    1474              :                       cell=cell, &
    1475            0 :                       particle_set=particle_set)
    1476              : 
    1477            0 :       DO iset = 1, 2
    1478            0 :          CALL get_mo_set(mo_set=mos(iset), mo_coeff=mo_coeff, nmo=nmo)
    1479            0 :          DO i = 1, nmo
    1480              :             CALL calculate_wavefunction(mo_coeff, i, wf_r, wf_g, atomic_kind_set, qs_kind_set, &
    1481            0 :                                         cell, dft_control, particle_set, pw_env)
    1482            0 :             IF (iset == 1) THEN
    1483            0 :                WRITE (filename, '(A6,I3.3,A5,I2.2,a11)') "_NEXC_", istate, "_NTO_", i, "_Hole_State"
    1484            0 :             ELSEIF (iset == 2) THEN
    1485            0 :                WRITE (filename, '(A6,I3.3,A5,I2.2,a15)') "_NEXC_", istate, "_NTO_", i, "_Particle_State"
    1486              :             END IF
    1487            0 :             info_approx_trunc = TRIM(ADJUSTL(info_approximation))
    1488            0 :             info_approx_trunc = info_approx_trunc(2:LEN_TRIM(info_approx_trunc) - 1)
    1489            0 :             filename = TRIM(info_approx_trunc)//TRIM(filename)
    1490            0 :             mpi_io = .TRUE.
    1491              :             unit_nr_cube = cp_print_key_unit_nr(logger, print_section, '', extension=".cube", &
    1492              :                                                 middle_name=TRIM(filename), file_position=my_pos_cube, &
    1493            0 :                                                 log_filename=.FALSE., ignore_should_output=.TRUE., mpi_io=mpi_io)
    1494            0 :             IF (iset == 1) THEN
    1495            0 :                WRITE (title, *) "Natural Transition Orbital Hole State", i
    1496            0 :             ELSEIF (iset == 2) THEN
    1497            0 :                WRITE (title, *) "Natural Transition Orbital Particle State", i
    1498              :             END IF
    1499            0 :             CALL cp_pw_to_cube(wf_r, unit_nr_cube, title, particles=particles, stride=stride, mpi_io=mpi_io)
    1500              :             CALL cp_print_key_finished_output(unit_nr_cube, logger, print_section, '', &
    1501            0 :                                               ignore_should_output=.TRUE., mpi_io=mpi_io)
    1502              :          END DO
    1503              :       END DO
    1504              : 
    1505            0 :       CALL auxbas_pw_pool%give_back_pw(wf_g)
    1506            0 :       CALL auxbas_pw_pool%give_back_pw(wf_r)
    1507              : 
    1508            0 :       CALL timestop(handle)
    1509            0 :    END SUBROUTINE print_bse_nto_cubes
    1510              : 
    1511              : ! **************************************************************************************************
    1512              : !> \brief Checks BSE input section and adapts them if necessary
    1513              : !> \param homo ...
    1514              : !> \param virtual ...
    1515              : !> \param unit_nr ...
    1516              : !> \param mp2_env ...
    1517              : !> \param qs_env ...
    1518              : ! **************************************************************************************************
    1519           30 :    SUBROUTINE adapt_BSE_input_params(homo, virtual, unit_nr, mp2_env, qs_env)
    1520              : 
    1521              :       INTEGER, INTENT(IN)                                :: homo, virtual, unit_nr
    1522              :       TYPE(mp2_type)                                     :: mp2_env
    1523              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1524              : 
    1525              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'adapt_BSE_input_params'
    1526              : 
    1527              :       INTEGER                                            :: handle, i, j, n, ndim_periodic_cell, &
    1528              :                                                             ndim_periodic_poisson, &
    1529              :                                                             num_state_list_exceptions
    1530              :       TYPE(cell_type), POINTER                           :: cell_ref
    1531              :       TYPE(pw_env_type), POINTER                         :: pw_env
    1532              :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
    1533              : 
    1534           30 :       CALL timeset(routineN, handle)
    1535              :       ! Get environment infos for later usage
    1536           30 :       NULLIFY (pw_env, cell_ref, poisson_env)
    1537           30 :       CALL get_qs_env(qs_env, pw_env=pw_env, cell_ref=cell_ref)
    1538           30 :       CALL pw_env_get(pw_env, poisson_env=poisson_env)
    1539          120 :       ndim_periodic_poisson = COUNT(poisson_env%parameters%periodic == 1)
    1540          120 :       ndim_periodic_cell = SUM(cell_ref%perd(1:3)) ! Borrowed from cell_methods.F/write_cell_low
    1541              : 
    1542              :       ! Handle negative NUM_PRINT_EXC
    1543           30 :       IF (mp2_env%bse%num_print_exc < 0 .OR. &
    1544              :           mp2_env%bse%num_print_exc > homo*virtual) THEN
    1545            2 :          mp2_env%bse%num_print_exc = homo*virtual
    1546            2 :          IF (unit_nr > 0) THEN
    1547              :             CALL cp_hint(__LOCATION__, &
    1548              :                          "Keyword NUM_PRINT_EXC is either negative or too large. "// &
    1549            1 :                          "Printing all computed excitations.")
    1550              :          END IF
    1551              :       END IF
    1552              : 
    1553              :       ! Default to NUM_PRINT_EXC if too large or negative,
    1554              :       ! but only if NTOs are called - would be confusing for the user otherwise
    1555              :       ! Prepare and adapt user inputs for NTO analysis
    1556              :       ! Logic: Explicit state list overrides NUM_PRINT_EXC_NTOS
    1557              :       !        If only NUM_PRINT_EXC_NTOS is given, we write the array 1,...,NUM_PRINT_EXC_NTOS to
    1558              :       !        bse_nto_state_list
    1559           30 :       IF (mp2_env%bse%do_nto_analysis) THEN
    1560            4 :          IF (mp2_env%bse%explicit_nto_list) THEN
    1561            0 :             IF (mp2_env%bse%num_print_exc_ntos > 0) THEN
    1562            0 :                IF (unit_nr > 0) THEN
    1563              :                   CALL cp_hint(__LOCATION__, &
    1564              :                                "Keywords NUM_PRINT_EXC_NTOS and STATE_LIST are both given in input. "// &
    1565            0 :                                "Overriding NUM_PRINT_EXC_NTOS.")
    1566              :                END IF
    1567              :             END IF
    1568              :             ! Check if all states are within the range
    1569              :             ! Count them and initialize new array afterwards
    1570            0 :             num_state_list_exceptions = 0
    1571            0 :             DO i = 1, SIZE(mp2_env%bse%bse_nto_state_list)
    1572            0 :                IF (mp2_env%bse%bse_nto_state_list(i) < 1 .OR. &
    1573            0 :                    mp2_env%bse%bse_nto_state_list(i) > mp2_env%bse%num_print_exc) THEN
    1574            0 :                   num_state_list_exceptions = num_state_list_exceptions + 1
    1575              :                END IF
    1576              :             END DO
    1577            0 :             IF (num_state_list_exceptions > 0) THEN
    1578            0 :                IF (unit_nr > 0) THEN
    1579              :                   CALL cp_hint(__LOCATION__, &
    1580              :                                "STATE_LIST contains indices outside the range of included excitation levels. "// &
    1581            0 :                                "Ignoring these states.")
    1582              :                END IF
    1583              :             END IF
    1584            0 :             n = SIZE(mp2_env%bse%bse_nto_state_list) - num_state_list_exceptions
    1585            0 :             ALLOCATE (mp2_env%bse%bse_nto_state_list_final(n))
    1586            0 :             mp2_env%bse%bse_nto_state_list_final(:) = 0
    1587              :             i = 1
    1588            0 :             DO j = 1, SIZE(mp2_env%bse%bse_nto_state_list)
    1589            0 :                IF (mp2_env%bse%bse_nto_state_list(j) >= 1 .AND. &
    1590            0 :                    mp2_env%bse%bse_nto_state_list(j) <= mp2_env%bse%num_print_exc) THEN
    1591            0 :                   mp2_env%bse%bse_nto_state_list_final(i) = mp2_env%bse%bse_nto_state_list(j)
    1592            0 :                   i = i + 1
    1593              :                END IF
    1594              :             END DO
    1595              : 
    1596            0 :             mp2_env%bse%num_print_exc_ntos = SIZE(mp2_env%bse%bse_nto_state_list_final)
    1597              :          ELSE
    1598            4 :             IF (mp2_env%bse%num_print_exc_ntos > mp2_env%bse%num_print_exc .OR. &
    1599              :                 mp2_env%bse%num_print_exc_ntos < 0) THEN
    1600            4 :                mp2_env%bse%num_print_exc_ntos = mp2_env%bse%num_print_exc
    1601              :             END IF
    1602           12 :             ALLOCATE (mp2_env%bse%bse_nto_state_list_final(mp2_env%bse%num_print_exc_ntos))
    1603          104 :             DO i = 1, mp2_env%bse%num_print_exc_ntos
    1604          104 :                mp2_env%bse%bse_nto_state_list_final(i) = i
    1605              :             END DO
    1606              :          END IF
    1607              :       END IF
    1608              : 
    1609              :       ! Takes care of triplet states, when oscillator strengths are 0
    1610           30 :       IF (mp2_env%bse%bse_spin_config /= 0 .AND. &
    1611              :           mp2_env%bse%eps_nto_osc_str > 0) THEN
    1612            0 :          IF (unit_nr > 0) THEN
    1613              :             CALL cp_warn(__LOCATION__, &
    1614              :                          "Cannot apply EPS_OSC_STR for Triplet excitations. "// &
    1615            0 :                          "Resetting EPS_OSC_STR to default.")
    1616              :          END IF
    1617            0 :          mp2_env%bse%eps_nto_osc_str = -1.0_dp
    1618              :       END IF
    1619              : 
    1620              :       ! Take care of number for computed exciton descriptors
    1621           30 :       IF (mp2_env%bse%num_print_exc_descr < 0 .OR. &
    1622              :           mp2_env%bse%num_print_exc_descr > mp2_env%bse%num_print_exc) THEN
    1623            4 :          IF (unit_nr > 0) THEN
    1624              :             CALL cp_hint(__LOCATION__, &
    1625              :                          "Keyword NUM_PRINT_EXC_DESCR is either negative or too large. "// &
    1626            2 :                          "Printing exciton descriptors up to NUM_PRINT_EXC.")
    1627              :          END IF
    1628            4 :          mp2_env%bse%num_print_exc_descr = mp2_env%bse%num_print_exc
    1629              :       END IF
    1630              : 
    1631              :       ! Handle screening factor options
    1632           30 :       IF (mp2_env%BSE%screening_factor > 0.0_dp) THEN
    1633            2 :          IF (mp2_env%BSE%screening_method /= bse_screening_alpha) THEN
    1634            0 :             IF (unit_nr > 0) THEN
    1635              :                CALL cp_warn(__LOCATION__, &
    1636              :                             "Screening factor is only supported for &SCREENING_IN_W ALPHA. "// &
    1637            0 :                             "Resetting SCREENING_IN_W to ALPHA.")
    1638              :             END IF
    1639            0 :             mp2_env%BSE%screening_method = bse_screening_alpha
    1640              :          END IF
    1641            2 :          IF (mp2_env%BSE%screening_factor > 1.0_dp) THEN
    1642            0 :             IF (unit_nr > 0) THEN
    1643              :                CALL cp_warn(__LOCATION__, &
    1644            0 :                             "Screening factor is larger than 1.0. ")
    1645              :             END IF
    1646              :          END IF
    1647              :       END IF
    1648              : 
    1649           30 :       IF (mp2_env%BSE%screening_factor < 0.0_dp .AND. &
    1650              :           mp2_env%BSE%screening_method == bse_screening_alpha) THEN
    1651            0 :          IF (unit_nr > 0) THEN
    1652              :             CALL cp_warn(__LOCATION__, &
    1653            0 :                          "Screening factor is negative. Defaulting to 0.25")
    1654              :          END IF
    1655            0 :          mp2_env%BSE%screening_factor = 0.25_dp
    1656              :       END IF
    1657              : 
    1658           30 :       IF (mp2_env%BSE%screening_factor == 0.0_dp) THEN
    1659              :          ! Use RPA internally in this case
    1660            0 :          mp2_env%BSE%screening_method = bse_screening_rpa
    1661              :       END IF
    1662           30 :       IF (mp2_env%BSE%screening_factor == 1.0_dp) THEN
    1663              :          ! Use TDHF internally in this case
    1664            0 :          mp2_env%BSE%screening_method = bse_screening_tdhf
    1665              :       END IF
    1666              : 
    1667              :       ! Add warning for usage of KS energies
    1668           30 :       IF (mp2_env%bse%use_ks_energies) THEN
    1669            0 :          IF (unit_nr > 0) THEN
    1670              :             CALL cp_warn(__LOCATION__, &
    1671              :                          "Using KS energies for BSE calculations. Therefore, no quantities "// &
    1672            0 :                          "of the preceeding GW calculation enter the BSE.")
    1673              :          END IF
    1674              :       END IF
    1675              : 
    1676              :       ! Add warning if periodic calculation is invoked
    1677           30 :       IF (ndim_periodic_poisson /= 0) THEN
    1678            0 :          IF (unit_nr > 0) THEN
    1679              :             CALL cp_warn(__LOCATION__, &
    1680              :                          "Poisson solver should be invoked by PERIODIC NONE. "// &
    1681              :                          "The applied length gauge might give misleading results for "// &
    1682            0 :                          "oscillator strengths.")
    1683              :          END IF
    1684              :       END IF
    1685           30 :       IF (ndim_periodic_cell /= 0) THEN
    1686            0 :          IF (unit_nr > 0) THEN
    1687              :             CALL cp_warn(__LOCATION__, &
    1688              :                          "CELL in SUBSYS should be invoked with PERIODIC NONE. "// &
    1689              :                          "The applied length gauge might give misleading results for "// &
    1690            0 :                          "oscillator strengths.")
    1691              :          END IF
    1692              :       END IF
    1693              : 
    1694           30 :       CALL timestop(handle)
    1695           30 :    END SUBROUTINE adapt_BSE_input_params
    1696              : 
    1697              : ! **************************************************************************************************
    1698              : 
    1699              : ! **************************************************************************************************
    1700              : !> \brief ...
    1701              : !> \param fm_multipole_ai_trunc ...
    1702              : !> \param fm_multipole_ij_trunc ...
    1703              : !> \param fm_multipole_ab_trunc ...
    1704              : !> \param qs_env ...
    1705              : !> \param mo_coeff ...
    1706              : !> \param rpoint ...
    1707              : !> \param n_moments ...
    1708              : !> \param homo_red ...
    1709              : !> \param virtual_red ...
    1710              : !> \param context_BSE ...
    1711              : ! **************************************************************************************************
    1712           36 :    SUBROUTINE get_multipoles_mo(fm_multipole_ai_trunc, fm_multipole_ij_trunc, fm_multipole_ab_trunc, &
    1713           36 :                                 qs_env, mo_coeff, rpoint, n_moments, &
    1714              :                                 homo_red, virtual_red, context_BSE)
    1715              : 
    1716              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:), &
    1717              :          INTENT(INOUT)                                   :: fm_multipole_ai_trunc, &
    1718              :                                                             fm_multipole_ij_trunc, &
    1719              :                                                             fm_multipole_ab_trunc
    1720              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1721              :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
    1722              :       REAL(dp), ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: rpoint
    1723              :       INTEGER, INTENT(IN)                                :: n_moments, homo_red, virtual_red
    1724              :       TYPE(cp_blacs_env_type), POINTER                   :: context_BSE
    1725              : 
    1726              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_multipoles_mo'
    1727              : 
    1728              :       INTEGER                                            :: handle, idir, n_multipole, n_occ, &
    1729              :                                                             n_virt, nao, nmo_mp2
    1730           36 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: ref_point
    1731              :       TYPE(cp_fm_struct_type), POINTER :: fm_struct_mp_ab_trunc, fm_struct_mp_ai_trunc, &
    1732              :          fm_struct_mp_ij_trunc, fm_struct_multipoles_ao, fm_struct_nao_nmo, fm_struct_nmo_nmo
    1733              :       TYPE(cp_fm_type)                                   :: fm_work
    1734           36 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_multipole_per_dir
    1735           36 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_multipole, matrix_s
    1736           36 :       TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
    1737              :       TYPE(mp_para_env_type), POINTER                    :: para_env_BSE
    1738              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
    1739           36 :          POINTER                                         :: sab_orb
    1740              : 
    1741           36 :       CALL timeset(routineN, handle)
    1742              : 
    1743              :       !First, we calculate the AO dipoles
    1744           36 :       NULLIFY (sab_orb, matrix_s)
    1745              :       CALL get_qs_env(qs_env, &
    1746              :                       mos=mos, &
    1747              :                       matrix_s=matrix_s, &
    1748           36 :                       sab_orb=sab_orb)
    1749              : 
    1750              :       ! Use the same blacs environment as for the MO coefficients to ensure correct multiplication dbcsr x fm later on
    1751           36 :       fm_struct_multipoles_ao => mos(1)%mo_coeff%matrix_struct
    1752              :       ! BSE has different contexts and blacsenvs
    1753           36 :       para_env_BSE => context_BSE%para_env
    1754              :       ! Get size of multipole tensor
    1755           36 :       n_multipole = (6 + 11*n_moments + 6*n_moments**2 + n_moments**3)/6 - 1
    1756           36 :       NULLIFY (matrix_multipole)
    1757           36 :       CALL dbcsr_allocate_matrix_set(matrix_multipole, n_multipole)
    1758          252 :       ALLOCATE (fm_multipole_per_dir(n_multipole))
    1759          180 :       DO idir = 1, n_multipole
    1760          144 :          CALL dbcsr_init_p(matrix_multipole(idir)%matrix)
    1761              :          CALL dbcsr_create(matrix_multipole(idir)%matrix, name="ao_multipole", &
    1762          144 :                            template=matrix_s(1)%matrix, matrix_type=dbcsr_type_symmetric)
    1763          144 :          CALL cp_dbcsr_alloc_block_from_nbl(matrix_multipole(idir)%matrix, sab_orb)
    1764          180 :          CALL dbcsr_set(matrix_multipole(idir)%matrix, 0._dp)
    1765              :       END DO
    1766              : 
    1767           36 :       CALL get_reference_point(rpoint, qs_env=qs_env, reference=use_mom_ref_coac, ref_point=ref_point)
    1768              : 
    1769           36 :       CALL build_local_moment_matrix(qs_env, matrix_multipole, n_moments, ref_point=rpoint)
    1770              : 
    1771           36 :       NULLIFY (sab_orb)
    1772              : 
    1773              :       ! Now we transform them to MO
    1774              :       ! n_occ is the number of occupied MOs, nao the number of all AOs
    1775              :       ! Writing homo to n_occ instead if nmo,
    1776              :       ! takes care of ADDED_MOS, which would overwrite nmo of qs_env-mos, if invoked
    1777           36 :       CALL get_mo_set(mo_set=mos(1), homo=n_occ, nao=nao)
    1778              :       ! Takes into account removed nullspace values from SVD
    1779           36 :       nmo_mp2 = mo_coeff(1)%matrix_struct%ncol_global
    1780           36 :       n_virt = nmo_mp2 - n_occ
    1781              : 
    1782              :       ! At the end, we need four different layouts of matrices in this multiplication, e.g. for a dipole:
    1783              :       ! D_pq = full multipole matrix for occupied and unoccupied
    1784              :       ! Final result:D_pq= C_{mu p}        <\mu|\vec{r}|\nu>        C_{\nu q}              EQ.I
    1785              :       !                   \_______/         \___________/          \______/
    1786              :       !                    fm_coeff            matrix_multipole              fm_coeff
    1787              :       !                    (EQ.Ia)             (EQ.Ib)              (EQ.Ia)
    1788              :       ! Intermediate work matrices:
    1789              :       ! fm_work =                 <\mu|\vec{r}|\nu>        C_{\nu q}              EQ.II
    1790              : 
    1791              :       ! Struct for the full multipole matrix
    1792              :       CALL cp_fm_struct_create(fm_struct_nao_nmo, &
    1793              :                                fm_struct_multipoles_ao%para_env, fm_struct_multipoles_ao%context, &
    1794           36 :                                nao, nmo_mp2)
    1795              :       CALL cp_fm_struct_create(fm_struct_nmo_nmo, &
    1796              :                                fm_struct_multipoles_ao%para_env, fm_struct_multipoles_ao%context, &
    1797           36 :                                nmo_mp2, nmo_mp2)
    1798              : 
    1799              :       ! At the very end, we copy the multipoles corresponding to truncated BSE indices in i and a
    1800              :       CALL cp_fm_struct_create(fm_struct_mp_ai_trunc, para_env_BSE, &
    1801           36 :                                context_BSE, virtual_red, homo_red)
    1802              :       CALL cp_fm_struct_create(fm_struct_mp_ij_trunc, para_env_BSE, &
    1803           36 :                                context_BSE, homo_red, homo_red)
    1804              :       CALL cp_fm_struct_create(fm_struct_mp_ab_trunc, para_env_BSE, &
    1805           36 :                                context_BSE, virtual_red, virtual_red)
    1806          180 :       DO idir = 1, n_multipole
    1807              :          CALL cp_fm_create(fm_multipole_ai_trunc(idir), matrix_struct=fm_struct_mp_ai_trunc, &
    1808          144 :                            name="dipoles_mo_ai_trunc")
    1809          144 :          CALL cp_fm_set_all(fm_multipole_ai_trunc(idir), 0.0_dp)
    1810              :          CALL cp_fm_create(fm_multipole_ij_trunc(idir), matrix_struct=fm_struct_mp_ij_trunc, &
    1811          144 :                            name="dipoles_mo_ij_trunc")
    1812          144 :          CALL cp_fm_set_all(fm_multipole_ij_trunc(idir), 0.0_dp)
    1813              :          CALL cp_fm_create(fm_multipole_ab_trunc(idir), matrix_struct=fm_struct_mp_ab_trunc, &
    1814          144 :                            name="dipoles_mo_ab_trunc")
    1815          180 :          CALL cp_fm_set_all(fm_multipole_ab_trunc(idir), 0.0_dp)
    1816              :       END DO
    1817              : 
    1818              :       ! Need another temporary matrix to store intermediate result from right multiplication
    1819              :       ! D = C_{mu a}        <\mu|\vec{r}|\nu>        C_{\nu i}
    1820           36 :       CALL cp_fm_create(fm_work, matrix_struct=fm_struct_nao_nmo, name="multipole_work")
    1821           36 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
    1822              : 
    1823          180 :       DO idir = 1, n_multipole
    1824              :          ! Create the full multipole matrix per direction
    1825          144 :          CALL cp_fm_create(fm_multipole_per_dir(idir), matrix_struct=fm_struct_nmo_nmo, name="multipoles_mo")
    1826          144 :          CALL cp_fm_set_all(fm_multipole_per_dir(idir), 0.0_dp)
    1827              :          ! Fill final (MO) multipole matrix
    1828              :          CALL cp_dbcsr_sm_fm_multiply(matrix_multipole(idir)%matrix, mo_coeff(1), &
    1829          144 :                                       fm_work, ncol=nmo_mp2)
    1830              :          ! Now obtain the multipoles by the final multiplication;
    1831              :          ! We do that inside the loop to obtain multipoles per axis for print
    1832          144 :          CALL parallel_gemm('T', 'N', nmo_mp2, nmo_mp2, nao, 1.0_dp, mo_coeff(1), fm_work, 0.0_dp, fm_multipole_per_dir(idir))
    1833              : 
    1834              :          ! Truncate full matrix to the BSE indices
    1835              :          ! D_ai
    1836              :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1837              :                                          fm_multipole_ai_trunc(idir), &
    1838              :                                          virtual_red, &
    1839              :                                          homo_red, &
    1840              :                                          n_occ + 1, &
    1841              :                                          n_occ - homo_red + 1, &
    1842              :                                          1, &
    1843              :                                          1, &
    1844          144 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1845              :          ! D_ij
    1846              :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1847              :                                          fm_multipole_ij_trunc(idir), &
    1848              :                                          homo_red, &
    1849              :                                          homo_red, &
    1850              :                                          n_occ - homo_red + 1, &
    1851              :                                          n_occ - homo_red + 1, &
    1852              :                                          1, &
    1853              :                                          1, &
    1854          144 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1855              :          ! D_ab
    1856              :          CALL cp_fm_to_fm_submat_general(fm_multipole_per_dir(idir), &
    1857              :                                          fm_multipole_ab_trunc(idir), &
    1858              :                                          virtual_red, &
    1859              :                                          virtual_red, &
    1860              :                                          n_occ + 1, &
    1861              :                                          n_occ + 1, &
    1862              :                                          1, &
    1863              :                                          1, &
    1864          180 :                                          fm_multipole_per_dir(idir)%matrix_struct%context)
    1865              :       END DO
    1866              : 
    1867              :       !Release matrices and structs
    1868           36 :       NULLIFY (fm_struct_multipoles_ao)
    1869           36 :       CALL cp_fm_struct_release(fm_struct_mp_ai_trunc)
    1870           36 :       CALL cp_fm_struct_release(fm_struct_mp_ij_trunc)
    1871           36 :       CALL cp_fm_struct_release(fm_struct_mp_ab_trunc)
    1872           36 :       CALL cp_fm_struct_release(fm_struct_nao_nmo)
    1873           36 :       CALL cp_fm_struct_release(fm_struct_nmo_nmo)
    1874          180 :       DO idir = 1, n_multipole
    1875          180 :          CALL cp_fm_release(fm_multipole_per_dir(idir))
    1876              :       END DO
    1877           36 :       DEALLOCATE (fm_multipole_per_dir)
    1878           36 :       CALL cp_fm_release(fm_work)
    1879           36 :       CALL dbcsr_deallocate_matrix_set(matrix_multipole)
    1880              : 
    1881           36 :       CALL timestop(handle)
    1882              : 
    1883          108 :    END SUBROUTINE get_multipoles_mo
    1884              : 
    1885              : ! **************************************************************************************************
    1886              : !> \brief Computes trace of form Tr{A^T B C} for exciton descriptors
    1887              : !> \param fm_A Full Matrix, typically X or Y, in format homo x virtual
    1888              : !> \param fm_B ...
    1889              : !> \param fm_C ...
    1890              : !> \param alpha ...
    1891              : ! **************************************************************************************************
    1892        11520 :    SUBROUTINE trace_exciton_descr(fm_A, fm_B, fm_C, alpha)
    1893              : 
    1894              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_A, fm_B, fm_C
    1895              :       REAL(KIND=dp), INTENT(OUT)                         :: alpha
    1896              : 
    1897              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'trace_exciton_descr'
    1898              : 
    1899              :       INTEGER                                            :: handle, ncol_A, ncol_B, ncol_C, nrow_A, &
    1900              :                                                             nrow_B, nrow_C
    1901              :       TYPE(cp_fm_type)                                   :: fm_work_ia
    1902              : 
    1903         1920 :       CALL timeset(routineN, handle)
    1904              : 
    1905         1920 :       CALL cp_fm_create(fm_work_ia, fm_A%matrix_struct)
    1906         1920 :       CALL cp_fm_get_info(fm_A, nrow_global=nrow_A, ncol_global=ncol_A)
    1907         1920 :       CALL cp_fm_get_info(fm_B, nrow_global=nrow_B, ncol_global=ncol_B)
    1908         1920 :       CALL cp_fm_get_info(fm_C, nrow_global=nrow_C, ncol_global=ncol_C)
    1909              : 
    1910              :       ! Check matrix sizes
    1911         1920 :       CPASSERT(nrow_A == nrow_B .AND. ncol_A == ncol_C .AND. ncol_B == nrow_C)
    1912              : 
    1913         1920 :       CALL cp_fm_set_all(fm_work_ia, 0.0_dp)
    1914              : 
    1915              :       CALL parallel_gemm("N", "N", nrow_A, ncol_A, nrow_C, 1.0_dp, &
    1916         1920 :                          fm_B, fm_C, 0.0_dp, fm_work_ia)
    1917              : 
    1918         1920 :       CALL cp_fm_trace(fm_A, fm_work_ia, alpha)
    1919              : 
    1920         1920 :       CALL cp_fm_release(fm_work_ia)
    1921              : 
    1922         1920 :       CALL timestop(handle)
    1923              : 
    1924         1920 :    END SUBROUTINE trace_exciton_descr
    1925              : 
    1926              : END MODULE bse_util
        

Generated by: LCOV version 2.0-1