LCOV - code coverage report
Current view: top level - src - bse_util.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b1f098b) Lines: 434 545 79.6 %
Date: 2024-05-05 06:30:09 Functions: 10 11 90.9 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Auxiliary routines for GW + Bethe-Salpeter for computing electronic excitations
      10             : !> \par History
      11             : !>      11.2023 created [Maximilian Graml]
      12             : ! **************************************************************************************************
      13             : MODULE bse_util
      14             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      15             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_upper_to_full
      16             :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose,&
      17             :                                               cp_fm_cholesky_invert
      18             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      19             :                                               cp_fm_struct_release,&
      20             :                                               cp_fm_struct_type
      21             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      22             :                                               cp_fm_get_info,&
      23             :                                               cp_fm_indxg2l,&
      24             :                                               cp_fm_indxg2p,&
      25             :                                               cp_fm_release,&
      26             :                                               cp_fm_set_all,&
      27             :                                               cp_fm_to_fm_submat_general,&
      28             :                                               cp_fm_type
      29             :    USE kinds,                           ONLY: dp
      30             :    USE message_passing,                 ONLY: mp_para_env_type,&
      31             :                                               mp_request_type
      32             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      33             :                                               mp2_type
      34             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      35             :    USE physcon,                         ONLY: evolt
      36             :    USE rpa_communication,               ONLY: communicate_buffer
      37             :    USE util,                            ONLY: sort,&
      38             :                                               sort_unique
      39             : #include "./base/base_uses.f90"
      40             : 
      41             :    IMPLICIT NONE
      42             : 
      43             :    PRIVATE
      44             : 
      45             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'bse_util'
      46             : 
      47             :    PUBLIC :: mult_B_with_W, fm_general_add_bse, truncate_fm, fm_write_thresh, print_BSE_start_flag, &
      48             :              deallocate_matrices_bse, comp_eigvec_coeff_BSE, sort_excitations, &
      49             :              estimate_BSE_resources, filter_eigvec_contrib, truncate_BSE_matrices
      50             : 
      51             : CONTAINS
      52             : 
      53             : ! **************************************************************************************************
      54             : !> \brief Multiplies B-matrix (RI-3c-Integrals) with W (screening) to obtain \bar{B}
      55             : !> \param fm_mat_S_ij_bse ...
      56             : !> \param fm_mat_S_ia_bse ...
      57             : !> \param fm_mat_S_bar_ia_bse ...
      58             : !> \param fm_mat_S_bar_ij_bse ...
      59             : !> \param fm_mat_Q_static_bse_gemm ...
      60             : !> \param dimen_RI ...
      61             : !> \param homo ...
      62             : !> \param virtual ...
      63             : ! **************************************************************************************************
      64          24 :    SUBROUTINE mult_B_with_W(fm_mat_S_ij_bse, fm_mat_S_ia_bse, fm_mat_S_bar_ia_bse, &
      65             :                             fm_mat_S_bar_ij_bse, fm_mat_Q_static_bse_gemm, &
      66             :                             dimen_RI, homo, virtual)
      67             : 
      68             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ij_bse, fm_mat_S_ia_bse
      69             :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse
      70             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_Q_static_bse_gemm
      71             :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
      72             : 
      73             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mult_B_with_W'
      74             : 
      75             :       INTEGER                                            :: handle, i_global, iiB, info_chol, &
      76             :                                                             j_global, jjB, ncol_local, nrow_local
      77           4 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
      78             :       TYPE(cp_fm_type)                                   :: fm_work
      79             : 
      80           4 :       CALL timeset(routineN, handle)
      81             : 
      82           4 :       CALL cp_fm_create(fm_mat_S_bar_ia_bse, fm_mat_S_ia_bse%matrix_struct)
      83           4 :       CALL cp_fm_set_all(fm_mat_S_bar_ia_bse, 0.0_dp)
      84             : 
      85           4 :       CALL cp_fm_create(fm_mat_S_bar_ij_bse, fm_mat_S_ij_bse%matrix_struct)
      86           4 :       CALL cp_fm_set_all(fm_mat_S_bar_ij_bse, 0.0_dp)
      87             : 
      88           4 :       CALL cp_fm_create(fm_work, fm_mat_Q_static_bse_gemm%matrix_struct)
      89           4 :       CALL cp_fm_set_all(fm_work, 0.0_dp)
      90             : 
      91             :       ! get info of fm_mat_Q_static_bse and compute ((1+Q(0))^-1-1)
      92             :       CALL cp_fm_get_info(matrix=fm_mat_Q_static_bse_gemm, &
      93             :                           nrow_local=nrow_local, &
      94             :                           ncol_local=ncol_local, &
      95             :                           row_indices=row_indices, &
      96           4 :                           col_indices=col_indices)
      97             : 
      98         336 :       DO jjB = 1, ncol_local
      99         332 :          j_global = col_indices(jjB)
     100       14114 :          DO iiB = 1, nrow_local
     101       13778 :             i_global = row_indices(iiB)
     102       14110 :             IF (j_global == i_global .AND. i_global <= dimen_RI) THEN
     103         166 :                fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) = fm_mat_Q_static_bse_gemm%local_data(iiB, jjB) + 1.0_dp
     104             :             END IF
     105             :          END DO
     106             :       END DO
     107             : 
     108             :       ! calculate Trace(Log(Matrix)) as Log(DET(Matrix)) via cholesky decomposition
     109           4 :       CALL cp_fm_cholesky_decompose(matrix=fm_mat_Q_static_bse_gemm, n=dimen_RI, info_out=info_chol)
     110             : 
     111           4 :       CPASSERT(info_chol == 0)
     112             : 
     113             :       ! calculate [1+Q(i0)]^-1
     114           4 :       CALL cp_fm_cholesky_invert(fm_mat_Q_static_bse_gemm)
     115             : 
     116             :       ! symmetrize the result
     117           4 :       CALL cp_fm_upper_to_full(fm_mat_Q_static_bse_gemm, fm_work)
     118             : 
     119             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo**2, k=dimen_RI, alpha=1.0_dp, &
     120             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ij_bse, beta=0.0_dp, &
     121           4 :                          matrix_c=fm_mat_S_bar_ij_bse)
     122             : 
     123             :       ! fm_mat_S_bar_ia_bse has a different blacs_env as fm_mat_S_ij_bse since we take
     124             :       ! fm_mat_S_ia_bse from RPA. Therefore, we also need a different fm_mat_Q_static_bse_gemm
     125             :       CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=homo*virtual, k=dimen_RI, alpha=1.0_dp, &
     126             :                          matrix_a=fm_mat_Q_static_bse_gemm, matrix_b=fm_mat_S_ia_bse, beta=0.0_dp, &
     127           4 :                          matrix_c=fm_mat_S_bar_ia_bse)
     128             : 
     129           4 :       CALL cp_fm_release(fm_work)
     130             : 
     131           4 :       CALL timestop(handle)
     132             : 
     133           4 :    END SUBROUTINE
     134             : 
     135             : ! **************************************************************************************************
     136             : !> \brief Adds and reorders full matrices with a combined index structure, e.g. adding W_ij,ab
     137             : !> to A_ia, which needs MPI communication.
     138             : !> \param fm_out ...
     139             : !> \param fm_in ...
     140             : !> \param beta ...
     141             : !> \param nrow_secidx_in ...
     142             : !> \param ncol_secidx_in ...
     143             : !> \param nrow_secidx_out ...
     144             : !> \param ncol_secidx_out ...
     145             : !> \param unit_nr ...
     146             : !> \param reordering ...
     147             : !> \param mp2_env ...
     148             : ! **************************************************************************************************
     149           6 :    SUBROUTINE fm_general_add_bse(fm_out, fm_in, beta, nrow_secidx_in, ncol_secidx_in, &
     150             :                                  nrow_secidx_out, ncol_secidx_out, unit_nr, reordering, mp2_env)
     151             : 
     152             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out, fm_in
     153             :       REAL(kind=dp)                                      :: beta
     154             :       INTEGER, INTENT(IN)                                :: nrow_secidx_in, ncol_secidx_in, &
     155             :                                                             nrow_secidx_out, ncol_secidx_out
     156             :       INTEGER                                            :: unit_nr
     157             :       INTEGER, DIMENSION(4)                              :: reordering
     158             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
     159             : 
     160             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_general_add_bse'
     161             : 
     162             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_out, idx_row_out, ii, &
     163             :          iproc, jj, ncol_block_in, ncol_block_out, ncol_local_in, ncol_local_out, npcol, nprocs, &
     164             :          nprow, nrow_block_in, nrow_block_out, nrow_local_in, nrow_local_out, proc_send, &
     165             :          row_idx_loc, send_pcol, send_prow
     166           6 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     167             :                                                             num_entries_send
     168             :       INTEGER, DIMENSION(4)                              :: indices_in
     169           6 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     170           6 :                                                             row_indices_in, row_indices_out
     171             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     172           6 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     173             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     174           6 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     175             : 
     176           6 :       CALL timeset(routineN, handle)
     177           6 :       CALL timeset(routineN//"_1_setup", handle2)
     178             : 
     179           6 :       para_env_out => fm_out%matrix_struct%para_env
     180             :       ! A_iajb
     181             :       ! We start by moving data from local parts of W_ijab to the full matrix A_iajb using buffers
     182             :       CALL cp_fm_get_info(matrix=fm_out, &
     183             :                           nrow_local=nrow_local_out, &
     184             :                           ncol_local=ncol_local_out, &
     185             :                           row_indices=row_indices_out, &
     186             :                           col_indices=col_indices_out, &
     187             :                           nrow_block=nrow_block_out, &
     188           6 :                           ncol_block=ncol_block_out)
     189             : 
     190           6 :       nprow = fm_out%matrix_struct%context%num_pe(1)
     191           6 :       npcol = fm_out%matrix_struct%context%num_pe(2)
     192             : 
     193          18 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     194          18 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     195             : 
     196          18 :       num_entries_rec(:) = 0
     197          18 :       num_entries_send(:) = 0
     198             : 
     199           6 :       dummy = 0
     200             : 
     201             :       CALL cp_fm_get_info(matrix=fm_in, &
     202             :                           nrow_local=nrow_local_in, &
     203             :                           ncol_local=ncol_local_in, &
     204             :                           row_indices=row_indices_in, &
     205             :                           col_indices=col_indices_in, &
     206             :                           nrow_block=nrow_block_in, &
     207           6 :                           ncol_block=ncol_block_in)
     208             : 
     209           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     210           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     211           0 :             fm_out%matrix_struct%nrow_global
     212           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     213           0 :             fm_out%matrix_struct%ncol_global
     214             : 
     215           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     216           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     217             : 
     218           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     219           0 :             fm_in%matrix_struct%nrow_global
     220           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     221           0 :             fm_in%matrix_struct%ncol_global
     222             : 
     223           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     224           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     225             :       END IF
     226             : 
     227             :       ! Use scalapack wrapper to find process index in fm_out
     228             :       ! To that end, we obtain the global index in fm_out from the level indices
     229           6 :       indices_in(:) = 0
     230          86 :       DO row_idx_loc = 1, nrow_local_in
     231          80 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     232          80 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     233        6998 :          DO col_idx_loc = 1, ncol_local_in
     234        6912 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     235        6912 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     236             : 
     237        6912 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     238        6912 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     239             : 
     240             :             send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
     241        6912 :                                       fm_out%matrix_struct%first_p_pos(1), nprow)
     242             : 
     243             :             send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
     244        6912 :                                       fm_out%matrix_struct%first_p_pos(2), npcol)
     245             : 
     246        6912 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     247             : 
     248        6992 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     249             : 
     250             :          END DO
     251             :       END DO
     252             : 
     253           6 :       CALL timestop(handle2)
     254             : 
     255           6 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     256           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     257           0 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     258             :       END IF
     259             : 
     260           6 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     261             : 
     262           6 :       CALL timestop(handle2)
     263             : 
     264           6 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     265           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     266           0 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     267             :       END IF
     268             : 
     269             :       ! Buffers for entries and their indices
     270          30 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     271          30 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     272             : 
     273             :       ! allocate data message and corresponding indices
     274          18 :       DO iproc = 0, para_env_out%num_pe - 1
     275             : 
     276          30 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     277        6930 :          buffer_rec(iproc)%msg = 0.0_dp
     278             : 
     279             :       END DO
     280             : 
     281          18 :       DO iproc = 0, para_env_out%num_pe - 1
     282             : 
     283          30 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     284        6930 :          buffer_send(iproc)%msg = 0.0_dp
     285             : 
     286             :       END DO
     287             : 
     288          18 :       DO iproc = 0, para_env_out%num_pe - 1
     289             : 
     290          30 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     291       13866 :          buffer_rec(iproc)%indx = 0
     292             : 
     293             :       END DO
     294             : 
     295          18 :       DO iproc = 0, para_env_out%num_pe - 1
     296             : 
     297          30 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     298       13866 :          buffer_send(iproc)%indx = 0
     299             : 
     300             :       END DO
     301             : 
     302           6 :       CALL timestop(handle2)
     303             : 
     304           6 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     305           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     306           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     307             :       END IF
     308             : 
     309          18 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     310          18 :       entry_counter(:) = 0
     311             : 
     312             :       ! Now we can write the actual data and indices to the send-buffer
     313          86 :       DO row_idx_loc = 1, nrow_local_in
     314          80 :          indices_in(1) = (row_indices_in(row_idx_loc) - 1)/nrow_secidx_in + 1
     315          80 :          indices_in(2) = MOD(row_indices_in(row_idx_loc) - 1, nrow_secidx_in) + 1
     316        6998 :          DO col_idx_loc = 1, ncol_local_in
     317        6912 :             indices_in(3) = (col_indices_in(col_idx_loc) - 1)/ncol_secidx_in + 1
     318        6912 :             indices_in(4) = MOD(col_indices_in(col_idx_loc) - 1, ncol_secidx_in) + 1
     319             : 
     320        6912 :             idx_row_out = indices_in(reordering(2)) + (indices_in(reordering(1)) - 1)*nrow_secidx_out
     321        6912 :             idx_col_out = indices_in(reordering(4)) + (indices_in(reordering(3)) - 1)*ncol_secidx_out
     322             : 
     323             :             send_prow = cp_fm_indxg2p(idx_row_out, nrow_block_out, dummy, &
     324        6912 :                                       fm_out%matrix_struct%first_p_pos(1), nprow)
     325             : 
     326             :             send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
     327        6912 :                                       fm_out%matrix_struct%first_p_pos(2), npcol)
     328             : 
     329        6912 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     330        6912 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     331             : 
     332             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     333        6912 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     334             : 
     335        6912 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_out
     336        6992 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     337             : 
     338             :          END DO
     339             :       END DO
     340             : 
     341          90 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     342             : 
     343           6 :       CALL timestop(handle2)
     344             : 
     345           6 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     346           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     347           0 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     348             :       END IF
     349             : 
     350             :       ! communicate the buffer
     351             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     352           6 :                               buffer_send, req_array)
     353             : 
     354           6 :       CALL timestop(handle2)
     355             : 
     356           6 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     357           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     358           0 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     359             :       END IF
     360             : 
     361             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     362           6 :       nprocs = para_env_out%num_pe
     363             : 
     364             : !$OMP PARALLEL DO DEFAULT(NONE) &
     365             : !$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, &
     366             : !$OMP          num_entries_rec, buffer_rec, beta, dummy, nprow, npcol) &
     367           6 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     368             :       DO iproc = 0, nprocs - 1
     369             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     370             :             ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
     371             :                                dummy, dummy, nprow)
     372             :             jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
     373             :                                dummy, dummy, npcol)
     374             : 
     375             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + beta*buffer_rec(iproc)%msg(i_entry_rec)
     376             :          END DO
     377             :       END DO
     378             : !$OMP END PARALLEL DO
     379             : 
     380           6 :       CALL timestop(handle2)
     381             : 
     382           6 :       CALL timeset(routineN//"_7_cleanup", handle2)
     383           6 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     384           0 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     385             :       END IF
     386             : 
     387             :       !Clean up all the arrays from the communication process
     388          18 :       DO iproc = 0, para_env_out%num_pe - 1
     389          12 :          DEALLOCATE (buffer_rec(iproc)%msg)
     390          12 :          DEALLOCATE (buffer_rec(iproc)%indx)
     391          12 :          DEALLOCATE (buffer_send(iproc)%msg)
     392          18 :          DEALLOCATE (buffer_send(iproc)%indx)
     393             :       END DO
     394          30 :       DEALLOCATE (buffer_rec, buffer_send)
     395           6 :       DEALLOCATE (req_array)
     396           6 :       DEALLOCATE (entry_counter)
     397           6 :       DEALLOCATE (num_entries_rec, num_entries_send)
     398             : 
     399           6 :       CALL timestop(handle2)
     400           6 :       CALL timestop(handle)
     401             : 
     402          42 :    END SUBROUTINE fm_general_add_bse
     403             : 
     404             : ! **************************************************************************************************
     405             : !> \brief Routine for truncating a full matrix as given by the energy cutoffs in the input file.
     406             : !>  Logic: Matrices have some dimension dimen_RI x nrow_in*ncol_in  for the incoming (untruncated) matrix
     407             : !>  and dimen_RI x nrow_out*ncol_out for the truncated matrix. The truncation is done by resorting the indices
     408             : !>  via parallel communication.
     409             : !> \param fm_out ...
     410             : !> \param fm_in ...
     411             : !> \param ncol_in ...
     412             : !> \param nrow_out ...
     413             : !> \param ncol_out ...
     414             : !> \param unit_nr ...
     415             : !> \param mp2_env ...
     416             : !> \param nrow_offset ...
     417             : !> \param ncol_offset ...
     418             : ! **************************************************************************************************
     419          12 :    SUBROUTINE truncate_fm(fm_out, fm_in, ncol_in, &
     420             :                           nrow_out, ncol_out, unit_nr, mp2_env, &
     421             :                           nrow_offset, ncol_offset)
     422             : 
     423             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     424             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     425             :       INTEGER                                            :: ncol_in, nrow_out, ncol_out, unit_nr
     426             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
     427             :       INTEGER, INTENT(IN), OPTIONAL                      :: nrow_offset, ncol_offset
     428             : 
     429             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'truncate_fm'
     430             : 
     431             :       INTEGER :: col_idx_loc, dummy, handle, handle2, i_entry_rec, idx_col_first, idx_col_in, &
     432             :          idx_col_out, idx_col_sec, idx_row_in, ii, iproc, jj, ncol_block_in, ncol_block_out, &
     433             :          ncol_local_in, ncol_local_out, npcol, nprocs, nprow, nrow_block_in, nrow_block_out, &
     434             :          nrow_local_in, nrow_local_out, proc_send, row_idx_loc, send_pcol, send_prow
     435          12 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: entry_counter, num_entries_rec, &
     436             :                                                             num_entries_send
     437          12 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices_in, col_indices_out, &
     438          12 :                                                             row_indices_in, row_indices_out
     439             :       LOGICAL                                            :: correct_ncol, correct_nrow
     440             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     441          12 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     442             :       TYPE(mp_para_env_type), POINTER                    :: para_env_out
     443          12 :       TYPE(mp_request_type), DIMENSION(:, :), POINTER    :: req_array
     444             : 
     445          12 :       CALL timeset(routineN, handle)
     446          12 :       CALL timeset(routineN//"_1_setup", handle2)
     447             : 
     448          12 :       correct_nrow = .FALSE.
     449          12 :       correct_ncol = .FALSE.
     450             :       !In case of truncation in the occupied space, we need to correct the interval of indices
     451          12 :       IF (PRESENT(nrow_offset)) THEN
     452           8 :          correct_nrow = .TRUE.
     453             :       END IF
     454          12 :       IF (PRESENT(ncol_offset)) THEN
     455           4 :          correct_ncol = .TRUE.
     456             :       END IF
     457             : 
     458          12 :       para_env_out => fm_out%matrix_struct%para_env
     459             : 
     460             :       CALL cp_fm_get_info(matrix=fm_out, &
     461             :                           nrow_local=nrow_local_out, &
     462             :                           ncol_local=ncol_local_out, &
     463             :                           row_indices=row_indices_out, &
     464             :                           col_indices=col_indices_out, &
     465             :                           nrow_block=nrow_block_out, &
     466          12 :                           ncol_block=ncol_block_out)
     467             : 
     468          12 :       nprow = fm_out%matrix_struct%context%num_pe(1)
     469          12 :       npcol = fm_out%matrix_struct%context%num_pe(2)
     470             : 
     471          36 :       ALLOCATE (num_entries_rec(0:para_env_out%num_pe - 1))
     472          36 :       ALLOCATE (num_entries_send(0:para_env_out%num_pe - 1))
     473             : 
     474          36 :       num_entries_rec(:) = 0
     475          36 :       num_entries_send(:) = 0
     476             : 
     477          12 :       dummy = 0
     478             : 
     479             :       CALL cp_fm_get_info(matrix=fm_in, &
     480             :                           nrow_local=nrow_local_in, &
     481             :                           ncol_local=ncol_local_in, &
     482             :                           row_indices=row_indices_in, &
     483             :                           col_indices=col_indices_in, &
     484             :                           nrow_block=nrow_block_in, &
     485          12 :                           ncol_block=ncol_block_in)
     486             : 
     487          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     488           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_out%name, &
     489           0 :             fm_out%matrix_struct%nrow_global
     490           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_out%name, &
     491           0 :             fm_out%matrix_struct%ncol_global
     492             : 
     493           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_out%name, nrow_block_out
     494           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_out%name, ncol_block_out
     495             : 
     496           0 :          WRITE (unit_nr, '(T2,A10,T13,A14,A10,T71,I10)') 'BSE|DEBUG|', 'Row number of ', fm_in%name, &
     497           0 :             fm_in%matrix_struct%nrow_global
     498           0 :          WRITE (unit_nr, '(T2,A10,T13,A17,A10,T71,I10)') 'BSE|DEBUG|', 'Column number of ', fm_in%name, &
     499           0 :             fm_in%matrix_struct%ncol_global
     500             : 
     501           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,T71,I10)') 'BSE|DEBUG|', 'Row block size of ', fm_in%name, nrow_block_in
     502           0 :          WRITE (unit_nr, '(T2,A10,T13,A21,A10,T71,I10)') 'BSE|DEBUG|', 'Column block size of ', fm_in%name, ncol_block_in
     503             :       END IF
     504             : 
     505             :       ! We find global indices in S with nrow_in and ncol_in for truncation
     506        1292 :       DO col_idx_loc = 1, ncol_local_in
     507        1284 :          idx_col_in = col_indices_in(col_idx_loc)
     508             : 
     509        1284 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     510        1284 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     511             : 
     512             :          ! If occupied orbitals are included, these have to be handled differently
     513             :          ! due to their reversed indexing
     514        1284 :          IF (correct_nrow) THEN
     515         368 :             idx_col_first = idx_col_first - nrow_offset + 1
     516         368 :             IF (idx_col_first .LE. 0) CYCLE
     517             :          ELSE
     518         916 :             IF (idx_col_first > nrow_out) EXIT
     519             :          END IF
     520        1280 :          IF (correct_ncol) THEN
     521          64 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     522          64 :             IF (idx_col_sec .LE. 0) CYCLE
     523             :          ELSE
     524        1216 :             IF (idx_col_sec > ncol_out) CYCLE
     525             :          END IF
     526             : 
     527         832 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     528             : 
     529       35372 :          DO row_idx_loc = 1, nrow_local_in
     530       34528 :             idx_row_in = row_indices_in(row_idx_loc)
     531             : 
     532             :             send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
     533       34528 :                                       fm_out%matrix_struct%first_p_pos(1), nprow)
     534             : 
     535             :             send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
     536       34528 :                                       fm_out%matrix_struct%first_p_pos(2), npcol)
     537             : 
     538       34528 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     539             : 
     540       35808 :             num_entries_send(proc_send) = num_entries_send(proc_send) + 1
     541             : 
     542             :          END DO
     543             :       END DO
     544             : 
     545          12 :       CALL timestop(handle2)
     546             : 
     547          12 :       CALL timeset(routineN//"_2_comm_entry_nums", handle2)
     548          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     549           0 :          WRITE (unit_nr, '(T2,A10,T13,A27)') 'BSE|DEBUG|', 'Communicating entry numbers'
     550             :       END IF
     551             : 
     552          12 :       CALL para_env_out%alltoall(num_entries_send, num_entries_rec, 1)
     553             : 
     554          12 :       CALL timestop(handle2)
     555             : 
     556          12 :       CALL timeset(routineN//"_3_alloc_buffer", handle2)
     557          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     558           0 :          WRITE (unit_nr, '(T2,A10,T13,A18)') 'BSE|DEBUG|', 'Allocating buffers'
     559             :       END IF
     560             : 
     561             :       ! Buffers for entries and their indices
     562          60 :       ALLOCATE (buffer_rec(0:para_env_out%num_pe - 1))
     563          60 :       ALLOCATE (buffer_send(0:para_env_out%num_pe - 1))
     564             : 
     565             :       ! allocate data message and corresponding indices
     566          36 :       DO iproc = 0, para_env_out%num_pe - 1
     567             : 
     568          64 :          ALLOCATE (buffer_rec(iproc)%msg(num_entries_rec(iproc)))
     569       34564 :          buffer_rec(iproc)%msg = 0.0_dp
     570             : 
     571             :       END DO
     572             : 
     573          36 :       DO iproc = 0, para_env_out%num_pe - 1
     574             : 
     575          64 :          ALLOCATE (buffer_send(iproc)%msg(num_entries_send(iproc)))
     576       34564 :          buffer_send(iproc)%msg = 0.0_dp
     577             : 
     578             :       END DO
     579             : 
     580          36 :       DO iproc = 0, para_env_out%num_pe - 1
     581             : 
     582          64 :          ALLOCATE (buffer_rec(iproc)%indx(num_entries_rec(iproc), 2))
     583       69140 :          buffer_rec(iproc)%indx = 0
     584             : 
     585             :       END DO
     586             : 
     587          36 :       DO iproc = 0, para_env_out%num_pe - 1
     588             : 
     589          64 :          ALLOCATE (buffer_send(iproc)%indx(num_entries_send(iproc), 2))
     590       69140 :          buffer_send(iproc)%indx = 0
     591             : 
     592             :       END DO
     593             : 
     594          12 :       CALL timestop(handle2)
     595             : 
     596          12 :       CALL timeset(routineN//"_4_buf_from_fmin_"//fm_out%name, handle2)
     597          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     598           0 :          WRITE (unit_nr, '(T2,A10,T13,A18,A10,A13)') 'BSE|DEBUG|', 'Writing data from ', fm_in%name, ' into buffers'
     599             :       END IF
     600             : 
     601          36 :       ALLOCATE (entry_counter(0:para_env_out%num_pe - 1))
     602          36 :       entry_counter(:) = 0
     603             : 
     604             :       ! Now we can write the actual data and indices to the send-buffer
     605        1292 :       DO col_idx_loc = 1, ncol_local_in
     606        1284 :          idx_col_in = col_indices_in(col_idx_loc)
     607             : 
     608        1284 :          idx_col_first = (idx_col_in - 1)/ncol_in + 1
     609        1284 :          idx_col_sec = MOD(idx_col_in - 1, ncol_in) + 1
     610             : 
     611             :          ! If occupied orbitals are included, these have to be handled differently
     612             :          ! due to their reversed indexing
     613        1284 :          IF (correct_nrow) THEN
     614         368 :             idx_col_first = idx_col_first - nrow_offset + 1
     615         368 :             IF (idx_col_first .LE. 0) CYCLE
     616             :          ELSE
     617         916 :             IF (idx_col_first > nrow_out) EXIT
     618             :          END IF
     619        1280 :          IF (correct_ncol) THEN
     620          64 :             idx_col_sec = idx_col_sec - ncol_offset + 1
     621          64 :             IF (idx_col_sec .LE. 0) CYCLE
     622             :          ELSE
     623        1216 :             IF (idx_col_sec > ncol_out) CYCLE
     624             :          END IF
     625             : 
     626         832 :          idx_col_out = idx_col_sec + (idx_col_first - 1)*ncol_out
     627             : 
     628       35372 :          DO row_idx_loc = 1, nrow_local_in
     629       34528 :             idx_row_in = row_indices_in(row_idx_loc)
     630             : 
     631             :             send_prow = cp_fm_indxg2p(idx_row_in, nrow_block_out, dummy, &
     632       34528 :                                       fm_out%matrix_struct%first_p_pos(1), nprow)
     633             : 
     634             :             send_pcol = cp_fm_indxg2p(idx_col_out, ncol_block_out, dummy, &
     635       34528 :                                       fm_out%matrix_struct%first_p_pos(2), npcol)
     636             : 
     637       34528 :             proc_send = fm_out%matrix_struct%context%blacs2mpi(send_prow, send_pcol)
     638       34528 :             entry_counter(proc_send) = entry_counter(proc_send) + 1
     639             : 
     640             :             buffer_send(proc_send)%msg(entry_counter(proc_send)) = &
     641       34528 :                fm_in%local_data(row_idx_loc, col_idx_loc)
     642             :             !No need to create row_out, since it is identical to incoming
     643             :             !We dont change the RI index for any fm_mat_XX_BSE
     644       34528 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 1) = idx_row_in
     645       35808 :             buffer_send(proc_send)%indx(entry_counter(proc_send), 2) = idx_col_out
     646             : 
     647             :          END DO
     648             :       END DO
     649             : 
     650         180 :       ALLOCATE (req_array(1:para_env_out%num_pe, 4))
     651             : 
     652          12 :       CALL timestop(handle2)
     653             : 
     654          12 :       CALL timeset(routineN//"_5_comm_buffer", handle2)
     655          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     656           0 :          WRITE (unit_nr, '(T2,A10,T13,A21)') 'BSE|DEBUG|', 'Communicating buffers'
     657             :       END IF
     658             : 
     659             :       ! communicate the buffer
     660             :       CALL communicate_buffer(para_env_out, num_entries_rec, num_entries_send, buffer_rec, &
     661          12 :                               buffer_send, req_array)
     662             : 
     663          12 :       CALL timestop(handle2)
     664             : 
     665          12 :       CALL timeset(routineN//"_6_buffer_to_fmout"//fm_out%name, handle2)
     666          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     667           0 :          WRITE (unit_nr, '(T2,A10,T13,A24,A10)') 'BSE|DEBUG|', 'Writing from buffers to ', fm_out%name
     668             :       END IF
     669             : 
     670             :       ! fill fm_out with the entries from buffer_rec, i.e. buffer_rec are parts of fm_in
     671          12 :       nprocs = para_env_out%num_pe
     672             : 
     673             : !$OMP PARALLEL DO DEFAULT(NONE) &
     674             : !$OMP SHARED(fm_out, nprocs, nrow_block_out, ncol_block_out, &
     675             : !$OMP          num_entries_rec, buffer_rec, nprow, npcol, dummy) &
     676          12 : !$OMP PRIVATE(iproc, i_entry_rec, ii, jj)
     677             :       DO iproc = 0, nprocs - 1
     678             :          DO i_entry_rec = 1, num_entries_rec(iproc)
     679             :             ii = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 1), nrow_block_out, &
     680             :                                dummy, dummy, nprow)
     681             :             jj = cp_fm_indxg2l(buffer_rec(iproc)%indx(i_entry_rec, 2), ncol_block_out, &
     682             :                                dummy, dummy, npcol)
     683             : 
     684             :             fm_out%local_data(ii, jj) = fm_out%local_data(ii, jj) + buffer_rec(iproc)%msg(i_entry_rec)
     685             :          END DO
     686             :       END DO
     687             : !$OMP END PARALLEL DO
     688             : 
     689          12 :       CALL timestop(handle2)
     690             : 
     691          12 :       CALL timeset(routineN//"_7_cleanup", handle2)
     692          12 :       IF (unit_nr > 0 .AND. mp2_env%ri_g0w0%bse_debug_print) THEN
     693           0 :          WRITE (unit_nr, '(T2,A10,T13,A41)') 'BSE|DEBUG|', 'Starting cleanup of communication buffers'
     694             :       END IF
     695             : 
     696             :       !Clean up all the arrays from the communication process
     697          36 :       DO iproc = 0, para_env_out%num_pe - 1
     698          24 :          DEALLOCATE (buffer_rec(iproc)%msg)
     699          24 :          DEALLOCATE (buffer_rec(iproc)%indx)
     700          24 :          DEALLOCATE (buffer_send(iproc)%msg)
     701          36 :          DEALLOCATE (buffer_send(iproc)%indx)
     702             :       END DO
     703          60 :       DEALLOCATE (buffer_rec, buffer_send)
     704          12 :       DEALLOCATE (req_array)
     705          12 :       DEALLOCATE (entry_counter)
     706          12 :       DEALLOCATE (num_entries_rec, num_entries_send)
     707             : 
     708          12 :       CALL timestop(handle2)
     709          12 :       CALL timestop(handle)
     710             : 
     711          96 :    END SUBROUTINE truncate_fm
     712             : 
     713             : ! **************************************************************************************************
     714             : !> \brief Debug function to write elements of a full matrix to file, if they are larger than a given threshold
     715             : !> \param fm ...
     716             : !> \param thresh ...
     717             : !> \param header ...
     718             : !> \param unit_nr ...
     719             : !> \param abs_vals ...
     720             : ! **************************************************************************************************
     721           0 :    SUBROUTINE fm_write_thresh(fm, thresh, header, unit_nr, abs_vals)
     722             : 
     723             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     724             :       REAL(KIND=dp), INTENT(IN)                          :: thresh
     725             :       CHARACTER(LEN=*), INTENT(IN)                       :: header
     726             :       INTEGER, INTENT(IN)                                :: unit_nr
     727             :       LOGICAL, OPTIONAL                                  :: abs_vals
     728             : 
     729             :       CHARACTER(LEN=*), PARAMETER :: my_footer = " | ENDING WRITING OF MATRIX", &
     730             :          routineN = 'fm_write_thresh'
     731             : 
     732             :       INTEGER                                            :: handle, i, j, ncol_local, nrow_local
     733           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     734             :       LOGICAL                                            :: my_abs_vals
     735             : 
     736           0 :       CALL timeset(routineN, handle)
     737             : 
     738           0 :       IF (PRESENT(abs_vals)) THEN
     739           0 :          my_abs_vals = abs_vals
     740             :       ELSE
     741             :          my_abs_vals = .FALSE.
     742             :       END IF
     743             : 
     744             :       CALL cp_fm_get_info(matrix=fm, &
     745             :                           nrow_local=nrow_local, &
     746             :                           ncol_local=ncol_local, &
     747             :                           row_indices=row_indices, &
     748           0 :                           col_indices=col_indices)
     749             : 
     750           0 :       IF (unit_nr > 0) THEN
     751           0 :          WRITE (unit_nr, *) header
     752             :       END IF
     753           0 :       IF (my_abs_vals) THEN
     754           0 :          DO i = 1, nrow_local
     755           0 :             DO j = 1, ncol_local
     756           0 :                IF (ABS(fm%local_data(i, j)) > thresh) THEN
     757           0 :                   WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
     758           0 :                      ABS(fm%local_data(i, j))
     759             :                END IF
     760             :             END DO
     761             :          END DO
     762             :       ELSE
     763           0 :          DO i = 1, nrow_local
     764           0 :             DO j = 1, ncol_local
     765           0 :                IF (ABS(fm%local_data(i, j)) > thresh) THEN
     766           0 :                   WRITE (unit_nr, "(A7,T10,I5,T20,I5,T30,F13.5)") header, row_indices(i), col_indices(j), &
     767           0 :                      fm%local_data(i, j)
     768             :                END IF
     769             :             END DO
     770             :          END DO
     771             :       END IF
     772           0 :       CALL fm%matrix_struct%para_env%sync()
     773           0 :       IF (unit_nr > 0) THEN
     774           0 :          WRITE (unit_nr, *) my_footer
     775             :       END IF
     776             : 
     777           0 :       CALL timestop(handle)
     778             : 
     779           0 :    END SUBROUTINE
     780             : 
     781             : ! **************************************************************************************************
     782             : !> \brief ...
     783             : !> \param bse_tda ...
     784             : !> \param bse_abba ...
     785             : !> \param unit_nr ...
     786             : ! **************************************************************************************************
     787           4 :    SUBROUTINE print_BSE_start_flag(bse_tda, bse_abba, unit_nr)
     788             : 
     789             :       LOGICAL, INTENT(IN)                                :: bse_tda, bse_abba
     790             :       INTEGER, INTENT(IN)                                :: unit_nr
     791             : 
     792             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'print_BSE_start_flag'
     793             : 
     794             :       INTEGER                                            :: handle
     795             : 
     796           4 :       CALL timeset(routineN, handle)
     797             : 
     798           4 :       IF (unit_nr > 0) THEN
     799           2 :          WRITE (unit_nr, *) ' '
     800           2 :          WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
     801           2 :          WRITE (unit_nr, '(T2,A79)') '**                                                                           **'
     802           2 :          WRITE (unit_nr, '(T2,A79)') '**           Bethe Salpeter equation (BSE) for excitation energies           **'
     803           2 :          IF (bse_tda .AND. bse_abba) THEN
     804           0 :             WRITE (unit_nr, '(T2,A79)') '**          solved with and without Tamm-Dancoff approximation (TDA)         **'
     805           2 :          ELSE IF (bse_tda) THEN
     806           1 :             WRITE (unit_nr, '(T2,A79)') '**                solved with Tamm-Dancoff approximation (TDA)               **'
     807             :          ELSE
     808           1 :             WRITE (unit_nr, '(T2,A79)') '**               solved without Tamm-Dancoff approximation (TDA)             **'
     809             :          END IF
     810             : 
     811           2 :          WRITE (unit_nr, '(T2,A79)') '**                                                                           **'
     812           2 :          WRITE (unit_nr, '(T2,A79)') '*******************************************************************************'
     813           2 :          WRITE (unit_nr, *) ' '
     814             :       END IF
     815             : 
     816           4 :       CALL timestop(handle)
     817             : 
     818           4 :    END SUBROUTINE
     819             : 
     820             : ! **************************************************************************************************
     821             : !> \brief ...
     822             : !> \param fm_mat_S_bar_ia_bse ...
     823             : !> \param fm_mat_S_bar_ij_bse ...
     824             : !> \param fm_mat_S_trunc ...
     825             : !> \param fm_mat_S_ij_trunc ...
     826             : !> \param fm_mat_S_ab_trunc ...
     827             : !> \param fm_mat_Q_static_bse ...
     828             : !> \param fm_mat_Q_static_bse_gemm ...
     829             : ! **************************************************************************************************
     830           4 :    SUBROUTINE deallocate_matrices_bse(fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, &
     831             :                                       fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
     832             :                                       fm_mat_Q_static_bse, fm_mat_Q_static_bse_gemm)
     833             : 
     834             :       TYPE(cp_fm_type), INTENT(INOUT) :: fm_mat_S_bar_ia_bse, fm_mat_S_bar_ij_bse, fm_mat_S_trunc, &
     835             :          fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, fm_mat_Q_static_bse, fm_mat_Q_static_bse_gemm
     836             : 
     837             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'deallocate_matrices_bse'
     838             : 
     839             :       INTEGER                                            :: handle
     840             : 
     841           4 :       CALL timeset(routineN, handle)
     842             : 
     843           4 :       CALL cp_fm_release(fm_mat_S_bar_ia_bse)
     844           4 :       CALL cp_fm_release(fm_mat_S_bar_ij_bse)
     845           4 :       CALL cp_fm_release(fm_mat_S_trunc)
     846           4 :       CALL cp_fm_release(fm_mat_S_ij_trunc)
     847           4 :       CALL cp_fm_release(fm_mat_S_ab_trunc)
     848           4 :       CALL cp_fm_release(fm_mat_Q_static_bse)
     849           4 :       CALL cp_fm_release(fm_mat_Q_static_bse_gemm)
     850             : 
     851           4 :       CALL timestop(handle)
     852             : 
     853           4 :    END SUBROUTINE deallocate_matrices_bse
     854             : 
     855             : ! **************************************************************************************************
     856             : !> \brief Routine for computing the coefficients of the eigenvectors of the BSE matrix from a
     857             : !>  multiplication with the eigenvalues
     858             : !> \param fm_work ...
     859             : !> \param eig_vals ...
     860             : !> \param beta ...
     861             : !> \param gamma ...
     862             : !> \param do_transpose ...
     863             : ! **************************************************************************************************
     864           8 :    SUBROUTINE comp_eigvec_coeff_BSE(fm_work, eig_vals, beta, gamma, do_transpose)
     865             : 
     866             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_work
     867             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), &
     868             :          INTENT(IN)                                      :: eig_vals
     869             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     870             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: gamma
     871             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_transpose
     872             : 
     873             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'comp_eigvec_coeff_BSE'
     874             : 
     875             :       INTEGER                                            :: handle, i_row_global, ii, j_col_global, &
     876             :                                                             jj, ncol_local, nrow_local
     877           4 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     878             :       LOGICAL                                            :: my_do_transpose
     879             :       REAL(KIND=dp)                                      :: coeff, my_gamma
     880             : 
     881           4 :       CALL timeset(routineN, handle)
     882             : 
     883           4 :       IF (PRESENT(gamma)) THEN
     884           4 :          my_gamma = gamma
     885             :       ELSE
     886             :          my_gamma = 2.0_dp
     887             :       END IF
     888             : 
     889           4 :       IF (PRESENT(do_transpose)) THEN
     890           4 :          my_do_transpose = do_transpose
     891             :       ELSE
     892             :          my_do_transpose = .FALSE.
     893             :       END IF
     894             : 
     895             :       CALL cp_fm_get_info(matrix=fm_work, &
     896             :                           nrow_local=nrow_local, &
     897             :                           ncol_local=ncol_local, &
     898             :                           row_indices=row_indices, &
     899           4 :                           col_indices=col_indices)
     900             : 
     901           4 :       IF (my_do_transpose) THEN
     902         196 :          DO jj = 1, ncol_local
     903         192 :             j_col_global = col_indices(jj)
     904        4804 :             DO ii = 1, nrow_local
     905        4608 :                coeff = (eig_vals(j_col_global)**beta)/my_gamma
     906        4800 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     907             :             END DO
     908             :          END DO
     909             :       ELSE
     910           0 :          DO jj = 1, ncol_local
     911           0 :             DO ii = 1, nrow_local
     912           0 :                i_row_global = row_indices(ii)
     913           0 :                coeff = (eig_vals(i_row_global)**beta)/my_gamma
     914           0 :                fm_work%local_data(ii, jj) = fm_work%local_data(ii, jj)*coeff
     915             :             END DO
     916             :          END DO
     917             :       END IF
     918             : 
     919           4 :       CALL timestop(handle)
     920             : 
     921           4 :    END SUBROUTINE
     922             : 
     923             : ! **************************************************************************************************
     924             : !> \brief ...
     925             : !> \param idx_prim ...
     926             : !> \param idx_sec ...
     927             : !> \param eigvec_entries ...
     928             : ! **************************************************************************************************
     929          40 :    SUBROUTINE sort_excitations(idx_prim, idx_sec, eigvec_entries)
     930             : 
     931             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim, idx_sec
     932             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
     933             : 
     934             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'sort_excitations'
     935             : 
     936             :       INTEGER                                            :: handle, ii, kk, num_entries, num_mults
     937          40 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_prim_work, idx_sec_work, tmp_index
     938             :       LOGICAL                                            :: unique_entries
     939          40 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries_work
     940             : 
     941          40 :       CALL timeset(routineN, handle)
     942             : 
     943          40 :       num_entries = SIZE(idx_prim)
     944             : 
     945         120 :       ALLOCATE (tmp_index(num_entries))
     946             : 
     947          40 :       CALL sort(idx_prim, num_entries, tmp_index)
     948             : 
     949          80 :       ALLOCATE (idx_sec_work(num_entries))
     950         120 :       ALLOCATE (eigvec_entries_work(num_entries))
     951             : 
     952         104 :       DO ii = 1, num_entries
     953          64 :          idx_sec_work(ii) = idx_sec(tmp_index(ii))
     954         104 :          eigvec_entries_work(ii) = eigvec_entries(tmp_index(ii))
     955             :       END DO
     956             : 
     957          40 :       DEALLOCATE (tmp_index)
     958          40 :       DEALLOCATE (idx_sec)
     959          40 :       DEALLOCATE (eigvec_entries)
     960             : 
     961          40 :       CALL MOVE_ALLOC(idx_sec_work, idx_sec)
     962          40 :       CALL MOVE_ALLOC(eigvec_entries_work, eigvec_entries)
     963             : 
     964             :       !Now check for multiple entries in first idx to check necessity of sorting in second idx
     965          40 :       CALL sort_unique(idx_prim, unique_entries)
     966          40 :       IF (.NOT. unique_entries) THEN
     967           0 :          ALLOCATE (idx_prim_work(num_entries))
     968           0 :          idx_prim_work(:) = idx_prim(:)
     969             :          ! Find duplicate entries in idx_prim
     970           0 :          DO ii = 1, num_entries
     971           0 :             IF (idx_prim_work(ii) == 0) CYCLE
     972           0 :             num_mults = COUNT(idx_prim_work == idx_prim_work(ii))
     973           0 :             IF (num_mults > 1) THEN
     974             :                !Set all duplicate entries to 0
     975           0 :                idx_prim_work(ii:ii + num_mults - 1) = 0
     976             :                !Start sorting in secondary index
     977           0 :                ALLOCATE (idx_sec_work(num_mults))
     978           0 :                ALLOCATE (eigvec_entries_work(num_mults))
     979           0 :                idx_sec_work(:) = idx_sec(ii:ii + num_mults - 1)
     980           0 :                eigvec_entries_work(:) = eigvec_entries(ii:ii + num_mults - 1)
     981           0 :                ALLOCATE (tmp_index(num_mults))
     982           0 :                CALL sort(idx_sec_work, num_mults, tmp_index)
     983             : 
     984             :                !Now write newly sorted indices to original arrays
     985           0 :                DO kk = ii, ii + num_mults - 1
     986           0 :                   idx_sec(kk) = idx_sec_work(kk - ii + 1)
     987           0 :                   eigvec_entries(kk) = eigvec_entries_work(tmp_index(kk - ii + 1))
     988             :                END DO
     989             :                !Deallocate work arrays
     990           0 :                DEALLOCATE (tmp_index)
     991           0 :                DEALLOCATE (idx_sec_work)
     992           0 :                DEALLOCATE (eigvec_entries_work)
     993             :             END IF
     994           0 :             idx_prim_work(ii) = idx_prim(ii)
     995             :          END DO
     996           0 :          DEALLOCATE (idx_prim_work)
     997             :       END IF
     998             : 
     999          40 :       CALL timestop(handle)
    1000             : 
    1001         120 :    END SUBROUTINE sort_excitations
    1002             : 
    1003             : ! **************************************************************************************************
    1004             : !> \brief Roughly estimates the needed runtime and memory during the BSE run
    1005             : !> \param homo_red ...
    1006             : !> \param virtual_red ...
    1007             : !> \param unit_nr ...
    1008             : !> \param bse_abba ...
    1009             : !> \param para_env ...
    1010             : !> \param diag_runtime_est ...
    1011             : ! **************************************************************************************************
    1012           4 :    SUBROUTINE estimate_BSE_resources(homo_red, virtual_red, unit_nr, bse_abba, &
    1013             :                                      para_env, diag_runtime_est)
    1014             : 
    1015             :       INTEGER                                            :: homo_red, virtual_red, unit_nr
    1016             :       LOGICAL                                            :: bse_abba
    1017             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1018             :       REAL(KIND=dp)                                      :: diag_runtime_est
    1019             : 
    1020             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'estimate_BSE_resources'
    1021             : 
    1022             :       INTEGER                                            :: handle, num_BSE_matrices
    1023             :       REAL(KIND=dp)                                      :: mem_est, mem_est_per_rank
    1024             : 
    1025           4 :       CALL timeset(routineN, handle)
    1026             : 
    1027             :       ! Number of matrices with size of A in TDA is 2 (A itself and W_ijab)
    1028           4 :       num_BSE_matrices = 2
    1029             :       ! With the full diagonalization of ABBA, we need several auxiliary matrices in the process
    1030             :       ! The maximum number is 2 + 2 + 6 (additional B and C matrix as well as 6 matrices to create C)
    1031           4 :       IF (bse_abba) THEN
    1032           2 :          num_BSE_matrices = 10
    1033             :       END IF
    1034           4 :       mem_est = REAL(8*(homo_red**2*virtual_red**2)*num_BSE_matrices, KIND=dp)/REAL(1024**3, KIND=dp)
    1035           4 :       mem_est_per_rank = REAL(mem_est/para_env%num_pe, KIND=dp)
    1036           4 :       IF (unit_nr > 0) THEN
    1037           2 :          WRITE (unit_nr, '(T2,A4,T7,A40,T68,F13.3)') 'BSE|', 'Total peak memory estimate from BSE [GB]', &
    1038           4 :             mem_est
    1039           2 :          WRITE (unit_nr, '(T2,A4,T7,A47,T68,F13.3)') 'BSE|', 'Peak memory estimate per MPI rank from BSE [GB]', &
    1040           4 :             mem_est_per_rank
    1041           2 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1042             :       END IF
    1043             :       ! Rough estimation of diagonalization runtimes. Baseline was a full BSE Naphthalene
    1044             :       ! run with 11000x11000 entries in A/B/C, which took 10s on 32 ranks
    1045           4 :       diag_runtime_est = REAL(homo_red*virtual_red/11000, KIND=dp)**3*10*32/REAL(para_env%num_pe, KIND=dp)
    1046             : 
    1047           4 :       CALL timestop(handle)
    1048             : 
    1049           4 :    END SUBROUTINE estimate_BSE_resources
    1050             : 
    1051             : ! **************************************************************************************************
    1052             : !> \brief Filters eigenvector entries above a given threshold to describe excitations in the
    1053             : !> singleparticle basis
    1054             : !> \param fm_eigvec ...
    1055             : !> \param idx_homo ...
    1056             : !> \param idx_virt ...
    1057             : !> \param eigvec_entries ...
    1058             : !> \param i_exc ...
    1059             : !> \param virtual ...
    1060             : !> \param num_entries ...
    1061             : !> \param mp2_env ...
    1062             : ! **************************************************************************************************
    1063          40 :    SUBROUTINE filter_eigvec_contrib(fm_eigvec, idx_homo, idx_virt, eigvec_entries, &
    1064             :                                     i_exc, virtual, num_entries, mp2_env)
    1065             : 
    1066             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_eigvec
    1067             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: idx_homo, idx_virt
    1068             :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: eigvec_entries
    1069             :       INTEGER                                            :: i_exc, virtual, num_entries
    1070             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1071             : 
    1072             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'filter_eigvec_contrib'
    1073             : 
    1074             :       INTEGER                                            :: eigvec_idx, handle, ii, iproc, jj, kk, &
    1075             :                                                             ncol_local, nrow_local, &
    1076             :                                                             num_entries_local
    1077             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: num_entries_to_comm
    1078          40 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1079             :       REAL(KIND=dp)                                      :: eigvec_entry
    1080             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1081          40 :          DIMENSION(:)                                    :: buffer_entries
    1082             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1083             : 
    1084          40 :       CALL timeset(routineN, handle)
    1085             : 
    1086          40 :       para_env => fm_eigvec%matrix_struct%para_env
    1087             : 
    1088             :       CALL cp_fm_get_info(matrix=fm_eigvec, &
    1089             :                           nrow_local=nrow_local, &
    1090             :                           ncol_local=ncol_local, &
    1091             :                           row_indices=row_indices, &
    1092          40 :                           col_indices=col_indices)
    1093             : 
    1094         120 :       ALLOCATE (num_entries_to_comm(0:para_env%num_pe - 1))
    1095         120 :       num_entries_to_comm(:) = 0
    1096             : 
    1097        1960 :       DO jj = 1, ncol_local
    1098             :          !First check if i is localized on this proc
    1099        1920 :          IF (col_indices(jj) /= i_exc) THEN
    1100             :             CYCLE
    1101             :          END IF
    1102        1040 :          DO ii = 1, nrow_local
    1103         960 :             eigvec_idx = row_indices(ii)
    1104         960 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1105        2880 :             IF (ABS(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
    1106          32 :                num_entries_to_comm(para_env%mepos) = num_entries_to_comm(para_env%mepos) + 1
    1107             :             END IF
    1108             :          END DO
    1109             :       END DO
    1110             : 
    1111             :       !Gather number of entries of other processes
    1112          40 :       CALL para_env%sum(num_entries_to_comm)
    1113             : 
    1114          40 :       num_entries_local = num_entries_to_comm(para_env%mepos)
    1115             : 
    1116         200 :       ALLOCATE (buffer_entries(0:para_env%num_pe - 1))
    1117             : 
    1118         120 :       DO iproc = 0, para_env%num_pe - 1
    1119         216 :          ALLOCATE (buffer_entries(iproc)%msg(num_entries_to_comm(iproc)))
    1120         216 :          ALLOCATE (buffer_entries(iproc)%indx(num_entries_to_comm(iproc), 2))
    1121         144 :          buffer_entries(iproc)%msg = 0.0_dp
    1122         408 :          buffer_entries(iproc)%indx = 0
    1123             :       END DO
    1124             : 
    1125             :       kk = 1
    1126        1960 :       DO jj = 1, ncol_local
    1127             :          !First check if i is localized on this proc
    1128        1920 :          IF (col_indices(jj) /= i_exc) THEN
    1129             :             CYCLE
    1130             :          END IF
    1131        1040 :          DO ii = 1, nrow_local
    1132         960 :             eigvec_idx = row_indices(ii)
    1133         960 :             eigvec_entry = fm_eigvec%local_data(ii, jj)/SQRT(2.0_dp)
    1134        2880 :             IF (ABS(eigvec_entry) > mp2_env%ri_g0w0%eps_x) THEN
    1135          32 :                buffer_entries(para_env%mepos)%indx(kk, 1) = (eigvec_idx - 1)/virtual + 1
    1136          32 :                buffer_entries(para_env%mepos)%indx(kk, 2) = MOD(eigvec_idx - 1, virtual) + 1
    1137          32 :                buffer_entries(para_env%mepos)%msg(kk) = eigvec_entry
    1138          32 :                kk = kk + 1
    1139             :             END IF
    1140             :          END DO
    1141             :       END DO
    1142             : 
    1143         120 :       DO iproc = 0, para_env%num_pe - 1
    1144          80 :          CALL para_env%sum(buffer_entries(iproc)%msg)
    1145         120 :          CALL para_env%sum(buffer_entries(iproc)%indx)
    1146             :       END DO
    1147             : 
    1148             :       !Now sum up gathered information
    1149         120 :       num_entries = SUM(num_entries_to_comm)
    1150         120 :       ALLOCATE (idx_homo(num_entries))
    1151          80 :       ALLOCATE (idx_virt(num_entries))
    1152         120 :       ALLOCATE (eigvec_entries(num_entries))
    1153             : 
    1154          40 :       kk = 1
    1155         120 :       DO iproc = 0, para_env%num_pe - 1
    1156         120 :          IF (num_entries_to_comm(iproc) /= 0) THEN
    1157         120 :             DO ii = 1, num_entries_to_comm(iproc)
    1158          64 :                idx_homo(kk) = buffer_entries(iproc)%indx(ii, 1)
    1159          64 :                idx_virt(kk) = buffer_entries(iproc)%indx(ii, 2)
    1160          64 :                eigvec_entries(kk) = buffer_entries(iproc)%msg(ii)
    1161         120 :                kk = kk + 1
    1162             :             END DO
    1163             :          END IF
    1164             :       END DO
    1165             : 
    1166             :       !Deallocate all the used arrays
    1167         120 :       DO iproc = 0, para_env%num_pe - 1
    1168          80 :          DEALLOCATE (buffer_entries(iproc)%msg)
    1169         120 :          DEALLOCATE (buffer_entries(iproc)%indx)
    1170             :       END DO
    1171         160 :       DEALLOCATE (buffer_entries)
    1172          40 :       DEALLOCATE (num_entries_to_comm)
    1173          40 :       NULLIFY (row_indices)
    1174          40 :       NULLIFY (col_indices)
    1175             : 
    1176             :       !Now sort the results according to the involved singleparticle orbitals
    1177             :       ! (homo first, then virtual)
    1178          40 :       CALL sort_excitations(idx_homo, idx_virt, eigvec_entries)
    1179             : 
    1180          40 :       CALL timestop(handle)
    1181             : 
    1182          40 :    END SUBROUTINE
    1183             : 
    1184             : ! **************************************************************************************************
    1185             : !> \brief Determines indices within the given energy cutoffs and truncates Eigenvalues and matrices
    1186             : !> \param fm_mat_S_ia_bse ...
    1187             : !> \param fm_mat_S_ij_bse ...
    1188             : !> \param fm_mat_S_ab_bse ...
    1189             : !> \param fm_mat_S_trunc ...
    1190             : !> \param fm_mat_S_ij_trunc ...
    1191             : !> \param fm_mat_S_ab_trunc ...
    1192             : !> \param Eigenval ...
    1193             : !> \param Eigenval_reduced ...
    1194             : !> \param homo ...
    1195             : !> \param virtual ...
    1196             : !> \param dimen_RI ...
    1197             : !> \param unit_nr ...
    1198             : !> \param homo_red ...
    1199             : !> \param virt_red ...
    1200             : !> \param mp2_env ...
    1201             : ! **************************************************************************************************
    1202          20 :    SUBROUTINE truncate_BSE_matrices(fm_mat_S_ia_bse, fm_mat_S_ij_bse, fm_mat_S_ab_bse, &
    1203             :                                     fm_mat_S_trunc, fm_mat_S_ij_trunc, fm_mat_S_ab_trunc, &
    1204           4 :                                     Eigenval, Eigenval_reduced, &
    1205             :                                     homo, virtual, dimen_RI, unit_nr, &
    1206             :                                     homo_red, virt_red, &
    1207             :                                     mp2_env)
    1208             : 
    1209             :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_mat_S_ia_bse, fm_mat_S_ij_bse, &
    1210             :                                                             fm_mat_S_ab_bse
    1211             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat_S_trunc, fm_mat_S_ij_trunc, &
    1212             :                                                             fm_mat_S_ab_trunc
    1213             :       REAL(KIND=dp), DIMENSION(:)                        :: Eigenval
    1214             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: Eigenval_reduced
    1215             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_RI, unit_nr
    1216             :       INTEGER, INTENT(OUT)                               :: homo_red, virt_red
    1217             :       TYPE(mp2_type), INTENT(INOUT)                      :: mp2_env
    1218             : 
    1219             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'truncate_BSE_matrices'
    1220             : 
    1221             :       INTEGER                                            :: handle, homo_incl, i_homo, j_virt, &
    1222             :                                                             virt_incl
    1223             :       TYPE(cp_blacs_env_type), POINTER                   :: context
    1224             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ab, fm_struct_ia, fm_struct_ij
    1225             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1226             : 
    1227           4 :       CALL timeset(routineN, handle)
    1228             : 
    1229             :       ! Determine index in homo and virtual for truncation
    1230             :       ! Uses indices of outermost orbitals within energy range (-mp2_env%ri_g0w0%bse_cutoff_occ,mp2_env%ri_g0w0%bse_cutoff_virt)
    1231           4 :       IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
    1232             :          IF (-mp2_env%ri_g0w0%bse_cutoff_occ .LT. Eigenval(1) - Eigenval(homo) &
    1233           4 :              .OR. mp2_env%ri_g0w0%bse_cutoff_occ < 0) THEN
    1234           4 :             homo_red = homo
    1235           4 :             homo_incl = 1
    1236             :          ELSE
    1237           0 :             homo_incl = 1
    1238           0 :             DO i_homo = 1, homo
    1239           0 :                IF (Eigenval(i_homo) - Eigenval(homo) .GT. -mp2_env%ri_g0w0%bse_cutoff_occ) THEN
    1240           0 :                   homo_incl = i_homo
    1241           0 :                   EXIT
    1242             :                END IF
    1243             :             END DO
    1244           0 :             homo_red = homo - homo_incl + 1
    1245             :          END IF
    1246             : 
    1247             :          IF (mp2_env%ri_g0w0%bse_cutoff_virt .GT. Eigenval(homo + virtual) - Eigenval(homo + 1) &
    1248           4 :              .OR. mp2_env%ri_g0w0%bse_cutoff_virt < 0) THEN
    1249           0 :             virt_red = virtual
    1250           0 :             virt_incl = virtual
    1251             :          ELSE
    1252           4 :             virt_incl = homo + 1
    1253          52 :             DO j_virt = 1, virtual
    1254          52 :                IF (Eigenval(homo + j_virt) - Eigenval(homo + 1) .GT. mp2_env%ri_g0w0%bse_cutoff_virt) THEN
    1255           4 :                   virt_incl = j_virt - 1
    1256           4 :                   EXIT
    1257             :                END IF
    1258             :             END DO
    1259           4 :             virt_red = virt_incl
    1260             :          END IF
    1261             :       ELSE
    1262           0 :          homo_red = homo
    1263           0 :          virt_red = virtual
    1264           0 :          homo_incl = 1
    1265           0 :          virt_incl = virtual
    1266             :       END IF
    1267           4 :       IF (unit_nr > 0) THEN
    1268           2 :          IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0) THEN
    1269           2 :             WRITE (unit_nr, '(T2,A4,T7,A29,T71,F10.3)') 'BSE|', 'Cutoff occupied orbitals [eV]', &
    1270           4 :                mp2_env%ri_g0w0%bse_cutoff_occ*evolt
    1271             :          ELSE
    1272           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No cutoff given for occupied orbitals'
    1273             :          END IF
    1274           2 :          IF (mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
    1275           2 :             WRITE (unit_nr, '(T2,A4,T7,A28,T71,F10.3)') 'BSE|', 'Cutoff virtual orbitals [eV]', &
    1276           4 :                mp2_env%ri_g0w0%bse_cutoff_virt*evolt
    1277             :          ELSE
    1278           0 :             WRITE (unit_nr, '(T2,A4,T7,A36)') 'BSE|', 'No cutoff given for virtual orbitals'
    1279             :          END IF
    1280           2 :          WRITE (unit_nr, '(T2,A4,T7,A20,T71,I10)') 'BSE|', 'First occupied index', homo_incl
    1281           2 :          WRITE (unit_nr, '(T2,A4,T7,A34,T71,I10)') 'BSE|', 'Last virtual index (not MO index!)', virt_incl
    1282           2 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,F10.3)') 'BSE|', 'Energy of first occupied index [eV]', Eigenval(homo_incl)*evolt
    1283           2 :          WRITE (unit_nr, '(T2,A4,T7,A33,T71,F10.3)') 'BSE|', 'Energy of last virtual index [eV]', Eigenval(homo + virt_incl)*evolt
    1284           2 :          WRITE (unit_nr, '(T2,A4,T7,A54,T71,F10.3)') 'BSE|', 'Energy difference of first occupied index to HOMO [eV]', &
    1285           4 :             -(Eigenval(homo_incl) - Eigenval(homo))*evolt
    1286           2 :          WRITE (unit_nr, '(T2,A4,T7,A52,T71,F10.3)') 'BSE|', 'Energy difference of last virtual index to LUMO [eV]', &
    1287           4 :             (Eigenval(homo + virt_incl) - Eigenval(homo + 1))*evolt
    1288           2 :          WRITE (unit_nr, '(T2,A4,T7,A35,T71,I10)') 'BSE|', 'Number of GW-corrected occupied MOs', mp2_env%ri_g0w0%corr_mos_occ
    1289           2 :          WRITE (unit_nr, '(T2,A4,T7,A34,T71,I10)') 'BSE|', 'Number of GW-corrected virtual MOs', mp2_env%ri_g0w0%corr_mos_virt
    1290           2 :          WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1291             :       END IF
    1292           4 :       IF (unit_nr > 0) THEN
    1293           2 :          IF (homo - homo_incl + 1 > mp2_env%ri_g0w0%corr_mos_occ) THEN
    1294           0 :             CPABORT("Number of GW-corrected occupied MOs too small for chosen BSE cutoff")
    1295             :          END IF
    1296           2 :          IF (virt_incl > mp2_env%ri_g0w0%corr_mos_virt) THEN
    1297           0 :             CPABORT("Number of GW-corrected virtual MOs too small for chosen BSE cutoff")
    1298             :          END IF
    1299             :       END IF
    1300             :       !Truncate full fm_S matrices
    1301             :       !Allocate new truncated matrices of proper size
    1302           4 :       para_env => fm_mat_S_ia_bse%matrix_struct%para_env
    1303           4 :       context => fm_mat_S_ia_bse%matrix_struct%context
    1304             : 
    1305           4 :       CALL cp_fm_struct_create(fm_struct_ia, para_env, context, dimen_RI, homo_red*virt_red)
    1306           4 :       CALL cp_fm_struct_create(fm_struct_ij, para_env, context, dimen_RI, homo_red*homo_red)
    1307           4 :       CALL cp_fm_struct_create(fm_struct_ab, para_env, context, dimen_RI, virt_red*virt_red)
    1308             : 
    1309           4 :       CALL cp_fm_create(fm_mat_S_trunc, fm_struct_ia, "fm_S_trunc")
    1310           4 :       CALL cp_fm_create(fm_mat_S_ij_trunc, fm_struct_ij, "fm_S_ij_trunc")
    1311           4 :       CALL cp_fm_create(fm_mat_S_ab_trunc, fm_struct_ab, "fm_S_ab_trunc")
    1312             : 
    1313             :       !Copy parts of original matrices to truncated ones
    1314           4 :       IF (mp2_env%ri_g0w0%bse_cutoff_occ > 0 .OR. mp2_env%ri_g0w0%bse_cutoff_virt > 0) THEN
    1315             :          !Truncate eigenvals
    1316          12 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1317          68 :          Eigenval_reduced(:) = Eigenval(homo_incl:homo + virt_incl)
    1318             : 
    1319             :          CALL truncate_fm(fm_mat_S_trunc, fm_mat_S_ia_bse, virtual, &
    1320             :                           homo_red, virt_red, unit_nr, mp2_env, &
    1321           4 :                           nrow_offset=homo_incl)
    1322             :          CALL truncate_fm(fm_mat_S_ij_trunc, fm_mat_S_ij_bse, homo, &
    1323             :                           homo_red, homo_red, unit_nr, mp2_env, &
    1324           4 :                           homo_incl, homo_incl)
    1325             :          CALL truncate_fm(fm_mat_S_ab_trunc, fm_mat_S_ab_bse, virtual, &
    1326           4 :                           virt_red, virt_red, unit_nr, mp2_env)
    1327             : 
    1328             :       ELSE
    1329           0 :          IF (unit_nr > 0) THEN
    1330           0 :             WRITE (unit_nr, '(T2,A4,T7,A37)') 'BSE|', 'No truncation of BSE matrices applied'
    1331           0 :             WRITE (unit_nr, '(T2,A4)') 'BSE|'
    1332             :          END IF
    1333           0 :          ALLOCATE (Eigenval_reduced(homo_red + virt_red))
    1334           0 :          Eigenval_reduced(:) = Eigenval(:)
    1335             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ia_bse, fm_mat_S_trunc, dimen_RI, homo_red*virt_red, &
    1336           0 :                                          1, 1, 1, 1, context)
    1337             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ij_bse, fm_mat_S_ij_trunc, dimen_RI, homo_red*homo_red, &
    1338           0 :                                          1, 1, 1, 1, context)
    1339             :          CALL cp_fm_to_fm_submat_general(fm_mat_S_ab_bse, fm_mat_S_ab_trunc, dimen_RI, virt_red*virt_red, &
    1340           0 :                                          1, 1, 1, 1, context)
    1341             :       END IF
    1342             : 
    1343           4 :       CALL cp_fm_struct_release(fm_struct_ia)
    1344           4 :       CALL cp_fm_struct_release(fm_struct_ij)
    1345           4 :       CALL cp_fm_struct_release(fm_struct_ab)
    1346             : 
    1347           4 :       NULLIFY (para_env)
    1348           4 :       NULLIFY (context)
    1349             : 
    1350           4 :       CALL timestop(handle)
    1351             : 
    1352           4 :    END SUBROUTINE truncate_BSE_matrices
    1353             : 
    1354             : END MODULE

Generated by: LCOV version 1.15