LCOV - code coverage report
Current view: top level - src - gw_large_cell_gamma_ri_rs.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:06f838d) Lines: 98.5 % 481 474
Test Date: 2026-06-05 07:04:50 Functions: 100.0 % 16 16

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief GW using RI-RS Approximation
      10              : !> \par History
      11              : !>      04.2026 created [Ritaj Tyagi]
      12              : ! **************************************************************************************************
      13              : MODULE gw_large_cell_Gamma_ri_rs
      14              :    USE atomic_kind_types,               ONLY: atomic_kind_type
      15              :    USE basis_set_types,                 ONLY: gto_basis_set_type
      16              :    USE cell_types,                      ONLY: cell_type,&
      17              :                                               pbc
      18              :    USE cp_dbcsr_api,                    ONLY: &
      19              :         dbcsr_add, dbcsr_binary_read, dbcsr_binary_write, dbcsr_copy, dbcsr_create, &
      20              :         dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
      21              :         dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      22              :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
      23              :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, &
      24              :         dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
      25              :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      26              :                                               copy_fm_to_dbcsr,&
      27              :                                               dbcsr_deallocate_matrix_set
      28              :    USE cp_files,                        ONLY: close_file,&
      29              :                                               open_file
      30              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      31              :                                               cp_fm_release,&
      32              :                                               cp_fm_type
      33              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      34              :                                               cp_logger_type
      35              :    USE cp_output_handling,              ONLY: cp_p_file,&
      36              :                                               cp_print_key_should_output
      37              :    USE gw_integrals,                    ONLY: build_3c_integral_block
      38              :    USE gw_large_cell_gamma,             ONLY: G_occ_vir,&
      39              :                                               compute_QP_energies,&
      40              :                                               delete_unnecessary_files,&
      41              :                                               fill_fm_Sigma_c_Gamma_time,&
      42              :                                               get_W_MIC,&
      43              :                                               multiply_fm_W_MIC_time_with_Minv_Gamma
      44              :    USE gw_utils,                        ONLY: de_init_bs_env
      45              :    USE input_section_types,             ONLY: section_vals_type
      46              :    USE kinds,                           ONLY: dp
      47              :    USE machine,                         ONLY: m_walltime
      48              :    USE message_passing,                 ONLY: mp_para_env_type
      49              :    USE mp2_ri_2c,                       ONLY: RI_2c_integral_mat
      50              :    USE orbital_pointers,                ONLY: indco,&
      51              :                                               ncoset
      52              :    USE particle_types,                  ONLY: particle_type
      53              :    USE post_scf_bandstructure_types,    ONLY: post_scf_bandstructure_type
      54              :    USE qs_environment_types,            ONLY: get_qs_env,&
      55              :                                               qs_environment_type
      56              :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      57              :                                               qs_kind_type
      58              : #include "./base/base_uses.f90"
      59              : 
      60              :    IMPLICIT NONE
      61              : 
      62              :    PRIVATE
      63              : 
      64              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_large_cell_Gamma_ri_rs'
      65              : 
      66              :    PUBLIC :: gw_calc_large_cell_Gamma_ri_rs
      67              : 
      68              : CONTAINS
      69              : 
      70              : ! **************************************************************************************************
      71              : !> \brief GW calculation using RI-RS formalism
      72              : !> \param qs_env ...
      73              : !> \param bs_env ...
      74              : ! **************************************************************************************************
      75              : 
      76           10 :    SUBROUTINE gw_calc_large_cell_Gamma_ri_rs(qs_env, bs_env)
      77              : 
      78              :       TYPE(qs_environment_type), POINTER                 :: qs_env
      79              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
      80              : 
      81              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'gw_calc_large_cell_Gamma_ri_rs'
      82              : 
      83              :       INTEGER                                            :: handle
      84           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_Sigma_x_Gamma, fm_W_MIC_time
      85           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: fm_Sigma_c_Gamma_time
      86              : 
      87           10 :       CALL timeset(routineN, handle)
      88              : 
      89              :        !!========================================================================
      90              :        !! 1. Grid Generation for RI-RS
      91              :        !!    (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
      92              :        !!    Generate flattened 1D array of grid points for RI-RS.
      93              :        !!    Equation: r_g(k) = R_A + r_g(A)
      94              :        !!========================================================================
      95           10 :       CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
      96              : 
      97              :        !!========================================================================
      98              :        !! 2. Atomic Basis Evaluation
      99              :        !!    Compute values of spherical atomic basis functions at grid points.
     100              :        !!    Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
     101              :        !!========================================================================
     102              :       CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
     103           10 :                                       bs_env%ri_rs%mat_phi_mu_l)
     104              : 
     105              :        !!========================================================================
     106              :        !! 3. Compute RI-RS Coefficients (Z_lp)
     107              :        !!    Solve the regularized system for each atom P, where the grid domain
     108              :        !!    is restricted to r_l within a cutoff distance of atom P:
     109              :        !!    a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
     110              :        !!    b. D_lP  = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
     111              :        !!    c. Conditioning:
     112              :        !!       Dvec_l   = 1 / sqrt(D_ll)  (Diagonal scaling vector)
     113              :        !!       D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
     114              :        !!       D'_lP  = Dvec_l * D_lP
     115              :        !!    d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
     116              :        !!    e. Rescale: Z_lP = Z'_lP * Dvec_l   (Z_lP stored in mat_Z_lP)
     117              :        !!========================================================================
     118              :       CALL compute_coeff_Z_lP(qs_env, bs_env, bs_env%ri_rs%grid_points, &
     119           10 :                               bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
     120              : 
     121              :        !!========================================================================
     122              :        !! 4. Compute Independent-Particle Polarizability (χ)
     123              :        !!    G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
     124              :        !!    G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
     125              :        !!    G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
     126              :        !!    G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
     127              :        !!    χ_ll'(iτ,k=0)       = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
     128              :        !!    χ_PQ(iτ,k=0)        = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
     129              :        !!========================================================================
     130              :       CALL get_mat_chi_Gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
     131           10 :                                  bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
     132              : 
     133              :        !!========================================================================
     134              :        !! 5. Compute Screened Interaction (W^MIC)
     135              :        !!    χ_PQ(iτ,k=0) -> χ_PQ(iω,k) -> ε_PQ(iω,k) -> W_PQ(iω,k) -> W^MIC_PQ(iτ)
     136              :        !!========================================================================
     137           10 :       CALL get_W_MIC(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_W_MIC_time)
     138              : 
     139              :        !!========================================================================
     140              :        !! 6. Compute Exact Exchange Self-Energy (Σ^x)
     141              :        !!    D_µν          = sum_n^occ C_µn(k=0) C_νn(k=0)
     142              :        !!    D_ll'         = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
     143              :        !!    V^trunc_ll'   = sum_PQ Z_lP V^trunc_PQ Z_l'Q
     144              :        !!    Σ^x_ll'       = D_ll' * V^trunc_ll'
     145              :        !!    Σ^x_λσ(k=0)   = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
     146              :        !!========================================================================
     147              :       CALL compute_Sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
     148           10 :                            bs_env%ri_rs%mat_Z_lP, fm_Sigma_x_Gamma)
     149              : 
     150              :        !!========================================================================
     151              :        !! 7. Compute Correlation Self-Energy (Σ^c)
     152              :        !!    W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
     153              :        !!    Σ^c_ll'(iτ,k=0)   = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
     154              :        !!    Σ^c_ll'(iτ,k=0)   =  G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
     155              :        !!    Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l')
     156              :        !!========================================================================
     157              :       CALL compute_Sigma_c(bs_env, fm_W_MIC_time, bs_env%ri_rs%mat_phi_mu_l, &
     158           10 :                            bs_env%ri_rs%mat_Z_lP, fm_Sigma_c_Gamma_time)
     159              : 
     160              :        !!========================================================================
     161              :        !! 8. Compute Quasiparticle Energies
     162              :        !!    Σ^c_λσ(iτ,k=0) -> Σ^c_nn(ϵ,k)
     163              :        !!    ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ,k) + Σ^x_nn(k) - v^xc_nn(k)
     164              :        !!========================================================================
     165           10 :       CALL compute_QP_energies(bs_env, qs_env, fm_Sigma_x_Gamma, fm_Sigma_c_Gamma_time)
     166              : 
     167           10 :       CALL de_init_bs_env(bs_env)
     168              : 
     169           10 :       CALL timestop(handle)
     170              : 
     171           10 :    END SUBROUTINE gw_calc_large_cell_Gamma_ri_rs
     172              : 
     173              : ! **************************************************************************************************
     174              : !> \brief Compute grid points for RI-RS
     175              : !>        Right now based on Ivan and Xavier implementation
     176              : !>        JCP 150, 174120 (2019), JCTC 17, 2383 (2021)
     177              : !> \param qs_env ...
     178              : !> \param bs_env ...
     179              : !> \param ri_rs_grid_points ...
     180              : ! **************************************************************************************************
     181              : 
     182           10 :    SUBROUTINE ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
     183              : 
     184              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     185              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     186              :       REAL(KIND=dp), ALLOCATABLE, INTENT(OUT)            :: ri_rs_grid_points(:, :)
     187              : 
     188              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'ri_rs_grid_assembler'
     189              : 
     190              :       INTEGER                                            :: atom_idx, end_idx, handle, ikind, j, &
     191              :                                                             natom, nkind, start_idx, &
     192              :                                                             total_grid_npts
     193           10 :       INTEGER, ALLOCATABLE                               :: atom_to_kind(:), ri_rs_grid_offsets(:)
     194              :       REAL(KIND=dp)                                      :: atomic_center(3)
     195           10 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     196           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     197              : 
     198           10 :       CALL timeset(routineN, handle)
     199              : 
     200              :        !! Get the information about the atoms in the system
     201           10 :       CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
     202              : 
     203           10 :       nkind = SIZE(atomic_kind_set)
     204           10 :       natom = SIZE(particle_set)
     205              : 
     206              :        !! 1. Build the grid cache
     207           10 :       CALL build_grid_cache(bs_env, atomic_kind_set)
     208              : 
     209              :        !! 2. Calculate grid counts and offsets
     210           30 :       ALLOCATE (ri_rs_grid_offsets(natom + 1))
     211           30 :       ALLOCATE (atom_to_kind(natom))
     212              : 
     213           10 :       total_grid_npts = 0
     214           28 :       DO ikind = 1, nkind
     215           56 :          DO j = 1, SIZE(atomic_kind_set(ikind)%atom_list)
     216           28 :             atom_idx = atomic_kind_set(ikind)%atom_list(j)
     217           28 :             atom_to_kind(atom_idx) = ikind
     218           28 :             ri_rs_grid_offsets(atom_idx) = total_grid_npts + 1
     219           46 :             total_grid_npts = total_grid_npts + bs_env%ri_rs%grid_cache(ikind)%npts
     220              :          END DO
     221              :       END DO
     222              : 
     223           10 :       ri_rs_grid_offsets(natom + 1) = total_grid_npts + 1
     224              : 
     225           10 :       IF (bs_env%unit_nr > 0) THEN
     226              :          WRITE (bs_env%unit_nr, FMT="(T2,A,T69,I12)") &
     227            5 :             'Total grid points used for RI-RS:', total_grid_npts
     228            5 :          WRITE (bs_env%unit_nr, "(A)") ' '
     229              :       END IF
     230              : 
     231              :        !! 3. Allocate the global ri_rs_grid arrays
     232           30 :       ALLOCATE (ri_rs_grid_points(3, total_grid_npts))
     233              : 
     234              :        !! 4. Parallelize the grid generation loop
     235              :       !$OMP PARALLEL DO DEFAULT(NONE) &
     236              :       !$OMP SHARED(ri_rs_grid_points, ri_rs_grid_offsets, atom_to_kind, &
     237              :       !$OMP        particle_set, bs_env, natom) &
     238              :       !$OMP PRIVATE(atom_idx, ikind, atomic_center, start_idx, end_idx) &
     239           10 :       !$OMP SCHEDULE(DYNAMIC, 1)
     240              :       DO atom_idx = 1, natom
     241              :          ikind = atom_to_kind(atom_idx)
     242              :          atomic_center(:) = particle_set(atom_idx)%r(:)
     243              : 
     244              :          start_idx = ri_rs_grid_offsets(atom_idx)
     245              :          end_idx = ri_rs_grid_offsets(atom_idx + 1) - 1
     246              : 
     247              :           !! Shift the cached origin grid by the atom's center
     248              :          ri_rs_grid_points(1, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(1, :) + atomic_center(1)
     249              :          ri_rs_grid_points(2, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(2, :) + atomic_center(2)
     250              :          ri_rs_grid_points(3, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(3, :) + atomic_center(3)
     251              : 
     252              :       END DO
     253              :       !$OMP END PARALLEL DO
     254              : 
     255              :        !! 5. Cleanup memory
     256           10 :       IF (ALLOCATED(bs_env%ri_rs%grid_cache)) THEN
     257           28 :          DO ikind = 1, nkind
     258           28 :             IF (ALLOCATED(bs_env%ri_rs%grid_cache(ikind)%raw_points)) DEALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points)
     259              :          END DO
     260           28 :          DEALLOCATE (bs_env%ri_rs%grid_cache)
     261              :       END IF
     262              : 
     263           10 :       DEALLOCATE (atom_to_kind, ri_rs_grid_offsets)
     264           10 :       CALL timestop(handle)
     265              : 
     266           20 :    END SUBROUTINE ri_rs_grid_assembler
     267              : 
     268              : ! **************************************************************************************************
     269              : !> \brief Reads grids from .ion files and stores them in memory based on grid_select
     270              : !> \param bs_env ...
     271              : !> \param atomic_kind_set ...
     272              : ! **************************************************************************************************
     273              : 
     274           10 :    SUBROUTINE build_grid_cache(bs_env, atomic_kind_set)
     275              : 
     276              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     277              :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     278              : 
     279              :       CHARACTER(LEN=256)                                 :: atom_sym, filename, full_path, line, &
     280              :                                                             suffix
     281              :       INTEGER                                            :: colon_idx, i, ierr, ikind, iunit, nkind
     282              :       REAL(KIND=dp)                                      :: pt(3)
     283              : 
     284              :       ! Determine the file suffix based on the user's choice
     285           10 :       IF (bs_env%ri_rs%grid_select == 1) THEN
     286           10 :          suffix = "_def2-tzvp-rs.ion"
     287            0 :       ELSE IF (bs_env%ri_rs%grid_select == 2) THEN
     288            0 :          suffix = "_cc-pvtz-rs.ion"
     289              :       ELSE
     290            0 :          CPABORT("Unknown grid_select value. Valid options are 1 (def2-TZVPP) or 2 (cc-pVTZ).")
     291              :       END IF
     292              : 
     293           10 :       nkind = SIZE(atomic_kind_set)
     294           48 :       IF (.NOT. ALLOCATED(bs_env%ri_rs%grid_cache)) ALLOCATE (bs_env%ri_rs%grid_cache(nkind))
     295              : 
     296           28 :       DO ikind = 1, nkind
     297           18 :          atom_sym = TRIM(atomic_kind_set(ikind)%element_symbol)
     298           18 :          filename = TRIM(atom_sym)//TRIM(suffix)
     299              : 
     300           18 :          full_path = "ri_rs_grid/"//TRIM(filename)
     301              : 
     302              :          CALL open_file(file_name=TRIM(full_path), unit_number=iunit, &
     303           18 :                         file_action="READ", file_status="OLD")
     304              : 
     305              :          ! Parse the preamble for 'n points'
     306           18 :          bs_env%ri_rs%grid_cache(ikind)%npts = 0
     307              :          DO
     308          612 :             READ (iunit, '(A)', IOSTAT=ierr) line
     309          612 :             IF (ierr /= 0) EXIT
     310          612 :             IF (INDEX(line, 'n points') > 0) THEN
     311           18 :                colon_idx = INDEX(line, ':')
     312           18 :                READ (line(colon_idx + 1:), *) bs_env%ri_rs%grid_cache(ikind)%npts
     313           18 :                EXIT
     314              :             END IF
     315              :          END DO
     316              : 
     317              :          ! Allocate the cache array for this specific element
     318           54 :          ALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points(3, bs_env%ri_rs%grid_cache(ikind)%npts))
     319              : 
     320              :          ! Fast-forward to the <grid_points> tag
     321           18 :          REWIND (iunit)
     322              :          DO
     323          648 :             READ (iunit, '(A)', IOSTAT=ierr) line
     324          648 :             IF (ierr /= 0) EXIT
     325          648 :             IF (INDEX(line, '<grid_points>') > 0) EXIT
     326              :          END DO
     327              : 
     328              :          ! Read the raw grid coordinates
     329         4466 :          DO i = 1, bs_env%ri_rs%grid_cache(ikind)%npts
     330         4448 :             READ (iunit, *, IOSTAT=ierr) pt(1), pt(2), pt(3)
     331         4448 :             IF (ierr /= 0) THEN
     332            0 :                CPABORT("Unexpected EOF in grid file ")
     333              :             END IF
     334        17810 :             bs_env%ri_rs%grid_cache(ikind)%raw_points(:, i) = pt(:)
     335              :          END DO
     336              : 
     337           28 :          CALL close_file(unit_number=iunit)
     338              :       END DO
     339              : 
     340           10 :    END SUBROUTINE build_grid_cache
     341              : 
     342              : ! **************************************************************************************************
     343              : !> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
     344              : !> \param qs_env ...
     345              : !> \param bs_env ...
     346              : !> \param ri_rs_grid_points ...
     347              : !> \param mat_phi_mu_l ...
     348              : ! **************************************************************************************************
     349              : 
     350           10 :    SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
     351              : 
     352              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     353              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     354              :       REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT)          :: ri_rs_grid_points(:, :)
     355              :       TYPE(dbcsr_type), INTENT(OUT)                      :: mat_phi_mu_l
     356              : 
     357              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'atomic_basis_at_grid_point'
     358              : 
     359              :       INTEGER :: c_size, chunk_size, dimen_ORB, handle, i, i_blk, iatom, natom, npcol, nprow, &
     360              :          num_grid_chunks, r_end, r_start, total_grid_npts
     361              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf
     362           10 :       INTEGER, DIMENSION(:), POINTER                     :: c_blk_sizes, col_dist, col_dist_ks, &
     363           10 :                                                             r_blk_sizes, row_dist, row_dist_ks
     364           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: atom_col_buffer
     365           10 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     366              :       TYPE(cell_type), POINTER                           :: cell
     367              :       TYPE(dbcsr_distribution_type)                      :: dist
     368              :       TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist_ks
     369              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     370           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     371           10 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     372              : 
     373           10 :       CALL timeset(routineN, handle)
     374              : 
     375              :       ! Setup Grid Blocking
     376              :       ! Right now using 256
     377           10 :       chunk_size = bs_env%ri_rs%chunk_size_dbcsr
     378              : 
     379              :       ! Extract environment variables
     380              :       CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
     381              :                       qs_kind_set=qs_kind_set, particle_set=particle_set, &
     382           10 :                       para_env=para_env)
     383              : 
     384           10 :       natom = SIZE(particle_set)
     385           10 :       total_grid_npts = SIZE(ri_rs_grid_points, 2)
     386              : 
     387              :       ! Map the starting indices of spherical gaussian functions (SGF) for each atom
     388           30 :       ALLOCATE (first_sgf(natom + 1))
     389           10 :       CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_ORB)
     390              : 
     391              :       ! =========================================================================
     392              :       ! 1. SETUP DBCSR MATRIX TOPOLOGY
     393              :       ! =========================================================================
     394              : 
     395              :       ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
     396           30 :       ALLOCATE (c_blk_sizes(natom))
     397           38 :       DO iatom = 1, natom
     398           38 :          c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
     399              :       END DO
     400              : 
     401              :       ! B. Define Row Block Sizes (Grid chunks of max size 256)
     402           10 :       num_grid_chunks = CEILING(REAL(total_grid_npts, KIND=dp)/REAL(chunk_size, KIND=dp))
     403           30 :       ALLOCATE (r_blk_sizes(num_grid_chunks))
     404           40 :       r_blk_sizes = chunk_size
     405           10 :       IF (MOD(total_grid_npts, chunk_size) /= 0) THEN
     406           10 :          r_blk_sizes(num_grid_chunks) = MOD(total_grid_npts, chunk_size)
     407              :       END IF
     408              : 
     409              :       ! C. Fetch CP2K's Default Process Grid Configuration
     410           10 :       CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
     411           10 :       CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
     412              : 
     413              :       ! D. Build Custom Mappings using Round-Robin across the 2D process grid
     414              :       !    (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
     415           38 :       nprow = MAXVAL(row_dist_ks) + 1
     416           38 :       npcol = MAXVAL(col_dist_ks) + 1
     417              : 
     418           20 :       ALLOCATE (row_dist(num_grid_chunks))
     419           40 :       DO i = 1, num_grid_chunks
     420           40 :          row_dist(i) = MOD(i - 1, nprow)
     421              :       END DO
     422              : 
     423           20 :       ALLOCATE (col_dist(natom))
     424           38 :       DO i = 1, natom
     425           38 :          col_dist(i) = MOD(i - 1, npcol)
     426              :       END DO
     427              : 
     428              :       ! E. Create the DBCSR Distribution and Initialize the Matrix
     429              :       CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
     430           10 :                                   row_dist=row_dist, col_dist=col_dist)
     431              : 
     432              :       CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
     433              :                         matrix_type=dbcsr_type_no_symmetry, &
     434           10 :                         row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
     435              : 
     436              :       ! =========================================================================
     437              :       ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
     438              :       ! =========================================================================
     439              :       ! Iterate over the atoms assigned to this specific MPI rank
     440           10 :       DO iatom = para_env%mepos + 1, natom, para_env%num_pe
     441              : 
     442           14 :          c_size = c_blk_sizes(iatom)
     443              : 
     444              :          ! Allocate a temporary dense buffer just for this specific atom
     445           56 :          ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
     446           14 :          atom_col_buffer = 0.0_dp
     447              : 
     448              :          ! Evaluate the basis functions on the grid
     449              :          CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
     450           14 :                                 iatom, particle_set, qs_kind_set, cell)
     451              : 
     452              :          ! Slice the dense column into chunks and insert into DBCSR
     453           56 :          DO i_blk = 1, num_grid_chunks
     454           42 :             r_start = (i_blk - 1)*chunk_size + 1
     455           42 :             r_end = MIN(i_blk*chunk_size, total_grid_npts)
     456              : 
     457              :             ! Apply dynamic sparsity filtering: Only store blocks with physical significance
     458        23914 :             IF (MAXVAL(ABS(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
     459              :                CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
     460           42 :                                     block=atom_col_buffer(r_start:r_end, 1:c_size))
     461              :             END IF
     462              :          END DO
     463              : 
     464           14 :          DEALLOCATE (atom_col_buffer)
     465              : 
     466              :       END DO
     467              : 
     468              :       ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
     469           10 :       CALL dbcsr_finalize(mat_phi_mu_l)
     470              : 
     471              :       ! -------------------------------------------------------------------------
     472              :       ! CLEANUP
     473              :       ! -------------------------------------------------------------------------
     474           10 :       DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
     475           10 :       CALL dbcsr_distribution_release(dist)
     476              : 
     477           10 :       CALL timestop(handle)
     478              : 
     479           40 :    END SUBROUTINE atomic_basis_at_grid_point
     480              : 
     481              : ! **************************************************************************************************
     482              : !> \brief Compute value of all basis function for single atom across all grid points
     483              : !> \param phi_val ...
     484              : !> \param ri_rs_grid ...
     485              : !> \param npts ...
     486              : !> \param iatom ...
     487              : !> \param particle_set ...
     488              : !> \param qs_kind_set ...
     489              : !> \param cell ...
     490              : ! **************************************************************************************************
     491              : 
     492           14 :    SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
     493              :                                 particle_set, qs_kind_set, cell)
     494              : 
     495              :       REAL(KIND=dp), INTENT(INOUT)                       :: phi_val(:, :)
     496              :       INTEGER, INTENT(IN)                                :: npts
     497              :       REAL(KIND=dp), INTENT(IN)                          :: ri_rs_grid(3, npts)
     498              :       INTEGER, INTENT(IN)                                :: iatom
     499              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     500              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     501              :       TYPE(cell_type), POINTER                           :: cell
     502              : 
     503              :       INTEGER                                            :: first_sgf, i_pt, ico, iend_co, ikind, &
     504              :                                                             ipgf, iset, isgf, ishell, istart_co, &
     505              :                                                             l, last_sgf, lx, ly, lz, n_cart_total, &
     506              :                                                             row_idx
     507              :       REAL(KIND=dp)                                      :: alpha, dist_vec(3), exp_val, poly, r2, &
     508              :                                                             r_atom(3), weight
     509              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
     510              : 
     511              :       ! Get Atom Info
     512           14 :       ikind = particle_set(iatom)%atomic_kind%kind_number
     513           14 :       CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
     514           14 :       IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
     515              : 
     516           56 :       r_atom = particle_set(iatom)%r
     517              : 
     518              :       !$OMP PARALLEL DO DEFAULT(NONE) &
     519              :       !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, cell, ncoset, indco) &
     520              :       !$OMP PRIVATE(i_pt, dist_vec, r2, iset, n_cart_total, ishell, l, istart_co, iend_co, &
     521              :       !$OMP         first_sgf, last_sgf, ipgf, alpha, exp_val, isgf, ico, row_idx, weight, &
     522              :       !$OMP         lx, ly, lz, poly) &
     523           14 :       !$OMP SCHEDULE(STATIC)
     524              :       DO i_pt = 1, npts
     525              :          dist_vec = pbc(ri_rs_grid(:, i_pt) - r_atom, cell)
     526              :          r2 = DOT_PRODUCT(dist_vec, dist_vec)
     527              : 
     528              :          DO iset = 1, orb_basis_set%nset
     529              :             n_cart_total = ncoset(orb_basis_set%lmax(iset))
     530              : 
     531              :             DO ishell = 1, orb_basis_set%nshell(iset)
     532              :                l = orb_basis_set%l(ishell, iset)
     533              :                istart_co = ncoset(l - 1) + 1
     534              :                iend_co = ncoset(l)
     535              : 
     536              :                first_sgf = orb_basis_set%first_sgf(ishell, iset)
     537              :                last_sgf = orb_basis_set%last_sgf(ishell, iset)
     538              : 
     539              :                DO ipgf = 1, orb_basis_set%npgf(iset)
     540              :                   alpha = orb_basis_set%zet(ipgf, iset)
     541              :                   exp_val = EXP(-alpha*r2)
     542              : 
     543              :                   DO isgf = first_sgf, last_sgf
     544              :                      DO ico = istart_co, iend_co
     545              :                         row_idx = (ipgf - 1)*n_cart_total + ico
     546              :                         weight = orb_basis_set%sphi(row_idx, isgf)
     547              :                         lx = indco(1, ico)
     548              :                         ly = indco(2, ico)
     549              :                         lz = indco(3, ico)
     550              :                         poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
     551              : 
     552              :                         phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
     553              : 
     554              :                      END DO
     555              :                   END DO
     556              :                END DO
     557              :             END DO
     558              :          END DO
     559              :       END DO
     560              :       !$OMP END PARALLEL DO
     561              : 
     562              :    END SUBROUTINE fill_phi_for_atom
     563              : 
     564              : ! **************************************************************************************************
     565              : !> \brief Helper for OMP threads to fill phi_val column values
     566              : !> \param particle_set ...
     567              : !> \param qs_kind_set ...
     568              : !> \param first_sgf ...
     569              : !> \param total_sgf ...
     570              : ! **************************************************************************************************
     571              : 
     572           10 :    SUBROUTINE get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
     573              : 
     574              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     575              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     576              :       INTEGER, INTENT(OUT)                               :: first_sgf(:), total_sgf
     577              : 
     578              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'get_basis_offsets'
     579              : 
     580              :       INTEGER                                            :: handle, iatom, ikind, nsgf
     581              : 
     582           10 :       CALL timeset(routineN, handle)
     583              : 
     584           10 :       total_sgf = 0
     585           38 :       DO iatom = 1, SIZE(particle_set)
     586           28 :          first_sgf(iatom) = total_sgf + 1
     587           28 :          ikind = particle_set(iatom)%atomic_kind%kind_number
     588           28 :          CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf, basis_type="ORB")
     589           38 :          total_sgf = total_sgf + nsgf
     590              :       END DO
     591           10 :       first_sgf(SIZE(particle_set) + 1) = total_sgf + 1
     592              : 
     593           10 :       CALL timestop(handle)
     594              : 
     595           10 :    END SUBROUTINE get_basis_offsets
     596              : 
     597              : ! **************************************************************************************************
     598              : !> \brief Compute RI-RS Coefficients (Z_lP)
     599              : !> \param qs_env ...
     600              : !> \param bs_env ...
     601              : !> \param ri_rs_grid_points ...
     602              : !> \param mat_phi_mu_l ...
     603              : !> \param mat_Z_lP ...
     604              : ! **************************************************************************************************
     605              : 
     606           10 :    SUBROUTINE compute_coeff_Z_lP(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
     607              : 
     608              :       ! Arguments
     609              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     610              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     611              :       REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT)          :: ri_rs_grid_points(:, :)
     612              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l
     613              :       TYPE(dbcsr_type), INTENT(OUT)                      :: mat_Z_lP
     614              : 
     615              :       CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
     616              :          routineN = 'compute_coeff_Z_lP'
     617              : 
     618              :       INTEGER :: atom_P, chunk_size, col, current_chunk_size, global_i, global_j, group_handle, &
     619              :          handle, i, i_blk, info, istart_ao, j, j_ri, l, loc_idx, max_ao_size, max_loc_ri, &
     620              :          n_ao_total, n_grid_total, n_loc_ri, n_local_grid, natom, num_grid_chunks, r_end, r_start, &
     621              :          row
     622           10 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_grid_idx, map_global_to_local, &
     623           10 :                                                             row_offset
     624           10 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist_phi, col_dist_ri, r_blk_sizes, &
     625           10 :                                                             ri_blk_sizes, row_dist_grid
     626              :       REAL(KIND=dp)                                      :: cutoff_ri, dist, t1
     627           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: d_vec_local
     628           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: D_local, d_lp_local, int_2d_local, &
     629           10 :                                                             phi_global, phi_local, rho_pair_local, &
     630           10 :                                                             Z_blk, Z_global_temp
     631           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: int_3c_local
     632              :       REAL(KIND=dp), DIMENSION(3)                        :: pos_P
     633           10 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_ptr
     634              :       TYPE(cp_logger_type), POINTER                      :: logger
     635              :       TYPE(dbcsr_distribution_type)                      :: dist_phi, dist_Z
     636              :       TYPE(dbcsr_iterator_type)                          :: iter
     637              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     638           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     639              :       TYPE(section_vals_type), POINTER                   :: input
     640              : 
     641           10 :       CALL timeset(routineN, handle)
     642              : 
     643              :       ! Restrict integration domain to a localized sphere around the atom
     644           10 :       cutoff_ri = bs_env%ri_rs%cutoff_radius_ri_rs
     645              : 
     646           10 :       t1 = m_walltime()
     647              : 
     648           10 :       CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input)
     649              : 
     650           10 :       natom = SIZE(bs_env%i_RI_start_from_atom)
     651           10 :       n_ao_total = bs_env%i_ao_end_from_atom(natom)
     652           10 :       n_grid_total = SIZE(ri_rs_grid_points, 2)
     653              : 
     654              :       ! =========================================================================
     655              :       ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
     656              :       ! =========================================================================
     657           10 :       CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
     658           10 :       CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
     659              : 
     660           10 :       chunk_size = r_blk_sizes(1)
     661           10 :       num_grid_chunks = SIZE(r_blk_sizes)
     662              : 
     663           30 :       ALLOCATE (row_offset(num_grid_chunks))
     664           10 :       row_offset(1) = 0
     665           30 :       DO i_blk = 2, num_grid_chunks
     666           30 :          row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
     667              :       END DO
     668              : 
     669           40 :       ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
     670           38 :       DO atom_P = 1, natom
     671           28 :          ri_blk_sizes(atom_P) = bs_env%i_RI_end_from_atom(atom_P) - bs_env%i_RI_start_from_atom(atom_P) + 1
     672          118 :          col_dist_ri(atom_P) = MOD(atom_P - 1, MAXVAL(col_dist_phi) + 1)
     673              :       END DO
     674              : 
     675           10 :       CALL dbcsr_distribution_new(dist_Z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
     676              : 
     677           10 :       IF (bs_env%ri_rs%Z_lP_exists) THEN
     678              :          CALL dbcsr_binary_read(filepath=TRIM(bs_env%prefix)//"Z_lP.matrix", &
     679              :                                 distribution=dist_Z, &
     680            2 :                                 matrix_new=mat_Z_lP)
     681            2 :          IF (bs_env%unit_nr > 0) THEN
     682              :             WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
     683            1 :                'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
     684            1 :             WRITE (bs_env%unit_nr, '(A)') ' '
     685              :          END IF
     686              :       ELSE
     687              : 
     688              :          CALL dbcsr_create(mat_Z_lP, name="mat_Z_lP", dist=dist_Z, &
     689              :                            matrix_type=dbcsr_type_no_symmetry, &
     690            8 :                            row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
     691              : 
     692            8 :          max_ao_size = 0
     693           30 :          DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
     694           30 :             max_ao_size = MAX(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
     695              :          END DO
     696           30 :          max_loc_ri = MAXVAL(ri_blk_sizes)
     697              : 
     698              :          ! Global mapping array: 4 bytes * N_grid_total (~4MB per million grid points)
     699           24 :          ALLOCATE (map_global_to_local(n_grid_total))
     700              : 
     701              :          ! =========================================================================
     702              :          ! 2. PRE-COMPUTE: EXTRACT FULL DBCSR TO DENSE GLOBAL ARRAY
     703              :          ! =========================================================================
     704           32 :          ALLOCATE (phi_global(n_grid_total, n_ao_total))
     705            8 :          phi_global = 0.0_dp
     706              : 
     707            8 :          CALL dbcsr_iterator_start(iter, mat_phi_mu_l)
     708           41 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     709           33 :             CALL dbcsr_iterator_next_block(iter, row, col, block_ptr)
     710              : 
     711           33 :             IF (col < 1 .OR. col > natom) CYCLE
     712           33 :             istart_ao = bs_env%i_ao_start_from_atom(col)
     713              : 
     714          134 :             DO j = 1, SIZE(block_ptr, 2)
     715           93 :                global_j = istart_ao + j - 1
     716           93 :                IF (global_j < 1 .OR. global_j > n_ao_total) CYCLE
     717              : 
     718        19614 :                DO i = 1, SIZE(block_ptr, 1)
     719        19488 :                   global_i = row_offset(row) + i
     720        19488 :                   IF (global_i < 1 .OR. global_i > n_grid_total) CYCLE
     721              : 
     722              :                   ! Insert directly into the global dense array
     723        19581 :                   phi_global(global_i, global_j) = block_ptr(i, j)
     724              :                END DO
     725              :             END DO
     726              :          END DO
     727            8 :          CALL dbcsr_iterator_stop(iter)
     728              : 
     729            8 :          CALL para_env%sum(phi_global)
     730              : 
     731              :          ! =========================================================================
     732              :          ! 3. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
     733              :          ! This distributes the N_atoms among the available MPI processors.
     734              :          ! =========================================================================
     735            8 :          DO atom_P = para_env%mepos + 1, natom, para_env%num_pe
     736              : 
     737           11 :             n_loc_ri = ri_blk_sizes(atom_P)
     738           44 :             pos_P(:) = particle_set(atom_P)%r(:)
     739              : 
     740              :             ! ---------------------------------------------------------------------
     741              :             ! A. Determine Local Grid Domain based on cutoff_ri
     742              :             ! ---------------------------------------------------------------------
     743           11 :             n_local_grid = 0
     744         6827 :             DO l = 1, n_grid_total
     745        27264 :                dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
     746         6827 :                IF (dist <= cutoff_ri) n_local_grid = n_local_grid + 1
     747              :             END DO
     748              : 
     749           33 :             ALLOCATE (local_grid_idx(n_local_grid))
     750           11 :             map_global_to_local = 0
     751              : 
     752           11 :             n_local_grid = 0
     753         6827 :             DO l = 1, n_grid_total
     754        27264 :                dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
     755         6827 :                IF (dist <= cutoff_ri) THEN
     756         6816 :                   n_local_grid = n_local_grid + 1
     757         6816 :                   local_grid_idx(n_local_grid) = l
     758         6816 :                   map_global_to_local(l) = n_local_grid
     759              :                END IF
     760              :             END DO
     761              : 
     762              :             ! ---------------------------------------------------------------------
     763              :             ! B. Smart Fetch of Local Phi (MPI-Safe & Bounds-Checked)
     764              :             ! ---------------------------------------------------------------------
     765           44 :             ALLOCATE (phi_local(n_local_grid, n_ao_total))
     766              : 
     767              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     768              :             !$OMP SHARED(n_ao_total, n_local_grid, local_grid_idx, phi_local, phi_global) &
     769              :             !$OMP PRIVATE(global_j, loc_idx, global_i) &
     770           11 :             !$OMP SCHEDULE(STATIC)
     771              :             DO global_j = 1, n_ao_total
     772              :                DO loc_idx = 1, n_local_grid
     773              :                   global_i = local_grid_idx(loc_idx)
     774              :                   phi_local(loc_idx, global_j) = phi_global(global_i, global_j)
     775              :                END DO
     776              :             END DO
     777              :             !$OMP END PARALLEL DO
     778              : 
     779              :             ! ---------------------------------------------------------------------
     780              :             ! C. Build and Balance Local LHS Matrix (D_local)
     781              :             ! ---------------------------------------------------------------------
     782           66 :             ALLOCATE (D_local(n_local_grid, n_local_grid), d_vec_local(n_local_grid))
     783           11 :             D_local = 0.0_dp
     784              : 
     785           11 :             CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, n_local_grid, 0.0_dp, D_local, n_local_grid)
     786              : 
     787              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     788              :             !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
     789              :             !$OMP PRIVATE(i) &
     790           11 :             !$OMP SCHEDULE(STATIC)
     791              :             DO i = 1, n_local_grid
     792              :                D_local(i, i) = D_local(i, i)**2
     793              :                d_vec_local(i) = 1.0_dp/SQRT(MAX(D_local(i, i), 1.0E-16_dp))
     794              :                D_local(i, i) = (D_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
     795              :             END DO
     796              :             !$OMP END PARALLEL DO
     797              : 
     798              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     799              :             !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
     800              :             !$OMP PRIVATE(j, i) &
     801           11 :             !$OMP SCHEDULE(DYNAMIC)
     802              :             DO j = 1, n_local_grid
     803              :                DO i = j + 1, n_local_grid
     804              :                   D_local(i, j) = D_local(i, j)**2
     805              :                   D_local(i, j) = D_local(i, j)*d_vec_local(i)*d_vec_local(j)
     806              :                   D_local(j, i) = D_local(i, j)
     807              :                END DO
     808              :             END DO
     809              :             !$OMP END PARALLEL DO
     810              : 
     811              :             ! ---------------------------------------------------------------------
     812              :             ! D. Build Local RHS Matrix (d_lp_local)
     813              :             ! ---------------------------------------------------------------------
     814           44 :             ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
     815           44 :             ALLOCATE (rho_pair_local(n_local_grid, max_ao_size*max_ao_size))
     816           55 :             ALLOCATE (int_3c_local(max_ao_size, max_ao_size, n_loc_ri))
     817           44 :             ALLOCATE (int_2d_local(max_ao_size*max_ao_size, n_loc_ri))
     818              : 
     819           11 :             d_lp_local = 0.0_dp
     820              : 
     821              :             CALL compute_d_lp(qs_env, bs_env, phi_local, d_lp_local, n_local_grid, &
     822           11 :                               n_loc_ri, atom_P, rho_pair_local, int_3c_local, int_2d_local, max_ao_size)
     823              : 
     824              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     825              :             !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
     826              :             !$OMP PRIVATE(j_ri, i) &
     827           11 :             !$OMP SCHEDULE(STATIC)
     828              :             DO j_ri = 1, n_loc_ri
     829              :                DO i = 1, n_local_grid
     830              :                   d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
     831              :                END DO
     832              :             END DO
     833              :             !$OMP END PARALLEL DO
     834              : 
     835              :             ! ---------------------------------------------------------------------
     836              :             ! E. Fast Local LAPACK Solve
     837              :             ! ---------------------------------------------------------------------
     838              : 
     839           11 :             CALL dpotrf('L', n_local_grid, D_local, n_local_grid, info)
     840              : 
     841           11 :             CALL dpotrs('L', n_local_grid, n_loc_ri, D_local, n_local_grid, d_lp_local, n_local_grid, info)
     842              : 
     843              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     844              :             !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
     845              :             !$OMP PRIVATE(j_ri, i) &
     846           11 :             !$OMP SCHEDULE(STATIC)
     847              :             DO j_ri = 1, n_loc_ri
     848              :                DO i = 1, n_local_grid
     849              :                   d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
     850              :                END DO
     851              :             END DO
     852              :             !$OMP END PARALLEL DO
     853              : 
     854              :             ! ---------------------------------------------------------------------
     855              :             ! F. Scatter Local Solution Back to Global DBCSR Matrix
     856              :             ! ---------------------------------------------------------------------
     857           44 :             ALLOCATE (Z_global_temp(n_grid_total, n_loc_ri))
     858           11 :             Z_global_temp = 0.0_dp
     859              : 
     860              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     861              :             !$OMP SHARED(n_local_grid, local_grid_idx, Z_global_temp, n_loc_ri, d_lp_local) &
     862              :             !$OMP PRIVATE(loc_idx, global_i) &
     863           11 :             !$OMP SCHEDULE(STATIC)
     864              :             DO loc_idx = 1, n_local_grid
     865              :                global_i = local_grid_idx(loc_idx)
     866              :                Z_global_temp(global_i, 1:n_loc_ri) = d_lp_local(loc_idx, 1:n_loc_ri)
     867              :             END DO
     868              :             !$OMP END PARALLEL DO
     869              : 
     870           77 :             ALLOCATE (Z_blk(MAXVAL(r_blk_sizes), n_loc_ri))
     871              : 
     872           44 :             DO i_blk = 1, num_grid_chunks
     873           33 :                r_start = row_offset(i_blk) + 1
     874           33 :                r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
     875           33 :                current_chunk_size = r_blk_sizes(i_blk)
     876              : 
     877           33 :                Z_blk = 0.0_dp
     878        53376 :                Z_blk(1:current_chunk_size, 1:n_loc_ri) = Z_global_temp(r_start:r_end, 1:n_loc_ri)
     879              : 
     880        53387 :                IF (MAXVAL(ABS(Z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
     881              :                   CALL dbcsr_put_block(mat_Z_lP, row=i_blk, col=atom_P, &
     882           33 :                                        block=Z_blk(1:current_chunk_size, 1:n_loc_ri))
     883              :                END IF
     884              :             END DO
     885              : 
     886           11 :             DEALLOCATE (Z_global_temp, Z_blk)
     887           11 :             DEALLOCATE (D_local, d_vec_local, d_lp_local)
     888           11 :             DEALLOCATE (rho_pair_local, int_3c_local, int_2d_local)
     889              : 
     890           11 :             DEALLOCATE (local_grid_idx, phi_local)
     891              : 
     892              :          END DO
     893              : 
     894              :          ! Clean up the massive global array once the loop finishes
     895            8 :          DEALLOCATE (phi_global)
     896              : 
     897            8 :          CALL dbcsr_finalize(mat_Z_lP)
     898              : 
     899            8 :          IF (bs_env%unit_nr > 0) THEN
     900              :             WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
     901            4 :                'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
     902            4 :             WRITE (bs_env%unit_nr, '(A)') ' '
     903              :          END IF
     904              : 
     905            8 :          logger => cp_get_default_logger()
     906              : 
     907            8 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
     908            0 :             CALL dbcsr_binary_write(matrix=mat_Z_lP, filepath=TRIM(bs_env%prefix)//"Z_lP.matrix")
     909              :          END IF
     910              : 
     911           16 :          DEALLOCATE (map_global_to_local)
     912              : 
     913              :       END IF
     914              : 
     915           10 :       DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
     916           10 :       CALL dbcsr_distribution_release(dist_Z)
     917              : 
     918           10 :       DEALLOCATE (ri_rs_grid_points)
     919              : 
     920           10 :       CALL timestop(handle)
     921              : 
     922           40 :    END SUBROUTINE compute_coeff_Z_lP
     923              : 
     924              : ! **************************************************************************************************
     925              : !> \brief Computes the dense localized Right-Hand Side (RHS) matrix d_lp
     926              : !> \param qs_env ...
     927              : !> \param bs_env ...
     928              : !> \param phi_val ...
     929              : !> \param d_lp ...
     930              : !> \param n_grid_total ...
     931              : !> \param n_loc_ri ...
     932              : !> \param atom_P ...
     933              : !> \param rho_pair ...
     934              : !> \param int_3c ...
     935              : !> \param int_2d ...
     936              : !> \param max_ao_size ...
     937              : ! **************************************************************************************************
     938              : 
     939           44 :    SUBROUTINE compute_d_lp(qs_env, bs_env, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
     940           11 :                            rho_pair, int_3c, int_2d, max_ao_size)
     941              : 
     942              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     943              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
     944              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: phi_val
     945              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: d_lp
     946              :       INTEGER, INTENT(IN)                                :: n_grid_total, n_loc_ri, atom_P
     947              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: rho_pair
     948              :       REAL(KIND=dp), DIMENSION(:, :, :), INTENT(INOUT)   :: int_3c
     949              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: int_2d
     950              :       INTEGER, INTENT(IN)                                :: max_ao_size
     951              : 
     952              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_d_lp'
     953              : 
     954              :       INTEGER                                            :: atom_j, atom_k, handle, j, jk_idx, &
     955              :                                                             jsize, jstart, k, ksize, kstart, l, ri
     956              : 
     957           11 :       CALL timeset(routineN, handle)
     958              : 
     959           42 :       DO atom_j = 1, SIZE(bs_env%i_ao_start_from_atom)
     960           31 :          jstart = bs_env%i_ao_start_from_atom(atom_j)
     961           31 :          jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
     962              : 
     963          131 :          DO atom_k = 1, SIZE(bs_env%i_ao_start_from_atom)
     964           89 :             kstart = bs_env%i_ao_start_from_atom(atom_k)
     965           89 :             ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
     966              : 
     967         7794 :             int_3c(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
     968              : 
     969              :             ! Compute B_{μν,P} = (μν|P)
     970              :             CALL build_3c_integral_block(int_3c(1:jsize, 1:ksize, 1:n_loc_ri), &
     971              :                                          qs_env, bs_env%ri_metric, &
     972              :                                          basis_j=bs_env%basis_set_AO, &
     973              :                                          basis_k=bs_env%basis_set_AO, &
     974              :                                          basis_i=bs_env%basis_set_RI, &
     975           89 :                                          atom_k=atom_k, atom_j=atom_j, atom_i=atom_P)
     976              : 
     977              :             ! Build Pair Density: ρ(l, μν) = Φ_μ(r_l) \times Φ_ν(r_l)
     978              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     979              :             !$OMP SHARED(ksize, jsize, n_grid_total, rho_pair, phi_val, jstart, kstart) &
     980              :             !$OMP PRIVATE(k, j, l, jk_idx) &
     981           89 :             !$OMP COLLAPSE(2)
     982              :             DO k = 1, ksize
     983              :                DO j = 1, jsize
     984              :                   jk_idx = (k - 1)*jsize + j
     985              :                   DO l = 1, n_grid_total
     986              :                      rho_pair(l, jk_idx) = phi_val(l, jstart + j - 1)*phi_val(l, kstart + k - 1)
     987              :                   END DO
     988              :                END DO
     989              :             END DO
     990              :             !$OMP END PARALLEL DO
     991              : 
     992              :             ! Flatten 3D B_{μν, P} tensor to 2D B_{(μν), P} matrix for BLAS
     993              :             !$OMP PARALLEL DO DEFAULT(NONE) &
     994              :             !$OMP SHARED(n_loc_ri, ksize, jsize, int_2d, int_3c) &
     995              :             !$OMP PRIVATE(ri, k, j, jk_idx) &
     996           89 :             !$OMP COLLAPSE(2)
     997              :             DO ri = 1, n_loc_ri
     998              :                DO k = 1, ksize
     999              :                   DO j = 1, jsize
    1000              :                      jk_idx = (k - 1)*jsize + j
    1001              :                      int_2d(jk_idx, ri) = int_3c(j, k, ri)
    1002              :                   END DO
    1003              :                END DO
    1004              :             END DO
    1005              :             !$OMP END PARALLEL DO
    1006              : 
    1007              :             ! Perform Contraction: d_{l, P} += ρ(l, μν) \times B_{(μν), P}
    1008              :             CALL dgemm("N", "N", n_grid_total, n_loc_ri, jsize*ksize, &
    1009              :                        1.0_dp, rho_pair, n_grid_total, &
    1010              :                        int_2d, max_ao_size*max_ao_size, &
    1011          120 :                        1.0_dp, d_lp(1, 1), n_grid_total)
    1012              :          END DO
    1013              :       END DO
    1014              : 
    1015           11 :       CALL timestop(handle)
    1016              : 
    1017           11 :    END SUBROUTINE compute_d_lp
    1018              : 
    1019              : ! **************************************************************************************************
    1020              : !> \brief Computes the χ(iτ, k=0) matrix
    1021              : !> \param bs_env ...
    1022              : !> \param mat_chi_Gamma_tau ...
    1023              : !> \param mat_phi_mu_l ...
    1024              : !> \param mat_Z_lP ...
    1025              : ! **************************************************************************************************
    1026              : 
    1027           10 :    SUBROUTINE get_mat_chi_Gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
    1028              : 
    1029              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1030              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_chi_Gamma_tau
    1031              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1032              : 
    1033              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'get_mat_chi_Gamma_tau'
    1034              : 
    1035              :       INTEGER                                            :: handle, i, i_t, ispin, npcol
    1036           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_ao, blk_grid, dist_col_ao, &
    1037           10 :                                                             dist_col_grid, dist_row_grid
    1038              :       REAL(KIND=dp)                                      :: t1, tau
    1039              :       TYPE(dbcsr_distribution_type)                      :: dist_grid_grid, dist_phi
    1040              :       TYPE(dbcsr_type)                                   :: matrix_chi_grid, matrix_chi_grid_spin, &
    1041              :                                                             matrix_G_occ_grid, matrix_G_vir_grid
    1042              : 
    1043           10 :       CALL timeset(routineN, handle)
    1044              : 
    1045              :       ! =========================================================================
    1046              :       ! 1. SETUP CORE TOPOLOGIES
    1047              :       ! =========================================================================
    1048           10 :       CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
    1049           10 :       CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
    1050              : 
    1051              :       ! Determine number of MPI process columns
    1052           38 :       npcol = MAXVAL(dist_col_ao) + 1
    1053              : 
    1054              :       ! Build a perfectly safe column distribution for the Grid dimension
    1055           30 :       ALLOCATE (dist_col_grid(SIZE(blk_grid)))
    1056           40 :       DO i = 1, SIZE(blk_grid)
    1057           40 :          dist_col_grid(i) = MOD(i - 1, npcol)
    1058              :       END DO
    1059              : 
    1060              :       CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
    1061           10 :                                   row_dist=dist_row_grid, col_dist=dist_col_grid)
    1062              : 
    1063           10 :       CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1064           10 :       CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1065           10 :       CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1066           10 :       CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1067              : 
    1068              :       ! =========================================================================
    1069              :       ! 2. MAIN IMAGINARY TIME LOOP
    1070              :       ! =========================================================================
    1071          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1072          110 :          t1 = m_walltime()
    1073              : 
    1074          110 :          tau = bs_env%imag_time_points(i_t)
    1075          110 :          CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
    1076              : 
    1077              :          ! ----------------------------------------------------------------------
    1078              :          ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
    1079              :          ! ----------------------------------------------------------------------
    1080          240 :          DO ispin = 1, bs_env%n_spin
    1081              : 
    1082              :             ! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1083              :             ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
    1084              :             CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, &
    1085          130 :                               matrix_G_occ_grid, bs_env%eps_filter)
    1086              : 
    1087              :             ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1088              :             ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
    1089              :             CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, &
    1090          130 :                               matrix_G_vir_grid, bs_env%eps_filter)
    1091              : 
    1092              :             ! -------------------------------------------------------------------
    1093              :             ! B. ELEMENT-WISE HADAMARD PRODUCT
    1094              :             ! -------------------------------------------------------------------
    1095              :             ! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
    1096          130 :             CALL hadamard_product(matrix_G_occ_grid, matrix_G_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
    1097              : 
    1098              :             ! Accumulate spin contributions
    1099          240 :             CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
    1100              : 
    1101              :          END DO ! ispin
    1102              : 
    1103              :          ! ----------------------------------------------------------------------
    1104              :          ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
    1105              :          ! χ_aux        = Z^T * χ_grid * Z
    1106              :          ! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
    1107              :          ! Result is dumped directly into the final array mat_chi_Gamma_tau!
    1108              :          ! ----------------------------------------------------------------------
    1109              :          CALL contract_A_B_A("T", "N", mat_Z_lP, matrix_chi_grid, &
    1110          110 :                              mat_chi_Gamma_tau(i_t)%matrix, bs_env%eps_filter)
    1111              : 
    1112          120 :          IF (bs_env%unit_nr > 0) THEN
    1113              :             WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
    1114           55 :                'Computed χ(iτ,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
    1115          110 :                ',    Execution time', m_walltime() - t1, ' s'
    1116              :          END IF
    1117              : 
    1118              :       END DO ! i_t
    1119              : 
    1120              :       ! =========================================================================
    1121              :       ! 3. FINAL CLEANUP
    1122              :       ! =========================================================================
    1123           10 :       CALL dbcsr_release(matrix_G_occ_grid)
    1124           10 :       CALL dbcsr_release(matrix_G_vir_grid)
    1125           10 :       CALL dbcsr_release(matrix_chi_grid)
    1126           10 :       CALL dbcsr_release(matrix_chi_grid_spin)
    1127           10 :       CALL dbcsr_distribution_release(dist_grid_grid)
    1128           10 :       DEALLOCATE (dist_col_grid)
    1129              : 
    1130           10 :       IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
    1131              : 
    1132           10 :       CALL timestop(handle)
    1133              : 
    1134           20 :    END SUBROUTINE get_mat_chi_Gamma_tau
    1135              : 
    1136              : ! **************************************************************************************************
    1137              : !> \brief Computes Green's Function in grid basis
    1138              : !> \param bs_env ...
    1139              : !> \param tau ...
    1140              : !> \param ispin ...
    1141              : !> \param occ ...
    1142              : !> \param vir ...
    1143              : !> \param mat_phi_mu_l ...
    1144              : !> \param matrix_G_grid ...
    1145              : !> \param eps_filter ...
    1146              : ! **************************************************************************************************
    1147              : 
    1148         1064 :    SUBROUTINE build_G_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
    1149              : 
    1150              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1151              :       REAL(KIND=dp), INTENT(IN)                          :: tau
    1152              :       INTEGER, INTENT(IN)                                :: ispin
    1153              :       LOGICAL, INTENT(IN)                                :: occ, vir
    1154              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, matrix_G_grid
    1155              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    1156              : 
    1157              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'build_G_grid'
    1158              : 
    1159              :       INTEGER                                            :: handle
    1160          532 :       INTEGER, DIMENSION(:), POINTER                     :: blk_ao, dist_row_ao
    1161              :       TYPE(cp_fm_type), POINTER                          :: fm_G
    1162              :       TYPE(dbcsr_distribution_type)                      :: dist_ao_ao
    1163              :       TYPE(dbcsr_type)                                   :: matrix_G_ao
    1164              : 
    1165          532 :       CALL timeset(routineN, handle)
    1166              : 
    1167              :       ! 1. Select the correct FM matrix based on occ/vir flags
    1168          532 :       IF (occ) THEN
    1169          272 :          fm_G => bs_env%fm_Gocc
    1170              :       ELSE
    1171          260 :          fm_G => bs_env%fm_Gvir
    1172              :       END IF
    1173              : 
    1174              :       ! 2. Compute Dense FM Green's Function
    1175              :       ! G^occ/vir_µλ(i|τ|,k=0) = sum_G^occ/vir_µλn^occ/vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1176          532 :       CALL G_occ_vir(bs_env, tau, fm_G, ispin, occ=occ, vir=vir)
    1177              : 
    1178              :       ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
    1179          532 :       CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
    1180              : 
    1181              :       CALL dbcsr_create(matrix_G_ao, name="G_ao", dist=dist_ao_ao, &
    1182              :                         matrix_type=dbcsr_type_no_symmetry, &
    1183          532 :                         row_blk_size=blk_ao, col_blk_size=blk_ao)
    1184              : 
    1185              :       ! 4. Convert FM to Sparse DBCSR
    1186          532 :       CALL copy_fm_to_dbcsr(fm_G, matrix_G_ao, keep_sparsity=.FALSE.)
    1187              : 
    1188              :       ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
    1189              :       ! G^occ/vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
    1190          532 :       CALL contract_A_B_A("N", "T", mat_phi_mu_l, matrix_G_ao, matrix_G_grid, eps_filter)
    1191              : 
    1192              :       ! 6. Release AO matrix and topology
    1193          532 :       CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_G_ao)
    1194              : 
    1195          532 :       CALL timestop(handle)
    1196              : 
    1197          532 :    END SUBROUTINE build_G_grid
    1198              : 
    1199              : ! **************************************************************************************************
    1200              : !> \brief Generalized routine to compute OUT = A * B * A^T  OR  OUT = A^T * B * A using DBCSR
    1201              : !> \param transA_left ...
    1202              : !> \param transA_right ...
    1203              : !> \param matrix_A ...
    1204              : !> \param matrix_B ...
    1205              : !> \param matrix_out ...
    1206              : !> \param eps_filter ...
    1207              : ! **************************************************************************************************
    1208              : 
    1209         1034 :    SUBROUTINE contract_A_B_A(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
    1210              : 
    1211              :       CHARACTER(LEN=1), INTENT(IN)                       :: transA_left, transA_right
    1212              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_A, matrix_B, matrix_out
    1213              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    1214              : 
    1215              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'contract_A_B_A'
    1216              : 
    1217              :       INTEGER                                            :: handle
    1218              :       TYPE(dbcsr_type)                                   :: matrix_tmp
    1219              : 
    1220         1034 :       CALL timeset(routineN, handle)
    1221              : 
    1222         1034 :       CALL dbcsr_create(matrix_tmp, template=matrix_A)
    1223              : 
    1224         1034 :       IF (transA_left == "N" .AND. transA_right == "T") THEN
    1225              :          ! Path 1: Out = A * B * A^T
    1226              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_A, matrix_B, &
    1227          652 :                              0.0_dp, matrix_tmp, filter_eps=eps_filter)
    1228              :          CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_A, &
    1229          652 :                              0.0_dp, matrix_out, filter_eps=eps_filter)
    1230              : 
    1231          382 :       ELSE IF (transA_left == "T" .AND. transA_right == "N") THEN
    1232              :          ! Path 2: Out = A^T * B * A
    1233              :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_B, matrix_A, &
    1234          382 :                              0.0_dp, matrix_tmp, filter_eps=eps_filter)
    1235              :          CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_A, matrix_tmp, &
    1236          382 :                              0.0_dp, matrix_out, filter_eps=eps_filter)
    1237              :       ELSE
    1238            0 :          CPABORT("Unsupported transposition pair in contract_A_B_A")
    1239              :       END IF
    1240              : 
    1241         1034 :       CALL dbcsr_release(matrix_tmp)
    1242              : 
    1243         1034 :       CALL timestop(handle)
    1244              : 
    1245         1034 :    END SUBROUTINE contract_A_B_A
    1246              : 
    1247              : ! **************************************************************************************************
    1248              : !> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
    1249              : !> \param matrix_A ...
    1250              : !> \param matrix_B ...
    1251              : !> \param matrix_C ...
    1252              : !> \param fac (Scaling factor applied to the product)
    1253              : ! **************************************************************************************************
    1254              : 
    1255          804 :    SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
    1256              : 
    1257              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_A, matrix_B, matrix_C
    1258              :       REAL(KIND=dp), INTENT(IN)                          :: fac
    1259              : 
    1260              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'hadamard_product'
    1261              : 
    1262              :       INTEGER                                            :: col, handle, row
    1263              :       LOGICAL                                            :: found
    1264          402 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: blk_B, blk_C
    1265              :       TYPE(dbcsr_iterator_type)                          :: iter
    1266              : 
    1267          402 :       CALL timeset(routineN, handle)
    1268              : 
    1269          402 :       CALL dbcsr_copy(matrix_C, matrix_A)
    1270              : 
    1271          402 :       CALL dbcsr_iterator_start(iter, matrix_C)
    1272         2211 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1273         1809 :          CALL dbcsr_iterator_next_block(iter, row, col, blk_C)
    1274              : 
    1275         1809 :          CALL dbcsr_get_block_p(matrix_B, row, col, blk_B, found)
    1276              : 
    1277         2211 :          IF (found) THEN
    1278    159523682 :             blk_C(:, :) = fac*blk_C(:, :)*blk_B(:, :)
    1279              :          ELSE
    1280              :             ! If B is sparse here, the product is zero
    1281            0 :             blk_C(:, :) = 0.0_dp
    1282              :          END IF
    1283              :       END DO
    1284          402 :       CALL dbcsr_iterator_stop(iter)
    1285              : 
    1286          402 :       CALL timestop(handle)
    1287              : 
    1288          402 :    END SUBROUTINE hadamard_product
    1289              : 
    1290              : ! **************************************************************************************************
    1291              : !> \brief Computes the exact exchange part of the GW self-energy
    1292              : !> \param bs_env ...
    1293              : !> \param qs_env ...
    1294              : !> \param mat_phi_mu_l ...
    1295              : !> \param mat_Z_lP ...
    1296              : !> \param fm_Sigma_x_Gamma ...
    1297              : ! **************************************************************************************************
    1298              : 
    1299           10 :    SUBROUTINE compute_Sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
    1300              : 
    1301              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1302              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    1303              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1304              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_Sigma_x_Gamma
    1305              : 
    1306              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_Sigma_x'
    1307              : 
    1308              :       INTEGER                                            :: handle, ispin
    1309           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_aux, blk_grid, dist_col_grid, &
    1310           10 :                                                             dist_row_aux
    1311              :       REAL(KIND=dp)                                      :: t1
    1312           10 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :)     :: fm_Vtr_Gamma
    1313              :       TYPE(dbcsr_distribution_type)                      :: dist_aux_aux, dist_grid_grid
    1314              :       TYPE(dbcsr_type)                                   :: mat_Sigma_x_Gamma, matrix_D_grid, &
    1315              :                                                             matrix_Sigma_x_grid, matrix_V_aux, &
    1316              :                                                             matrix_V_grid
    1317              : 
    1318           10 :       CALL timeset(routineN, handle)
    1319              : 
    1320           10 :       t1 = m_walltime()
    1321              : 
    1322           42 :       ALLOCATE (fm_Sigma_x_Gamma(bs_env%n_spin))
    1323           22 :       DO ispin = 1, bs_env%n_spin
    1324           22 :          CALL cp_fm_create(fm_Sigma_x_Gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
    1325              :       END DO
    1326              : 
    1327           10 :       CALL dbcsr_create(mat_Sigma_x_Gamma, template=bs_env%mat_ao_ao%matrix)
    1328              : 
    1329              :       ! =========================================================================
    1330              :       ! 1. SETUP CORE TOPOLOGIES
    1331              :       ! =========================================================================
    1332           10 :       CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
    1333           10 :       CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
    1334              : 
    1335              :       ! =========================================================================
    1336              :       ! 2. COMPUTE V^tr_ll'
    1337              :       ! =========================================================================
    1338              :       CALL RI_2c_integral_mat(qs_env, fm_Vtr_Gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
    1339           10 :                               bs_env%trunc_coulomb, do_kpoints=.FALSE.)
    1340              : 
    1341              :       ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
    1342           10 :       CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_Vtr_Gamma(:, 1))
    1343              : 
    1344           10 :       CALL dbcsr_create(matrix_V_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
    1345           10 :       CALL dbcsr_create(matrix_V_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1346              : 
    1347           10 :       CALL copy_fm_to_dbcsr(fm_Vtr_Gamma(1, 1), matrix_V_aux, keep_sparsity=.FALSE.)
    1348              : 
    1349              :       ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
    1350           10 :       CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_V_aux, matrix_V_grid, bs_env%eps_filter)
    1351           10 :       CALL dbcsr_release(matrix_V_aux)
    1352              : 
    1353              :       ! =========================================================================
    1354              :       ! 3. SPIN LOOP FOR EXACT EXCHANGE
    1355              :       ! =========================================================================
    1356           22 :       DO ispin = 1, bs_env%n_spin
    1357              : 
    1358              :          ! Density matrix on grid is essentially G_occ at tau = 0.0
    1359              :          ! D_µν  = sum_n^occ C_µn(k=0) C_νn(k=0)
    1360              :          ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
    1361           12 :          CALL dbcsr_create(matrix_D_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1362           12 :          CALL build_G_grid(bs_env, 0.0_dp, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_D_grid, bs_env%eps_filter)
    1363              : 
    1364              :          ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
    1365              :          ! Σ^x_ll' = D_ll' * V^tr_ll'
    1366           12 :          CALL dbcsr_create(matrix_Sigma_x_grid, template=matrix_V_grid)
    1367           12 :          CALL hadamard_product(matrix_D_grid, matrix_V_grid, matrix_Sigma_x_grid, 1.0_dp)
    1368              : 
    1369           12 :          CALL dbcsr_release(matrix_D_grid)
    1370              : 
    1371              :          ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
    1372              :          ! Σ^x_λσ(k=0)   = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
    1373           12 :          CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_x_grid, mat_Sigma_x_Gamma, bs_env%eps_filter)
    1374           12 :          CALL dbcsr_scale(mat_Sigma_x_Gamma, -1.0_dp)
    1375              : 
    1376           12 :          CALL dbcsr_release(matrix_Sigma_x_grid)
    1377              : 
    1378              :          ! Data I/O and Export to CP2K Full Matrices
    1379           22 :          CALL copy_dbcsr_to_fm(mat_Sigma_x_Gamma, fm_Sigma_x_Gamma(ispin))
    1380              : 
    1381              :       END DO ! ispin
    1382              : 
    1383           10 :       IF (bs_env%unit_nr > 0) THEN
    1384              :          WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
    1385            5 :             'Computed Σ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
    1386            5 :          WRITE (bs_env%unit_nr, '(A)') ' '
    1387              :       END IF
    1388              : 
    1389              :       ! =========================================================================
    1390              :       ! 4. CLEANUP
    1391              :       ! =========================================================================
    1392              :       CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
    1393           10 :                                                m1=mat_Sigma_x_Gamma, m2=matrix_V_grid)
    1394           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
    1395              : 
    1396           10 :       CALL cp_fm_release(fm_Vtr_Gamma)
    1397              : 
    1398           10 :       CALL timestop(handle)
    1399              : 
    1400           30 :    END SUBROUTINE compute_Sigma_x
    1401              : 
    1402              : ! **************************************************************************************************
    1403              : !> \brief Computes the correlation part of the GW self-energy
    1404              : !> \param bs_env ...
    1405              : !> \param fm_W_MIC_time ...
    1406              : !> \param mat_phi_mu_l ...
    1407              : !> \param mat_Z_lP ...
    1408              : !> \param fm_Sigma_c_Gamma_time ...
    1409              : ! **************************************************************************************************
    1410              : 
    1411           10 :    SUBROUTINE compute_Sigma_c(bs_env, fm_W_MIC_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
    1412              : 
    1413              :       TYPE(post_scf_bandstructure_type), POINTER         :: bs_env
    1414              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: fm_W_MIC_time
    1415              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mat_phi_mu_l, mat_Z_lP
    1416              :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :)  :: fm_Sigma_c_Gamma_time
    1417              : 
    1418              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'compute_Sigma_c'
    1419              : 
    1420              :       INTEGER                                            :: handle, i_t, ispin
    1421           10 :       INTEGER, DIMENSION(:), POINTER                     :: blk_aux, blk_grid, dist_col_grid, &
    1422           10 :                                                             dist_row_aux
    1423              :       REAL(KIND=dp)                                      :: t1, tau
    1424              :       TYPE(dbcsr_distribution_type)                      :: dist_aux_aux, dist_grid_grid
    1425           10 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: mat_Sigma_neg_tau, mat_Sigma_pos_tau
    1426              :       TYPE(dbcsr_type) :: matrix_G_occ_grid, matrix_G_vir_grid, matrix_Sigma_neg_grid, &
    1427              :          matrix_Sigma_pos_grid, matrix_W_aux, matrix_W_grid
    1428              : 
    1429           10 :       CALL timeset(routineN, handle)
    1430              : 
    1431              :       ! =========================================================================
    1432              :       ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
    1433              :       ! =========================================================================
    1434           10 :       CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
    1435           10 :       CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
    1436              : 
    1437              :       ! Pre-allocate local DBCSR matrices to act as targets for final output
    1438           10 :       NULLIFY (mat_Sigma_neg_tau, mat_Sigma_pos_tau)
    1439          182 :       ALLOCATE (mat_Sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
    1440          182 :       ALLOCATE (mat_Sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
    1441              : 
    1442          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1443          250 :          DO ispin = 1, bs_env%n_spin
    1444          130 :             ALLOCATE (mat_Sigma_neg_tau(i_t, ispin)%matrix)
    1445          130 :             ALLOCATE (mat_Sigma_pos_tau(i_t, ispin)%matrix)
    1446          130 :             CALL dbcsr_create(mat_Sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
    1447          240 :             CALL dbcsr_create(mat_Sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
    1448              :          END DO
    1449              :       END DO
    1450              : 
    1451              :       ! =========================================================================
    1452              :       ! 2. MAIN IMAGINARY TIME LOOP
    1453              :       ! =========================================================================
    1454          120 :       DO i_t = 1, bs_env%num_time_freq_points
    1455          110 :          tau = bs_env%imag_time_points(i_t)
    1456              : 
    1457              :          ! -------------------------------------------------------------------
    1458              :          ! Compute W_grid = Z * W_aux * Z^T
    1459              :          ! -------------------------------------------------------------------
    1460          110 :          CALL dbcsr_create(matrix_W_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
    1461          110 :          CALL dbcsr_create(matrix_W_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1462              : 
    1463          110 :          CALL copy_fm_to_dbcsr(fm_W_MIC_time(i_t), matrix_W_aux, keep_sparsity=.FALSE.)
    1464              : 
    1465              :          ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
    1466          110 :          CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_W_aux, matrix_W_grid, bs_env%eps_filter)
    1467              : 
    1468          110 :          CALL dbcsr_release(matrix_W_aux) ! Clean up aux basis immediately
    1469              : 
    1470          240 :          DO ispin = 1, bs_env%n_spin
    1471          130 :             t1 = m_walltime()
    1472              : 
    1473              :             ! -------------------------------------------------------------------
    1474              :             ! A. Transform Green's Functions to the Grid
    1475              :             ! -------------------------------------------------------------------
    1476          130 :             CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1477          130 :             CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
    1478              : 
    1479              :             ! G^occ_µλ(i|τ|,k=0) = sum_G^occ_µλn^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1480              :             ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
    1481          130 :             CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_G_occ_grid, bs_env%eps_filter)
    1482              : 
    1483              :             ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
    1484              :             ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
    1485          130 :             CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, matrix_G_vir_grid, bs_env%eps_filter)
    1486              : 
    1487              :             ! -------------------------------------------------------------------
    1488              :             ! B. Element-wise Hadamard Products for Sigma_c on Grid
    1489              :             ! Σ_neg_grid = G_occ_grid ◦ W_grid
    1490              :             ! Σ_pos_grid = G_vir_grid ◦ W_grid
    1491              :             ! -------------------------------------------------------------------
    1492          130 :             CALL dbcsr_create(matrix_Sigma_neg_grid, template=matrix_W_grid)
    1493          130 :             CALL dbcsr_create(matrix_Sigma_pos_grid, template=matrix_W_grid)
    1494              : 
    1495              :             ! Σ^c_ll'(iτ,k=0)   = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
    1496          130 :             CALL hadamard_product(matrix_G_occ_grid, matrix_W_grid, matrix_Sigma_neg_grid, 1.0_dp)
    1497              : 
    1498              :             ! Σ^c_ll'(iτ,k=0)   =  G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
    1499          130 :             CALL hadamard_product(matrix_G_vir_grid, matrix_W_grid, matrix_Sigma_pos_grid, 1.0_dp)
    1500              : 
    1501              :             ! Instantly purge massive G_grid arrays to save memory
    1502          130 :             CALL dbcsr_release(matrix_G_occ_grid)
    1503          130 :             CALL dbcsr_release(matrix_G_vir_grid)
    1504              : 
    1505              :             ! -------------------------------------------------------------------
    1506              :             ! C. Transform Sigma back to AO Basis
    1507              :             ! Σ_AO = phi^T * Σ_grid * phi
    1508              :             ! -------------------------------------------------------------------
    1509              : 
    1510              :             ! Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ < 0
    1511              :             CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_neg_grid, &
    1512          130 :                                 mat_Sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
    1513          130 :             CALL dbcsr_scale(mat_Sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
    1514              : 
    1515              :             ! Σ^c_λσ(iτ,k=0)    = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ > 0
    1516              :             CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_pos_grid, &
    1517          130 :                                 mat_Sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
    1518              : 
    1519              :             ! Purge Grid Sigma arrays
    1520          130 :             CALL dbcsr_release(matrix_Sigma_neg_grid)
    1521          130 :             CALL dbcsr_release(matrix_Sigma_pos_grid)
    1522              : 
    1523          240 :             IF (bs_env%unit_nr > 0) THEN
    1524              :                WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
    1525           65 :                   'Computed Σ^c(iτ,k=0) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
    1526          130 :                   ',    Execution time', m_walltime() - t1, ' s'
    1527              :             END IF
    1528              : 
    1529              :          END DO ! ispin
    1530              : 
    1531              :          ! Release the W_grid for this time point
    1532          120 :          CALL dbcsr_release(matrix_W_grid)
    1533              : 
    1534              :       END DO ! i_t
    1535              : 
    1536           10 :       IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
    1537              : 
    1538              :       ! -------------------------------------------------------------------------
    1539              :       ! 3. FINALIZE AND CLEANUP
    1540              :       ! -------------------------------------------------------------------------
    1541              :       CALL fill_fm_Sigma_c_Gamma_time(fm_Sigma_c_Gamma_time, bs_env, &
    1542           10 :                                       mat_Sigma_pos_tau, mat_Sigma_neg_tau)
    1543              : 
    1544           10 :       CALL cp_fm_release(fm_W_MIC_time)
    1545              : 
    1546           10 :       CALL dbcsr_deallocate_matrix_set(mat_Sigma_neg_tau)
    1547           10 :       CALL dbcsr_deallocate_matrix_set(mat_Sigma_pos_tau)
    1548              : 
    1549           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
    1550           10 :       CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
    1551              : 
    1552           10 :       CALL delete_unnecessary_files(bs_env)
    1553           10 :       CALL timestop(handle)
    1554              : 
    1555           10 :    END SUBROUTINE compute_Sigma_c
    1556              : 
    1557              : ! **************************************************************************************************
    1558              : !> \brief DBCSR Topology Generation
    1559              : !> \param matrix_template ...
    1560              : !> \param dim_type ...
    1561              : !> \param square_dist ...
    1562              : !> \param blk_sizes ...
    1563              : !> \param mapped_dist ...
    1564              : ! **************************************************************************************************
    1565              : 
    1566          572 :    SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
    1567              : 
    1568              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_template
    1569              :       CHARACTER(LEN=*), INTENT(IN)                       :: dim_type
    1570              :       TYPE(dbcsr_distribution_type), INTENT(OUT)         :: square_dist
    1571              :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: blk_sizes, mapped_dist
    1572              : 
    1573              :       INTEGER                                            :: i, np
    1574          572 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk, col_dist, row_blk, row_dist
    1575              :       TYPE(dbcsr_distribution_type)                      :: dist_template
    1576              : 
    1577              :       CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
    1578          572 :                           row_blk_size=row_blk, col_blk_size=col_blk)
    1579          572 :       CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
    1580              : 
    1581          572 :       IF (TRIM(dim_type) == 'ROW') THEN
    1582              :          ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
    1583           20 :          blk_sizes => row_blk
    1584           76 :          np = MAXVAL(col_dist) + 1  ! npcol
    1585           60 :          ALLOCATE (mapped_dist(SIZE(blk_sizes)))
    1586           80 :          DO i = 1, SIZE(blk_sizes)
    1587           80 :             mapped_dist(i) = MOD(i - 1, np)
    1588              :          END DO
    1589              :          CALL dbcsr_distribution_new(square_dist, template=dist_template, &
    1590           20 :                                      row_dist=row_dist, col_dist=mapped_dist)
    1591              : 
    1592          552 :       ELSE IF (TRIM(dim_type) == 'COL') THEN
    1593              :          ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
    1594          552 :          blk_sizes => col_blk
    1595         2208 :          np = MAXVAL(row_dist) + 1  ! nprow
    1596         1656 :          ALLOCATE (mapped_dist(SIZE(blk_sizes)))
    1597         2040 :          DO i = 1, SIZE(blk_sizes)
    1598         2040 :             mapped_dist(i) = MOD(i - 1, np)
    1599              :          END DO
    1600              :          CALL dbcsr_distribution_new(square_dist, template=dist_template, &
    1601          552 :                                      row_dist=mapped_dist, col_dist=col_dist)
    1602              :       END IF
    1603              : 
    1604          572 :    END SUBROUTINE setup_square_topology
    1605              : 
    1606              : ! **************************************************************************************************
    1607              : !> \brief DBCSR matrices deallocation
    1608              : !> \param dist ...
    1609              : !> \param mapped_dist    ...
    1610              : !> \param m1 ...
    1611              : !> \param m2 ...
    1612              : !> \param m3 ...
    1613              : !> \param m4 ...
    1614              : ! **************************************************************************************************
    1615              : 
    1616          572 :    SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
    1617              : 
    1618              :       TYPE(dbcsr_distribution_type), INTENT(INOUT), &
    1619              :          OPTIONAL                                        :: dist
    1620              :       INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
    1621              :          POINTER                                         :: mapped_dist
    1622              :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m1, m2, m3, m4
    1623              : 
    1624          572 :       IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
    1625          572 :       IF (PRESENT(mapped_dist)) THEN
    1626          572 :          IF (ASSOCIATED(mapped_dist)) THEN
    1627          572 :             DEALLOCATE (mapped_dist)
    1628              :             NULLIFY (mapped_dist)
    1629              :          END IF
    1630              :       END IF
    1631          572 :       IF (PRESENT(m1)) CALL dbcsr_release(m1)
    1632          572 :       IF (PRESENT(m2)) CALL dbcsr_release(m2)
    1633          572 :       IF (PRESENT(m3)) CALL dbcsr_release(m3)
    1634          572 :       IF (PRESENT(m4)) CALL dbcsr_release(m4)
    1635              : 
    1636          572 :    END SUBROUTINE release_dbcsr_topology_and_matrices
    1637              : 
    1638              : END MODULE gw_large_cell_Gamma_ri_rs
        

Generated by: LCOV version 2.0-1