Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief GW using RI-RS Approximation for molecules
10 : !> \par History
11 : !> 04.2026 created [Ritaj Tyagi]
12 : ! **************************************************************************************************
13 : MODULE gw_large_cell_Gamma_ri_rs
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE basis_set_types, ONLY: gto_basis_set_type
16 : USE cell_types, ONLY: cell_type,&
17 : get_cell,&
18 : pbc
19 : USE constants_operator, ONLY: operator_coulomb
20 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
21 : cp_blacs_env_release,&
22 : cp_blacs_env_type
23 : USE cp_dbcsr_api, ONLY: &
24 : dbcsr_add, dbcsr_binary_read, dbcsr_binary_write, dbcsr_copy, dbcsr_create, &
25 : dbcsr_deallocate_matrix, dbcsr_distribution_get, dbcsr_distribution_new, &
26 : dbcsr_distribution_release, dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, &
27 : dbcsr_get_info, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
28 : dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
29 : dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, &
30 : dbcsr_type_no_symmetry
31 : USE cp_dbcsr_contrib, ONLY: dbcsr_reserve_all_blocks
32 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
33 : copy_fm_to_dbcsr,&
34 : dbcsr_deallocate_matrix_set,&
35 : max_elements_per_block
36 : USE cp_fm_basic_linalg, ONLY: cp_fm_scale_and_add,&
37 : cp_fm_uplo_to_full
38 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose,&
39 : cp_fm_cholesky_invert
40 : USE cp_fm_diag, ONLY: cp_fm_power
41 : USE cp_fm_struct, ONLY: cp_fm_struct_type
42 : USE cp_fm_types, ONLY: cp_fm_create,&
43 : cp_fm_get_info,&
44 : cp_fm_release,&
45 : cp_fm_set_all,&
46 : cp_fm_to_fm,&
47 : cp_fm_type
48 : USE cp_log_handling, ONLY: cp_get_default_logger,&
49 : cp_logger_type
50 : USE cp_output_handling, ONLY: cp_p_file,&
51 : cp_print_key_should_output
52 : USE gw_integrals, ONLY: build_3c_integral_block_ctx,&
53 : gw_3c_ctx_create,&
54 : gw_3c_ctx_release,&
55 : gw_3c_ctx_type,&
56 : gw_3c_ws_create,&
57 : gw_3c_ws_release,&
58 : gw_3c_ws_type
59 : USE gw_large_cell_gamma, ONLY: &
60 : Fourier_transform_w_to_t, G_occ_vir, compute_QP_energies, compute_fm_chi_Gamma_freq, &
61 : create_fm_W_MIC_time, delete_unnecessary_files, fill_fm_Sigma_c_Gamma_time, fm_write, &
62 : get_W_MIC, multiply_fm_W_MIC_time_with_Minv_Gamma
63 : USE gw_non_periodic_ri_rs, ONLY: get_basis_offsets,&
64 : precompute_ri_rs_radii,&
65 : ri_rs_grid_assembler,&
66 : solve_D_lp_distributed
67 : USE gw_utils, ONLY: de_init_bs_env
68 : USE input_constants, ONLY: rtp_method_bse
69 : USE input_section_types, ONLY: section_vals_type
70 : USE kinds, ONLY: dp
71 : USE kpoint_coulomb_2c, ONLY: build_2c_coulomb_matrix_kp
72 : USE machine, ONLY: m_walltime
73 : USE message_passing, ONLY: mp_para_env_type
74 : USE mp2_ri_2c, ONLY: RI_2c_integral_mat
75 : USE orbital_pointers, ONLY: indco,&
76 : ncoset
77 : USE parallel_gemm_api, ONLY: parallel_gemm
78 : USE particle_types, ONLY: particle_type
79 : USE post_scf_bandstructure_types, ONLY: post_scf_bandstructure_type
80 : USE qs_environment_types, ONLY: get_qs_env,&
81 : qs_environment_type
82 : USE qs_kind_types, ONLY: get_qs_kind,&
83 : qs_kind_type
84 : #include "./base/base_uses.f90"
85 :
86 : IMPLICIT NONE
87 :
88 : PRIVATE
89 :
90 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_large_cell_Gamma_ri_rs'
91 :
92 : PUBLIC :: gw_calc_large_cell_Gamma_ri_rs
93 :
94 : CONTAINS
95 :
96 : ! **************************************************************************************************
97 : !> \brief GW calculation using RI-RS formalism for molecules
98 : !> \param qs_env ...
99 : !> \param bs_env ...
100 : ! **************************************************************************************************
101 :
102 0 : SUBROUTINE gw_calc_large_cell_Gamma_ri_rs(qs_env, bs_env)
103 :
104 : TYPE(qs_environment_type), POINTER :: qs_env
105 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
106 :
107 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gw_calc_large_cell_Gamma_ri_rs'
108 :
109 : INTEGER :: handle
110 0 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma, fm_W_time
111 0 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
112 :
113 0 : CALL timeset(routineN, handle)
114 :
115 : !!========================================================================
116 : !! 0. Precompute AO and RI Radii
117 : !! Per-atom cutoff radii from the most diffuse Gaussian primitives in
118 : !! the AO and RI auxiliary basis sets. Stored in bs_env%ri_rs%
119 : !! radius_ao_per_atom and radius_ri_per_atom, used for sphere-cutoff
120 : !! and phi_local screening.
121 : !!========================================================================
122 0 : CALL precompute_ri_rs_radii(qs_env, bs_env)
123 :
124 : !!========================================================================
125 : !! 1. Grid Generation for RI-RS
126 : !! (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
127 : !! Generate flattened 1D array of grid points for RI-RS.
128 : !! Equation: r_g(k) = R_A + r_g(A)
129 : !!========================================================================
130 0 : CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
131 :
132 : !!========================================================================
133 : !! 2. Atomic Basis Evaluation
134 : !! Compute values of spherical atomic basis functions at grid points.
135 : !! Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
136 : !!========================================================================
137 : CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
138 0 : bs_env%ri_rs%mat_phi_mu_l)
139 :
140 : !!========================================================================
141 : !! 3. Compute RI-RS Coefficients (Z_lp)
142 : !! Solve the regularized system for each atom P, where the grid domain
143 : !! is restricted to r_l within a cutoff distance of atom P:
144 : !! a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
145 : !! b. D_lP = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
146 : !! c. Conditioning:
147 : !! Dvec_l = 1 / sqrt(D_ll) (Diagonal scaling vector)
148 : !! D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
149 : !! D'_lP = Dvec_l * D_lP
150 : !! d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
151 : !! e. Rescale: Z_lP = Z'_lP * Dvec_l (Z_lP stored in mat_Z_lP)
152 : !!========================================================================
153 : CALL compute_coeff_Z_lP(qs_env, bs_env, bs_env%ri_rs%grid_points, &
154 0 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
155 :
156 : !!========================================================================
157 : !! 4. Compute Independent-Particle Polarizability (χ)
158 : !! G^occ_µλ(i|τ|) = sum_n^occ C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
159 : !! G^vir_µλ(i|τ|) = sum_n^vir C_µn e^(-|(ϵ_n-ϵ_F)τ|) C_λn
160 : !! G^occ_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
161 : !! G^vir_ll'(i|τ|) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
162 : !! χ_ll'(iτ) = G^occ_ll'(i|τ|) * G^vir_ll'(i|τ|)
163 : !! χ_PQ(iτ) = sum_ll' Z_lP χ_ll'(iτ) Z_l'Q
164 : !!========================================================================
165 : CALL get_mat_chi_Gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
166 0 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
167 :
168 : !!========================================================================
169 : !! 5. Compute Screened Interaction (W^MIC)
170 : !! χ_PQ(iτ) -> χ_PQ(iω) -> ε_PQ(iω) -> W_PQ(iω) -> W^MIC_PQ(iτ)
171 : !!========================================================================
172 0 : CALL get_W_MIC(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_W_time)
173 :
174 : !!========================================================================
175 : !! 6. Compute Exact Exchange Self-Energy (Σ^x)
176 : !! D_µν = sum_n^occ C_µn C_νn
177 : !! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
178 : !! V^trunc_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
179 : !! Σ^x_ll' = D_ll' * V^trunc_ll'
180 : !! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
181 : !!========================================================================
182 : CALL compute_Sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
183 0 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_x_Gamma)
184 :
185 : !!========================================================================
186 : !! 7. Compute Correlation Self-Energy (Σ^c)
187 : !! W^MIC_ll'(iτ) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
188 : !! Σ^c_ll'(iτ) = -G^occ_ll'(i|τ|) * W^MIC_ll'(iτ), for τ < 0
189 : !! Σ^c_ll'(iτ) = G^vir_ll'(i|τ|) * W^MIC_ll'(iτ), for τ > 0
190 : !! Σ^c_λσ(iτ) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ) Φ_σ(r_l')
191 : !!========================================================================
192 : CALL compute_Sigma_c(bs_env, fm_W_time, bs_env%ri_rs%mat_phi_mu_l, &
193 0 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_c_Gamma_time)
194 :
195 : !!========================================================================
196 : !! 8. Compute Quasiparticle Energies
197 : !! Σ^c_λσ(iτ) -> Σ^c_nn(ϵ)
198 : !! ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ) + Σ^x_nn - v^xc_nn
199 : !!========================================================================
200 0 : CALL compute_QP_energies(bs_env, qs_env, fm_Sigma_x_Gamma, fm_Sigma_c_Gamma_time)
201 :
202 0 : CALL de_init_bs_env(bs_env)
203 :
204 0 : CALL timestop(handle)
205 :
206 0 : END SUBROUTINE gw_calc_large_cell_Gamma_ri_rs
207 :
208 : ! **************************************************************************************************
209 : !> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
210 : !> \param qs_env ...
211 : !> \param bs_env ...
212 : !> \param ri_rs_grid_points ...
213 : !> \param mat_phi_mu_l ...
214 : ! **************************************************************************************************
215 :
216 0 : SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
217 :
218 : TYPE(qs_environment_type), POINTER :: qs_env
219 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
220 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
221 : TYPE(dbcsr_type), INTENT(OUT) :: mat_phi_mu_l
222 :
223 : CHARACTER(LEN=*), PARAMETER :: routineN = 'atomic_basis_at_grid_point'
224 :
225 : INTEGER :: c_size, chunk_size, dimen_ORB, handle, i, i_blk, iatom, natom, npcol, nprow, &
226 : num_grid_chunks, r_end, r_start, total_grid_npts
227 : INTEGER, ALLOCATABLE, DIMENSION(:) :: first_sgf
228 0 : INTEGER, DIMENSION(:), POINTER :: c_blk_sizes, col_dist, col_dist_ks, &
229 0 : r_blk_sizes, row_dist, row_dist_ks
230 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atom_col_buffer
231 0 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
232 : TYPE(cell_type), POINTER :: cell
233 : TYPE(dbcsr_distribution_type) :: dist
234 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist_ks
235 : TYPE(mp_para_env_type), POINTER :: para_env
236 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
237 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
238 :
239 0 : CALL timeset(routineN, handle)
240 :
241 : ! Setup Grid Blocking
242 0 : chunk_size = max_elements_per_block
243 :
244 : ! Extract environment variables
245 : CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
246 : qs_kind_set=qs_kind_set, particle_set=particle_set, &
247 0 : para_env=para_env)
248 :
249 0 : natom = SIZE(particle_set)
250 0 : total_grid_npts = SIZE(ri_rs_grid_points, 2)
251 :
252 : ! Map the starting indices of spherical gaussian functions (SGF) for each atom
253 0 : ALLOCATE (first_sgf(natom + 1))
254 0 : CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_ORB)
255 :
256 : ! =========================================================================
257 : ! 1. SETUP DBCSR MATRIX TOPOLOGY
258 : ! =========================================================================
259 :
260 : ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
261 0 : ALLOCATE (c_blk_sizes(natom))
262 0 : DO iatom = 1, natom
263 0 : c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
264 : END DO
265 :
266 : ! B. Define Row Block Sizes (Grid chunks of max size 256)
267 0 : num_grid_chunks = CEILING(REAL(total_grid_npts, KIND=dp)/REAL(chunk_size, KIND=dp))
268 0 : ALLOCATE (r_blk_sizes(num_grid_chunks))
269 0 : r_blk_sizes = chunk_size
270 0 : IF (MOD(total_grid_npts, chunk_size) /= 0) THEN
271 0 : r_blk_sizes(num_grid_chunks) = MOD(total_grid_npts, chunk_size)
272 : END IF
273 :
274 : ! C. Fetch CP2K's Default Process Grid Configuration
275 0 : CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
276 0 : CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
277 :
278 : ! D. Build Custom Mappings using Round-Robin across the 2D process grid
279 : ! (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
280 0 : nprow = MAXVAL(row_dist_ks) + 1
281 0 : npcol = MAXVAL(col_dist_ks) + 1
282 :
283 0 : ALLOCATE (row_dist(num_grid_chunks))
284 0 : DO i = 1, num_grid_chunks
285 0 : row_dist(i) = MOD(i - 1, nprow)
286 : END DO
287 :
288 0 : ALLOCATE (col_dist(natom))
289 0 : DO i = 1, natom
290 0 : col_dist(i) = MOD(i - 1, npcol)
291 : END DO
292 :
293 : ! E. Create the DBCSR Distribution and Initialize the Matrix
294 : CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
295 0 : row_dist=row_dist, col_dist=col_dist)
296 :
297 : CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
298 : matrix_type=dbcsr_type_no_symmetry, &
299 0 : row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
300 :
301 : ! =========================================================================
302 : ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
303 : ! =========================================================================
304 : ! Iterate over the atoms assigned to this specific MPI rank
305 0 : DO iatom = para_env%mepos + 1, natom, para_env%num_pe
306 :
307 0 : c_size = c_blk_sizes(iatom)
308 :
309 : ! Allocate a temporary dense buffer just for this specific atom
310 0 : ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
311 0 : atom_col_buffer = 0.0_dp
312 :
313 : ! Evaluate the basis functions on the grid. Skip grid points outside the spatial
314 : ! extent of the most diffuse AO Gaussian on iatom; beyond that radius the contribution
315 : ! is guaranteed below eps_filter.
316 : CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
317 : iatom, particle_set, qs_kind_set, cell, &
318 0 : r2_threshold=bs_env%ri_rs%radius_ao_per_atom(iatom)**2)
319 :
320 : ! Slice the dense column into chunks and insert into DBCSR
321 0 : DO i_blk = 1, num_grid_chunks
322 0 : r_start = (i_blk - 1)*chunk_size + 1
323 0 : r_end = MIN(i_blk*chunk_size, total_grid_npts)
324 :
325 : ! Apply dynamic sparsity filtering: Only store blocks with physical significance
326 0 : IF (MAXVAL(ABS(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
327 : CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
328 0 : block=atom_col_buffer(r_start:r_end, 1:c_size))
329 : END IF
330 : END DO
331 :
332 0 : DEALLOCATE (atom_col_buffer)
333 :
334 : END DO
335 :
336 : ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
337 0 : CALL dbcsr_finalize(mat_phi_mu_l)
338 :
339 0 : IF (bs_env%unit_nr > 0) THEN
340 0 : WRITE (bs_env%unit_nr, *) "Done with evaluation of phi"
341 : END IF
342 :
343 : ! -------------------------------------------------------------------------
344 : ! CLEANUP
345 : ! -------------------------------------------------------------------------
346 0 : DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
347 0 : CALL dbcsr_distribution_release(dist)
348 :
349 0 : CALL timestop(handle)
350 :
351 0 : END SUBROUTINE atomic_basis_at_grid_point
352 :
353 : ! **************************************************************************************************
354 : !> \brief Compute value of all basis functions for a single atom across all grid points.
355 : !> Sums contributions from periodic images of `iatom` (loop over (ix, iy, iz) cells gated
356 : !> by `cell%perd`). Each per-image squared distance is compared against `r2_threshold`
357 : !> (per-atom AO Gaussian extent²); images beyond that radius contribute below eps_filter
358 : !> and are skipped.
359 : !> \param phi_val ...
360 : !> \param ri_rs_grid ...
361 : !> \param npts ...
362 : !> \param iatom ...
363 : !> \param particle_set ...
364 : !> \param qs_kind_set ...
365 : !> \param cell ...
366 : !> \param r2_threshold per-image squared-distance threshold; CYCLE if r² > r2_threshold. Pass
367 : !> HUGE(1.0_dp) to disable.
368 : ! **************************************************************************************************
369 :
370 0 : SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
371 : particle_set, qs_kind_set, cell, r2_threshold)
372 :
373 : REAL(KIND=dp), INTENT(INOUT) :: phi_val(:, :)
374 : INTEGER, INTENT(IN) :: npts
375 : REAL(KIND=dp), INTENT(IN) :: ri_rs_grid(3, npts)
376 : INTEGER, INTENT(IN) :: iatom
377 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
378 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
379 : TYPE(cell_type), POINTER :: cell
380 : REAL(KIND=dp), INTENT(IN) :: r2_threshold
381 :
382 : INTEGER :: first_sgf, i_pt, ico, iend_co, ikind, ipgf, iset, isgf, ishell, istart_co, ix, &
383 : ix_max, ix_min, iy, iy_max, iy_min, iz, iz_max, iz_min, l, last_sgf, lx, ly, lz, &
384 : n_cart_total, row_idx
385 : REAL(KIND=dp) :: alpha, cell_vector(3), dist_vec(3), &
386 : dist_vec_raw(3), exp_val, poly, r2, &
387 : r_atom(3), weight
388 : REAL(KIND=dp), DIMENSION(3, 3) :: hmat
389 : TYPE(gto_basis_set_type), POINTER :: orb_basis_set
390 :
391 : ! Get Atom Info
392 0 : ikind = particle_set(iatom)%atomic_kind%kind_number
393 0 : CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
394 0 : CALL get_cell(cell=cell, h=hmat)
395 :
396 0 : IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
397 :
398 0 : IF (cell%perd(1) == 1) THEN; ix_min = -1; ix_max = 1; ELSE; ix_min = 0; ix_max = 0; END IF
399 0 : IF (cell%perd(2) == 1) THEN; iy_min = -1; iy_max = 1; ELSE; iy_min = 0; iy_max = 0; END IF
400 0 : IF (cell%perd(3) == 1) THEN; iz_min = -1; iz_max = 1; ELSE; iz_min = 0; iz_max = 0; END IF
401 :
402 0 : r_atom = particle_set(iatom)%r
403 :
404 : !$OMP PARALLEL DO DEFAULT(NONE) &
405 : !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, hmat, &
406 : !$OMP ncoset, indco, cell, ix_min, ix_max, &
407 : !$OMP iy_min, iy_max, iz_min, iz_max, r2_threshold) &
408 : !$OMP PRIVATE(i_pt, dist_vec_raw, ix, iy, iz, cell_vector, dist_vec, r2, iset, &
409 : !$OMP n_cart_total, ishell, l, istart_co, iend_co, first_sgf, last_sgf, &
410 : !$OMP ipgf, alpha, exp_val, isgf, ico, row_idx, weight, lx, ly, lz, poly) &
411 0 : !$OMP SCHEDULE(DYNAMIC)
412 :
413 : DO i_pt = 1, npts
414 :
415 : dist_vec_raw = ri_rs_grid(:, i_pt) - r_atom
416 :
417 : DO ix = ix_min, ix_max
418 : DO iy = iy_min, iy_max
419 : DO iz = iz_min, iz_max
420 :
421 : cell_vector(1:3) = MATMUL(hmat, REAL([ix, iy, iz], dp))
422 :
423 : dist_vec = dist_vec_raw - cell_vector
424 :
425 : r2 = DOT_PRODUCT(dist_vec, dist_vec)
426 :
427 : IF (r2 > r2_threshold) CYCLE
428 :
429 : DO iset = 1, orb_basis_set%nset
430 : n_cart_total = ncoset(orb_basis_set%lmax(iset))
431 :
432 : DO ishell = 1, orb_basis_set%nshell(iset)
433 : l = orb_basis_set%l(ishell, iset)
434 : istart_co = ncoset(l - 1) + 1
435 : iend_co = ncoset(l)
436 :
437 : first_sgf = orb_basis_set%first_sgf(ishell, iset)
438 : last_sgf = orb_basis_set%last_sgf(ishell, iset)
439 :
440 : DO ipgf = 1, orb_basis_set%npgf(iset)
441 : alpha = orb_basis_set%zet(ipgf, iset)
442 : exp_val = EXP(-alpha*r2)
443 :
444 : DO isgf = first_sgf, last_sgf
445 : DO ico = istart_co, iend_co
446 : row_idx = (ipgf - 1)*n_cart_total + ico
447 : weight = orb_basis_set%sphi(row_idx, isgf)
448 : lx = indco(1, ico)
449 : ly = indco(2, ico)
450 : lz = indco(3, ico)
451 : poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
452 :
453 : phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
454 :
455 : END DO
456 : END DO
457 : END DO
458 : END DO
459 : END DO
460 : END DO
461 : END DO
462 : END DO
463 : END DO
464 : !$OMP END PARALLEL DO
465 :
466 : END SUBROUTINE fill_phi_for_atom
467 :
468 : ! **************************************************************************************************
469 : !> \brief Compute RI-RS Coefficients (Z_lP)
470 : !> \param qs_env ...
471 : !> \param bs_env ...
472 : !> \param ri_rs_grid_points ...
473 : !> \param mat_phi_mu_l ...
474 : !> \param mat_Z_lP ...
475 : ! **************************************************************************************************
476 :
477 0 : SUBROUTINE compute_coeff_Z_lP(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
478 :
479 : ! Arguments
480 : TYPE(qs_environment_type), POINTER :: qs_env
481 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
482 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
483 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l
484 : TYPE(dbcsr_type), INTENT(OUT) :: mat_Z_lP
485 :
486 : CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
487 : routineN = 'compute_coeff_Z_lP'
488 :
489 : INTEGER :: atom_j_mepos, atom_j_stride, atom_P, atom_P_start, atom_P_stride, col_end, &
490 : col_start, current_chunk_size, g, group_handle, handle, i, i_blk, ikind, info, j, j_ri, &
491 : l, loc_idx, loc_ptr, max_ao_size, max_loc_ri, my_group, n_ao_total, n_grid_total, &
492 : n_groups, n_loc_ri, n_local_grid, n_procs_per_atom, natom, nkind, num_grid_chunks, &
493 : P_loop_atom, r_end, r_start, source_atom
494 0 : INTEGER, ALLOCATABLE, DIMENSION(:) :: local_grid_idx, row_offset
495 0 : INTEGER, DIMENSION(:), POINTER :: col_dist_phi, col_dist_ri, r_blk_sizes, &
496 0 : ri_blk_sizes, row_dist_grid
497 : REAL(KIND=dp) :: cutoff_ri, cutoff_ri_2, d_sP, dist2_min, &
498 : r2_threshold, r_c, t1, t2, t3
499 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: cutoff_ri_per_atom, cutoff_ri_per_kind, &
500 0 : d_vec_local
501 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: D_local, d_lp_local, phi_local, &
502 0 : sphere_grid, Z_blk
503 : REAL(KIND=dp), DIMENSION(3) :: dist_vec_raw, pos_P
504 0 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
505 : TYPE(cell_type), POINTER :: cell
506 : TYPE(cp_blacs_env_type), POINTER :: blacs_env_sub
507 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_b, fm_struct_D
508 : TYPE(cp_fm_type) :: fm_b, fm_D
509 : TYPE(cp_logger_type), POINTER :: logger
510 : TYPE(dbcsr_distribution_type) :: dist_phi, dist_Z
511 0 : TYPE(gw_3c_ctx_type) :: ctx_3c
512 : TYPE(mp_para_env_type), POINTER :: para_env, para_env_sub
513 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
514 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
515 : TYPE(section_vals_type), POINTER :: input
516 :
517 0 : CALL timeset(routineN, handle)
518 :
519 0 : t1 = m_walltime()
520 :
521 : CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input, &
522 0 : cell=cell, qs_kind_set=qs_kind_set, atomic_kind_set=atomic_kind_set)
523 :
524 : ! ---------------------------------------------------------------------
525 : ! Subgroup setup. Default G=1 keeps the single-rank BLAS path; G>1 splits
526 : ! ranks into atom-groups so the Cholesky on D_local distributes across G
527 : ! ranks (memory ~1/G) and the compute_d_lp build also splits across the
528 : ! subgroup. G=1 leaves para_env_sub / blacs_env_sub NULL — no subgroup
529 : ! comms created, atom_P loop uses per-rank round-robin, compute_d_lp runs
530 : ! its full atom_j range on each rank, no allreduce.
531 : ! ---------------------------------------------------------------------
532 0 : n_procs_per_atom = MIN(bs_env%ri_rs%n_procs_per_atom_z_lp, para_env%num_pe)
533 0 : IF (n_procs_per_atom < 1) n_procs_per_atom = 1
534 :
535 0 : NULLIFY (para_env_sub, blacs_env_sub)
536 0 : IF (n_procs_per_atom > 1) THEN
537 0 : n_groups = para_env%num_pe/n_procs_per_atom
538 0 : my_group = MIN(para_env%mepos/n_procs_per_atom, n_groups - 1)
539 0 : ALLOCATE (para_env_sub)
540 0 : CALL para_env_sub%from_split(para_env, my_group)
541 0 : CALL cp_blacs_env_create(blacs_env=blacs_env_sub, para_env=para_env_sub)
542 0 : atom_P_start = my_group + 1
543 0 : atom_P_stride = n_groups
544 0 : atom_j_mepos = para_env_sub%mepos
545 0 : atom_j_stride = para_env_sub%num_pe
546 : ELSE
547 0 : atom_P_start = para_env%mepos + 1
548 0 : atom_P_stride = para_env%num_pe
549 0 : atom_j_mepos = 0
550 0 : atom_j_stride = 1
551 : END IF
552 :
553 0 : natom = SIZE(bs_env%i_RI_start_from_atom)
554 0 : n_ao_total = bs_env%i_ao_end_from_atom(natom)
555 0 : n_grid_total = SIZE(ri_rs_grid_points, 2)
556 :
557 : ! =========================================================================
558 : ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
559 : ! =========================================================================
560 0 : CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
561 0 : CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
562 :
563 0 : num_grid_chunks = SIZE(r_blk_sizes)
564 :
565 0 : ALLOCATE (row_offset(num_grid_chunks))
566 0 : row_offset(1) = 0
567 0 : DO i_blk = 2, num_grid_chunks
568 0 : row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
569 : END DO
570 :
571 0 : ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
572 0 : DO atom_P = 1, natom
573 0 : ri_blk_sizes(atom_P) = bs_env%i_RI_end_from_atom(atom_P) - bs_env%i_RI_start_from_atom(atom_P) + 1
574 0 : col_dist_ri(atom_P) = MOD(atom_P - 1, MAXVAL(col_dist_phi) + 1)
575 : END DO
576 :
577 0 : CALL dbcsr_distribution_new(dist_Z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
578 :
579 0 : IF (bs_env%ri_rs%Z_lP_exists) THEN
580 : CALL dbcsr_binary_read(filepath=TRIM(bs_env%prefix)//"Z_lP.matrix", &
581 : distribution=dist_Z, &
582 0 : matrix_new=mat_Z_lP)
583 0 : IF (bs_env%unit_nr > 0) THEN
584 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
585 0 : 'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
586 0 : WRITE (bs_env%unit_nr, '(A)') ' '
587 : END IF
588 : ELSE
589 :
590 : CALL dbcsr_create(mat_Z_lP, name="mat_Z_lP", dist=dist_Z, &
591 : matrix_type=dbcsr_type_no_symmetry, &
592 0 : row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
593 :
594 0 : max_ao_size = 0
595 0 : DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
596 0 : max_ao_size = MAX(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
597 : END DO
598 0 : max_loc_ri = MAXVAL(ri_blk_sizes)
599 :
600 : ! Per-atom RI-RS integration sphere:
601 : ! cutoff_ri(P) = r_c + r_AO(P)
602 : ! where r_c is the truncated-Coulomb cutoff of the RI metric. The
603 : ! CUTOFF_RADIUS_RI_RS keyword (when > 0) overrides the entire cutoff calculation.
604 0 : nkind = SIZE(atomic_kind_set)
605 0 : ALLOCATE (cutoff_ri_per_atom(natom))
606 :
607 0 : IF (bs_env%ri_rs%cutoff_radius_ri_rs > 0.0_dp) THEN
608 0 : cutoff_ri_per_atom(:) = bs_env%ri_rs%cutoff_radius_ri_rs
609 : ELSE
610 0 : r_c = bs_env%ri_metric%cutoff_radius
611 0 : DO P_loop_atom = 1, natom
612 : cutoff_ri_per_atom(P_loop_atom) = &
613 0 : r_c + bs_env%ri_rs%radius_ao_per_atom(P_loop_atom)
614 : END DO
615 : END IF
616 :
617 0 : ALLOCATE (cutoff_ri_per_kind(nkind))
618 0 : cutoff_ri_per_kind(:) = 0.0_dp
619 0 : IF (bs_env%unit_nr > 0) THEN
620 0 : DO P_loop_atom = 1, natom
621 0 : ikind = particle_set(P_loop_atom)%atomic_kind%kind_number
622 : cutoff_ri_per_kind(ikind) = MAX(cutoff_ri_per_kind(ikind), &
623 0 : cutoff_ri_per_atom(P_loop_atom))
624 : END DO
625 0 : WRITE (bs_env%unit_nr, '(T2,A)') 'Per-kind maximum RI-RS sphere cutoff (Bohr):'
626 0 : WRITE (bs_env%unit_nr, '(T4,A4,A14)') 'Kind', 'max cutoff_ri'
627 0 : DO ikind = 1, nkind
628 : WRITE (bs_env%unit_nr, '(T4,A4,F14.4)') &
629 0 : atomic_kind_set(ikind)%element_symbol, &
630 0 : cutoff_ri_per_kind(ikind)
631 : END DO
632 0 : WRITE (bs_env%unit_nr, '(A)') ' '
633 0 : DEALLOCATE (cutoff_ri_per_kind)
634 : END IF
635 :
636 : ! Shared 3c-integral context: hoists libint / t_c_g0 / md_ftable / contracted
637 : ! sphi tables out of the per-triple call so compute_d_lp threads only allocate
638 : ! a lightweight per-thread workspace. MPI-collective; must be outside any
639 : ! OMP region.
640 : CALL gw_3c_ctx_create(ctx_3c, qs_env, bs_env%ri_metric, &
641 : basis_j=bs_env%basis_set_AO, basis_k=bs_env%basis_set_AO, &
642 0 : basis_i=bs_env%basis_set_RI)
643 :
644 : ! =========================================================================
645 : ! 2. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
646 : ! phi_local for each atom_P's cutoff sphere is built on the fly via
647 : ! fill_phi_for_atom — no dense replicated phi_global, no allreduce.
648 : ! =========================================================================
649 0 : DO atom_P = atom_P_start, natom, atom_P_stride
650 :
651 0 : n_loc_ri = ri_blk_sizes(atom_P)
652 0 : pos_P(:) = particle_set(atom_P)%r(:)
653 :
654 0 : cutoff_ri = cutoff_ri_per_atom(atom_P)
655 0 : cutoff_ri_2 = cutoff_ri**2
656 :
657 : ! ---------------------------------------------------------------------
658 : ! A. Determine Local Grid Domain based on cutoff_ri (PBC distance)
659 : ! ---------------------------------------------------------------------
660 0 : n_local_grid = 0
661 0 : DO l = 1, n_grid_total
662 0 : dist_vec_raw = pbc(ri_rs_grid_points(1:3, l), pos_P(1:3), cell)
663 0 : dist2_min = DOT_PRODUCT(dist_vec_raw, dist_vec_raw)
664 0 : IF (dist2_min <= cutoff_ri_2) n_local_grid = n_local_grid + 1
665 : END DO
666 :
667 0 : ALLOCATE (local_grid_idx(n_local_grid))
668 :
669 0 : n_local_grid = 0
670 0 : DO l = 1, n_grid_total
671 0 : dist_vec_raw = pbc(ri_rs_grid_points(1:3, l), pos_P(1:3), cell)
672 0 : dist2_min = DOT_PRODUCT(dist_vec_raw, dist_vec_raw)
673 0 : IF (dist2_min <= cutoff_ri_2) THEN
674 0 : n_local_grid = n_local_grid + 1
675 0 : local_grid_idx(n_local_grid) = l
676 : END IF
677 : END DO
678 :
679 : ! ---------------------------------------------------------------------
680 : ! B. Build phi_local on the fly via fill_phi_for_atom.
681 : ! Only source atoms whose AO basis can reach the cutoff sphere of
682 : ! atom_P (MIC distance) contribute; the rest are pruned. The
683 : ! periodic fill_phi_for_atom sums over (ix, iy, iz) images of
684 : ! source_atom internally.
685 : ! ---------------------------------------------------------------------
686 0 : ALLOCATE (sphere_grid(3, n_local_grid))
687 0 : DO loc_idx = 1, n_local_grid
688 0 : sphere_grid(:, loc_idx) = ri_rs_grid_points(:, local_grid_idx(loc_idx))
689 : END DO
690 :
691 0 : ALLOCATE (phi_local(n_local_grid, n_ao_total))
692 0 : phi_local = 0.0_dp
693 :
694 0 : DO source_atom = 1, natom
695 0 : dist_vec_raw = pbc(particle_set(source_atom)%r(:), pos_P(:), cell)
696 0 : d_sP = NORM2(dist_vec_raw)
697 0 : IF (d_sP > bs_env%ri_rs%radius_ao_per_atom(source_atom) + cutoff_ri) CYCLE
698 :
699 0 : col_start = bs_env%i_ao_start_from_atom(source_atom)
700 0 : col_end = bs_env%i_ao_end_from_atom(source_atom)
701 0 : r2_threshold = bs_env%ri_rs%radius_ao_per_atom(source_atom)**2
702 :
703 : CALL fill_phi_for_atom(phi_local(:, col_start:col_end), sphere_grid, &
704 : n_local_grid, source_atom, particle_set, qs_kind_set, &
705 0 : cell, r2_threshold)
706 : END DO
707 :
708 0 : DEALLOCATE (sphere_grid)
709 :
710 : ! ---------------------------------------------------------------------
711 : ! C. Build Local RHS Matrix (d_lp_local) first so the subgroup-
712 : ! distributed compute_d_lp + allreduce is not entangled with the LHS
713 : ! build. compute_d_lp does not depend on D_local or d_vec_local.
714 : ! ---------------------------------------------------------------------
715 0 : ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
716 0 : d_lp_local = 0.0_dp
717 :
718 0 : t2 = m_walltime()
719 :
720 : CALL compute_d_lp(bs_env, ctx_3c, cell, phi_local, d_lp_local, n_local_grid, &
721 0 : n_loc_ri, atom_P, max_ao_size, atom_j_mepos, atom_j_stride)
722 :
723 : ! Reduce per-subgroup-rank partials into the replicated d_lp_local.
724 : ! Skipped for G=1 (BLAS path): each rank has the full sum locally.
725 0 : IF (n_procs_per_atom > 1) THEN
726 0 : CALL para_env_sub%sum(d_lp_local)
727 : END IF
728 :
729 0 : t3 = m_walltime()
730 :
731 : ! ---------------------------------------------------------------------
732 : ! D. Build d_vec_local (Jacobi diagonal) + LHS — BLAS or ScaLAPACK
733 : ! ---------------------------------------------------------------------
734 0 : ALLOCATE (d_vec_local(n_local_grid))
735 :
736 0 : IF (n_procs_per_atom == 1) THEN
737 : ! BLAS path: build D_local densely, compute d_vec as side-effect
738 : ! of the Jacobi step (preserves bit-identical arithmetic with the
739 : ! previous branch).
740 0 : ALLOCATE (D_local(n_local_grid, n_local_grid))
741 0 : D_local = 0.0_dp
742 :
743 : CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, &
744 0 : n_local_grid, 0.0_dp, D_local, n_local_grid)
745 :
746 : !$OMP PARALLEL DO DEFAULT(NONE) &
747 : !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
748 : !$OMP PRIVATE(i) &
749 0 : !$OMP SCHEDULE(STATIC)
750 : DO i = 1, n_local_grid
751 : D_local(i, i) = D_local(i, i)**2
752 : d_vec_local(i) = 1.0_dp/SQRT(MAX(D_local(i, i), 1.0E-16_dp))
753 : D_local(i, i) = (D_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
754 : END DO
755 : !$OMP END PARALLEL DO
756 :
757 : !$OMP PARALLEL DO DEFAULT(NONE) &
758 : !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
759 : !$OMP PRIVATE(j, i) &
760 0 : !$OMP SCHEDULE(DYNAMIC)
761 : DO j = 1, n_local_grid
762 : DO i = j + 1, n_local_grid
763 : D_local(i, j) = D_local(i, j)**2
764 : D_local(i, j) = D_local(i, j)*d_vec_local(i)*d_vec_local(j)
765 : D_local(j, i) = D_local(i, j)
766 : END DO
767 : END DO
768 : !$OMP END PARALLEL DO
769 : ELSE
770 : ! ScaLAPACK path: d_vec computed directly from phi (= 1/||phi_i||^2);
771 : ! solve_D_lp_distributed builds D block-cyclic internally with
772 : ! the squared+scaled values, so no dense D_local on this rank.
773 : !$OMP PARALLEL DO DEFAULT(NONE) &
774 : !$OMP SHARED(n_local_grid, n_ao_total, phi_local, d_vec_local) &
775 : !$OMP PRIVATE(i, j) &
776 0 : !$OMP SCHEDULE(STATIC)
777 : DO i = 1, n_local_grid
778 : d_vec_local(i) = 0.0_dp
779 : DO j = 1, n_ao_total
780 : d_vec_local(i) = d_vec_local(i) + phi_local(i, j)*phi_local(i, j)
781 : END DO
782 : d_vec_local(i) = 1.0_dp/MAX(d_vec_local(i), 1.0E-16_dp)
783 : END DO
784 : !$OMP END PARALLEL DO
785 : END IF
786 :
787 : ! ---------------------------------------------------------------------
788 : ! E. Pre-scale d_lp by d_vec
789 : ! ---------------------------------------------------------------------
790 : !$OMP PARALLEL DO DEFAULT(NONE) &
791 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
792 : !$OMP PRIVATE(j_ri, i) &
793 0 : !$OMP SCHEDULE(STATIC)
794 : DO j_ri = 1, n_loc_ri
795 : DO i = 1, n_local_grid
796 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
797 : END DO
798 : END DO
799 : !$OMP END PARALLEL DO
800 :
801 : ! ---------------------------------------------------------------------
802 : ! F. Solve — BLAS dpotrf/dpotrs or ScaLAPACK pdpotrf/pdpotrs
803 : ! ---------------------------------------------------------------------
804 0 : IF (n_procs_per_atom == 1) THEN
805 0 : CALL dpotrf('L', n_local_grid, D_local, n_local_grid, info)
806 : CALL dpotrs('L', n_local_grid, n_loc_ri, D_local, n_local_grid, &
807 0 : d_lp_local, n_local_grid, info)
808 0 : DEALLOCATE (D_local)
809 : ELSE
810 : CALL solve_D_lp_distributed(phi_local, d_vec_local, d_lp_local, &
811 : n_local_grid, n_ao_total, n_loc_ri, &
812 : bs_env%ri_rs%tikhonov, &
813 : para_env_sub, blacs_env_sub, &
814 0 : fm_struct_D, fm_struct_b, fm_D, fm_b, info)
815 : END IF
816 :
817 : ! ---------------------------------------------------------------------
818 : ! G. Post-scale solution by d_vec (common to both paths)
819 : ! ---------------------------------------------------------------------
820 : !$OMP PARALLEL DO DEFAULT(NONE) &
821 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
822 : !$OMP PRIVATE(j_ri, i) &
823 0 : !$OMP SCHEDULE(STATIC)
824 : DO j_ri = 1, n_loc_ri
825 : DO i = 1, n_local_grid
826 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
827 : END DO
828 : END DO
829 : !$OMP END PARALLEL DO
830 :
831 : ! ---------------------------------------------------------------------
832 : ! H. Scatter Local Solution Back to Global DBCSR Matrix.
833 : ! Under ScaLAPACK (G>1) the d_lp_local solution is identical on all
834 : ! G subgroup ranks (gathered via cp_fm_get_submatrix); only the
835 : ! subgroup root writes to mat_Z_lP so each atom column is emitted
836 : ! exactly once. DBCSR routes blocks to their global owner on finalize.
837 : ! local_grid_idx is ascending (built by the ordered scan above), so
838 : ! a single walking pointer over chunks works.
839 : ! ---------------------------------------------------------------------
840 0 : IF (n_procs_per_atom == 1 .OR. para_env_sub%mepos == 0) THEN
841 0 : ALLOCATE (Z_blk(MAXVAL(r_blk_sizes), n_loc_ri))
842 0 : loc_ptr = 1
843 :
844 0 : DO i_blk = 1, num_grid_chunks
845 0 : r_start = row_offset(i_blk) + 1
846 0 : r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
847 0 : current_chunk_size = r_blk_sizes(i_blk)
848 :
849 0 : Z_blk = 0.0_dp
850 :
851 0 : DO WHILE (loc_ptr <= n_local_grid)
852 0 : g = local_grid_idx(loc_ptr)
853 0 : IF (g > r_end) EXIT
854 0 : Z_blk(g - r_start + 1, 1:n_loc_ri) = d_lp_local(loc_ptr, 1:n_loc_ri)
855 0 : loc_ptr = loc_ptr + 1
856 : END DO
857 :
858 0 : IF (MAXVAL(ABS(Z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
859 : CALL dbcsr_put_block(mat_Z_lP, row=i_blk, col=atom_P, &
860 0 : block=Z_blk(1:current_chunk_size, 1:n_loc_ri))
861 : END IF
862 : END DO
863 :
864 0 : DEALLOCATE (Z_blk)
865 : END IF
866 :
867 0 : DEALLOCATE (d_vec_local, d_lp_local)
868 0 : DEALLOCATE (local_grid_idx, phi_local)
869 :
870 : END DO
871 :
872 0 : DEALLOCATE (cutoff_ri_per_atom)
873 0 : CALL gw_3c_ctx_release(ctx_3c)
874 :
875 0 : CALL dbcsr_finalize(mat_Z_lP)
876 :
877 0 : IF (bs_env%unit_nr > 0) THEN
878 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
879 0 : 'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
880 0 : WRITE (bs_env%unit_nr, '(A)') ' '
881 : END IF
882 :
883 0 : logger => cp_get_default_logger()
884 :
885 0 : IF (BTEST(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
886 0 : CALL dbcsr_binary_write(matrix=mat_Z_lP, filepath=TRIM(bs_env%prefix)//"Z_lP.matrix")
887 : END IF
888 :
889 : END IF
890 :
891 0 : DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
892 0 : CALL dbcsr_distribution_release(dist_Z)
893 :
894 0 : IF (n_procs_per_atom > 1) THEN
895 0 : CALL cp_blacs_env_release(blacs_env_sub)
896 0 : CALL para_env_sub%free()
897 0 : DEALLOCATE (para_env_sub)
898 : END IF
899 :
900 0 : DEALLOCATE (ri_rs_grid_points)
901 :
902 0 : CALL timestop(handle)
903 :
904 0 : END SUBROUTINE compute_coeff_Z_lP
905 :
906 : ! **************************************************************************************************
907 : !> \brief Computes the dense localized RHS d_lp(l,P) = Σ_{μν,R,S} Φ_μ(r_l)·Φ_ν(r_l)·(μν|P) for one
908 : !> RI atom P. OMP-threaded over (atom_j, atom_k) AO-pair blocks: per thread, sweep all
909 : !> (cell_R, cell_S) periodic images of (atom_j, atom_k) about atom_P at cell (0,0,0); each
910 : !> 3c block is built by build_3c_integral_block_ctx (cached libint / sphi tables in ctx,
911 : !> kind-radius triangle screen → `screened` short-circuits negligible image triples), and
912 : !> grid-chunked pair densities are contracted into a private d_lp partial that is reduced
913 : !> into d_lp at the end of the parallel region.
914 : !> \param bs_env ...
915 : !> \param ctx shared 3c-integral context (gw_3c_ctx_create)
916 : !> \param cell ...
917 : !> \param phi_val Φ_μ(r_l) on the local-sphere grid (n_grid_total × n_ao)
918 : !> \param d_lp output (n_grid_total × n_loc_ri), zeroed by the caller, accumulated here
919 : !> \param n_grid_total number of local-sphere grid rows
920 : !> \param n_loc_ri number of RI functions of atom_P
921 : !> \param atom_P RI atom (pinned to cell (0,0,0))
922 : !> \param max_ao_size ...
923 : !> \param atom_j_mepos ...
924 : !> \param atom_j_stride ...
925 : ! **************************************************************************************************
926 :
927 0 : SUBROUTINE compute_d_lp(bs_env, ctx, cell, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
928 : max_ao_size, atom_j_mepos, atom_j_stride)
929 :
930 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
931 : TYPE(gw_3c_ctx_type), INTENT(IN) :: ctx
932 : TYPE(cell_type), POINTER :: cell
933 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: phi_val
934 : INTEGER, INTENT(IN) :: n_grid_total, n_loc_ri
935 : REAL(KIND=dp), INTENT(INOUT) :: d_lp(n_grid_total, n_loc_ri)
936 : INTEGER, INTENT(IN) :: atom_P, max_ao_size, atom_j_mepos, &
937 : atom_j_stride
938 :
939 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_d_lp'
940 : INTEGER, PARAMETER :: grid_chunk = 1024
941 :
942 : INTEGER :: atom_j, atom_k, c, handle, ix_max, ix_min, ix_R, ix_S, iy_max, iy_min, iy_R, &
943 : iy_S, iz_max, iz_min, iz_R, iz_S, j, jk_idx, jsize, jstart, k, ksize, kstart, l, l0, &
944 : natom, ri
945 : INTEGER, DIMENSION(3) :: cell_R_vec, cell_S_vec
946 : LOGICAL :: any_kept, screened
947 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: int_2d_prv, rho_chunk
948 0 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: int_3c_prv, int_3c_sum
949 0 : TYPE(gw_3c_ws_type) :: ws
950 :
951 0 : CALL timeset(routineN, handle)
952 :
953 0 : natom = SIZE(bs_env%i_ao_start_from_atom)
954 :
955 0 : IF (cell%perd(1) == 1) THEN; ix_min = -1; ix_max = 1; ELSE; ix_min = 0; ix_max = 0; END IF
956 0 : IF (cell%perd(2) == 1) THEN; iy_min = -1; iy_max = 1; ELSE; iy_min = 0; iy_max = 0; END IF
957 0 : IF (cell%perd(3) == 1) THEN; iz_min = -1; iz_max = 1; ELSE; iz_min = 0; iz_max = 0; END IF
958 :
959 : !$OMP PARALLEL DEFAULT(NONE) &
960 : !$OMP SHARED(bs_env, ctx, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, max_ao_size, &
961 : !$OMP natom, ix_min, ix_max, iy_min, iy_max, iz_min, iz_max, &
962 : !$OMP atom_j_mepos, atom_j_stride) &
963 : !$OMP PRIVATE(any_kept, atom_j, atom_k, c, j, jk_idx, jsize, jstart, k, ksize, kstart, l, &
964 : !$OMP l0, ri, ix_R, iy_R, iz_R, ix_S, iy_S, iz_S, cell_R_vec, cell_S_vec, &
965 0 : !$OMP screened, int_2d_prv, rho_chunk, int_3c_prv, int_3c_sum, ws)
966 :
967 : CALL gw_3c_ws_create(ws, ctx)
968 : ALLOCATE (int_3c_prv(max_ao_size, max_ao_size, n_loc_ri))
969 : ALLOCATE (int_3c_sum(max_ao_size, max_ao_size, n_loc_ri))
970 : ALLOCATE (int_2d_prv(max_ao_size*max_ao_size, n_loc_ri))
971 : ALLOCATE (rho_chunk(grid_chunk, max_ao_size*max_ao_size))
972 :
973 : ! atom_P pinned at cell (0,0,0); enumerate (atom_j, cell_R) × (atom_k, cell_S). The ctx
974 : ! integral builder's kind_radius triangle screen sets screened=.TRUE. for the bulk of
975 : ! image triples (one or both AO atoms beyond the truncated-Coulomb reach of atom_P),
976 : ! so the 27 × 27 = 729 candidate cells collapse to "adjacent cells" in practice.
977 : ! MPI-stride atom_j over the subgroup (atom_j_stride = 1 for the BLAS path, > 1 for the
978 : ! ScaLAPACK path). COLLAPSE(2) dropped because the outer stride is non-unit under
979 : ! ScaLAPACK; the inner atom_k loop carries enough work for DYNAMIC.
980 : !$OMP DO SCHEDULE(DYNAMIC) REDUCTION(+:d_lp)
981 : DO atom_j = atom_j_mepos + 1, natom, atom_j_stride
982 : DO atom_k = 1, natom
983 : jstart = bs_env%i_ao_start_from_atom(atom_j)
984 : jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
985 : kstart = bs_env%i_ao_start_from_atom(atom_k)
986 : ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
987 :
988 : int_3c_sum(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
989 : any_kept = .FALSE.
990 :
991 : DO ix_R = ix_min, ix_max
992 : DO iy_R = iy_min, iy_max
993 : DO iz_R = iz_min, iz_max
994 : cell_R_vec = [ix_R, iy_R, iz_R]
995 : DO ix_S = ix_min, ix_max
996 : DO iy_S = iy_min, iy_max
997 : DO iz_S = iz_min, iz_max
998 : cell_S_vec = [ix_S, iy_S, iz_S]
999 :
1000 : int_3c_prv(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
1001 :
1002 : CALL build_3c_integral_block_ctx( &
1003 : int_3c_prv(1:jsize, 1:ksize, 1:n_loc_ri), ctx, ws, &
1004 : atom_j=atom_j, atom_k=atom_k, atom_i=atom_P, &
1005 : cell_j=cell_R_vec, cell_k=cell_S_vec, cell_i=[0, 0, 0], &
1006 : screened=screened)
1007 : IF (screened) CYCLE
1008 :
1009 : any_kept = .TRUE.
1010 : int_3c_sum(1:jsize, 1:ksize, 1:n_loc_ri) = &
1011 : int_3c_sum(1:jsize, 1:ksize, 1:n_loc_ri) + &
1012 : int_3c_prv(1:jsize, 1:ksize, 1:n_loc_ri)
1013 : END DO
1014 : END DO
1015 : END DO
1016 : END DO
1017 : END DO
1018 : END DO
1019 :
1020 : IF (.NOT. any_kept) CYCLE
1021 :
1022 : ! Flatten 3D B_{μν,P} → 2D B_{(μν),P}
1023 : DO ri = 1, n_loc_ri
1024 : DO k = 1, ksize
1025 : DO j = 1, jsize
1026 : jk_idx = (k - 1)*jsize + j
1027 : int_2d_prv(jk_idx, ri) = int_3c_sum(j, k, ri)
1028 : END DO
1029 : END DO
1030 : END DO
1031 :
1032 : ! Pair density ρ(l,μν) = Φ_μ(r_l)Φ_ν(r_l) in grid chunks, contracted on the fly:
1033 : ! d_{l,P} += ρ(l,μν) B_{(μν),P} (dgemm runs serially inside the parallel region)
1034 : DO l0 = 1, n_grid_total, grid_chunk
1035 : c = MIN(grid_chunk, n_grid_total - l0 + 1)
1036 : DO k = 1, ksize
1037 : DO j = 1, jsize
1038 : jk_idx = (k - 1)*jsize + j
1039 : DO l = 1, c
1040 : rho_chunk(l, jk_idx) = phi_val(l0 + l - 1, jstart + j - 1)* &
1041 : phi_val(l0 + l - 1, kstart + k - 1)
1042 : END DO
1043 : END DO
1044 : END DO
1045 : CALL dgemm("N", "N", c, n_loc_ri, jsize*ksize, &
1046 : 1.0_dp, rho_chunk, grid_chunk, &
1047 : int_2d_prv, max_ao_size*max_ao_size, &
1048 : 1.0_dp, d_lp(l0, 1), n_grid_total)
1049 : END DO
1050 : END DO
1051 : END DO
1052 : !$OMP END DO
1053 :
1054 : DEALLOCATE (int_3c_prv, int_3c_sum, int_2d_prv, rho_chunk)
1055 : CALL gw_3c_ws_release(ws)
1056 :
1057 : !$OMP END PARALLEL
1058 :
1059 0 : CALL timestop(handle)
1060 :
1061 0 : END SUBROUTINE compute_d_lp
1062 :
1063 : ! **************************************************************************************************
1064 : !> \brief Computes the χ(iτ, k=0) matrix
1065 : !> \param bs_env ...
1066 : !> \param mat_chi_Gamma_tau ...
1067 : !> \param mat_phi_mu_l ...
1068 : !> \param mat_Z_lP ...
1069 : ! **************************************************************************************************
1070 :
1071 0 : SUBROUTINE get_mat_chi_Gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
1072 :
1073 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1074 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_Gamma_tau
1075 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1076 :
1077 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_mat_chi_Gamma_tau'
1078 :
1079 : INTEGER :: handle, i, i_t, ispin, npcol
1080 0 : INTEGER, DIMENSION(:), POINTER :: blk_ao, blk_grid, dist_col_ao, &
1081 0 : dist_col_grid, dist_row_grid
1082 : REAL(KIND=dp) :: t1, tau
1083 : TYPE(dbcsr_distribution_type) :: dist_grid_grid, dist_phi
1084 : TYPE(dbcsr_type) :: matrix_chi_grid, matrix_chi_grid_spin, &
1085 : matrix_G_occ_grid, matrix_G_vir_grid
1086 :
1087 0 : CALL timeset(routineN, handle)
1088 :
1089 : ! =========================================================================
1090 : ! 1. SETUP CORE TOPOLOGIES
1091 : ! =========================================================================
1092 0 : CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
1093 0 : CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
1094 :
1095 : ! Determine number of MPI process columns
1096 0 : npcol = MAXVAL(dist_col_ao) + 1
1097 :
1098 : ! Build a perfectly safe column distribution for the Grid dimension
1099 0 : ALLOCATE (dist_col_grid(SIZE(blk_grid)))
1100 0 : DO i = 1, SIZE(blk_grid)
1101 0 : dist_col_grid(i) = MOD(i - 1, npcol)
1102 : END DO
1103 :
1104 : CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
1105 0 : row_dist=dist_row_grid, col_dist=dist_col_grid)
1106 :
1107 0 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1108 0 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1109 0 : CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1110 0 : CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1111 :
1112 : ! =========================================================================
1113 : ! 2. MAIN IMAGINARY TIME LOOP
1114 : ! =========================================================================
1115 0 : DO i_t = 1, bs_env%num_time_freq_points
1116 0 : t1 = m_walltime()
1117 :
1118 0 : tau = bs_env%imag_time_points(i_t)
1119 0 : CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
1120 :
1121 : ! ----------------------------------------------------------------------
1122 : ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
1123 : ! ----------------------------------------------------------------------
1124 0 : DO ispin = 1, bs_env%n_spin
1125 :
1126 : ! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1127 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1128 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, &
1129 0 : matrix_G_occ_grid, bs_env%eps_filter)
1130 :
1131 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1132 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1133 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, &
1134 0 : matrix_G_vir_grid, bs_env%eps_filter)
1135 :
1136 : ! -------------------------------------------------------------------
1137 : ! B. ELEMENT-WISE HADAMARD PRODUCT
1138 : ! -------------------------------------------------------------------
1139 : ! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
1140 0 : CALL hadamard_product(matrix_G_occ_grid, matrix_G_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
1141 :
1142 : ! Accumulate spin contributions
1143 0 : CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
1144 :
1145 : END DO ! ispin
1146 :
1147 : ! ----------------------------------------------------------------------
1148 : ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
1149 : ! χ_aux = Z^T * χ_grid * Z
1150 : ! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
1151 : ! Result is dumped directly into the final array mat_chi_Gamma_tau!
1152 : ! ----------------------------------------------------------------------
1153 : CALL contract_A_B_A("T", "N", mat_Z_lP, matrix_chi_grid, &
1154 0 : mat_chi_Gamma_tau(i_t)%matrix, bs_env%eps_filter)
1155 :
1156 0 : IF (bs_env%unit_nr > 0) THEN
1157 : WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
1158 0 : 'Computed χ(iτ,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
1159 0 : ', Execution time', m_walltime() - t1, ' s'
1160 : END IF
1161 :
1162 : END DO ! i_t
1163 :
1164 : ! =========================================================================
1165 : ! 3. FINAL CLEANUP
1166 : ! =========================================================================
1167 0 : CALL dbcsr_release(matrix_G_occ_grid)
1168 0 : CALL dbcsr_release(matrix_G_vir_grid)
1169 0 : CALL dbcsr_release(matrix_chi_grid)
1170 0 : CALL dbcsr_release(matrix_chi_grid_spin)
1171 0 : CALL dbcsr_distribution_release(dist_grid_grid)
1172 0 : DEALLOCATE (dist_col_grid)
1173 :
1174 0 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1175 :
1176 0 : CALL timestop(handle)
1177 :
1178 0 : END SUBROUTINE get_mat_chi_Gamma_tau
1179 :
1180 : ! **************************************************************************************************
1181 : !> \brief Computes Green's Function in grid basis
1182 : !> \param bs_env ...
1183 : !> \param tau ...
1184 : !> \param ispin ...
1185 : !> \param occ ...
1186 : !> \param vir ...
1187 : !> \param mat_phi_mu_l ...
1188 : !> \param matrix_G_grid ...
1189 : !> \param eps_filter ...
1190 : ! **************************************************************************************************
1191 :
1192 0 : SUBROUTINE build_G_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
1193 :
1194 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1195 : REAL(KIND=dp), INTENT(IN) :: tau
1196 : INTEGER, INTENT(IN) :: ispin
1197 : LOGICAL, INTENT(IN) :: occ, vir
1198 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, matrix_G_grid
1199 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1200 :
1201 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_G_grid'
1202 :
1203 : INTEGER :: handle
1204 0 : INTEGER, DIMENSION(:), POINTER :: blk_ao, dist_row_ao
1205 : TYPE(cp_fm_type), POINTER :: fm_G
1206 : TYPE(dbcsr_distribution_type) :: dist_ao_ao
1207 : TYPE(dbcsr_type) :: matrix_G_ao
1208 :
1209 0 : CALL timeset(routineN, handle)
1210 :
1211 : ! 1. Select the correct FM matrix based on occ/vir flags
1212 0 : IF (occ) THEN
1213 0 : fm_G => bs_env%fm_Gocc
1214 : ELSE
1215 0 : fm_G => bs_env%fm_Gvir
1216 : END IF
1217 :
1218 : ! 2. Compute Dense FM Green's Function
1219 : ! G^occ/vir_µλ(i|τ|,k=0) = sum_G^occ/vir_µλn^occ/vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1220 0 : CALL G_occ_vir(bs_env, tau, fm_G, ispin, occ=occ, vir=vir)
1221 :
1222 : ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
1223 0 : CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
1224 :
1225 : CALL dbcsr_create(matrix_G_ao, name="G_ao", dist=dist_ao_ao, &
1226 : matrix_type=dbcsr_type_no_symmetry, &
1227 0 : row_blk_size=blk_ao, col_blk_size=blk_ao)
1228 :
1229 : ! 4. Convert FM to Sparse DBCSR
1230 0 : CALL copy_fm_to_dbcsr(fm_G, matrix_G_ao, keep_sparsity=.FALSE.)
1231 :
1232 : ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
1233 : ! G^occ/vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
1234 0 : CALL contract_A_B_A("N", "T", mat_phi_mu_l, matrix_G_ao, matrix_G_grid, eps_filter)
1235 :
1236 : ! 6. Release AO matrix and topology
1237 0 : CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_G_ao)
1238 :
1239 0 : CALL timestop(handle)
1240 :
1241 0 : END SUBROUTINE build_G_grid
1242 :
1243 : ! **************************************************************************************************
1244 : !> \brief Generalized routine to compute OUT = A * B * A^T OR OUT = A^T * B * A using DBCSR
1245 : !> \param transA_left ...
1246 : !> \param transA_right ...
1247 : !> \param matrix_A ...
1248 : !> \param matrix_B ...
1249 : !> \param matrix_out ...
1250 : !> \param eps_filter ...
1251 : ! **************************************************************************************************
1252 :
1253 0 : SUBROUTINE contract_A_B_A(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
1254 :
1255 : CHARACTER(LEN=1), INTENT(IN) :: transA_left, transA_right
1256 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_out
1257 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1258 :
1259 : CHARACTER(LEN=*), PARAMETER :: routineN = 'contract_A_B_A'
1260 :
1261 : INTEGER :: handle
1262 : TYPE(dbcsr_type) :: matrix_tmp
1263 :
1264 0 : CALL timeset(routineN, handle)
1265 :
1266 0 : CALL dbcsr_create(matrix_tmp, template=matrix_A)
1267 :
1268 0 : IF (transA_left == "N" .AND. transA_right == "T") THEN
1269 : ! Path 1: Out = A * B * A^T
1270 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_A, matrix_B, &
1271 0 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1272 : CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_A, &
1273 0 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1274 :
1275 0 : ELSE IF (transA_left == "T" .AND. transA_right == "N") THEN
1276 : ! Path 2: Out = A^T * B * A
1277 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_B, matrix_A, &
1278 0 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1279 : CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_A, matrix_tmp, &
1280 0 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1281 : ELSE
1282 0 : CPABORT("Unsupported transposition pair in contract_A_B_A")
1283 : END IF
1284 :
1285 0 : CALL dbcsr_release(matrix_tmp)
1286 :
1287 0 : CALL timestop(handle)
1288 :
1289 0 : END SUBROUTINE contract_A_B_A
1290 :
1291 : ! **************************************************************************************************
1292 : !> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
1293 : !> \param matrix_A ...
1294 : !> \param matrix_B ...
1295 : !> \param matrix_C ...
1296 : !> \param fac (Scaling factor applied to the product)
1297 : ! **************************************************************************************************
1298 :
1299 0 : SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
1300 :
1301 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_C
1302 : REAL(KIND=dp), INTENT(IN) :: fac
1303 :
1304 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hadamard_product'
1305 :
1306 : INTEGER :: col, handle, row
1307 : LOGICAL :: found
1308 0 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: blk_B, blk_C
1309 : TYPE(dbcsr_iterator_type) :: iter
1310 :
1311 0 : CALL timeset(routineN, handle)
1312 :
1313 0 : CALL dbcsr_copy(matrix_C, matrix_A)
1314 :
1315 0 : CALL dbcsr_iterator_start(iter, matrix_C)
1316 0 : DO WHILE (dbcsr_iterator_blocks_left(iter))
1317 0 : CALL dbcsr_iterator_next_block(iter, row, col, blk_C)
1318 :
1319 0 : CALL dbcsr_get_block_p(matrix_B, row, col, blk_B, found)
1320 :
1321 0 : IF (found) THEN
1322 0 : blk_C(:, :) = fac*blk_C(:, :)*blk_B(:, :)
1323 : ELSE
1324 : ! If B is sparse here, the product is zero
1325 0 : blk_C(:, :) = 0.0_dp
1326 : END IF
1327 : END DO
1328 0 : CALL dbcsr_iterator_stop(iter)
1329 :
1330 0 : CALL timestop(handle)
1331 :
1332 0 : END SUBROUTINE hadamard_product
1333 :
1334 : ! **************************************************************************************************
1335 : !> \brief Compute screened Coulomb interaction matrix
1336 : !> \param bs_env ...
1337 : !> \param qs_env ...
1338 : !> \param mat_chi_Gamma_tau ...
1339 : !> \param fm_W_time ...
1340 : ! **************************************************************************************************
1341 :
1342 0 : SUBROUTINE compute_W(bs_env, qs_env, mat_chi_Gamma_tau, fm_W_time)
1343 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1344 : TYPE(qs_environment_type), POINTER :: qs_env
1345 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_Gamma_tau
1346 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_W_time
1347 :
1348 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_W'
1349 :
1350 : INTEGER :: handle, i_t, j_w
1351 : REAL(KIND=dp) :: t1
1352 : TYPE(cp_fm_type) :: fm_M_inv_V_sqrt, fm_V, fm_V_sqrt
1353 :
1354 0 : CALL timeset(routineN, handle)
1355 :
1356 0 : t1 = m_walltime()
1357 :
1358 0 : CALL create_fm_W_MIC_time(bs_env, fm_W_time)
1359 :
1360 : ! 1. Allocate V and M matrices
1361 0 : CALL cp_fm_create(fm_V, bs_env%fm_RI_RI%matrix_struct)
1362 0 : CALL cp_fm_create(fm_V_sqrt, bs_env%fm_RI_RI%matrix_struct)
1363 0 : CALL cp_fm_create(fm_M_inv_V_sqrt, bs_env%fm_RI_RI%matrix_struct)
1364 :
1365 : ! Compute V and M^-1 * V^0.5
1366 0 : CALL compute_V_MinvVsqrt(bs_env, qs_env, fm_V, fm_V_sqrt, fm_M_inv_V_sqrt)
1367 :
1368 : ! 2. Loop over frequencies
1369 0 : DO j_w = 1, bs_env%num_time_freq_points
1370 : ! Fourier transformation of χ_PQ(iτ) to χ_PQ(iω_j)
1371 0 : CALL compute_fm_chi_Gamma_freq(bs_env, bs_env%fm_chi_Gamma_freq, j_w, mat_chi_Gamma_tau)
1372 :
1373 : ! ε(iω_j) = Id - V^0.5*M^-1*χ(iω_j)*M^-1*V^0.5
1374 : ! W(iω_j) = V^0.5*(ε^-1(iω_j)-Id)*V^0.5
1375 : CALL compute_fm_W_freq(bs_env, bs_env%fm_chi_Gamma_freq, fm_V_sqrt, &
1376 0 : fm_M_inv_V_sqrt, bs_env%fm_W_MIC_freq)
1377 :
1378 : ! Fourier transform from W_PQ^MIC(iω_j) to W_PQ^MIC(iτ)
1379 0 : CALL Fourier_transform_w_to_t(bs_env, fm_W_time, bs_env%fm_W_MIC_freq, j_w)
1380 : END DO
1381 :
1382 : ! M^-1*W^MIC(iτ)*M^-1
1383 0 : CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_W_time)
1384 :
1385 0 : IF (bs_env%unit_nr > 0) THEN
1386 : WRITE (bs_env%unit_nr, '(T2,A,T55,A,F10.1,A)') &
1387 0 : 'Computed W(iτ),', ' Execution time', m_walltime() - t1, ' s'
1388 : END IF
1389 :
1390 0 : CALL dbcsr_deallocate_matrix_set(mat_chi_Gamma_tau)
1391 :
1392 : ! Cleanup
1393 0 : CALL cp_fm_release(fm_V)
1394 0 : CALL cp_fm_release(fm_V_sqrt)
1395 0 : CALL cp_fm_release(fm_M_inv_V_sqrt)
1396 :
1397 : ! Marek : Fourier transform W^MIC(itau) back to get it at a specific im.frequency point - iomega = 0
1398 0 : IF (bs_env%rtp_method == rtp_method_bse) THEN
1399 0 : t1 = m_walltime()
1400 0 : CALL cp_fm_create(bs_env%fm_W_MIC_freq_zero, bs_env%fm_W_MIC_freq%matrix_struct)
1401 : ! Set to zero
1402 0 : CALL cp_fm_set_all(bs_env%fm_W_MIC_freq_zero, 0.0_dp)
1403 : ! Sum over all times
1404 0 : DO i_t = 1, bs_env%num_time_freq_points
1405 : ! Add the relevant structure with correct weight
1406 : CALL cp_fm_scale_and_add(1.0_dp, bs_env%fm_W_MIC_freq_zero, &
1407 0 : bs_env%imag_time_weights_freq_zero(i_t), fm_W_time(i_t))
1408 : END DO
1409 : ! Done, save to file
1410 0 : CALL fm_write(bs_env%fm_W_MIC_freq_zero, 0, "W_freq_rtp", qs_env)
1411 : ! Report calculation
1412 0 : IF (bs_env%unit_nr > 0) THEN
1413 : WRITE (bs_env%unit_nr, '(T2,A,T55,A,F10.1,A)') &
1414 0 : 'Computed W(0),', ' Execution time', m_walltime() - t1, ' s'
1415 : END IF
1416 : END IF
1417 :
1418 0 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1419 :
1420 0 : CALL timestop(handle)
1421 :
1422 0 : END SUBROUTINE compute_W
1423 :
1424 : ! **************************************************************************************************
1425 : !> \brief Computes V, V^0.5, and M^-1 * V^0.5
1426 : !> \param bs_env ...
1427 : !> \param qs_env ...
1428 : !> \param fm_V ...
1429 : !> \param fm_V_sqrt ...
1430 : !> \param fm_Minv_Vsqrt ...
1431 : ! **************************************************************************************************
1432 :
1433 0 : SUBROUTINE compute_V_MinvVsqrt(bs_env, qs_env, fm_V, fm_V_sqrt, fm_Minv_Vsqrt)
1434 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1435 : TYPE(qs_environment_type), POINTER :: qs_env
1436 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_V, fm_V_sqrt, fm_Minv_Vsqrt
1437 :
1438 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_V_MinvVsqrt'
1439 :
1440 : INTEGER :: handle, info, n_RI, ndep
1441 0 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
1442 : TYPE(cell_type), POINTER :: cell
1443 : TYPE(cp_fm_type) :: fm_work
1444 0 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_M
1445 0 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_V_kp
1446 0 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
1447 0 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
1448 :
1449 0 : CALL timeset(routineN, handle)
1450 :
1451 0 : n_RI = bs_env%n_RI
1452 0 : CALL cp_fm_create(fm_work, fm_V%matrix_struct)
1453 :
1454 : ! -----------------------------------------------------------------------
1455 : ! 1. Build Coulomb Matrix V(k=0) using the kp-routine but only for ikp=1
1456 : ! -----------------------------------------------------------------------
1457 : CALL get_qs_env(qs_env=qs_env, particle_set=particle_set, cell=cell, &
1458 0 : qs_kind_set=qs_kind_set, atomic_kind_set=atomic_kind_set)
1459 :
1460 0 : ALLOCATE (mat_V_kp(1:1, 1:2))
1461 0 : NULLIFY (mat_V_kp(1, 1)%matrix, mat_V_kp(1, 2)%matrix)
1462 0 : ALLOCATE (mat_V_kp(1, 1)%matrix, mat_V_kp(1, 2)%matrix)
1463 :
1464 0 : CALL dbcsr_create(mat_V_kp(1, 1)%matrix, template=bs_env%mat_RI_RI%matrix)
1465 0 : CALL dbcsr_reserve_all_blocks(mat_V_kp(1, 1)%matrix)
1466 0 : CALL dbcsr_set(mat_V_kp(1, 1)%matrix, 0.0_dp)
1467 :
1468 : ! Dummy imaginary part just to satisfy the routine
1469 0 : CALL dbcsr_create(mat_V_kp(1, 2)%matrix, template=bs_env%mat_RI_RI%matrix)
1470 0 : CALL dbcsr_reserve_all_blocks(mat_V_kp(1, 2)%matrix)
1471 0 : CALL dbcsr_set(mat_V_kp(1, 2)%matrix, 0.0_dp)
1472 :
1473 0 : bs_env%kpoints_chi_eps_W%nkp_grid = bs_env%nkp_grid_chi_eps_W_orig
1474 :
1475 : CALL build_2c_coulomb_matrix_kp(mat_V_kp, bs_env%kpoints_chi_eps_W, "RI_AUX", cell, &
1476 : particle_set, qs_kind_set, atomic_kind_set, &
1477 0 : bs_env%size_lattice_sum_V, operator_coulomb, 1, 1)
1478 :
1479 : ! Copy real part to fm_V
1480 0 : CALL copy_dbcsr_to_fm(mat_V_kp(1, 1)%matrix, fm_V)
1481 :
1482 0 : CALL dbcsr_deallocate_matrix(mat_V_kp(1, 1)%matrix)
1483 0 : CALL dbcsr_deallocate_matrix(mat_V_kp(1, 2)%matrix)
1484 0 : DEALLOCATE (mat_V_kp)
1485 :
1486 : ! -----------------------------------------------------------------------
1487 : ! 2. Get RI-Metric Matrix M(k=0)
1488 : ! -----------------------------------------------------------------------
1489 : CALL RI_2c_integral_mat(qs_env, fm_M, fm_V, n_RI, bs_env%ri_metric, &
1490 0 : do_kpoints=.FALSE., regularization_RI=bs_env%regularization_RI)
1491 :
1492 : ! -----------------------------------------------------------------------
1493 : ! 3. M -> M^-1
1494 : ! -----------------------------------------------------------------------
1495 0 : CALL cp_fm_cholesky_decompose(fm_M(1, 1), info_out=info)
1496 0 : IF (info == 0) THEN
1497 0 : CALL cp_fm_cholesky_invert(fm_M(1, 1))
1498 0 : CALL cp_fm_uplo_to_full(fm_M(1, 1), fm_work)
1499 : ELSE
1500 : ! Fallback if Cholesky fails due to conditioning
1501 0 : CALL cp_fm_power(fm_M(1, 1), fm_work, -1.0_dp, bs_env%eps_eigval_mat_RI, ndep)
1502 0 : CALL cp_fm_to_fm(fm_work, fm_M(1, 1))
1503 : END IF
1504 :
1505 : ! -----------------------------------------------------------------------
1506 : ! 4. V -> V^0.5
1507 : ! -----------------------------------------------------------------------
1508 0 : CALL cp_fm_to_fm(fm_V, fm_V_sqrt)
1509 0 : CALL cp_fm_cholesky_decompose(fm_V_sqrt, info_out=info)
1510 0 : IF (info == 0) THEN
1511 0 : CALL clean_lower_part(fm_V_sqrt)
1512 : ELSE
1513 0 : CALL cp_fm_power(fm_V, fm_V_sqrt, 0.5_dp, bs_env%eps_eigval_mat_RI, ndep)
1514 : END IF
1515 :
1516 : ! -----------------------------------------------------------------------
1517 : ! 5. M^-1 * V^0.5
1518 : ! -----------------------------------------------------------------------
1519 : CALL parallel_gemm("N", "T", n_RI, n_RI, n_RI, 1.0_dp, fm_M(1, 1), fm_V_sqrt, &
1520 0 : 0.0_dp, fm_Minv_Vsqrt)
1521 :
1522 0 : CALL cp_fm_release(fm_M)
1523 0 : CALL cp_fm_release(fm_work)
1524 :
1525 0 : CALL timestop(handle)
1526 :
1527 0 : END SUBROUTINE compute_V_MinvVsqrt
1528 :
1529 : ! **************************************************************************************************
1530 : !> \brief Computes W(iω) from χ_PQ(iω_j)
1531 : !> \param bs_env ...
1532 : !> \param fm_chi_freq_j ...
1533 : !> \param fm_V_sqrt ...
1534 : !> \param fm_Minv_Vsqrt ...
1535 : !> \param fm_W_freq_j ...
1536 : ! **************************************************************************************************
1537 :
1538 0 : SUBROUTINE compute_fm_W_freq(bs_env, fm_chi_freq_j, fm_V_sqrt, fm_Minv_Vsqrt, fm_W_freq_j)
1539 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1540 : TYPE(cp_fm_type), INTENT(IN) :: fm_chi_freq_j, fm_V_sqrt, fm_Minv_Vsqrt
1541 : TYPE(cp_fm_type), INTENT(INOUT) :: fm_W_freq_j
1542 :
1543 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_fm_W_freq'
1544 :
1545 : INTEGER :: handle, info, n_RI, ndep
1546 : TYPE(cp_fm_type) :: fm_eps_freq_j, fm_work
1547 :
1548 0 : CALL timeset(routineN, handle)
1549 :
1550 0 : n_RI = bs_env%n_RI
1551 :
1552 0 : CALL cp_fm_create(fm_eps_freq_j, fm_chi_freq_j%matrix_struct)
1553 0 : CALL cp_fm_create(fm_work, fm_chi_freq_j%matrix_struct)
1554 :
1555 : ! -----------------------------------------------------------------------
1556 : ! 1. ε(iω_j) = Id - (M^-1 * V^0.5)^T * χ(iω_j) * (M^-1 * V^0.5)
1557 : ! -----------------------------------------------------------------------
1558 : ! work = χ(iω_j) * (M^-1 * V^0.5)
1559 : CALL parallel_gemm('N', 'N', n_RI, n_RI, n_RI, 1.0_dp, &
1560 0 : fm_chi_freq_j, fm_Minv_Vsqrt, 0.0_dp, fm_work)
1561 :
1562 : ! eps_work = (M^-1 * V^0.5)^T * work
1563 : CALL parallel_gemm('T', 'N', n_RI, n_RI, n_RI, 1.0_dp, &
1564 0 : fm_Minv_Vsqrt, fm_work, 0.0_dp, fm_eps_freq_j)
1565 :
1566 : ! ε(iω_j) = Id - eps_work --> -eps_work + Id
1567 0 : CALL fm_add_on_diag(fm_eps_freq_j, 1.0_dp)
1568 :
1569 : ! Force perfect symmetry before Cholesky to avoid info != 0 due to GEMM noise
1570 0 : CALL cp_fm_uplo_to_full(fm_eps_freq_j, fm_work)
1571 :
1572 : ! -----------------------------------------------------------------------
1573 : ! 2. W(iω_j) = V^0.5^T * (ε^-1(iω_j) - Id) * V^0.5
1574 : ! -----------------------------------------------------------------------
1575 :
1576 : ! a) Cholesky decomposition of ε(iω_j)
1577 0 : CALL cp_fm_cholesky_decompose(fm_eps_freq_j, info_out=info)
1578 :
1579 : ! b) Inversion
1580 0 : IF (info == 0) THEN
1581 0 : CALL cp_fm_cholesky_invert(fm_eps_freq_j)
1582 0 : CALL cp_fm_uplo_to_full(fm_eps_freq_j, fm_work)
1583 : ELSE
1584 : ! Fallback to expensive diagonalization if Cholesky fails
1585 0 : CALL cp_fm_power(fm_eps_freq_j, fm_work, -1.0_dp, bs_env%eps_eigval_mat_RI, ndep)
1586 0 : CALL cp_fm_to_fm(fm_work, fm_eps_freq_j)
1587 : END IF
1588 :
1589 : ! c) ε^-1(iω_j) - Id
1590 0 : CALL fm_add_on_diag(fm_eps_freq_j, -1.0_dp)
1591 :
1592 : ! d) work = (ε^-1(iω_j) - Id) * V^0.5
1593 : CALL parallel_gemm('N', 'N', n_RI, n_RI, n_RI, 1.0_dp, fm_eps_freq_j, fm_V_sqrt, &
1594 0 : 0.0_dp, fm_work)
1595 :
1596 : ! e) W(iw) = V^0.5^T * work
1597 : CALL parallel_gemm('T', 'N', n_RI, n_RI, n_RI, 1.0_dp, fm_V_sqrt, fm_work, &
1598 0 : 0.0_dp, fm_W_freq_j)
1599 :
1600 : ! Cleanup
1601 0 : CALL cp_fm_release(fm_work)
1602 0 : CALL cp_fm_release(fm_eps_freq_j)
1603 :
1604 0 : CALL timestop(handle)
1605 :
1606 0 : END SUBROUTINE compute_fm_W_freq
1607 :
1608 : ! **************************************************************************************************
1609 : !> \brief Adds a real scalar value to the diagonal of a real full matrix (fm)
1610 : !> \param fm ...
1611 : !> \param alpha ...
1612 : ! **************************************************************************************************
1613 :
1614 0 : SUBROUTINE fm_add_on_diag(fm, alpha)
1615 : TYPE(cp_fm_type), INTENT(INOUT) :: fm
1616 : REAL(KIND=dp), INTENT(IN) :: alpha
1617 :
1618 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fm_add_on_diag'
1619 :
1620 : INTEGER :: handle, i_global, i_row, j_col, &
1621 : j_global, ncol_local, nrow_local
1622 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1623 :
1624 0 : CALL timeset(routineN, handle)
1625 :
1626 : CALL cp_fm_get_info(matrix=fm, &
1627 : nrow_local=nrow_local, &
1628 : ncol_local=ncol_local, &
1629 : row_indices=row_indices, &
1630 0 : col_indices=col_indices)
1631 :
1632 0 : DO j_col = 1, ncol_local
1633 0 : j_global = col_indices(j_col)
1634 0 : DO i_row = 1, nrow_local
1635 0 : i_global = row_indices(i_row)
1636 0 : IF (j_global == i_global) THEN
1637 0 : fm%local_data(i_row, j_col) = fm%local_data(i_row, j_col) + alpha
1638 : END IF
1639 : END DO
1640 : END DO
1641 :
1642 0 : CALL timestop(handle)
1643 :
1644 0 : END SUBROUTINE fm_add_on_diag
1645 :
1646 : ! **************************************************************************************************
1647 : !> \brief Zeroes out the strictly lower triangular part of a real matrix
1648 : !> \param fm_mat ...
1649 : ! **************************************************************************************************
1650 0 : SUBROUTINE clean_lower_part(fm_mat)
1651 : TYPE(cp_fm_type) :: fm_mat
1652 :
1653 : CHARACTER(LEN=*), PARAMETER :: routineN = 'clean_lower_part'
1654 :
1655 : INTEGER :: handle, i_row, j_col, j_global, &
1656 : ncol_local, nrow_local
1657 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1658 :
1659 0 : CALL timeset(routineN, handle)
1660 :
1661 : CALL cp_fm_get_info(matrix=fm_mat, &
1662 : nrow_local=nrow_local, ncol_local=ncol_local, &
1663 0 : row_indices=row_indices, col_indices=col_indices)
1664 :
1665 0 : DO j_col = 1, ncol_local
1666 0 : j_global = col_indices(j_col)
1667 0 : DO i_row = 1, nrow_local
1668 0 : IF (j_global < row_indices(i_row)) fm_mat%local_data(i_row, j_col) = 0.0_dp
1669 : END DO
1670 : END DO
1671 :
1672 0 : CALL timestop(handle)
1673 :
1674 0 : END SUBROUTINE clean_lower_part
1675 :
1676 : ! **************************************************************************************************
1677 : !> \brief Computes the exact exchange part of the GW self-energy
1678 : !> \param bs_env ...
1679 : !> \param qs_env ...
1680 : !> \param mat_phi_mu_l ...
1681 : !> \param mat_Z_lP ...
1682 : !> \param fm_Sigma_x_Gamma ...
1683 : ! **************************************************************************************************
1684 :
1685 0 : SUBROUTINE compute_Sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
1686 :
1687 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1688 : TYPE(qs_environment_type), POINTER :: qs_env
1689 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1690 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma
1691 :
1692 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_x'
1693 :
1694 : INTEGER :: handle, ispin
1695 0 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1696 0 : dist_row_aux
1697 : REAL(KIND=dp) :: t1
1698 0 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_Vtr_Gamma
1699 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1700 : TYPE(dbcsr_type) :: mat_Sigma_x_Gamma, matrix_D_grid, &
1701 : matrix_Sigma_x_grid, matrix_V_aux, &
1702 : matrix_V_grid
1703 :
1704 0 : CALL timeset(routineN, handle)
1705 :
1706 0 : t1 = m_walltime()
1707 :
1708 0 : ALLOCATE (fm_Sigma_x_Gamma(bs_env%n_spin))
1709 0 : DO ispin = 1, bs_env%n_spin
1710 0 : CALL cp_fm_create(fm_Sigma_x_Gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
1711 : END DO
1712 :
1713 0 : CALL dbcsr_create(mat_Sigma_x_Gamma, template=bs_env%mat_ao_ao%matrix)
1714 :
1715 : ! =========================================================================
1716 : ! 1. SETUP CORE TOPOLOGIES
1717 : ! =========================================================================
1718 0 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1719 0 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1720 :
1721 : ! =========================================================================
1722 : ! 2. COMPUTE V^tr_ll'
1723 : ! =========================================================================
1724 : CALL RI_2c_integral_mat(qs_env, fm_Vtr_Gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
1725 0 : bs_env%trunc_coulomb, do_kpoints=.FALSE.)
1726 :
1727 : ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
1728 0 : CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_Vtr_Gamma(:, 1))
1729 :
1730 0 : CALL dbcsr_create(matrix_V_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1731 0 : CALL dbcsr_create(matrix_V_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1732 :
1733 0 : CALL copy_fm_to_dbcsr(fm_Vtr_Gamma(1, 1), matrix_V_aux, keep_sparsity=.FALSE.)
1734 :
1735 : ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
1736 0 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_V_aux, matrix_V_grid, bs_env%eps_filter)
1737 0 : CALL dbcsr_release(matrix_V_aux)
1738 :
1739 : ! =========================================================================
1740 : ! 3. SPIN LOOP FOR EXACT EXCHANGE
1741 : ! =========================================================================
1742 0 : DO ispin = 1, bs_env%n_spin
1743 :
1744 : ! Density matrix on grid is essentially G_occ at tau = 0.0
1745 : ! D_µν = sum_n^occ C_µn(k=0) C_νn(k=0)
1746 : ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
1747 0 : CALL dbcsr_create(matrix_D_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1748 0 : CALL build_G_grid(bs_env, 0.0_dp, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_D_grid, bs_env%eps_filter)
1749 :
1750 : ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
1751 : ! Σ^x_ll' = D_ll' * V^tr_ll'
1752 0 : CALL dbcsr_create(matrix_Sigma_x_grid, template=matrix_V_grid)
1753 0 : CALL hadamard_product(matrix_D_grid, matrix_V_grid, matrix_Sigma_x_grid, 1.0_dp)
1754 :
1755 0 : CALL dbcsr_release(matrix_D_grid)
1756 :
1757 : ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
1758 : ! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
1759 0 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_x_grid, mat_Sigma_x_Gamma, bs_env%eps_filter)
1760 0 : CALL dbcsr_scale(mat_Sigma_x_Gamma, -1.0_dp)
1761 :
1762 0 : CALL dbcsr_release(matrix_Sigma_x_grid)
1763 :
1764 : ! Data I/O and Export to CP2K Full Matrices
1765 0 : CALL copy_dbcsr_to_fm(mat_Sigma_x_Gamma, fm_Sigma_x_Gamma(ispin))
1766 :
1767 : END DO ! ispin
1768 :
1769 0 : IF (bs_env%unit_nr > 0) THEN
1770 : WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
1771 0 : 'Computed Σ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
1772 0 : WRITE (bs_env%unit_nr, '(A)') ' '
1773 : END IF
1774 :
1775 : ! =========================================================================
1776 : ! 4. CLEANUP
1777 : ! =========================================================================
1778 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
1779 0 : m1=mat_Sigma_x_Gamma, m2=matrix_V_grid)
1780 0 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1781 :
1782 0 : CALL cp_fm_release(fm_Vtr_Gamma)
1783 :
1784 0 : CALL timestop(handle)
1785 :
1786 0 : END SUBROUTINE compute_Sigma_x
1787 :
1788 : ! **************************************************************************************************
1789 : !> \brief Computes the correlation part of the GW self-energy
1790 : !> \param bs_env ...
1791 : !> \param fm_W_time ...
1792 : !> \param mat_phi_mu_l ...
1793 : !> \param mat_Z_lP ...
1794 : !> \param fm_Sigma_c_Gamma_time ...
1795 : ! **************************************************************************************************
1796 :
1797 0 : SUBROUTINE compute_Sigma_c(bs_env, fm_W_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
1798 :
1799 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1800 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_W_time
1801 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1802 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
1803 :
1804 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_c'
1805 :
1806 : INTEGER :: handle, i_t, ispin
1807 0 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1808 0 : dist_row_aux
1809 : REAL(KIND=dp) :: t1, tau
1810 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1811 0 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_Sigma_neg_tau, mat_Sigma_pos_tau
1812 : TYPE(dbcsr_type) :: matrix_G_occ_grid, matrix_G_vir_grid, matrix_Sigma_neg_grid, &
1813 : matrix_Sigma_pos_grid, matrix_W_aux, matrix_W_grid
1814 :
1815 0 : CALL timeset(routineN, handle)
1816 :
1817 : ! =========================================================================
1818 : ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
1819 : ! =========================================================================
1820 0 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1821 0 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1822 :
1823 : ! Pre-allocate local DBCSR matrices to act as targets for final output
1824 0 : NULLIFY (mat_Sigma_neg_tau, mat_Sigma_pos_tau)
1825 0 : ALLOCATE (mat_Sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1826 0 : ALLOCATE (mat_Sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1827 :
1828 0 : DO i_t = 1, bs_env%num_time_freq_points
1829 0 : DO ispin = 1, bs_env%n_spin
1830 0 : ALLOCATE (mat_Sigma_neg_tau(i_t, ispin)%matrix)
1831 0 : ALLOCATE (mat_Sigma_pos_tau(i_t, ispin)%matrix)
1832 0 : CALL dbcsr_create(mat_Sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1833 0 : CALL dbcsr_create(mat_Sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1834 : END DO
1835 : END DO
1836 :
1837 : ! =========================================================================
1838 : ! 2. MAIN IMAGINARY TIME LOOP
1839 : ! =========================================================================
1840 0 : DO i_t = 1, bs_env%num_time_freq_points
1841 0 : tau = bs_env%imag_time_points(i_t)
1842 :
1843 : ! -------------------------------------------------------------------
1844 : ! Compute W_grid = Z * W_aux * Z^T
1845 : ! -------------------------------------------------------------------
1846 0 : CALL dbcsr_create(matrix_W_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1847 0 : CALL dbcsr_create(matrix_W_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1848 :
1849 0 : CALL copy_fm_to_dbcsr(fm_W_time(i_t), matrix_W_aux, keep_sparsity=.FALSE.)
1850 :
1851 : ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
1852 0 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_W_aux, matrix_W_grid, bs_env%eps_filter)
1853 :
1854 0 : CALL dbcsr_release(matrix_W_aux) ! Clean up aux basis immediately
1855 :
1856 0 : DO ispin = 1, bs_env%n_spin
1857 0 : t1 = m_walltime()
1858 :
1859 : ! -------------------------------------------------------------------
1860 : ! A. Transform Green's Functions to the Grid
1861 : ! -------------------------------------------------------------------
1862 0 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1863 0 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1864 :
1865 : ! G^occ_µλ(i|τ|,k=0) = sum_G^occ_µλn^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1866 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1867 0 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_G_occ_grid, bs_env%eps_filter)
1868 :
1869 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1870 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1871 0 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, matrix_G_vir_grid, bs_env%eps_filter)
1872 :
1873 : ! -------------------------------------------------------------------
1874 : ! B. Element-wise Hadamard Products for Sigma_c on Grid
1875 : ! Σ_neg_grid = G_occ_grid ◦ W_grid
1876 : ! Σ_pos_grid = G_vir_grid ◦ W_grid
1877 : ! -------------------------------------------------------------------
1878 0 : CALL dbcsr_create(matrix_Sigma_neg_grid, template=matrix_W_grid)
1879 0 : CALL dbcsr_create(matrix_Sigma_pos_grid, template=matrix_W_grid)
1880 :
1881 : ! Σ^c_ll'(iτ,k=0) = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
1882 0 : CALL hadamard_product(matrix_G_occ_grid, matrix_W_grid, matrix_Sigma_neg_grid, 1.0_dp)
1883 :
1884 : ! Σ^c_ll'(iτ,k=0) = G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
1885 0 : CALL hadamard_product(matrix_G_vir_grid, matrix_W_grid, matrix_Sigma_pos_grid, 1.0_dp)
1886 :
1887 : ! Instantly purge massive G_grid arrays to save memory
1888 0 : CALL dbcsr_release(matrix_G_occ_grid)
1889 0 : CALL dbcsr_release(matrix_G_vir_grid)
1890 :
1891 : ! -------------------------------------------------------------------
1892 : ! C. Transform Sigma back to AO Basis
1893 : ! Σ_AO = phi^T * Σ_grid * phi
1894 : ! -------------------------------------------------------------------
1895 :
1896 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ < 0
1897 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_neg_grid, &
1898 0 : mat_Sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1899 0 : CALL dbcsr_scale(mat_Sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
1900 :
1901 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ > 0
1902 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_pos_grid, &
1903 0 : mat_Sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1904 :
1905 : ! Purge Grid Sigma arrays
1906 0 : CALL dbcsr_release(matrix_Sigma_neg_grid)
1907 0 : CALL dbcsr_release(matrix_Sigma_pos_grid)
1908 :
1909 0 : IF (bs_env%unit_nr > 0) THEN
1910 : WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
1911 0 : 'Computed Σ^c(iτ,k=0) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
1912 0 : ', Execution time', m_walltime() - t1, ' s'
1913 : END IF
1914 :
1915 : END DO ! ispin
1916 :
1917 : ! Release the W_grid for this time point
1918 0 : CALL dbcsr_release(matrix_W_grid)
1919 :
1920 : END DO ! i_t
1921 :
1922 0 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1923 :
1924 : ! -------------------------------------------------------------------------
1925 : ! 3. FINALIZE AND CLEANUP
1926 : ! -------------------------------------------------------------------------
1927 : CALL fill_fm_Sigma_c_Gamma_time(fm_Sigma_c_Gamma_time, bs_env, &
1928 0 : mat_Sigma_pos_tau, mat_Sigma_neg_tau)
1929 :
1930 0 : CALL cp_fm_release(fm_W_time)
1931 :
1932 0 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_neg_tau)
1933 0 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_pos_tau)
1934 :
1935 0 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
1936 0 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1937 :
1938 0 : CALL delete_unnecessary_files(bs_env)
1939 0 : CALL timestop(handle)
1940 :
1941 0 : END SUBROUTINE compute_Sigma_c
1942 :
1943 : ! **************************************************************************************************
1944 : !> \brief DBCSR Topology Generation
1945 : !> \param matrix_template ...
1946 : !> \param dim_type ...
1947 : !> \param square_dist ...
1948 : !> \param blk_sizes ...
1949 : !> \param mapped_dist ...
1950 : ! **************************************************************************************************
1951 :
1952 0 : SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
1953 :
1954 : TYPE(dbcsr_type), INTENT(IN) :: matrix_template
1955 : CHARACTER(LEN=*), INTENT(IN) :: dim_type
1956 : TYPE(dbcsr_distribution_type), INTENT(OUT) :: square_dist
1957 : INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: blk_sizes, mapped_dist
1958 :
1959 : INTEGER :: i, np
1960 0 : INTEGER, DIMENSION(:), POINTER :: col_blk, col_dist, row_blk, row_dist
1961 : TYPE(dbcsr_distribution_type) :: dist_template
1962 :
1963 : CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
1964 0 : row_blk_size=row_blk, col_blk_size=col_blk)
1965 0 : CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
1966 :
1967 0 : IF (TRIM(dim_type) == 'ROW') THEN
1968 : ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
1969 0 : blk_sizes => row_blk
1970 0 : np = MAXVAL(col_dist) + 1 ! npcol
1971 0 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1972 0 : DO i = 1, SIZE(blk_sizes)
1973 0 : mapped_dist(i) = MOD(i - 1, np)
1974 : END DO
1975 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1976 0 : row_dist=row_dist, col_dist=mapped_dist)
1977 :
1978 0 : ELSE IF (TRIM(dim_type) == 'COL') THEN
1979 : ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
1980 0 : blk_sizes => col_blk
1981 0 : np = MAXVAL(row_dist) + 1 ! nprow
1982 0 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1983 0 : DO i = 1, SIZE(blk_sizes)
1984 0 : mapped_dist(i) = MOD(i - 1, np)
1985 : END DO
1986 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1987 0 : row_dist=mapped_dist, col_dist=col_dist)
1988 : END IF
1989 :
1990 0 : END SUBROUTINE setup_square_topology
1991 :
1992 : ! **************************************************************************************************
1993 : !> \brief DBCSR matrices deallocation
1994 : !> \param dist ...
1995 : !> \param mapped_dist ...
1996 : !> \param m1 ...
1997 : !> \param m2 ...
1998 : !> \param m3 ...
1999 : !> \param m4 ...
2000 : ! **************************************************************************************************
2001 :
2002 0 : SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
2003 :
2004 : TYPE(dbcsr_distribution_type), INTENT(INOUT), &
2005 : OPTIONAL :: dist
2006 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
2007 : POINTER :: mapped_dist
2008 : TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL :: m1, m2, m3, m4
2009 :
2010 0 : IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
2011 0 : IF (PRESENT(mapped_dist)) THEN
2012 0 : IF (ASSOCIATED(mapped_dist)) THEN
2013 0 : DEALLOCATE (mapped_dist)
2014 : NULLIFY (mapped_dist)
2015 : END IF
2016 : END IF
2017 0 : IF (PRESENT(m1)) CALL dbcsr_release(m1)
2018 0 : IF (PRESENT(m2)) CALL dbcsr_release(m2)
2019 0 : IF (PRESENT(m3)) CALL dbcsr_release(m3)
2020 0 : IF (PRESENT(m4)) CALL dbcsr_release(m4)
2021 :
2022 0 : END SUBROUTINE release_dbcsr_topology_and_matrices
2023 :
2024 : END MODULE gw_large_cell_Gamma_ri_rs
|