Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief GW using RI-RS Approximation
10 : !> \par History
11 : !> 04.2026 created [Ritaj Tyagi]
12 : ! **************************************************************************************************
13 : MODULE gw_large_cell_Gamma_ri_rs
14 : USE atomic_kind_types, ONLY: atomic_kind_type
15 : USE basis_set_types, ONLY: gto_basis_set_type
16 : USE cell_types, ONLY: cell_type,&
17 : pbc
18 : USE cp_dbcsr_api, ONLY: &
19 : dbcsr_add, dbcsr_binary_read, dbcsr_binary_write, dbcsr_copy, dbcsr_create, &
20 : dbcsr_distribution_get, dbcsr_distribution_new, dbcsr_distribution_release, &
21 : dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
22 : dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
23 : dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_put_block, &
24 : dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
25 : USE cp_dbcsr_operations, ONLY: copy_dbcsr_to_fm,&
26 : copy_fm_to_dbcsr,&
27 : dbcsr_deallocate_matrix_set
28 : USE cp_files, ONLY: close_file,&
29 : open_file
30 : USE cp_fm_types, ONLY: cp_fm_create,&
31 : cp_fm_release,&
32 : cp_fm_type
33 : USE cp_log_handling, ONLY: cp_get_default_logger,&
34 : cp_logger_type
35 : USE cp_output_handling, ONLY: cp_p_file,&
36 : cp_print_key_should_output
37 : USE gw_integrals, ONLY: build_3c_integral_block
38 : USE gw_large_cell_gamma, ONLY: G_occ_vir,&
39 : compute_QP_energies,&
40 : delete_unnecessary_files,&
41 : fill_fm_Sigma_c_Gamma_time,&
42 : get_W_MIC,&
43 : multiply_fm_W_MIC_time_with_Minv_Gamma
44 : USE gw_utils, ONLY: de_init_bs_env
45 : USE input_section_types, ONLY: section_vals_type
46 : USE kinds, ONLY: dp
47 : USE machine, ONLY: m_walltime
48 : USE message_passing, ONLY: mp_para_env_type
49 : USE mp2_ri_2c, ONLY: RI_2c_integral_mat
50 : USE orbital_pointers, ONLY: indco,&
51 : ncoset
52 : USE particle_types, ONLY: particle_type
53 : USE post_scf_bandstructure_types, ONLY: post_scf_bandstructure_type
54 : USE qs_environment_types, ONLY: get_qs_env,&
55 : qs_environment_type
56 : USE qs_kind_types, ONLY: get_qs_kind,&
57 : qs_kind_type
58 : #include "./base/base_uses.f90"
59 :
60 : IMPLICIT NONE
61 :
62 : PRIVATE
63 :
64 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'gw_large_cell_Gamma_ri_rs'
65 :
66 : PUBLIC :: gw_calc_large_cell_Gamma_ri_rs
67 :
68 : CONTAINS
69 :
70 : ! **************************************************************************************************
71 : !> \brief GW calculation using RI-RS formalism
72 : !> \param qs_env ...
73 : !> \param bs_env ...
74 : ! **************************************************************************************************
75 :
76 10 : SUBROUTINE gw_calc_large_cell_Gamma_ri_rs(qs_env, bs_env)
77 :
78 : TYPE(qs_environment_type), POINTER :: qs_env
79 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
80 :
81 : CHARACTER(LEN=*), PARAMETER :: routineN = 'gw_calc_large_cell_Gamma_ri_rs'
82 :
83 : INTEGER :: handle
84 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma, fm_W_MIC_time
85 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
86 :
87 10 : CALL timeset(routineN, handle)
88 :
89 : !!========================================================================
90 : !! 1. Grid Generation for RI-RS
91 : !! (Modified Lebedev grids from Ivan Duchemin and Xavier Blase)
92 : !! Generate flattened 1D array of grid points for RI-RS.
93 : !! Equation: r_g(k) = R_A + r_g(A)
94 : !!========================================================================
95 10 : CALL ri_rs_grid_assembler(qs_env, bs_env, bs_env%ri_rs%grid_points)
96 :
97 : !!========================================================================
98 : !! 2. Atomic Basis Evaluation
99 : !! Compute values of spherical atomic basis functions at grid points.
100 : !! Expression: Φ_μl = Φ_μ(r_l) (mat_phi_mu_l)
101 : !!========================================================================
102 : CALL atomic_basis_at_grid_point(qs_env, bs_env, bs_env%ri_rs%grid_points, &
103 10 : bs_env%ri_rs%mat_phi_mu_l)
104 :
105 : !!========================================================================
106 : !! 3. Compute RI-RS Coefficients (Z_lp)
107 : !! Solve the regularized system for each atom P, where the grid domain
108 : !! is restricted to r_l within a cutoff distance of atom P:
109 : !! a. D_ll' = [ Σ_μ Φ_μ(r_l) Φ_μ(r_l') ]^2 (Equation 13)
110 : !! b. D_lP = Σ_{μν} Φ_μ(r_l) Φ_ν(r_l) (μν|P) (Equation 15)
111 : !! c. Conditioning:
112 : !! Dvec_l = 1 / sqrt(D_ll) (Diagonal scaling vector)
113 : !! D'_ll' = Dvec_l * D_ll' * Dvec_l' + λδ_ll'
114 : !! D'_lP = Dvec_l * D_lP
115 : !! d. Solve: Σ_l' D'_ll' * Z'_l'P = D'_lP (Equation 14)
116 : !! e. Rescale: Z_lP = Z'_lP * Dvec_l (Z_lP stored in mat_Z_lP)
117 : !!========================================================================
118 : CALL compute_coeff_Z_lP(qs_env, bs_env, bs_env%ri_rs%grid_points, &
119 10 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
120 :
121 : !!========================================================================
122 : !! 4. Compute Independent-Particle Polarizability (χ)
123 : !! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
124 : !! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
125 : !! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
126 : !! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
127 : !! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
128 : !! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
129 : !!========================================================================
130 : CALL get_mat_chi_Gamma_tau(bs_env, bs_env%mat_chi_Gamma_tau, &
131 10 : bs_env%ri_rs%mat_phi_mu_l, bs_env%ri_rs%mat_Z_lP)
132 :
133 : !!========================================================================
134 : !! 5. Compute Screened Interaction (W^MIC)
135 : !! χ_PQ(iτ,k=0) -> χ_PQ(iω,k) -> ε_PQ(iω,k) -> W_PQ(iω,k) -> W^MIC_PQ(iτ)
136 : !!========================================================================
137 10 : CALL get_W_MIC(bs_env, qs_env, bs_env%mat_chi_Gamma_tau, fm_W_MIC_time)
138 :
139 : !!========================================================================
140 : !! 6. Compute Exact Exchange Self-Energy (Σ^x)
141 : !! D_µν = sum_n^occ C_µn(k=0) C_νn(k=0)
142 : !! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
143 : !! V^trunc_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
144 : !! Σ^x_ll' = D_ll' * V^trunc_ll'
145 : !! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
146 : !!========================================================================
147 : CALL compute_Sigma_x(bs_env, qs_env, bs_env%ri_rs%mat_phi_mu_l, &
148 10 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_x_Gamma)
149 :
150 : !!========================================================================
151 : !! 7. Compute Correlation Self-Energy (Σ^c)
152 : !! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
153 : !! Σ^c_ll'(iτ,k=0) = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
154 : !! Σ^c_ll'(iτ,k=0) = G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
155 : !! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l')
156 : !!========================================================================
157 : CALL compute_Sigma_c(bs_env, fm_W_MIC_time, bs_env%ri_rs%mat_phi_mu_l, &
158 10 : bs_env%ri_rs%mat_Z_lP, fm_Sigma_c_Gamma_time)
159 :
160 : !!========================================================================
161 : !! 8. Compute Quasiparticle Energies
162 : !! Σ^c_λσ(iτ,k=0) -> Σ^c_nn(ϵ,k)
163 : !! ϵ_nk^GW = ϵ_nk^DFT + Σ^c_nn(ϵ,k) + Σ^x_nn(k) - v^xc_nn(k)
164 : !!========================================================================
165 10 : CALL compute_QP_energies(bs_env, qs_env, fm_Sigma_x_Gamma, fm_Sigma_c_Gamma_time)
166 :
167 10 : CALL de_init_bs_env(bs_env)
168 :
169 10 : CALL timestop(handle)
170 :
171 10 : END SUBROUTINE gw_calc_large_cell_Gamma_ri_rs
172 :
173 : ! **************************************************************************************************
174 : !> \brief Compute grid points for RI-RS
175 : !> Right now based on Ivan and Xavier implementation
176 : !> JCP 150, 174120 (2019), JCTC 17, 2383 (2021)
177 : !> \param qs_env ...
178 : !> \param bs_env ...
179 : !> \param ri_rs_grid_points ...
180 : ! **************************************************************************************************
181 :
182 10 : SUBROUTINE ri_rs_grid_assembler(qs_env, bs_env, ri_rs_grid_points)
183 :
184 : TYPE(qs_environment_type), POINTER :: qs_env
185 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
186 : REAL(KIND=dp), ALLOCATABLE, INTENT(OUT) :: ri_rs_grid_points(:, :)
187 :
188 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ri_rs_grid_assembler'
189 :
190 : INTEGER :: atom_idx, end_idx, handle, ikind, j, &
191 : natom, nkind, start_idx, &
192 : total_grid_npts
193 10 : INTEGER, ALLOCATABLE :: atom_to_kind(:), ri_rs_grid_offsets(:)
194 : REAL(KIND=dp) :: atomic_center(3)
195 10 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
196 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
197 :
198 10 : CALL timeset(routineN, handle)
199 :
200 : !! Get the information about the atoms in the system
201 10 : CALL get_qs_env(qs_env, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
202 :
203 10 : nkind = SIZE(atomic_kind_set)
204 10 : natom = SIZE(particle_set)
205 :
206 : !! 1. Build the grid cache
207 10 : CALL build_grid_cache(bs_env, atomic_kind_set)
208 :
209 : !! 2. Calculate grid counts and offsets
210 30 : ALLOCATE (ri_rs_grid_offsets(natom + 1))
211 30 : ALLOCATE (atom_to_kind(natom))
212 :
213 10 : total_grid_npts = 0
214 28 : DO ikind = 1, nkind
215 56 : DO j = 1, SIZE(atomic_kind_set(ikind)%atom_list)
216 28 : atom_idx = atomic_kind_set(ikind)%atom_list(j)
217 28 : atom_to_kind(atom_idx) = ikind
218 28 : ri_rs_grid_offsets(atom_idx) = total_grid_npts + 1
219 46 : total_grid_npts = total_grid_npts + bs_env%ri_rs%grid_cache(ikind)%npts
220 : END DO
221 : END DO
222 :
223 10 : ri_rs_grid_offsets(natom + 1) = total_grid_npts + 1
224 :
225 10 : IF (bs_env%unit_nr > 0) THEN
226 : WRITE (bs_env%unit_nr, FMT="(T2,A,T69,I12)") &
227 5 : 'Total grid points used for RI-RS:', total_grid_npts
228 5 : WRITE (bs_env%unit_nr, "(A)") ' '
229 : END IF
230 :
231 : !! 3. Allocate the global ri_rs_grid arrays
232 30 : ALLOCATE (ri_rs_grid_points(3, total_grid_npts))
233 :
234 : !! 4. Parallelize the grid generation loop
235 : !$OMP PARALLEL DO DEFAULT(NONE) &
236 : !$OMP SHARED(ri_rs_grid_points, ri_rs_grid_offsets, atom_to_kind, &
237 : !$OMP particle_set, bs_env, natom) &
238 : !$OMP PRIVATE(atom_idx, ikind, atomic_center, start_idx, end_idx) &
239 10 : !$OMP SCHEDULE(DYNAMIC, 1)
240 : DO atom_idx = 1, natom
241 : ikind = atom_to_kind(atom_idx)
242 : atomic_center(:) = particle_set(atom_idx)%r(:)
243 :
244 : start_idx = ri_rs_grid_offsets(atom_idx)
245 : end_idx = ri_rs_grid_offsets(atom_idx + 1) - 1
246 :
247 : !! Shift the cached origin grid by the atom's center
248 : ri_rs_grid_points(1, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(1, :) + atomic_center(1)
249 : ri_rs_grid_points(2, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(2, :) + atomic_center(2)
250 : ri_rs_grid_points(3, start_idx:end_idx) = bs_env%ri_rs%grid_cache(ikind)%raw_points(3, :) + atomic_center(3)
251 :
252 : END DO
253 : !$OMP END PARALLEL DO
254 :
255 : !! 5. Cleanup memory
256 10 : IF (ALLOCATED(bs_env%ri_rs%grid_cache)) THEN
257 28 : DO ikind = 1, nkind
258 28 : IF (ALLOCATED(bs_env%ri_rs%grid_cache(ikind)%raw_points)) DEALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points)
259 : END DO
260 28 : DEALLOCATE (bs_env%ri_rs%grid_cache)
261 : END IF
262 :
263 10 : DEALLOCATE (atom_to_kind, ri_rs_grid_offsets)
264 10 : CALL timestop(handle)
265 :
266 20 : END SUBROUTINE ri_rs_grid_assembler
267 :
268 : ! **************************************************************************************************
269 : !> \brief Reads grids from .ion files and stores them in memory based on grid_select
270 : !> \param bs_env ...
271 : !> \param atomic_kind_set ...
272 : ! **************************************************************************************************
273 :
274 10 : SUBROUTINE build_grid_cache(bs_env, atomic_kind_set)
275 :
276 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
277 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
278 :
279 : CHARACTER(LEN=256) :: atom_sym, filename, full_path, line, &
280 : suffix
281 : INTEGER :: colon_idx, i, ierr, ikind, iunit, nkind
282 : REAL(KIND=dp) :: pt(3)
283 :
284 : ! Determine the file suffix based on the user's choice
285 10 : IF (bs_env%ri_rs%grid_select == 1) THEN
286 10 : suffix = "_def2-tzvp-rs.ion"
287 0 : ELSE IF (bs_env%ri_rs%grid_select == 2) THEN
288 0 : suffix = "_cc-pvtz-rs.ion"
289 : ELSE
290 0 : CPABORT("Unknown grid_select value. Valid options are 1 (def2-TZVPP) or 2 (cc-pVTZ).")
291 : END IF
292 :
293 10 : nkind = SIZE(atomic_kind_set)
294 48 : IF (.NOT. ALLOCATED(bs_env%ri_rs%grid_cache)) ALLOCATE (bs_env%ri_rs%grid_cache(nkind))
295 :
296 28 : DO ikind = 1, nkind
297 18 : atom_sym = TRIM(atomic_kind_set(ikind)%element_symbol)
298 18 : filename = TRIM(atom_sym)//TRIM(suffix)
299 :
300 18 : full_path = "ri_rs_grid/"//TRIM(filename)
301 :
302 : CALL open_file(file_name=TRIM(full_path), unit_number=iunit, &
303 18 : file_action="READ", file_status="OLD")
304 :
305 : ! Parse the preamble for 'n points'
306 18 : bs_env%ri_rs%grid_cache(ikind)%npts = 0
307 : DO
308 612 : READ (iunit, '(A)', IOSTAT=ierr) line
309 612 : IF (ierr /= 0) EXIT
310 612 : IF (INDEX(line, 'n points') > 0) THEN
311 18 : colon_idx = INDEX(line, ':')
312 18 : READ (line(colon_idx + 1:), *) bs_env%ri_rs%grid_cache(ikind)%npts
313 18 : EXIT
314 : END IF
315 : END DO
316 :
317 : ! Allocate the cache array for this specific element
318 54 : ALLOCATE (bs_env%ri_rs%grid_cache(ikind)%raw_points(3, bs_env%ri_rs%grid_cache(ikind)%npts))
319 :
320 : ! Fast-forward to the <grid_points> tag
321 18 : REWIND (iunit)
322 : DO
323 648 : READ (iunit, '(A)', IOSTAT=ierr) line
324 648 : IF (ierr /= 0) EXIT
325 648 : IF (INDEX(line, '<grid_points>') > 0) EXIT
326 : END DO
327 :
328 : ! Read the raw grid coordinates
329 4466 : DO i = 1, bs_env%ri_rs%grid_cache(ikind)%npts
330 4448 : READ (iunit, *, IOSTAT=ierr) pt(1), pt(2), pt(3)
331 4448 : IF (ierr /= 0) THEN
332 0 : CPABORT("Unexpected EOF in grid file ")
333 : END IF
334 17810 : bs_env%ri_rs%grid_cache(ikind)%raw_points(:, i) = pt(:)
335 : END DO
336 :
337 28 : CALL close_file(unit_number=iunit)
338 : END DO
339 :
340 10 : END SUBROUTINE build_grid_cache
341 :
342 : ! **************************************************************************************************
343 : !> \brief Evaluates atomic basis functions on a real-space grid and builds a sparse DBCSR matrix.
344 : !> \param qs_env ...
345 : !> \param bs_env ...
346 : !> \param ri_rs_grid_points ...
347 : !> \param mat_phi_mu_l ...
348 : ! **************************************************************************************************
349 :
350 10 : SUBROUTINE atomic_basis_at_grid_point(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l)
351 :
352 : TYPE(qs_environment_type), POINTER :: qs_env
353 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
354 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
355 : TYPE(dbcsr_type), INTENT(OUT) :: mat_phi_mu_l
356 :
357 : CHARACTER(LEN=*), PARAMETER :: routineN = 'atomic_basis_at_grid_point'
358 :
359 : INTEGER :: c_size, chunk_size, dimen_ORB, handle, i, i_blk, iatom, natom, npcol, nprow, &
360 : num_grid_chunks, r_end, r_start, total_grid_npts
361 : INTEGER, ALLOCATABLE, DIMENSION(:) :: first_sgf
362 10 : INTEGER, DIMENSION(:), POINTER :: c_blk_sizes, col_dist, col_dist_ks, &
363 10 : r_blk_sizes, row_dist, row_dist_ks
364 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atom_col_buffer
365 10 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
366 : TYPE(cell_type), POINTER :: cell
367 : TYPE(dbcsr_distribution_type) :: dist
368 : TYPE(dbcsr_distribution_type), POINTER :: dbcsr_dist_ks
369 : TYPE(mp_para_env_type), POINTER :: para_env
370 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
371 10 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
372 :
373 10 : CALL timeset(routineN, handle)
374 :
375 : ! Setup Grid Blocking
376 : ! Right now using 256
377 10 : chunk_size = bs_env%ri_rs%chunk_size_dbcsr
378 :
379 : ! Extract environment variables
380 : CALL get_qs_env(qs_env, cell=cell, atomic_kind_set=atomic_kind_set, &
381 : qs_kind_set=qs_kind_set, particle_set=particle_set, &
382 10 : para_env=para_env)
383 :
384 10 : natom = SIZE(particle_set)
385 10 : total_grid_npts = SIZE(ri_rs_grid_points, 2)
386 :
387 : ! Map the starting indices of spherical gaussian functions (SGF) for each atom
388 30 : ALLOCATE (first_sgf(natom + 1))
389 10 : CALL get_basis_offsets(particle_set, qs_kind_set, first_sgf, dimen_ORB)
390 :
391 : ! =========================================================================
392 : ! 1. SETUP DBCSR MATRIX TOPOLOGY
393 : ! =========================================================================
394 :
395 : ! A. Define Column Block Sizes (1 Block = 1 Atom's full basis set)
396 30 : ALLOCATE (c_blk_sizes(natom))
397 38 : DO iatom = 1, natom
398 38 : c_blk_sizes(iatom) = first_sgf(iatom + 1) - first_sgf(iatom)
399 : END DO
400 :
401 : ! B. Define Row Block Sizes (Grid chunks of max size 256)
402 10 : num_grid_chunks = CEILING(REAL(total_grid_npts, KIND=dp)/REAL(chunk_size, KIND=dp))
403 30 : ALLOCATE (r_blk_sizes(num_grid_chunks))
404 40 : r_blk_sizes = chunk_size
405 10 : IF (MOD(total_grid_npts, chunk_size) /= 0) THEN
406 10 : r_blk_sizes(num_grid_chunks) = MOD(total_grid_npts, chunk_size)
407 : END IF
408 :
409 : ! C. Fetch CP2K's Default Process Grid Configuration
410 10 : CALL get_qs_env(qs_env, dbcsr_dist=dbcsr_dist_ks)
411 10 : CALL dbcsr_distribution_get(dbcsr_dist_ks, row_dist=row_dist_ks, col_dist=col_dist_ks)
412 :
413 : ! D. Build Custom Mappings using Round-Robin across the 2D process grid
414 : ! (MAXVAL + 1 accounts for 0-based process indexing in DBCSR)
415 38 : nprow = MAXVAL(row_dist_ks) + 1
416 38 : npcol = MAXVAL(col_dist_ks) + 1
417 :
418 20 : ALLOCATE (row_dist(num_grid_chunks))
419 40 : DO i = 1, num_grid_chunks
420 40 : row_dist(i) = MOD(i - 1, nprow)
421 : END DO
422 :
423 20 : ALLOCATE (col_dist(natom))
424 38 : DO i = 1, natom
425 38 : col_dist(i) = MOD(i - 1, npcol)
426 : END DO
427 :
428 : ! E. Create the DBCSR Distribution and Initialize the Matrix
429 : CALL dbcsr_distribution_new(dist, template=dbcsr_dist_ks, &
430 10 : row_dist=row_dist, col_dist=col_dist)
431 :
432 : CALL dbcsr_create(mat_phi_mu_l, name="phi_val_sparse", dist=dist, &
433 : matrix_type=dbcsr_type_no_symmetry, &
434 10 : row_blk_size=r_blk_sizes, col_blk_size=c_blk_sizes)
435 :
436 : ! =========================================================================
437 : ! 2. STREAM DATA DIRECTLY INTO SPARSE MATRIX
438 : ! =========================================================================
439 : ! Iterate over the atoms assigned to this specific MPI rank
440 10 : DO iatom = para_env%mepos + 1, natom, para_env%num_pe
441 :
442 14 : c_size = c_blk_sizes(iatom)
443 :
444 : ! Allocate a temporary dense buffer just for this specific atom
445 56 : ALLOCATE (atom_col_buffer(total_grid_npts, c_size))
446 14 : atom_col_buffer = 0.0_dp
447 :
448 : ! Evaluate the basis functions on the grid
449 : CALL fill_phi_for_atom(atom_col_buffer, ri_rs_grid_points, total_grid_npts, &
450 14 : iatom, particle_set, qs_kind_set, cell)
451 :
452 : ! Slice the dense column into chunks and insert into DBCSR
453 56 : DO i_blk = 1, num_grid_chunks
454 42 : r_start = (i_blk - 1)*chunk_size + 1
455 42 : r_end = MIN(i_blk*chunk_size, total_grid_npts)
456 :
457 : ! Apply dynamic sparsity filtering: Only store blocks with physical significance
458 23914 : IF (MAXVAL(ABS(atom_col_buffer(r_start:r_end, 1:c_size))) > bs_env%eps_filter) THEN
459 : CALL dbcsr_put_block(mat_phi_mu_l, row=i_blk, col=iatom, &
460 42 : block=atom_col_buffer(r_start:r_end, 1:c_size))
461 : END IF
462 : END DO
463 :
464 14 : DEALLOCATE (atom_col_buffer)
465 :
466 : END DO
467 :
468 : ! Finalize triggers internal MPI communication to route blocks to their correct 2D process owners
469 10 : CALL dbcsr_finalize(mat_phi_mu_l)
470 :
471 : ! -------------------------------------------------------------------------
472 : ! CLEANUP
473 : ! -------------------------------------------------------------------------
474 10 : DEALLOCATE (first_sgf, r_blk_sizes, c_blk_sizes, row_dist, col_dist)
475 10 : CALL dbcsr_distribution_release(dist)
476 :
477 10 : CALL timestop(handle)
478 :
479 40 : END SUBROUTINE atomic_basis_at_grid_point
480 :
481 : ! **************************************************************************************************
482 : !> \brief Compute value of all basis function for single atom across all grid points
483 : !> \param phi_val ...
484 : !> \param ri_rs_grid ...
485 : !> \param npts ...
486 : !> \param iatom ...
487 : !> \param particle_set ...
488 : !> \param qs_kind_set ...
489 : !> \param cell ...
490 : ! **************************************************************************************************
491 :
492 14 : SUBROUTINE fill_phi_for_atom(phi_val, ri_rs_grid, npts, iatom, &
493 : particle_set, qs_kind_set, cell)
494 :
495 : REAL(KIND=dp), INTENT(INOUT) :: phi_val(:, :)
496 : INTEGER, INTENT(IN) :: npts
497 : REAL(KIND=dp), INTENT(IN) :: ri_rs_grid(3, npts)
498 : INTEGER, INTENT(IN) :: iatom
499 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
500 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
501 : TYPE(cell_type), POINTER :: cell
502 :
503 : INTEGER :: first_sgf, i_pt, ico, iend_co, ikind, &
504 : ipgf, iset, isgf, ishell, istart_co, &
505 : l, last_sgf, lx, ly, lz, n_cart_total, &
506 : row_idx
507 : REAL(KIND=dp) :: alpha, dist_vec(3), exp_val, poly, r2, &
508 : r_atom(3), weight
509 : TYPE(gto_basis_set_type), POINTER :: orb_basis_set
510 :
511 : ! Get Atom Info
512 14 : ikind = particle_set(iatom)%atomic_kind%kind_number
513 14 : CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, basis_type="ORB")
514 14 : IF (.NOT. ASSOCIATED(orb_basis_set)) RETURN
515 :
516 56 : r_atom = particle_set(iatom)%r
517 :
518 : !$OMP PARALLEL DO DEFAULT(NONE) &
519 : !$OMP SHARED(phi_val, ri_rs_grid, npts, orb_basis_set, r_atom, cell, ncoset, indco) &
520 : !$OMP PRIVATE(i_pt, dist_vec, r2, iset, n_cart_total, ishell, l, istart_co, iend_co, &
521 : !$OMP first_sgf, last_sgf, ipgf, alpha, exp_val, isgf, ico, row_idx, weight, &
522 : !$OMP lx, ly, lz, poly) &
523 14 : !$OMP SCHEDULE(STATIC)
524 : DO i_pt = 1, npts
525 : dist_vec = pbc(ri_rs_grid(:, i_pt) - r_atom, cell)
526 : r2 = DOT_PRODUCT(dist_vec, dist_vec)
527 :
528 : DO iset = 1, orb_basis_set%nset
529 : n_cart_total = ncoset(orb_basis_set%lmax(iset))
530 :
531 : DO ishell = 1, orb_basis_set%nshell(iset)
532 : l = orb_basis_set%l(ishell, iset)
533 : istart_co = ncoset(l - 1) + 1
534 : iend_co = ncoset(l)
535 :
536 : first_sgf = orb_basis_set%first_sgf(ishell, iset)
537 : last_sgf = orb_basis_set%last_sgf(ishell, iset)
538 :
539 : DO ipgf = 1, orb_basis_set%npgf(iset)
540 : alpha = orb_basis_set%zet(ipgf, iset)
541 : exp_val = EXP(-alpha*r2)
542 :
543 : DO isgf = first_sgf, last_sgf
544 : DO ico = istart_co, iend_co
545 : row_idx = (ipgf - 1)*n_cart_total + ico
546 : weight = orb_basis_set%sphi(row_idx, isgf)
547 : lx = indco(1, ico)
548 : ly = indco(2, ico)
549 : lz = indco(3, ico)
550 : poly = (dist_vec(1)**lx)*(dist_vec(2)**ly)*(dist_vec(3)**lz)
551 :
552 : phi_val(i_pt, isgf) = phi_val(i_pt, isgf) + (weight*poly*exp_val)
553 :
554 : END DO
555 : END DO
556 : END DO
557 : END DO
558 : END DO
559 : END DO
560 : !$OMP END PARALLEL DO
561 :
562 : END SUBROUTINE fill_phi_for_atom
563 :
564 : ! **************************************************************************************************
565 : !> \brief Helper for OMP threads to fill phi_val column values
566 : !> \param particle_set ...
567 : !> \param qs_kind_set ...
568 : !> \param first_sgf ...
569 : !> \param total_sgf ...
570 : ! **************************************************************************************************
571 :
572 10 : SUBROUTINE get_basis_offsets(particle_set, qs_kind_set, first_sgf, total_sgf)
573 :
574 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
575 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
576 : INTEGER, INTENT(OUT) :: first_sgf(:), total_sgf
577 :
578 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_basis_offsets'
579 :
580 : INTEGER :: handle, iatom, ikind, nsgf
581 :
582 10 : CALL timeset(routineN, handle)
583 :
584 10 : total_sgf = 0
585 38 : DO iatom = 1, SIZE(particle_set)
586 28 : first_sgf(iatom) = total_sgf + 1
587 28 : ikind = particle_set(iatom)%atomic_kind%kind_number
588 28 : CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf, basis_type="ORB")
589 38 : total_sgf = total_sgf + nsgf
590 : END DO
591 10 : first_sgf(SIZE(particle_set) + 1) = total_sgf + 1
592 :
593 10 : CALL timestop(handle)
594 :
595 10 : END SUBROUTINE get_basis_offsets
596 :
597 : ! **************************************************************************************************
598 : !> \brief Compute RI-RS Coefficients (Z_lP)
599 : !> \param qs_env ...
600 : !> \param bs_env ...
601 : !> \param ri_rs_grid_points ...
602 : !> \param mat_phi_mu_l ...
603 : !> \param mat_Z_lP ...
604 : ! **************************************************************************************************
605 :
606 10 : SUBROUTINE compute_coeff_Z_lP(qs_env, bs_env, ri_rs_grid_points, mat_phi_mu_l, mat_Z_lP)
607 :
608 : ! Arguments
609 : TYPE(qs_environment_type), POINTER :: qs_env
610 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
611 : REAL(KIND=dp), ALLOCATABLE, INTENT(INOUT) :: ri_rs_grid_points(:, :)
612 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l
613 : TYPE(dbcsr_type), INTENT(OUT) :: mat_Z_lP
614 :
615 : CHARACTER(LEN=*), PARAMETER :: key = 'PROPERTIES%BANDSTRUCTURE%GW%PRINT%RESTART', &
616 : routineN = 'compute_coeff_Z_lP'
617 :
618 : INTEGER :: atom_P, chunk_size, col, current_chunk_size, global_i, global_j, group_handle, &
619 : handle, i, i_blk, info, istart_ao, j, j_ri, l, loc_idx, max_ao_size, max_loc_ri, &
620 : n_ao_total, n_grid_total, n_loc_ri, n_local_grid, natom, num_grid_chunks, r_end, r_start, &
621 : row
622 10 : INTEGER, ALLOCATABLE, DIMENSION(:) :: local_grid_idx, map_global_to_local, &
623 10 : row_offset
624 10 : INTEGER, DIMENSION(:), POINTER :: col_dist_phi, col_dist_ri, r_blk_sizes, &
625 10 : ri_blk_sizes, row_dist_grid
626 : REAL(KIND=dp) :: cutoff_ri, dist, t1
627 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: d_vec_local
628 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: D_local, d_lp_local, int_2d_local, &
629 10 : phi_global, phi_local, rho_pair_local, &
630 10 : Z_blk, Z_global_temp
631 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: int_3c_local
632 : REAL(KIND=dp), DIMENSION(3) :: pos_P
633 10 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: block_ptr
634 : TYPE(cp_logger_type), POINTER :: logger
635 : TYPE(dbcsr_distribution_type) :: dist_phi, dist_Z
636 : TYPE(dbcsr_iterator_type) :: iter
637 : TYPE(mp_para_env_type), POINTER :: para_env
638 10 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
639 : TYPE(section_vals_type), POINTER :: input
640 :
641 10 : CALL timeset(routineN, handle)
642 :
643 : ! Restrict integration domain to a localized sphere around the atom
644 10 : cutoff_ri = bs_env%ri_rs%cutoff_radius_ri_rs
645 :
646 10 : t1 = m_walltime()
647 :
648 10 : CALL get_qs_env(qs_env, para_env=para_env, particle_set=particle_set, input=input)
649 :
650 10 : natom = SIZE(bs_env%i_RI_start_from_atom)
651 10 : n_ao_total = bs_env%i_ao_end_from_atom(natom)
652 10 : n_grid_total = SIZE(ri_rs_grid_points, 2)
653 :
654 : ! =========================================================================
655 : ! 1. SETUP DBCSR TOPOLOGY & EXACT OFFSETS
656 : ! =========================================================================
657 10 : CALL dbcsr_get_info(mat_phi_mu_l, row_blk_size=r_blk_sizes, distribution=dist_phi)
658 10 : CALL dbcsr_distribution_get(dist_phi, row_dist=row_dist_grid, col_dist=col_dist_phi, group=group_handle)
659 :
660 10 : chunk_size = r_blk_sizes(1)
661 10 : num_grid_chunks = SIZE(r_blk_sizes)
662 :
663 30 : ALLOCATE (row_offset(num_grid_chunks))
664 10 : row_offset(1) = 0
665 30 : DO i_blk = 2, num_grid_chunks
666 30 : row_offset(i_blk) = row_offset(i_blk - 1) + r_blk_sizes(i_blk - 1)
667 : END DO
668 :
669 40 : ALLOCATE (ri_blk_sizes(natom), col_dist_ri(natom))
670 38 : DO atom_P = 1, natom
671 28 : ri_blk_sizes(atom_P) = bs_env%i_RI_end_from_atom(atom_P) - bs_env%i_RI_start_from_atom(atom_P) + 1
672 118 : col_dist_ri(atom_P) = MOD(atom_P - 1, MAXVAL(col_dist_phi) + 1)
673 : END DO
674 :
675 10 : CALL dbcsr_distribution_new(dist_Z, template=dist_phi, row_dist=row_dist_grid, col_dist=col_dist_ri)
676 :
677 10 : IF (bs_env%ri_rs%Z_lP_exists) THEN
678 : CALL dbcsr_binary_read(filepath=TRIM(bs_env%prefix)//"Z_lP.matrix", &
679 : distribution=dist_Z, &
680 2 : matrix_new=mat_Z_lP)
681 2 : IF (bs_env%unit_nr > 0) THEN
682 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
683 1 : 'Read Z_lP from file ', ' Execution time', m_walltime() - t1, ' s'
684 1 : WRITE (bs_env%unit_nr, '(A)') ' '
685 : END IF
686 : ELSE
687 :
688 : CALL dbcsr_create(mat_Z_lP, name="mat_Z_lP", dist=dist_Z, &
689 : matrix_type=dbcsr_type_no_symmetry, &
690 8 : row_blk_size=r_blk_sizes, col_blk_size=ri_blk_sizes)
691 :
692 8 : max_ao_size = 0
693 30 : DO j = 1, SIZE(bs_env%i_ao_start_from_atom)
694 30 : max_ao_size = MAX(max_ao_size, bs_env%i_ao_end_from_atom(j) - bs_env%i_ao_start_from_atom(j) + 1)
695 : END DO
696 30 : max_loc_ri = MAXVAL(ri_blk_sizes)
697 :
698 : ! Global mapping array: 4 bytes * N_grid_total (~4MB per million grid points)
699 24 : ALLOCATE (map_global_to_local(n_grid_total))
700 :
701 : ! =========================================================================
702 : ! 2. PRE-COMPUTE: EXTRACT FULL DBCSR TO DENSE GLOBAL ARRAY
703 : ! =========================================================================
704 32 : ALLOCATE (phi_global(n_grid_total, n_ao_total))
705 8 : phi_global = 0.0_dp
706 :
707 8 : CALL dbcsr_iterator_start(iter, mat_phi_mu_l)
708 41 : DO WHILE (dbcsr_iterator_blocks_left(iter))
709 33 : CALL dbcsr_iterator_next_block(iter, row, col, block_ptr)
710 :
711 33 : IF (col < 1 .OR. col > natom) CYCLE
712 33 : istart_ao = bs_env%i_ao_start_from_atom(col)
713 :
714 134 : DO j = 1, SIZE(block_ptr, 2)
715 93 : global_j = istart_ao + j - 1
716 93 : IF (global_j < 1 .OR. global_j > n_ao_total) CYCLE
717 :
718 19614 : DO i = 1, SIZE(block_ptr, 1)
719 19488 : global_i = row_offset(row) + i
720 19488 : IF (global_i < 1 .OR. global_i > n_grid_total) CYCLE
721 :
722 : ! Insert directly into the global dense array
723 19581 : phi_global(global_i, global_j) = block_ptr(i, j)
724 : END DO
725 : END DO
726 : END DO
727 8 : CALL dbcsr_iterator_stop(iter)
728 :
729 8 : CALL para_env%sum(phi_global)
730 :
731 : ! =========================================================================
732 : ! 3. MPI LOOP OVER ATOMS (Fully independent, no MPI barriers inside)
733 : ! This distributes the N_atoms among the available MPI processors.
734 : ! =========================================================================
735 8 : DO atom_P = para_env%mepos + 1, natom, para_env%num_pe
736 :
737 11 : n_loc_ri = ri_blk_sizes(atom_P)
738 44 : pos_P(:) = particle_set(atom_P)%r(:)
739 :
740 : ! ---------------------------------------------------------------------
741 : ! A. Determine Local Grid Domain based on cutoff_ri
742 : ! ---------------------------------------------------------------------
743 11 : n_local_grid = 0
744 6827 : DO l = 1, n_grid_total
745 27264 : dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
746 6827 : IF (dist <= cutoff_ri) n_local_grid = n_local_grid + 1
747 : END DO
748 :
749 33 : ALLOCATE (local_grid_idx(n_local_grid))
750 11 : map_global_to_local = 0
751 :
752 11 : n_local_grid = 0
753 6827 : DO l = 1, n_grid_total
754 27264 : dist = SQRT(SUM((ri_rs_grid_points(1:3, l) - pos_P(1:3))**2))
755 6827 : IF (dist <= cutoff_ri) THEN
756 6816 : n_local_grid = n_local_grid + 1
757 6816 : local_grid_idx(n_local_grid) = l
758 6816 : map_global_to_local(l) = n_local_grid
759 : END IF
760 : END DO
761 :
762 : ! ---------------------------------------------------------------------
763 : ! B. Smart Fetch of Local Phi (MPI-Safe & Bounds-Checked)
764 : ! ---------------------------------------------------------------------
765 44 : ALLOCATE (phi_local(n_local_grid, n_ao_total))
766 :
767 : !$OMP PARALLEL DO DEFAULT(NONE) &
768 : !$OMP SHARED(n_ao_total, n_local_grid, local_grid_idx, phi_local, phi_global) &
769 : !$OMP PRIVATE(global_j, loc_idx, global_i) &
770 11 : !$OMP SCHEDULE(STATIC)
771 : DO global_j = 1, n_ao_total
772 : DO loc_idx = 1, n_local_grid
773 : global_i = local_grid_idx(loc_idx)
774 : phi_local(loc_idx, global_j) = phi_global(global_i, global_j)
775 : END DO
776 : END DO
777 : !$OMP END PARALLEL DO
778 :
779 : ! ---------------------------------------------------------------------
780 : ! C. Build and Balance Local LHS Matrix (D_local)
781 : ! ---------------------------------------------------------------------
782 66 : ALLOCATE (D_local(n_local_grid, n_local_grid), d_vec_local(n_local_grid))
783 11 : D_local = 0.0_dp
784 :
785 11 : CALL dsyrk("L", "N", n_local_grid, n_ao_total, 1.0_dp, phi_local, n_local_grid, 0.0_dp, D_local, n_local_grid)
786 :
787 : !$OMP PARALLEL DO DEFAULT(NONE) &
788 : !$OMP SHARED(n_local_grid, D_local, d_vec_local, bs_env) &
789 : !$OMP PRIVATE(i) &
790 11 : !$OMP SCHEDULE(STATIC)
791 : DO i = 1, n_local_grid
792 : D_local(i, i) = D_local(i, i)**2
793 : d_vec_local(i) = 1.0_dp/SQRT(MAX(D_local(i, i), 1.0E-16_dp))
794 : D_local(i, i) = (D_local(i, i)*d_vec_local(i)**2) + bs_env%ri_rs%tikhonov
795 : END DO
796 : !$OMP END PARALLEL DO
797 :
798 : !$OMP PARALLEL DO DEFAULT(NONE) &
799 : !$OMP SHARED(n_local_grid, D_local, d_vec_local) &
800 : !$OMP PRIVATE(j, i) &
801 11 : !$OMP SCHEDULE(DYNAMIC)
802 : DO j = 1, n_local_grid
803 : DO i = j + 1, n_local_grid
804 : D_local(i, j) = D_local(i, j)**2
805 : D_local(i, j) = D_local(i, j)*d_vec_local(i)*d_vec_local(j)
806 : D_local(j, i) = D_local(i, j)
807 : END DO
808 : END DO
809 : !$OMP END PARALLEL DO
810 :
811 : ! ---------------------------------------------------------------------
812 : ! D. Build Local RHS Matrix (d_lp_local)
813 : ! ---------------------------------------------------------------------
814 44 : ALLOCATE (d_lp_local(n_local_grid, n_loc_ri))
815 44 : ALLOCATE (rho_pair_local(n_local_grid, max_ao_size*max_ao_size))
816 55 : ALLOCATE (int_3c_local(max_ao_size, max_ao_size, n_loc_ri))
817 44 : ALLOCATE (int_2d_local(max_ao_size*max_ao_size, n_loc_ri))
818 :
819 11 : d_lp_local = 0.0_dp
820 :
821 : CALL compute_d_lp(qs_env, bs_env, phi_local, d_lp_local, n_local_grid, &
822 11 : n_loc_ri, atom_P, rho_pair_local, int_3c_local, int_2d_local, max_ao_size)
823 :
824 : !$OMP PARALLEL DO DEFAULT(NONE) &
825 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
826 : !$OMP PRIVATE(j_ri, i) &
827 11 : !$OMP SCHEDULE(STATIC)
828 : DO j_ri = 1, n_loc_ri
829 : DO i = 1, n_local_grid
830 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
831 : END DO
832 : END DO
833 : !$OMP END PARALLEL DO
834 :
835 : ! ---------------------------------------------------------------------
836 : ! E. Fast Local LAPACK Solve
837 : ! ---------------------------------------------------------------------
838 :
839 11 : CALL dpotrf('L', n_local_grid, D_local, n_local_grid, info)
840 :
841 11 : CALL dpotrs('L', n_local_grid, n_loc_ri, D_local, n_local_grid, d_lp_local, n_local_grid, info)
842 :
843 : !$OMP PARALLEL DO DEFAULT(NONE) &
844 : !$OMP SHARED(n_loc_ri, n_local_grid, d_lp_local, d_vec_local) &
845 : !$OMP PRIVATE(j_ri, i) &
846 11 : !$OMP SCHEDULE(STATIC)
847 : DO j_ri = 1, n_loc_ri
848 : DO i = 1, n_local_grid
849 : d_lp_local(i, j_ri) = d_lp_local(i, j_ri)*d_vec_local(i)
850 : END DO
851 : END DO
852 : !$OMP END PARALLEL DO
853 :
854 : ! ---------------------------------------------------------------------
855 : ! F. Scatter Local Solution Back to Global DBCSR Matrix
856 : ! ---------------------------------------------------------------------
857 44 : ALLOCATE (Z_global_temp(n_grid_total, n_loc_ri))
858 11 : Z_global_temp = 0.0_dp
859 :
860 : !$OMP PARALLEL DO DEFAULT(NONE) &
861 : !$OMP SHARED(n_local_grid, local_grid_idx, Z_global_temp, n_loc_ri, d_lp_local) &
862 : !$OMP PRIVATE(loc_idx, global_i) &
863 11 : !$OMP SCHEDULE(STATIC)
864 : DO loc_idx = 1, n_local_grid
865 : global_i = local_grid_idx(loc_idx)
866 : Z_global_temp(global_i, 1:n_loc_ri) = d_lp_local(loc_idx, 1:n_loc_ri)
867 : END DO
868 : !$OMP END PARALLEL DO
869 :
870 77 : ALLOCATE (Z_blk(MAXVAL(r_blk_sizes), n_loc_ri))
871 :
872 44 : DO i_blk = 1, num_grid_chunks
873 33 : r_start = row_offset(i_blk) + 1
874 33 : r_end = row_offset(i_blk) + r_blk_sizes(i_blk)
875 33 : current_chunk_size = r_blk_sizes(i_blk)
876 :
877 33 : Z_blk = 0.0_dp
878 53376 : Z_blk(1:current_chunk_size, 1:n_loc_ri) = Z_global_temp(r_start:r_end, 1:n_loc_ri)
879 :
880 53387 : IF (MAXVAL(ABS(Z_blk(1:current_chunk_size, 1:n_loc_ri))) > bs_env%eps_filter) THEN
881 : CALL dbcsr_put_block(mat_Z_lP, row=i_blk, col=atom_P, &
882 33 : block=Z_blk(1:current_chunk_size, 1:n_loc_ri))
883 : END IF
884 : END DO
885 :
886 11 : DEALLOCATE (Z_global_temp, Z_blk)
887 11 : DEALLOCATE (D_local, d_vec_local, d_lp_local)
888 11 : DEALLOCATE (rho_pair_local, int_3c_local, int_2d_local)
889 :
890 11 : DEALLOCATE (local_grid_idx, phi_local)
891 :
892 : END DO
893 :
894 : ! Clean up the massive global array once the loop finishes
895 8 : DEALLOCATE (phi_global)
896 :
897 8 : CALL dbcsr_finalize(mat_Z_lP)
898 :
899 8 : IF (bs_env%unit_nr > 0) THEN
900 : WRITE (bs_env%unit_nr, '(T2,A,T57,A,F7.1,A)') &
901 4 : 'Computed Z_lP ', ' Execution time', m_walltime() - t1, ' s'
902 4 : WRITE (bs_env%unit_nr, '(A)') ' '
903 : END IF
904 :
905 8 : logger => cp_get_default_logger()
906 :
907 8 : IF (BTEST(cp_print_key_should_output(logger%iter_info, input, key), cp_p_file)) THEN
908 0 : CALL dbcsr_binary_write(matrix=mat_Z_lP, filepath=TRIM(bs_env%prefix)//"Z_lP.matrix")
909 : END IF
910 :
911 16 : DEALLOCATE (map_global_to_local)
912 :
913 : END IF
914 :
915 10 : DEALLOCATE (row_offset, ri_blk_sizes, col_dist_ri)
916 10 : CALL dbcsr_distribution_release(dist_Z)
917 :
918 10 : DEALLOCATE (ri_rs_grid_points)
919 :
920 10 : CALL timestop(handle)
921 :
922 40 : END SUBROUTINE compute_coeff_Z_lP
923 :
924 : ! **************************************************************************************************
925 : !> \brief Computes the dense localized Right-Hand Side (RHS) matrix d_lp
926 : !> \param qs_env ...
927 : !> \param bs_env ...
928 : !> \param phi_val ...
929 : !> \param d_lp ...
930 : !> \param n_grid_total ...
931 : !> \param n_loc_ri ...
932 : !> \param atom_P ...
933 : !> \param rho_pair ...
934 : !> \param int_3c ...
935 : !> \param int_2d ...
936 : !> \param max_ao_size ...
937 : ! **************************************************************************************************
938 :
939 44 : SUBROUTINE compute_d_lp(qs_env, bs_env, phi_val, d_lp, n_grid_total, n_loc_ri, atom_P, &
940 11 : rho_pair, int_3c, int_2d, max_ao_size)
941 :
942 : TYPE(qs_environment_type), POINTER :: qs_env
943 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
944 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: phi_val
945 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: d_lp
946 : INTEGER, INTENT(IN) :: n_grid_total, n_loc_ri, atom_P
947 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: rho_pair
948 : REAL(KIND=dp), DIMENSION(:, :, :), INTENT(INOUT) :: int_3c
949 : REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT) :: int_2d
950 : INTEGER, INTENT(IN) :: max_ao_size
951 :
952 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_d_lp'
953 :
954 : INTEGER :: atom_j, atom_k, handle, j, jk_idx, &
955 : jsize, jstart, k, ksize, kstart, l, ri
956 :
957 11 : CALL timeset(routineN, handle)
958 :
959 42 : DO atom_j = 1, SIZE(bs_env%i_ao_start_from_atom)
960 31 : jstart = bs_env%i_ao_start_from_atom(atom_j)
961 31 : jsize = bs_env%i_ao_end_from_atom(atom_j) - jstart + 1
962 :
963 131 : DO atom_k = 1, SIZE(bs_env%i_ao_start_from_atom)
964 89 : kstart = bs_env%i_ao_start_from_atom(atom_k)
965 89 : ksize = bs_env%i_ao_end_from_atom(atom_k) - kstart + 1
966 :
967 7794 : int_3c(1:jsize, 1:ksize, 1:n_loc_ri) = 0.0_dp
968 :
969 : ! Compute B_{μν,P} = (μν|P)
970 : CALL build_3c_integral_block(int_3c(1:jsize, 1:ksize, 1:n_loc_ri), &
971 : qs_env, bs_env%ri_metric, &
972 : basis_j=bs_env%basis_set_AO, &
973 : basis_k=bs_env%basis_set_AO, &
974 : basis_i=bs_env%basis_set_RI, &
975 89 : atom_k=atom_k, atom_j=atom_j, atom_i=atom_P)
976 :
977 : ! Build Pair Density: ρ(l, μν) = Φ_μ(r_l) \times Φ_ν(r_l)
978 : !$OMP PARALLEL DO DEFAULT(NONE) &
979 : !$OMP SHARED(ksize, jsize, n_grid_total, rho_pair, phi_val, jstart, kstart) &
980 : !$OMP PRIVATE(k, j, l, jk_idx) &
981 89 : !$OMP COLLAPSE(2)
982 : DO k = 1, ksize
983 : DO j = 1, jsize
984 : jk_idx = (k - 1)*jsize + j
985 : DO l = 1, n_grid_total
986 : rho_pair(l, jk_idx) = phi_val(l, jstart + j - 1)*phi_val(l, kstart + k - 1)
987 : END DO
988 : END DO
989 : END DO
990 : !$OMP END PARALLEL DO
991 :
992 : ! Flatten 3D B_{μν, P} tensor to 2D B_{(μν), P} matrix for BLAS
993 : !$OMP PARALLEL DO DEFAULT(NONE) &
994 : !$OMP SHARED(n_loc_ri, ksize, jsize, int_2d, int_3c) &
995 : !$OMP PRIVATE(ri, k, j, jk_idx) &
996 89 : !$OMP COLLAPSE(2)
997 : DO ri = 1, n_loc_ri
998 : DO k = 1, ksize
999 : DO j = 1, jsize
1000 : jk_idx = (k - 1)*jsize + j
1001 : int_2d(jk_idx, ri) = int_3c(j, k, ri)
1002 : END DO
1003 : END DO
1004 : END DO
1005 : !$OMP END PARALLEL DO
1006 :
1007 : ! Perform Contraction: d_{l, P} += ρ(l, μν) \times B_{(μν), P}
1008 : CALL dgemm("N", "N", n_grid_total, n_loc_ri, jsize*ksize, &
1009 : 1.0_dp, rho_pair, n_grid_total, &
1010 : int_2d, max_ao_size*max_ao_size, &
1011 120 : 1.0_dp, d_lp(1, 1), n_grid_total)
1012 : END DO
1013 : END DO
1014 :
1015 11 : CALL timestop(handle)
1016 :
1017 11 : END SUBROUTINE compute_d_lp
1018 :
1019 : ! **************************************************************************************************
1020 : !> \brief Computes the χ(iτ, k=0) matrix
1021 : !> \param bs_env ...
1022 : !> \param mat_chi_Gamma_tau ...
1023 : !> \param mat_phi_mu_l ...
1024 : !> \param mat_Z_lP ...
1025 : ! **************************************************************************************************
1026 :
1027 10 : SUBROUTINE get_mat_chi_Gamma_tau(bs_env, mat_chi_Gamma_tau, mat_phi_mu_l, mat_Z_lP)
1028 :
1029 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1030 : TYPE(dbcsr_p_type), DIMENSION(:), POINTER :: mat_chi_Gamma_tau
1031 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1032 :
1033 : CHARACTER(LEN=*), PARAMETER :: routineN = 'get_mat_chi_Gamma_tau'
1034 :
1035 : INTEGER :: handle, i, i_t, ispin, npcol
1036 10 : INTEGER, DIMENSION(:), POINTER :: blk_ao, blk_grid, dist_col_ao, &
1037 10 : dist_col_grid, dist_row_grid
1038 : REAL(KIND=dp) :: t1, tau
1039 : TYPE(dbcsr_distribution_type) :: dist_grid_grid, dist_phi
1040 : TYPE(dbcsr_type) :: matrix_chi_grid, matrix_chi_grid_spin, &
1041 : matrix_G_occ_grid, matrix_G_vir_grid
1042 :
1043 10 : CALL timeset(routineN, handle)
1044 :
1045 : ! =========================================================================
1046 : ! 1. SETUP CORE TOPOLOGIES
1047 : ! =========================================================================
1048 10 : CALL dbcsr_get_info(mat_phi_mu_l, distribution=dist_phi, row_blk_size=blk_grid, col_blk_size=blk_ao)
1049 10 : CALL dbcsr_distribution_get(dist_phi, row_dist=dist_row_grid, col_dist=dist_col_ao)
1050 :
1051 : ! Determine number of MPI process columns
1052 38 : npcol = MAXVAL(dist_col_ao) + 1
1053 :
1054 : ! Build a perfectly safe column distribution for the Grid dimension
1055 30 : ALLOCATE (dist_col_grid(SIZE(blk_grid)))
1056 40 : DO i = 1, SIZE(blk_grid)
1057 40 : dist_col_grid(i) = MOD(i - 1, npcol)
1058 : END DO
1059 :
1060 : CALL dbcsr_distribution_new(dist_grid_grid, template=dist_phi, &
1061 10 : row_dist=dist_row_grid, col_dist=dist_col_grid)
1062 :
1063 10 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1064 10 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1065 10 : CALL dbcsr_create(matrix_chi_grid, "chi_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1066 10 : CALL dbcsr_create(matrix_chi_grid_spin, "chi_grid_spin", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1067 :
1068 : ! =========================================================================
1069 : ! 2. MAIN IMAGINARY TIME LOOP
1070 : ! =========================================================================
1071 120 : DO i_t = 1, bs_env%num_time_freq_points
1072 110 : t1 = m_walltime()
1073 :
1074 110 : tau = bs_env%imag_time_points(i_t)
1075 110 : CALL dbcsr_set(matrix_chi_grid, 0.0_dp)
1076 :
1077 : ! ----------------------------------------------------------------------
1078 : ! A. SPIN LOOP (Allocations safely encapsulated in wrappers)
1079 : ! ----------------------------------------------------------------------
1080 240 : DO ispin = 1, bs_env%n_spin
1081 :
1082 : ! G^occ_µλ(i|τ|,k=0) = sum_n^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1083 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1084 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, &
1085 130 : matrix_G_occ_grid, bs_env%eps_filter)
1086 :
1087 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1088 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1089 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, &
1090 130 : matrix_G_vir_grid, bs_env%eps_filter)
1091 :
1092 : ! -------------------------------------------------------------------
1093 : ! B. ELEMENT-WISE HADAMARD PRODUCT
1094 : ! -------------------------------------------------------------------
1095 : ! χ_ll'(iτ,k=0) = G^occ_ll'(i|τ|,k=0) * G^vir_ll'(i|τ|,k=0)
1096 130 : CALL hadamard_product(matrix_G_occ_grid, matrix_G_vir_grid, matrix_chi_grid_spin, bs_env%spin_degeneracy)
1097 :
1098 : ! Accumulate spin contributions
1099 240 : CALL dbcsr_add(matrix_chi_grid, matrix_chi_grid_spin, 1.0_dp, 1.0_dp)
1100 :
1101 : END DO ! ispin
1102 :
1103 : ! ----------------------------------------------------------------------
1104 : ! C. TRANSFORM TO AUXILIARY BASIS & EXPORT DIRECTLY
1105 : ! χ_aux = Z^T * χ_grid * Z
1106 : ! χ_PQ(iτ,k=0) = sum_ll' Z_lP χ_ll'(iτ,k=0) Z_l'Q
1107 : ! Result is dumped directly into the final array mat_chi_Gamma_tau!
1108 : ! ----------------------------------------------------------------------
1109 : CALL contract_A_B_A("T", "N", mat_Z_lP, matrix_chi_grid, &
1110 110 : mat_chi_Gamma_tau(i_t)%matrix, bs_env%eps_filter)
1111 :
1112 120 : IF (bs_env%unit_nr > 0) THEN
1113 : WRITE (bs_env%unit_nr, '(T2,A,I13,A,I3,A,F7.1,A)') &
1114 55 : 'Computed χ(iτ,k=0) for time point', i_t, ' /', bs_env%num_time_freq_points, &
1115 110 : ', Execution time', m_walltime() - t1, ' s'
1116 : END IF
1117 :
1118 : END DO ! i_t
1119 :
1120 : ! =========================================================================
1121 : ! 3. FINAL CLEANUP
1122 : ! =========================================================================
1123 10 : CALL dbcsr_release(matrix_G_occ_grid)
1124 10 : CALL dbcsr_release(matrix_G_vir_grid)
1125 10 : CALL dbcsr_release(matrix_chi_grid)
1126 10 : CALL dbcsr_release(matrix_chi_grid_spin)
1127 10 : CALL dbcsr_distribution_release(dist_grid_grid)
1128 10 : DEALLOCATE (dist_col_grid)
1129 :
1130 10 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1131 :
1132 10 : CALL timestop(handle)
1133 :
1134 20 : END SUBROUTINE get_mat_chi_Gamma_tau
1135 :
1136 : ! **************************************************************************************************
1137 : !> \brief Computes Green's Function in grid basis
1138 : !> \param bs_env ...
1139 : !> \param tau ...
1140 : !> \param ispin ...
1141 : !> \param occ ...
1142 : !> \param vir ...
1143 : !> \param mat_phi_mu_l ...
1144 : !> \param matrix_G_grid ...
1145 : !> \param eps_filter ...
1146 : ! **************************************************************************************************
1147 :
1148 1064 : SUBROUTINE build_G_grid(bs_env, tau, ispin, occ, vir, mat_phi_mu_l, matrix_G_grid, eps_filter)
1149 :
1150 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1151 : REAL(KIND=dp), INTENT(IN) :: tau
1152 : INTEGER, INTENT(IN) :: ispin
1153 : LOGICAL, INTENT(IN) :: occ, vir
1154 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, matrix_G_grid
1155 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1156 :
1157 : CHARACTER(LEN=*), PARAMETER :: routineN = 'build_G_grid'
1158 :
1159 : INTEGER :: handle
1160 532 : INTEGER, DIMENSION(:), POINTER :: blk_ao, dist_row_ao
1161 : TYPE(cp_fm_type), POINTER :: fm_G
1162 : TYPE(dbcsr_distribution_type) :: dist_ao_ao
1163 : TYPE(dbcsr_type) :: matrix_G_ao
1164 :
1165 532 : CALL timeset(routineN, handle)
1166 :
1167 : ! 1. Select the correct FM matrix based on occ/vir flags
1168 532 : IF (occ) THEN
1169 272 : fm_G => bs_env%fm_Gocc
1170 : ELSE
1171 260 : fm_G => bs_env%fm_Gvir
1172 : END IF
1173 :
1174 : ! 2. Compute Dense FM Green's Function
1175 : ! G^occ/vir_µλ(i|τ|,k=0) = sum_G^occ/vir_µλn^occ/vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1176 532 : CALL G_occ_vir(bs_env, tau, fm_G, ispin, occ=occ, vir=vir)
1177 :
1178 : ! 3. Setup AO DBCSR Topology and Create Matrix dynamically
1179 532 : CALL setup_square_topology(mat_phi_mu_l, 'COL', dist_ao_ao, blk_ao, dist_row_ao)
1180 :
1181 : CALL dbcsr_create(matrix_G_ao, name="G_ao", dist=dist_ao_ao, &
1182 : matrix_type=dbcsr_type_no_symmetry, &
1183 532 : row_blk_size=blk_ao, col_blk_size=blk_ao)
1184 :
1185 : ! 4. Convert FM to Sparse DBCSR
1186 532 : CALL copy_fm_to_dbcsr(fm_G, matrix_G_ao, keep_sparsity=.FALSE.)
1187 :
1188 : ! 5. Transform to Grid Basis: G_grid = phi * G_ao * phi^T
1189 : ! G^occ/vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ/vir_µν Φ_ν(r_l')
1190 532 : CALL contract_A_B_A("N", "T", mat_phi_mu_l, matrix_G_ao, matrix_G_grid, eps_filter)
1191 :
1192 : ! 6. Release AO matrix and topology
1193 532 : CALL release_dbcsr_topology_and_matrices(dist=dist_ao_ao, mapped_dist=dist_row_ao, m1=matrix_G_ao)
1194 :
1195 532 : CALL timestop(handle)
1196 :
1197 532 : END SUBROUTINE build_G_grid
1198 :
1199 : ! **************************************************************************************************
1200 : !> \brief Generalized routine to compute OUT = A * B * A^T OR OUT = A^T * B * A using DBCSR
1201 : !> \param transA_left ...
1202 : !> \param transA_right ...
1203 : !> \param matrix_A ...
1204 : !> \param matrix_B ...
1205 : !> \param matrix_out ...
1206 : !> \param eps_filter ...
1207 : ! **************************************************************************************************
1208 :
1209 1034 : SUBROUTINE contract_A_B_A(transA_left, transA_right, matrix_A, matrix_B, matrix_out, eps_filter)
1210 :
1211 : CHARACTER(LEN=1), INTENT(IN) :: transA_left, transA_right
1212 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_out
1213 : REAL(KIND=dp), INTENT(IN) :: eps_filter
1214 :
1215 : CHARACTER(LEN=*), PARAMETER :: routineN = 'contract_A_B_A'
1216 :
1217 : INTEGER :: handle
1218 : TYPE(dbcsr_type) :: matrix_tmp
1219 :
1220 1034 : CALL timeset(routineN, handle)
1221 :
1222 1034 : CALL dbcsr_create(matrix_tmp, template=matrix_A)
1223 :
1224 1034 : IF (transA_left == "N" .AND. transA_right == "T") THEN
1225 : ! Path 1: Out = A * B * A^T
1226 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_A, matrix_B, &
1227 652 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1228 : CALL dbcsr_multiply("N", "T", 1.0_dp, matrix_tmp, matrix_A, &
1229 652 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1230 :
1231 382 : ELSE IF (transA_left == "T" .AND. transA_right == "N") THEN
1232 : ! Path 2: Out = A^T * B * A
1233 : CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_B, matrix_A, &
1234 382 : 0.0_dp, matrix_tmp, filter_eps=eps_filter)
1235 : CALL dbcsr_multiply("T", "N", 1.0_dp, matrix_A, matrix_tmp, &
1236 382 : 0.0_dp, matrix_out, filter_eps=eps_filter)
1237 : ELSE
1238 0 : CPABORT("Unsupported transposition pair in contract_A_B_A")
1239 : END IF
1240 :
1241 1034 : CALL dbcsr_release(matrix_tmp)
1242 :
1243 1034 : CALL timestop(handle)
1244 :
1245 1034 : END SUBROUTINE contract_A_B_A
1246 :
1247 : ! **************************************************************************************************
1248 : !> \brief Computes C = A ◦ B (Element-wise Hadamard product) for sparse DBCSR matrices.
1249 : !> \param matrix_A ...
1250 : !> \param matrix_B ...
1251 : !> \param matrix_C ...
1252 : !> \param fac (Scaling factor applied to the product)
1253 : ! **************************************************************************************************
1254 :
1255 804 : SUBROUTINE hadamard_product(matrix_A, matrix_B, matrix_C, fac)
1256 :
1257 : TYPE(dbcsr_type), INTENT(INOUT) :: matrix_A, matrix_B, matrix_C
1258 : REAL(KIND=dp), INTENT(IN) :: fac
1259 :
1260 : CHARACTER(LEN=*), PARAMETER :: routineN = 'hadamard_product'
1261 :
1262 : INTEGER :: col, handle, row
1263 : LOGICAL :: found
1264 402 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: blk_B, blk_C
1265 : TYPE(dbcsr_iterator_type) :: iter
1266 :
1267 402 : CALL timeset(routineN, handle)
1268 :
1269 402 : CALL dbcsr_copy(matrix_C, matrix_A)
1270 :
1271 402 : CALL dbcsr_iterator_start(iter, matrix_C)
1272 2211 : DO WHILE (dbcsr_iterator_blocks_left(iter))
1273 1809 : CALL dbcsr_iterator_next_block(iter, row, col, blk_C)
1274 :
1275 1809 : CALL dbcsr_get_block_p(matrix_B, row, col, blk_B, found)
1276 :
1277 2211 : IF (found) THEN
1278 159523682 : blk_C(:, :) = fac*blk_C(:, :)*blk_B(:, :)
1279 : ELSE
1280 : ! If B is sparse here, the product is zero
1281 0 : blk_C(:, :) = 0.0_dp
1282 : END IF
1283 : END DO
1284 402 : CALL dbcsr_iterator_stop(iter)
1285 :
1286 402 : CALL timestop(handle)
1287 :
1288 402 : END SUBROUTINE hadamard_product
1289 :
1290 : ! **************************************************************************************************
1291 : !> \brief Computes the exact exchange part of the GW self-energy
1292 : !> \param bs_env ...
1293 : !> \param qs_env ...
1294 : !> \param mat_phi_mu_l ...
1295 : !> \param mat_Z_lP ...
1296 : !> \param fm_Sigma_x_Gamma ...
1297 : ! **************************************************************************************************
1298 :
1299 10 : SUBROUTINE compute_Sigma_x(bs_env, qs_env, mat_phi_mu_l, mat_Z_lP, fm_Sigma_x_Gamma)
1300 :
1301 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1302 : TYPE(qs_environment_type), POINTER :: qs_env
1303 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1304 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_Sigma_x_Gamma
1305 :
1306 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_x'
1307 :
1308 : INTEGER :: handle, ispin
1309 10 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1310 10 : dist_row_aux
1311 : REAL(KIND=dp) :: t1
1312 10 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: fm_Vtr_Gamma
1313 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1314 : TYPE(dbcsr_type) :: mat_Sigma_x_Gamma, matrix_D_grid, &
1315 : matrix_Sigma_x_grid, matrix_V_aux, &
1316 : matrix_V_grid
1317 :
1318 10 : CALL timeset(routineN, handle)
1319 :
1320 10 : t1 = m_walltime()
1321 :
1322 42 : ALLOCATE (fm_Sigma_x_Gamma(bs_env%n_spin))
1323 22 : DO ispin = 1, bs_env%n_spin
1324 22 : CALL cp_fm_create(fm_Sigma_x_Gamma(ispin), bs_env%fm_s_Gamma%matrix_struct)
1325 : END DO
1326 :
1327 10 : CALL dbcsr_create(mat_Sigma_x_Gamma, template=bs_env%mat_ao_ao%matrix)
1328 :
1329 : ! =========================================================================
1330 : ! 1. SETUP CORE TOPOLOGIES
1331 : ! =========================================================================
1332 10 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1333 10 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1334 :
1335 : ! =========================================================================
1336 : ! 2. COMPUTE V^tr_ll'
1337 : ! =========================================================================
1338 : CALL RI_2c_integral_mat(qs_env, fm_Vtr_Gamma, bs_env%fm_RI_RI, bs_env%n_RI, &
1339 10 : bs_env%trunc_coulomb, do_kpoints=.FALSE.)
1340 :
1341 : ! M^-1 * V^tr * M^-1 directly modifies fm_Vtr_Gamma(:, 1)
1342 10 : CALL multiply_fm_W_MIC_time_with_Minv_Gamma(bs_env, qs_env, fm_Vtr_Gamma(:, 1))
1343 :
1344 10 : CALL dbcsr_create(matrix_V_aux, "V_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1345 10 : CALL dbcsr_create(matrix_V_grid, "V_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1346 :
1347 10 : CALL copy_fm_to_dbcsr(fm_Vtr_Gamma(1, 1), matrix_V_aux, keep_sparsity=.FALSE.)
1348 :
1349 : ! V^tr_ll' = sum_PQ Z_lP V^trunc_PQ Z_l'Q
1350 10 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_V_aux, matrix_V_grid, bs_env%eps_filter)
1351 10 : CALL dbcsr_release(matrix_V_aux)
1352 :
1353 : ! =========================================================================
1354 : ! 3. SPIN LOOP FOR EXACT EXCHANGE
1355 : ! =========================================================================
1356 22 : DO ispin = 1, bs_env%n_spin
1357 :
1358 : ! Density matrix on grid is essentially G_occ at tau = 0.0
1359 : ! D_µν = sum_n^occ C_µn(k=0) C_νn(k=0)
1360 : ! D_ll' = sum_µν Φ_µ(r_l) D_µν Φ_ν(r_l')
1361 12 : CALL dbcsr_create(matrix_D_grid, "D_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1362 12 : CALL build_G_grid(bs_env, 0.0_dp, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_D_grid, bs_env%eps_filter)
1363 :
1364 : ! Element-wise Hadamard product: Σ^x_grid = D_grid ◦ V_grid
1365 : ! Σ^x_ll' = D_ll' * V^tr_ll'
1366 12 : CALL dbcsr_create(matrix_Sigma_x_grid, template=matrix_V_grid)
1367 12 : CALL hadamard_product(matrix_D_grid, matrix_V_grid, matrix_Sigma_x_grid, 1.0_dp)
1368 :
1369 12 : CALL dbcsr_release(matrix_D_grid)
1370 :
1371 : ! Transform back to AO basis: Σ^x_ao = -1.0 * phi^T * Σ^x_grid * phi
1372 : ! Σ^x_λσ(k=0) = -sum_ll' Φ_λ(r_l) Σ^x_ll' Φ_σ(r_l')
1373 12 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_x_grid, mat_Sigma_x_Gamma, bs_env%eps_filter)
1374 12 : CALL dbcsr_scale(mat_Sigma_x_Gamma, -1.0_dp)
1375 :
1376 12 : CALL dbcsr_release(matrix_Sigma_x_grid)
1377 :
1378 : ! Data I/O and Export to CP2K Full Matrices
1379 22 : CALL copy_dbcsr_to_fm(mat_Sigma_x_Gamma, fm_Sigma_x_Gamma(ispin))
1380 :
1381 : END DO ! ispin
1382 :
1383 10 : IF (bs_env%unit_nr > 0) THEN
1384 : WRITE (bs_env%unit_nr, '(T2,A,T58,A,F7.1,A)') &
1385 5 : 'Computed Σ^x(k=0),', ' Execution time', m_walltime() - t1, ' s'
1386 5 : WRITE (bs_env%unit_nr, '(A)') ' '
1387 : END IF
1388 :
1389 : ! =========================================================================
1390 : ! 4. CLEANUP
1391 : ! =========================================================================
1392 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid, &
1393 10 : m1=mat_Sigma_x_Gamma, m2=matrix_V_grid)
1394 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1395 :
1396 10 : CALL cp_fm_release(fm_Vtr_Gamma)
1397 :
1398 10 : CALL timestop(handle)
1399 :
1400 30 : END SUBROUTINE compute_Sigma_x
1401 :
1402 : ! **************************************************************************************************
1403 : !> \brief Computes the correlation part of the GW self-energy
1404 : !> \param bs_env ...
1405 : !> \param fm_W_MIC_time ...
1406 : !> \param mat_phi_mu_l ...
1407 : !> \param mat_Z_lP ...
1408 : !> \param fm_Sigma_c_Gamma_time ...
1409 : ! **************************************************************************************************
1410 :
1411 10 : SUBROUTINE compute_Sigma_c(bs_env, fm_W_MIC_time, mat_phi_mu_l, mat_Z_lP, fm_Sigma_c_Gamma_time)
1412 :
1413 : TYPE(post_scf_bandstructure_type), POINTER :: bs_env
1414 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: fm_W_MIC_time
1415 : TYPE(dbcsr_type), INTENT(INOUT) :: mat_phi_mu_l, mat_Z_lP
1416 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: fm_Sigma_c_Gamma_time
1417 :
1418 : CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_Sigma_c'
1419 :
1420 : INTEGER :: handle, i_t, ispin
1421 10 : INTEGER, DIMENSION(:), POINTER :: blk_aux, blk_grid, dist_col_grid, &
1422 10 : dist_row_aux
1423 : REAL(KIND=dp) :: t1, tau
1424 : TYPE(dbcsr_distribution_type) :: dist_aux_aux, dist_grid_grid
1425 10 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: mat_Sigma_neg_tau, mat_Sigma_pos_tau
1426 : TYPE(dbcsr_type) :: matrix_G_occ_grid, matrix_G_vir_grid, matrix_Sigma_neg_grid, &
1427 : matrix_Sigma_pos_grid, matrix_W_aux, matrix_W_grid
1428 :
1429 10 : CALL timeset(routineN, handle)
1430 :
1431 : ! =========================================================================
1432 : ! 1. SETUP CORE TOPOLOGIES AND PRE-ALLOCATE OUTPUT ARRAYS
1433 : ! =========================================================================
1434 10 : CALL setup_square_topology(mat_phi_mu_l, 'ROW', dist_grid_grid, blk_grid, dist_col_grid)
1435 10 : CALL setup_square_topology(mat_Z_lP, 'COL', dist_aux_aux, blk_aux, dist_row_aux)
1436 :
1437 : ! Pre-allocate local DBCSR matrices to act as targets for final output
1438 10 : NULLIFY (mat_Sigma_neg_tau, mat_Sigma_pos_tau)
1439 182 : ALLOCATE (mat_Sigma_neg_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1440 182 : ALLOCATE (mat_Sigma_pos_tau(bs_env%num_time_freq_points, bs_env%n_spin))
1441 :
1442 120 : DO i_t = 1, bs_env%num_time_freq_points
1443 250 : DO ispin = 1, bs_env%n_spin
1444 130 : ALLOCATE (mat_Sigma_neg_tau(i_t, ispin)%matrix)
1445 130 : ALLOCATE (mat_Sigma_pos_tau(i_t, ispin)%matrix)
1446 130 : CALL dbcsr_create(mat_Sigma_neg_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1447 240 : CALL dbcsr_create(mat_Sigma_pos_tau(i_t, ispin)%matrix, template=bs_env%mat_ao_ao%matrix)
1448 : END DO
1449 : END DO
1450 :
1451 : ! =========================================================================
1452 : ! 2. MAIN IMAGINARY TIME LOOP
1453 : ! =========================================================================
1454 120 : DO i_t = 1, bs_env%num_time_freq_points
1455 110 : tau = bs_env%imag_time_points(i_t)
1456 :
1457 : ! -------------------------------------------------------------------
1458 : ! Compute W_grid = Z * W_aux * Z^T
1459 : ! -------------------------------------------------------------------
1460 110 : CALL dbcsr_create(matrix_W_aux, "W_aux", dist_aux_aux, dbcsr_type_no_symmetry, blk_aux, blk_aux)
1461 110 : CALL dbcsr_create(matrix_W_grid, "W_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1462 :
1463 110 : CALL copy_fm_to_dbcsr(fm_W_MIC_time(i_t), matrix_W_aux, keep_sparsity=.FALSE.)
1464 :
1465 : ! W^MIC_ll'(iτ,k=0) = sum_PQ Z_lP W^MIC_PQ(iτ) Z_l'Q
1466 110 : CALL contract_A_B_A("N", "T", mat_Z_lP, matrix_W_aux, matrix_W_grid, bs_env%eps_filter)
1467 :
1468 110 : CALL dbcsr_release(matrix_W_aux) ! Clean up aux basis immediately
1469 :
1470 240 : DO ispin = 1, bs_env%n_spin
1471 130 : t1 = m_walltime()
1472 :
1473 : ! -------------------------------------------------------------------
1474 : ! A. Transform Green's Functions to the Grid
1475 : ! -------------------------------------------------------------------
1476 130 : CALL dbcsr_create(matrix_G_occ_grid, "G_occ_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1477 130 : CALL dbcsr_create(matrix_G_vir_grid, "G_vir_grid", dist_grid_grid, dbcsr_type_no_symmetry, blk_grid, blk_grid)
1478 :
1479 : ! G^occ_µλ(i|τ|,k=0) = sum_G^occ_µλn^occ C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1480 : ! G^occ_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^occ_µν Φ_ν(r_l')
1481 130 : CALL build_G_grid(bs_env, tau, ispin, .TRUE., .FALSE., mat_phi_mu_l, matrix_G_occ_grid, bs_env%eps_filter)
1482 :
1483 : ! G^vir_µλ(i|τ|,k=0) = sum_n^vir C_µn(k=0) e^(-|(ϵ_nk=0-ϵ_F)τ|) C_λn(k=0)
1484 : ! G^vir_ll'(i|τ|,k=0) = sum_µν Φ_µ(r_l) G^vir_µν Φ_ν(r_l')
1485 130 : CALL build_G_grid(bs_env, tau, ispin, .FALSE., .TRUE., mat_phi_mu_l, matrix_G_vir_grid, bs_env%eps_filter)
1486 :
1487 : ! -------------------------------------------------------------------
1488 : ! B. Element-wise Hadamard Products for Sigma_c on Grid
1489 : ! Σ_neg_grid = G_occ_grid ◦ W_grid
1490 : ! Σ_pos_grid = G_vir_grid ◦ W_grid
1491 : ! -------------------------------------------------------------------
1492 130 : CALL dbcsr_create(matrix_Sigma_neg_grid, template=matrix_W_grid)
1493 130 : CALL dbcsr_create(matrix_Sigma_pos_grid, template=matrix_W_grid)
1494 :
1495 : ! Σ^c_ll'(iτ,k=0) = -G^occ_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ < 0
1496 130 : CALL hadamard_product(matrix_G_occ_grid, matrix_W_grid, matrix_Sigma_neg_grid, 1.0_dp)
1497 :
1498 : ! Σ^c_ll'(iτ,k=0) = G^vir_ll'(i|τ|,k=0) * W^MIC_ll'(iτ,k=0), for τ > 0
1499 130 : CALL hadamard_product(matrix_G_vir_grid, matrix_W_grid, matrix_Sigma_pos_grid, 1.0_dp)
1500 :
1501 : ! Instantly purge massive G_grid arrays to save memory
1502 130 : CALL dbcsr_release(matrix_G_occ_grid)
1503 130 : CALL dbcsr_release(matrix_G_vir_grid)
1504 :
1505 : ! -------------------------------------------------------------------
1506 : ! C. Transform Sigma back to AO Basis
1507 : ! Σ_AO = phi^T * Σ_grid * phi
1508 : ! -------------------------------------------------------------------
1509 :
1510 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ < 0
1511 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_neg_grid, &
1512 130 : mat_Sigma_neg_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1513 130 : CALL dbcsr_scale(mat_Sigma_neg_tau(i_t, ispin)%matrix, -1.0_dp)
1514 :
1515 : ! Σ^c_λσ(iτ,k=0) = sum_ll' Φ_λ(r_l) Σ^c_ll'(iτ,k=0) Φ_σ(r_l'), for τ > 0
1516 : CALL contract_A_B_A("T", "N", mat_phi_mu_l, matrix_Sigma_pos_grid, &
1517 130 : mat_Sigma_pos_tau(i_t, ispin)%matrix, bs_env%eps_filter)
1518 :
1519 : ! Purge Grid Sigma arrays
1520 130 : CALL dbcsr_release(matrix_Sigma_neg_grid)
1521 130 : CALL dbcsr_release(matrix_Sigma_pos_grid)
1522 :
1523 240 : IF (bs_env%unit_nr > 0) THEN
1524 : WRITE (bs_env%unit_nr, '(T2,A,I10,A,I3,A,F7.1,A)') &
1525 65 : 'Computed Σ^c(iτ,k=0) for time point ', i_t, ' /', bs_env%num_time_freq_points, &
1526 130 : ', Execution time', m_walltime() - t1, ' s'
1527 : END IF
1528 :
1529 : END DO ! ispin
1530 :
1531 : ! Release the W_grid for this time point
1532 120 : CALL dbcsr_release(matrix_W_grid)
1533 :
1534 : END DO ! i_t
1535 :
1536 10 : IF (bs_env%unit_nr > 0) WRITE (bs_env%unit_nr, '(A)') ' '
1537 :
1538 : ! -------------------------------------------------------------------------
1539 : ! 3. FINALIZE AND CLEANUP
1540 : ! -------------------------------------------------------------------------
1541 : CALL fill_fm_Sigma_c_Gamma_time(fm_Sigma_c_Gamma_time, bs_env, &
1542 10 : mat_Sigma_pos_tau, mat_Sigma_neg_tau)
1543 :
1544 10 : CALL cp_fm_release(fm_W_MIC_time)
1545 :
1546 10 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_neg_tau)
1547 10 : CALL dbcsr_deallocate_matrix_set(mat_Sigma_pos_tau)
1548 :
1549 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_grid_grid, mapped_dist=dist_col_grid)
1550 10 : CALL release_dbcsr_topology_and_matrices(dist=dist_aux_aux, mapped_dist=dist_row_aux)
1551 :
1552 10 : CALL delete_unnecessary_files(bs_env)
1553 10 : CALL timestop(handle)
1554 :
1555 10 : END SUBROUTINE compute_Sigma_c
1556 :
1557 : ! **************************************************************************************************
1558 : !> \brief DBCSR Topology Generation
1559 : !> \param matrix_template ...
1560 : !> \param dim_type ...
1561 : !> \param square_dist ...
1562 : !> \param blk_sizes ...
1563 : !> \param mapped_dist ...
1564 : ! **************************************************************************************************
1565 :
1566 572 : SUBROUTINE setup_square_topology(matrix_template, dim_type, square_dist, blk_sizes, mapped_dist)
1567 :
1568 : TYPE(dbcsr_type), INTENT(IN) :: matrix_template
1569 : CHARACTER(LEN=*), INTENT(IN) :: dim_type
1570 : TYPE(dbcsr_distribution_type), INTENT(OUT) :: square_dist
1571 : INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: blk_sizes, mapped_dist
1572 :
1573 : INTEGER :: i, np
1574 572 : INTEGER, DIMENSION(:), POINTER :: col_blk, col_dist, row_blk, row_dist
1575 : TYPE(dbcsr_distribution_type) :: dist_template
1576 :
1577 : CALL dbcsr_get_info(matrix_template, distribution=dist_template, &
1578 572 : row_blk_size=row_blk, col_blk_size=col_blk)
1579 572 : CALL dbcsr_distribution_get(dist_template, row_dist=row_dist, col_dist=col_dist)
1580 :
1581 572 : IF (TRIM(dim_type) == 'ROW') THEN
1582 : ! Creates ROW x ROW (e.g., Grid x Grid from mat_phi_mu_l)
1583 20 : blk_sizes => row_blk
1584 76 : np = MAXVAL(col_dist) + 1 ! npcol
1585 60 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1586 80 : DO i = 1, SIZE(blk_sizes)
1587 80 : mapped_dist(i) = MOD(i - 1, np)
1588 : END DO
1589 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1590 20 : row_dist=row_dist, col_dist=mapped_dist)
1591 :
1592 552 : ELSE IF (TRIM(dim_type) == 'COL') THEN
1593 : ! Creates COL x COL (e.g., Aux x Aux from mat_Z_lP)
1594 552 : blk_sizes => col_blk
1595 2208 : np = MAXVAL(row_dist) + 1 ! nprow
1596 1656 : ALLOCATE (mapped_dist(SIZE(blk_sizes)))
1597 2040 : DO i = 1, SIZE(blk_sizes)
1598 2040 : mapped_dist(i) = MOD(i - 1, np)
1599 : END DO
1600 : CALL dbcsr_distribution_new(square_dist, template=dist_template, &
1601 552 : row_dist=mapped_dist, col_dist=col_dist)
1602 : END IF
1603 :
1604 572 : END SUBROUTINE setup_square_topology
1605 :
1606 : ! **************************************************************************************************
1607 : !> \brief DBCSR matrices deallocation
1608 : !> \param dist ...
1609 : !> \param mapped_dist ...
1610 : !> \param m1 ...
1611 : !> \param m2 ...
1612 : !> \param m3 ...
1613 : !> \param m4 ...
1614 : ! **************************************************************************************************
1615 :
1616 572 : SUBROUTINE release_dbcsr_topology_and_matrices(dist, mapped_dist, m1, m2, m3, m4)
1617 :
1618 : TYPE(dbcsr_distribution_type), INTENT(INOUT), &
1619 : OPTIONAL :: dist
1620 : INTEGER, DIMENSION(:), INTENT(INOUT), OPTIONAL, &
1621 : POINTER :: mapped_dist
1622 : TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL :: m1, m2, m3, m4
1623 :
1624 572 : IF (PRESENT(dist)) CALL dbcsr_distribution_release(dist)
1625 572 : IF (PRESENT(mapped_dist)) THEN
1626 572 : IF (ASSOCIATED(mapped_dist)) THEN
1627 572 : DEALLOCATE (mapped_dist)
1628 : NULLIFY (mapped_dist)
1629 : END IF
1630 : END IF
1631 572 : IF (PRESENT(m1)) CALL dbcsr_release(m1)
1632 572 : IF (PRESENT(m2)) CALL dbcsr_release(m2)
1633 572 : IF (PRESENT(m3)) CALL dbcsr_release(m3)
1634 572 : IF (PRESENT(m4)) CALL dbcsr_release(m4)
1635 :
1636 572 : END SUBROUTINE release_dbcsr_topology_and_matrices
1637 :
1638 : END MODULE gw_large_cell_Gamma_ri_rs
|