Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : MODULE qs_fb_filter_matrix_methods
9 :
10 : USE atomic_kind_types, ONLY: atomic_kind_type,&
11 : get_atomic_kind
12 : USE cp_dbcsr_api, ONLY: dbcsr_create,&
13 : dbcsr_distribution_type,&
14 : dbcsr_finalize,&
15 : dbcsr_get_info,&
16 : dbcsr_get_stored_coordinates,&
17 : dbcsr_put_block,&
18 : dbcsr_type,&
19 : dbcsr_type_no_symmetry
20 : USE input_constants, ONLY: smear_fermi_dirac
21 : USE kinds, ONLY: default_string_length,&
22 : dp,&
23 : int_8
24 : USE message_passing, ONLY: mp_para_env_type
25 : USE particle_types, ONLY: particle_type
26 : USE qs_fb_atomic_halo_types, ONLY: fb_atomic_halo_create,&
27 : fb_atomic_halo_get,&
28 : fb_atomic_halo_list_get,&
29 : fb_atomic_halo_list_obj,&
30 : fb_atomic_halo_nullify,&
31 : fb_atomic_halo_obj,&
32 : fb_atomic_halo_release,&
33 : fb_atomic_halo_set
34 : USE qs_fb_atomic_matrix_methods, ONLY: fb_atmatrix_calc_size,&
35 : fb_atmatrix_construct,&
36 : fb_atmatrix_construct_2,&
37 : fb_atmatrix_generate_com_pairs_2
38 : USE qs_fb_com_tasks_types, ONLY: &
39 : TASK_COST, TASK_DEST, TASK_N_RECORDS, TASK_PAIR, TASK_SRC, &
40 : fb_com_atom_pairs_calc_buffer_sizes, fb_com_atom_pairs_create, fb_com_atom_pairs_decode, &
41 : fb_com_atom_pairs_distribute_blks, fb_com_atom_pairs_gather_blks, fb_com_atom_pairs_get, &
42 : fb_com_atom_pairs_has_data, fb_com_atom_pairs_init, fb_com_atom_pairs_nullify, &
43 : fb_com_atom_pairs_obj, fb_com_atom_pairs_release, fb_com_tasks_build_atom_pairs, &
44 : fb_com_tasks_create, fb_com_tasks_encode_pair, fb_com_tasks_nullify, fb_com_tasks_obj, &
45 : fb_com_tasks_release, fb_com_tasks_set, fb_com_tasks_transpose_dest_src
46 : USE qs_fb_matrix_data_types, ONLY: fb_matrix_data_add,&
47 : fb_matrix_data_create,&
48 : fb_matrix_data_has_data,&
49 : fb_matrix_data_nullify,&
50 : fb_matrix_data_obj,&
51 : fb_matrix_data_release
52 : USE qs_fb_trial_fns_types, ONLY: fb_trial_fns_get,&
53 : fb_trial_fns_obj
54 : USE smearing_utils, ONLY: SmearFixed,&
55 : SmearOcc
56 : USE string_utilities, ONLY: compress,&
57 : uppercase
58 : #include "./base/base_uses.f90"
59 :
60 : IMPLICIT NONE
61 :
62 : PRIVATE
63 :
64 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_filter_matrix_methods'
65 :
66 : PUBLIC :: fb_fltrmat_build, &
67 : fb_fltrmat_build_2
68 :
69 : CONTAINS
70 :
71 : ! **************************************************************************************************
72 : !> \brief Build the filter matrix, with MPI communications happening at each
73 : !> step. Less efficient on communication, but more efficient on
74 : !> memory usage (compared to fb_fltrmat_build_2)
75 : !> \param H_mat : DBCSR system KS matrix
76 : !> \param S_mat : DBCSR system overlap matrix
77 : !> \param atomic_halos : list of all local atomic halos, each halo gives
78 : !> one atomic matrix and contributes to one blk
79 : !> col to the filter matrix
80 : !> \param trial_fns : the trial functions to be used to shrink the
81 : !> size of the new "filtered" basis
82 : !> \param para_env : cp2k parallel environment
83 : !> \param particle_set : set of all particles in the system
84 : !> \param fermi_level : the fermi level used for defining the filter
85 : !> function, which is a Fermi-Dirac distribution
86 : !> function
87 : !> \param filter_temp : the filter temperature used for defining the
88 : !> filter function
89 : !> \param name : name given to the filter matrix
90 : !> \param filter_mat : DBCSR format filter matrix
91 : !> \param tolerance : anything less than tolerance is treated as zero
92 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
93 : ! **************************************************************************************************
94 64 : SUBROUTINE fb_fltrmat_build(H_mat, &
95 : S_mat, &
96 : atomic_halos, &
97 : trial_fns, &
98 : para_env, &
99 : particle_set, &
100 : fermi_level, &
101 : filter_temp, &
102 : name, &
103 : filter_mat, &
104 : tolerance)
105 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
106 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
107 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
108 : TYPE(mp_para_env_type), POINTER :: para_env
109 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
110 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
111 : CHARACTER(LEN=*), INTENT(IN) :: name
112 : TYPE(dbcsr_type), POINTER :: filter_mat
113 : REAL(KIND=dp), INTENT(IN) :: tolerance
114 :
115 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build'
116 :
117 : CHARACTER(LEN=32) :: symmetry_string
118 : CHARACTER(LEN=default_string_length) :: name_string
119 : INTEGER :: handle, iblkcol, ihalo, ikind, &
120 : max_nhalos, nblkcols_total, nhalos
121 64 : INTEGER, DIMENSION(:), POINTER :: col_blk_size, dummy_halo_atoms, ntfns, &
122 64 : row_blk_size
123 : LOGICAL :: send_data_only
124 : TYPE(atomic_kind_type), POINTER :: atomic_kind
125 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
126 : TYPE(fb_atomic_halo_obj) :: dummy_atomic_halo
127 64 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
128 :
129 64 : CALL timeset(routineN, handle)
130 :
131 64 : NULLIFY (halos, atomic_kind, ntfns, dummy_halo_atoms, row_blk_size, col_blk_size)
132 64 : CALL fb_atomic_halo_nullify(dummy_atomic_halo)
133 :
134 : ! filter_mat must be of a dissassociated status (i.e. brand new)
135 64 : CPASSERT(.NOT. ASSOCIATED(filter_mat))
136 :
137 : ! get trial function information
138 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
139 64 : nfunctions=ntfns)
140 :
141 : ! calculate the row_blk_size and col_blk_size arrays for
142 : ! constructing the filter matrix in DBCSR format
143 : ! row_blk_size for the filter matrix is the same as H or S
144 : CALL dbcsr_get_info(H_mat, &
145 : nblkcols_total=nblkcols_total, &
146 : row_blk_size=row_blk_size, &
147 64 : distribution=dbcsr_dist)
148 192 : ALLOCATE (col_blk_size(nblkcols_total))
149 576 : col_blk_size = 0
150 576 : DO iblkcol = 1, nblkcols_total
151 512 : atomic_kind => particle_set(iblkcol)%atomic_kind
152 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
153 512 : kind_number=ikind)
154 576 : col_blk_size(iblkcol) = ntfns(ikind)
155 : END DO
156 : ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
157 64 : name_string = name
158 64 : CALL compress(name_string)
159 64 : CALL uppercase(name_string)
160 : ! the filter matrix is non-square and is always non-symmetric
161 64 : symmetry_string = dbcsr_type_no_symmetry
162 : ! create empty filter matrix
163 64 : ALLOCATE (filter_mat)
164 : CALL dbcsr_create(matrix=filter_mat, &
165 : name=name_string, &
166 : dist=dbcsr_dist, &
167 : matrix_type=symmetry_string, &
168 : row_blk_size=row_blk_size, &
169 64 : col_blk_size=col_blk_size)
170 64 : DEALLOCATE (col_blk_size)
171 :
172 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
173 : nhalos=nhalos, &
174 : max_nhalos=max_nhalos, &
175 64 : halos=halos)
176 :
177 : ! create dummy empty atomic halo
178 64 : CALL fb_atomic_halo_create(dummy_atomic_halo)
179 64 : ALLOCATE (dummy_halo_atoms(0))
180 : CALL fb_atomic_halo_set(atomic_halo=dummy_atomic_halo, &
181 : owner_atom=0, &
182 : owner_id_in_halo=0, &
183 : natoms=0, &
184 : halo_atoms=dummy_halo_atoms, &
185 : nelectrons=0, &
186 64 : sorted=.TRUE.)
187 :
188 64 : send_data_only = .FALSE.
189 :
190 320 : DO ihalo = 1, max_nhalos
191 256 : IF (ihalo > nhalos) THEN
192 : send_data_only = .TRUE.
193 : END IF
194 : ! construct the filter matrix block by block
195 320 : IF (send_data_only) THEN
196 : CALL fb_fltrmat_add_blkcol(H_mat, &
197 : S_mat, &
198 : dummy_atomic_halo, &
199 : trial_fns, &
200 : para_env, &
201 : particle_set, &
202 : fermi_level, &
203 : filter_temp, &
204 : filter_mat, &
205 0 : tolerance)
206 : ELSE
207 : CALL fb_fltrmat_add_blkcol(H_mat, &
208 : S_mat, &
209 : halos(ihalo), &
210 : trial_fns, &
211 : para_env, &
212 : particle_set, &
213 : fermi_level, &
214 : filter_temp, &
215 : filter_mat, &
216 256 : tolerance)
217 : END IF ! send_data_only
218 : END DO
219 :
220 : ! finalise the filter matrix
221 64 : CALL dbcsr_finalize(filter_mat)
222 :
223 : ! cleanup
224 64 : CALL fb_atomic_halo_release(dummy_atomic_halo)
225 :
226 64 : CALL timestop(handle)
227 :
228 192 : END SUBROUTINE fb_fltrmat_build
229 :
230 : ! **************************************************************************************************
231 : !> \brief Build the filter matrix, with MPI communications grouped together.
232 : !> More effcient on communication, less efficient on memory (compared
233 : !> to fb_fltrmat_build)
234 : !> \param H_mat : DBCSR system KS matrix
235 : !> \param S_mat : DBCSR system overlap matrix
236 : !> \param atomic_halos : list of all local atomic halos, each halo gives
237 : !> one atomic matrix and contributes to one blk
238 : !> col to the filter matrix
239 : !> \param trial_fns : the trial functions to be used to shrink the
240 : !> size of the new "filtered" basis
241 : !> \param para_env : cp2k parallel environment
242 : !> \param particle_set : set of all particles in the system
243 : !> \param fermi_level : the fermi level used for defining the filter
244 : !> function, which is a Fermi-Dirac distribution
245 : !> function
246 : !> \param filter_temp : the filter temperature used for defining the
247 : !> filter function
248 : !> \param name : name given to the filter matrix
249 : !> \param filter_mat : DBCSR format filter matrix
250 : !> \param tolerance : anything less than tolerance is treated as zero
251 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
252 : ! **************************************************************************************************
253 16 : SUBROUTINE fb_fltrmat_build_2(H_mat, &
254 : S_mat, &
255 : atomic_halos, &
256 : trial_fns, &
257 : para_env, &
258 : particle_set, &
259 : fermi_level, &
260 : filter_temp, &
261 : name, &
262 : filter_mat, &
263 : tolerance)
264 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
265 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
266 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
267 : TYPE(mp_para_env_type), POINTER :: para_env
268 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
269 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
270 : CHARACTER(LEN=*), INTENT(IN) :: name
271 : TYPE(dbcsr_type), POINTER :: filter_mat
272 : REAL(KIND=dp), INTENT(IN) :: tolerance
273 :
274 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_2'
275 :
276 : CHARACTER(LEN=default_string_length) :: name_string
277 : INTEGER :: handle, iblkcol, ihalo, ikind, &
278 : natoms_global, natoms_in_halo, &
279 : nblkcols_total, nblks_recv, nhalos, &
280 : nmax
281 16 : INTEGER, DIMENSION(:), POINTER :: col_blk_size, ntfns, row_blk_size
282 : LOGICAL :: check_ok
283 : TYPE(atomic_kind_type), POINTER :: atomic_kind
284 : TYPE(dbcsr_distribution_type) :: dbcsr_dist
285 16 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
286 : TYPE(fb_com_atom_pairs_obj) :: atmatrix_blks_recv, atmatrix_blks_send, &
287 : filter_mat_blks_recv, &
288 : filter_mat_blks_send
289 : TYPE(fb_matrix_data_obj) :: filter_mat_data, H_mat_data, S_mat_data
290 :
291 16 : CALL timeset(routineN, handle)
292 :
293 16 : NULLIFY (halos, atomic_kind, row_blk_size, col_blk_size, ntfns)
294 :
295 : ! filter_mat must be of a dissassociated status (i.e. brand new)
296 16 : check_ok = .NOT. ASSOCIATED(filter_mat)
297 16 : CPASSERT(check_ok)
298 :
299 : ! get total number of atoms
300 16 : natoms_global = SIZE(particle_set)
301 :
302 : ! get trial function information
303 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
304 16 : nfunctions=ntfns)
305 :
306 : ! calculate the row_blk_size and col_blk_size arrays for
307 : ! constructing the filter matrix in DBCSR format
308 : ! row_blk_size for the filter matrix is the same as H or S
309 : CALL dbcsr_get_info(H_mat, &
310 : nblkcols_total=nblkcols_total, &
311 : row_blk_size=row_blk_size, &
312 16 : distribution=dbcsr_dist)
313 48 : ALLOCATE (col_blk_size(nblkcols_total))
314 144 : col_blk_size = 0
315 144 : DO iblkcol = 1, nblkcols_total
316 128 : atomic_kind => particle_set(iblkcol)%atomic_kind
317 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
318 128 : kind_number=ikind)
319 144 : col_blk_size(iblkcol) = ntfns(ikind)
320 : END DO
321 : ! DO NOT deallocate cbs if gift=.TRUE. as col_blk_sizes will only point to cbs
322 16 : name_string = name
323 16 : CALL compress(name_string)
324 16 : CALL uppercase(name_string)
325 : ! create empty filter matrix (it is always non-symmetric as it is non-square)
326 16 : ALLOCATE (filter_mat)
327 : CALL dbcsr_create(matrix=filter_mat, &
328 : name=name_string, &
329 : dist=dbcsr_dist, &
330 : matrix_type=dbcsr_type_no_symmetry, &
331 : row_blk_size=row_blk_size, &
332 16 : col_blk_size=col_blk_size)
333 16 : DEALLOCATE (col_blk_size)
334 :
335 : ! get all the blocks required for constructing atomic matrics, and
336 : ! store it in a fb_matrix_data object
337 16 : CALL fb_matrix_data_nullify(H_mat_data)
338 16 : CALL fb_matrix_data_nullify(S_mat_data)
339 16 : CALL fb_com_atom_pairs_nullify(atmatrix_blks_send)
340 16 : CALL fb_com_atom_pairs_nullify(atmatrix_blks_recv)
341 16 : CALL fb_com_atom_pairs_create(atmatrix_blks_send)
342 16 : CALL fb_com_atom_pairs_create(atmatrix_blks_recv)
343 : ! H matrix
344 : CALL fb_atmatrix_generate_com_pairs_2(H_mat, &
345 : atomic_halos, &
346 : para_env, &
347 : atmatrix_blks_send, &
348 16 : atmatrix_blks_recv)
349 : CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
350 16 : npairs=nblks_recv)
351 : CALL fb_matrix_data_create(H_mat_data, &
352 : nblks_recv, &
353 16 : natoms_global)
354 : CALL fb_com_atom_pairs_gather_blks(H_mat, &
355 : atmatrix_blks_send, &
356 : atmatrix_blks_recv, &
357 : para_env, &
358 16 : H_mat_data)
359 : ! S matrix
360 : CALL fb_atmatrix_generate_com_pairs_2(S_mat, &
361 : atomic_halos, &
362 : para_env, &
363 : atmatrix_blks_send, &
364 16 : atmatrix_blks_recv)
365 : CALL fb_com_atom_pairs_get(atom_pairs=atmatrix_blks_recv, &
366 16 : npairs=nblks_recv)
367 : CALL fb_matrix_data_create(S_mat_data, &
368 : nblks_recv, &
369 16 : natoms_global)
370 : CALL fb_com_atom_pairs_gather_blks(S_mat, &
371 : atmatrix_blks_send, &
372 : atmatrix_blks_recv, &
373 : para_env, &
374 16 : S_mat_data)
375 : ! cleanup
376 16 : CALL fb_com_atom_pairs_release(atmatrix_blks_send)
377 16 : CALL fb_com_atom_pairs_release(atmatrix_blks_recv)
378 :
379 : ! make filter matrix blocks one by one and store in an
380 : ! matrix_data_obj
381 16 : CALL fb_matrix_data_nullify(filter_mat_data)
382 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
383 : nhalos=nhalos, &
384 16 : halos=halos)
385 16 : nmax = 0
386 80 : DO ihalo = 1, nhalos
387 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
388 64 : natoms=natoms_in_halo)
389 80 : nmax = nmax + natoms_in_halo
390 : END DO
391 : CALL fb_matrix_data_create(filter_mat_data, &
392 : nmax, &
393 16 : natoms_global)
394 80 : DO ihalo = 1, nhalos
395 : CALL fb_fltrmat_add_blkcol_2(H_mat, &
396 : S_mat, &
397 : H_mat_data, &
398 : S_mat_data, &
399 : halos(ihalo), &
400 : trial_fns, &
401 : particle_set, &
402 : fermi_level, &
403 : filter_temp, &
404 : filter_mat_data, &
405 80 : tolerance)
406 : END DO
407 : ! clean up
408 16 : CALL fb_matrix_data_release(H_mat_data)
409 16 : CALL fb_matrix_data_release(S_mat_data)
410 :
411 : ! distribute the relevant blocks from the matrix_data_obj to DBCSR
412 : ! filter matrix
413 16 : CALL fb_com_atom_pairs_nullify(filter_mat_blks_send)
414 16 : CALL fb_com_atom_pairs_nullify(filter_mat_blks_recv)
415 16 : CALL fb_com_atom_pairs_create(filter_mat_blks_send)
416 16 : CALL fb_com_atom_pairs_create(filter_mat_blks_recv)
417 : CALL fb_fltrmat_generate_com_pairs_2(filter_mat, &
418 : atomic_halos, &
419 : para_env, &
420 : filter_mat_blks_send, &
421 16 : filter_mat_blks_recv)
422 : CALL fb_com_atom_pairs_distribute_blks(filter_mat_data, &
423 : filter_mat_blks_send, &
424 : filter_mat_blks_recv, &
425 : para_env, &
426 16 : filter_mat)
427 : ! cleanup
428 16 : CALL fb_com_atom_pairs_release(filter_mat_blks_send)
429 16 : CALL fb_com_atom_pairs_release(filter_mat_blks_recv)
430 16 : CALL fb_matrix_data_release(filter_mat_data)
431 :
432 : ! finalise matrix
433 16 : CALL dbcsr_finalize(filter_mat)
434 :
435 16 : CALL timestop(handle)
436 :
437 96 : END SUBROUTINE fb_fltrmat_build_2
438 :
439 : ! **************************************************************************************************
440 : !> \brief Add a computed blocks in one column to the filter matrix. This
441 : !> version is used by fb_fltrmat_build, for the case where MPI
442 : !> communications are done at each step
443 : !> It does not finalise the filter matrix
444 : !> \param H_mat : DBCSR system KS matrix
445 : !> \param S_mat : DBCSR system overlap matrix
446 : !> \param atomic_halo : the halo that contributes to the blk
447 : !> col of the filter matrix
448 : !> \param trial_fns ...
449 : !> \param para_env : cp2k parallel environment
450 : !> \param particle_set : set of all particles in the system
451 : !> \param fermi_level : the fermi level used for defining the filter
452 : !> function, which is a Fermi-Dirac distribution
453 : !> function
454 : !> \param filter_temp : the filter temperature used for defining the
455 : !> filter function
456 : !> \param filter_mat : DBCSR format filter matrix
457 : !> \param tolerance : anything smaller than tolerance is treated as zero
458 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
459 : ! **************************************************************************************************
460 256 : SUBROUTINE fb_fltrmat_add_blkcol(H_mat, &
461 : S_mat, &
462 : atomic_halo, &
463 : trial_fns, &
464 : para_env, &
465 : particle_set, &
466 : fermi_level, &
467 : filter_temp, &
468 : filter_mat, &
469 : tolerance)
470 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
471 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
472 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
473 : TYPE(mp_para_env_type), POINTER :: para_env
474 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
475 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
476 : TYPE(dbcsr_type), POINTER :: filter_mat
477 : REAL(KIND=dp), INTENT(IN) :: tolerance
478 :
479 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol'
480 :
481 : INTEGER :: handle, handle_mpi, iatom_global, iatom_in_halo, ind, ipair, ipe, itrial, &
482 : jatom_global, jatom_in_halo, jkind, natoms_global, natoms_in_halo, ncols_atmatrix, &
483 : ncols_blk, nrows_atmatrix, nrows_blk, numprocs, pe, recv_encode, send_encode, stat
484 256 : INTEGER(KIND=int_8), DIMENSION(:), POINTER :: pairs_recv, pairs_send
485 256 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
486 256 : atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data, ind_in_halo, &
487 256 : recv_disps, recv_pair_count, recv_pair_disps, recv_sizes, send_disps, send_pair_count, &
488 256 : send_pair_disps, send_sizes
489 256 : INTEGER, DIMENSION(:), POINTER :: halo_atoms, ntfns, row_block_size_data
490 256 : INTEGER, DIMENSION(:, :), POINTER :: tfns
491 256 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:), TARGET :: recv_buf, send_buf
492 256 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_filter_mat, atomic_H, atomic_S
493 256 : REAL(kind=dp), DIMENSION(:), POINTER :: vector
494 : TYPE(atomic_kind_type), POINTER :: atomic_kind
495 : TYPE(fb_com_atom_pairs_obj) :: com_pairs_recv, com_pairs_send
496 :
497 256 : CALL timeset(routineN, handle)
498 :
499 256 : NULLIFY (atomic_kind, halo_atoms, ntfns, pairs_send, pairs_recv, &
500 256 : row_block_size_data, tfns)
501 256 : CALL fb_com_atom_pairs_nullify(com_pairs_send)
502 256 : CALL fb_com_atom_pairs_nullify(com_pairs_recv)
503 :
504 : ! ----------------------------------------------------------------------
505 : ! Get communication buffers ready
506 : ! ----------------------------------------------------------------------
507 :
508 : ! generate send and recv atom pairs
509 256 : CALL fb_com_atom_pairs_create(com_pairs_send)
510 256 : CALL fb_com_atom_pairs_create(com_pairs_recv)
511 : CALL fb_fltrmat_generate_com_pairs(filter_mat, &
512 : atomic_halo, &
513 : para_env, &
514 : com_pairs_send, &
515 256 : com_pairs_recv)
516 : CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_send, &
517 : natoms_encode=send_encode, &
518 256 : pairs=pairs_send)
519 : CALL fb_com_atom_pairs_get(atom_pairs=com_pairs_recv, &
520 : natoms_encode=recv_encode, &
521 256 : pairs=pairs_recv)
522 :
523 : ! get para_env info
524 256 : numprocs = para_env%num_pe
525 : ! me = para_env%mepos + 1 ! my process id, starting counting from 1
526 :
527 : ! obtain trail function information
528 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
529 : nfunctions=ntfns, &
530 256 : functions=tfns)
531 :
532 : ! obtain row and col block size data for filter matrix
533 256 : CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
534 256 : natoms_global = SIZE(particle_set)
535 768 : ALLOCATE (col_block_size_data(natoms_global))
536 2304 : DO jatom_global = 1, natoms_global
537 2048 : atomic_kind => particle_set(jatom_global)%atomic_kind
538 2048 : CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
539 2304 : col_block_size_data(jatom_global) = ntfns(jkind)
540 : END DO
541 :
542 : ! allocate temporary arrays for send
543 768 : ALLOCATE (send_sizes(numprocs))
544 512 : ALLOCATE (send_disps(numprocs))
545 512 : ALLOCATE (send_pair_count(numprocs))
546 512 : ALLOCATE (send_pair_disps(numprocs))
547 : ! setup send buffer sizes
548 : CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_send, &
549 : numprocs, &
550 : row_block_size_data, &
551 : col_block_size_data, &
552 : send_sizes, &
553 : send_disps, &
554 : send_pair_count, &
555 256 : send_pair_disps)
556 : ! allocate send buffer
557 1280 : ALLOCATE (send_buf(SUM(send_sizes)))
558 :
559 : ! allocate temporary array for recv
560 512 : ALLOCATE (recv_sizes(numprocs))
561 512 : ALLOCATE (recv_disps(numprocs))
562 512 : ALLOCATE (recv_pair_count(numprocs))
563 512 : ALLOCATE (recv_pair_disps(numprocs))
564 : ! setup recv buffer sizes
565 : CALL fb_com_atom_pairs_calc_buffer_sizes(com_pairs_recv, &
566 : numprocs, &
567 : row_block_size_data, &
568 : col_block_size_data, &
569 : recv_sizes, &
570 : recv_disps, &
571 : recv_pair_count, &
572 256 : recv_pair_disps)
573 : ! allocate recv buffer
574 1280 : ALLOCATE (recv_buf(SUM(recv_sizes)))
575 :
576 : ! ----------------------------------------------------------------------
577 : ! Construct atomic filter matrix for this atomic_halo
578 : ! ----------------------------------------------------------------------
579 :
580 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
581 : natoms=natoms_in_halo, &
582 256 : halo_atoms=halo_atoms)
583 :
584 : ! construct atomic matrix for H for atomic_halo
585 : ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
586 : atomic_H_blk_col_start(natoms_in_halo + 1), &
587 1024 : STAT=stat)
588 256 : CPASSERT(stat == 0)
589 : CALL fb_atmatrix_calc_size(H_mat, &
590 : atomic_halo, &
591 : nrows_atmatrix, &
592 : ncols_atmatrix, &
593 : atomic_H_blk_row_start, &
594 256 : atomic_H_blk_col_start)
595 :
596 1024 : ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
597 : CALL fb_atmatrix_construct(H_mat, &
598 : atomic_halo, &
599 : para_env, &
600 : atomic_H, &
601 : atomic_H_blk_row_start, &
602 256 : atomic_H_blk_col_start)
603 :
604 : ! construct atomic matrix for S for atomic_halo
605 : ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
606 : atomic_S_blk_col_start(natoms_in_halo + 1), &
607 1024 : STAT=stat)
608 256 : CPASSERT(stat == 0)
609 : CALL fb_atmatrix_calc_size(S_mat, &
610 : atomic_halo, &
611 : nrows_atmatrix, &
612 : ncols_atmatrix, &
613 : atomic_S_blk_row_start, &
614 256 : atomic_S_blk_col_start)
615 1024 : ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
616 : CALL fb_atmatrix_construct(S_mat, &
617 : atomic_halo, &
618 : para_env, &
619 : atomic_S, &
620 : atomic_S_blk_row_start, &
621 256 : atomic_S_blk_col_start)
622 :
623 : ! construct the atomic filter matrix
624 768 : ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
625 : ! calculate atomic filter matrix only if it is non-zero sized
626 256 : IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
627 : CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
628 : atomic_S, &
629 : fermi_level, &
630 : filter_temp, &
631 : atomic_filter_mat, &
632 256 : tolerance)
633 : END IF
634 :
635 : ! ----------------------------------------------------------------------
636 : ! Construct filter matrix blocks and add to the correct locations
637 : ! in send_buffer
638 : ! ----------------------------------------------------------------------
639 :
640 : ! preconstruct iatom_global to iatom_in_halo map
641 512 : ALLOCATE (ind_in_halo(natoms_global))
642 2304 : ind_in_halo = 0
643 2304 : DO iatom_in_halo = 1, natoms_in_halo
644 2048 : iatom_global = halo_atoms(iatom_in_halo)
645 2304 : ind_in_halo(iatom_global) = iatom_in_halo
646 : END DO
647 :
648 : ! initialise send buffer
649 106752 : IF (SIZE(send_buf) > 0) send_buf = 0.0_dp
650 : ! assign values
651 768 : DO ipe = 1, numprocs
652 512 : send_sizes(ipe) = 0
653 2816 : DO ipair = 1, send_pair_count(ipe)
654 : CALL fb_com_atom_pairs_decode(pairs_send(send_pair_disps(ipe) + ipair), &
655 : pe, iatom_global, jatom_global, &
656 2048 : send_encode)
657 2048 : iatom_in_halo = ind_in_halo(iatom_global)
658 2048 : CPASSERT(iatom_in_halo > 0)
659 2048 : jatom_in_halo = ind_in_halo(jatom_global)
660 2048 : CPASSERT(jatom_in_halo > 0)
661 2048 : atomic_kind => particle_set(jatom_global)%atomic_kind
662 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
663 2048 : kind_number=jkind)
664 2048 : nrows_blk = row_block_size_data(iatom_global)
665 2048 : ncols_blk = ntfns(jkind)
666 :
667 : ! do it column-wise one trial function at a time
668 10240 : DO itrial = 1, ntfns(jkind)
669 8192 : ind = send_disps(ipe) + send_sizes(ipe) + (itrial - 1)*nrows_blk
670 : CALL dgemv("N", &
671 : nrows_blk, &
672 : ncols_atmatrix, &
673 : 1.0_dp, &
674 : atomic_filter_mat( &
675 : atomic_H_blk_row_start(iatom_in_halo): &
676 : atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
677 : 1:ncols_atmatrix &
678 : ), &
679 : nrows_blk, &
680 : atomic_S( &
681 : 1:nrows_atmatrix, &
682 : atomic_S_blk_col_start(jatom_in_halo) + &
683 : tfns(itrial, jkind) - 1 &
684 : ), &
685 : 1, &
686 : 0.0_dp, &
687 : send_buf(ind + 1:ind + nrows_blk), &
688 23865344 : 1)
689 : END DO ! itrial
690 6656 : send_sizes(ipe) = send_sizes(ipe) + nrows_blk*ncols_blk
691 : END DO ! ipair
692 : END DO ! ipe
693 :
694 256 : DEALLOCATE (atomic_H)
695 256 : DEALLOCATE (atomic_H_blk_row_start)
696 256 : DEALLOCATE (atomic_S)
697 256 : DEALLOCATE (atomic_S_blk_row_start)
698 256 : DEALLOCATE (atomic_filter_mat)
699 256 : DEALLOCATE (ind_in_halo)
700 :
701 : ! ----------------------------------------------------------------------
702 : ! Do communication
703 : ! ----------------------------------------------------------------------
704 :
705 256 : CALL timeset("fb_fltrmat_add_blkcol_mpi", handle_mpi)
706 :
707 : CALL para_env%alltoall(send_buf, send_sizes, send_disps, &
708 256 : recv_buf, recv_sizes, recv_disps)
709 :
710 256 : CALL timestop(handle_mpi)
711 :
712 256 : DEALLOCATE (send_buf)
713 256 : DEALLOCATE (send_sizes)
714 256 : DEALLOCATE (send_disps)
715 256 : DEALLOCATE (send_pair_count)
716 256 : DEALLOCATE (send_pair_disps)
717 :
718 : ! ----------------------------------------------------------------------
719 : ! Unpack the recv buffer and add the blocks to correct parts of
720 : ! the DBCSR filter matrix
721 : ! ----------------------------------------------------------------------
722 :
723 768 : DO ipe = 1, numprocs
724 512 : recv_sizes(ipe) = 0
725 2816 : DO ipair = 1, recv_pair_count(ipe)
726 : CALL fb_com_atom_pairs_decode(pairs_recv(recv_pair_disps(ipe) + ipair), &
727 : pe, iatom_global, jatom_global, &
728 2048 : recv_encode)
729 2048 : nrows_blk = row_block_size_data(iatom_global)
730 2048 : ncols_blk = col_block_size_data(jatom_global)
731 2048 : ind = recv_disps(ipe) + recv_sizes(ipe)
732 2048 : vector => recv_buf((ind + 1):(ind + nrows_blk*ncols_blk))
733 : CALL dbcsr_put_block(filter_mat, &
734 : iatom_global, jatom_global, &
735 6144 : block=RESHAPE(vector, [nrows_blk, ncols_blk]))
736 4608 : recv_sizes(ipe) = recv_sizes(ipe) + nrows_blk*ncols_blk
737 : END DO ! ipair
738 : END DO ! ipe
739 :
740 : ! cleanup rest of the temporary arrays
741 256 : DEALLOCATE (recv_buf)
742 256 : DEALLOCATE (recv_sizes)
743 256 : DEALLOCATE (recv_pair_count)
744 256 : DEALLOCATE (recv_pair_disps)
745 :
746 256 : CALL fb_com_atom_pairs_release(com_pairs_send)
747 256 : CALL fb_com_atom_pairs_release(com_pairs_recv)
748 :
749 : ! cannot finalise the matrix until all blocks has been added
750 :
751 256 : CALL timestop(handle)
752 :
753 1792 : END SUBROUTINE fb_fltrmat_add_blkcol
754 :
755 : ! **************************************************************************************************
756 : !> \brief Computed blocks in one filter matrix column. This version is used by
757 : !> fb_fltrmat_build_2, where MPI communication is done collectively
758 : !> \param H_mat : DBCSR system KS matrix
759 : !> \param S_mat : DBCSR system overlap matrix
760 : !> \param H_mat_data : local storage of the relevant H_mat matrix blocks
761 : !> \param S_mat_data : local storage of the relevant S_mat matrix blocks
762 : !> \param atomic_halo : the halo that contributes to the blk
763 : !> col of the filter matrix
764 : !> \param trial_fns : trial functions data
765 : !> \param particle_set : set of all particles in the system
766 : !> \param fermi_level : the fermi level used for defining the filter
767 : !> function, which is a Fermi-Dirac distribution
768 : !> function
769 : !> \param filter_temp : the filter temperature used for defining the
770 : !> filter function
771 : !> \param filter_mat_data : local storage for the the computed filter matrix
772 : !> blocks
773 : !> \param tolerance : anything less than this is regarded as zero
774 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
775 : ! **************************************************************************************************
776 64 : SUBROUTINE fb_fltrmat_add_blkcol_2(H_mat, &
777 : S_mat, &
778 : H_mat_data, &
779 : S_mat_data, &
780 : atomic_halo, &
781 : trial_fns, &
782 : particle_set, &
783 : fermi_level, &
784 : filter_temp, &
785 : filter_mat_data, &
786 : tolerance)
787 : TYPE(dbcsr_type), POINTER :: H_mat, S_mat
788 : TYPE(fb_matrix_data_obj), INTENT(IN) :: H_mat_data, S_mat_data
789 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
790 : TYPE(fb_trial_fns_obj), INTENT(IN) :: trial_fns
791 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
792 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
793 : TYPE(fb_matrix_data_obj), INTENT(INOUT) :: filter_mat_data
794 : REAL(KIND=dp), INTENT(IN) :: tolerance
795 :
796 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_add_blkcol_2'
797 :
798 : INTEGER :: handle, iatom_global, iatom_in_halo, itrial, jatom_global, jatom_in_halo, jkind, &
799 : natoms_global, natoms_in_halo, ncols_atmatrix, ncols_blk, ncols_blk_max, nrows_atmatrix, &
800 : nrows_blk, nrows_blk_max, stat
801 64 : INTEGER, ALLOCATABLE, DIMENSION(:) :: atomic_H_blk_col_start, atomic_H_blk_row_start, &
802 64 : atomic_S_blk_col_start, atomic_S_blk_row_start, col_block_size_data
803 64 : INTEGER, DIMENSION(:), POINTER :: halo_atoms, ntfns, row_block_size_data
804 64 : INTEGER, DIMENSION(:, :), POINTER :: tfns
805 : LOGICAL :: check_ok
806 64 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_filter_mat, atomic_H, atomic_S, &
807 64 : mat_blk
808 : TYPE(atomic_kind_type), POINTER :: atomic_kind
809 :
810 64 : CALL timeset(routineN, handle)
811 :
812 64 : NULLIFY (atomic_kind, halo_atoms, ntfns, row_block_size_data, tfns)
813 :
814 64 : check_ok = fb_matrix_data_has_data(H_mat_data)
815 64 : CPASSERT(check_ok)
816 64 : check_ok = fb_matrix_data_has_data(S_mat_data)
817 64 : CPASSERT(check_ok)
818 :
819 : ! obtain trial function information
820 : CALL fb_trial_fns_get(trial_fns=trial_fns, &
821 : nfunctions=ntfns, &
822 64 : functions=tfns)
823 :
824 : ! obtain row and col block size data for filter matrix
825 64 : CALL dbcsr_get_info(H_mat, row_blk_size=row_block_size_data)
826 64 : natoms_global = SIZE(particle_set)
827 192 : ALLOCATE (col_block_size_data(natoms_global))
828 576 : DO jatom_global = 1, natoms_global
829 512 : atomic_kind => particle_set(jatom_global)%atomic_kind
830 512 : CALL get_atomic_kind(atomic_kind=atomic_kind, kind_number=jkind)
831 576 : col_block_size_data(jatom_global) = ntfns(jkind)
832 : END DO
833 :
834 : ! ----------------------------------------------------------------------
835 : ! Construct atomic filter matrix for this atomic_halo
836 : ! ----------------------------------------------------------------------
837 :
838 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
839 : natoms=natoms_in_halo, &
840 64 : halo_atoms=halo_atoms)
841 :
842 : ! construct atomic matrix for H for atomic_halo
843 : ALLOCATE (atomic_H_blk_row_start(natoms_in_halo + 1), &
844 : atomic_H_blk_col_start(natoms_in_halo + 1), &
845 256 : STAT=stat)
846 64 : CPASSERT(stat == 0)
847 : CALL fb_atmatrix_calc_size(H_mat, &
848 : atomic_halo, &
849 : nrows_atmatrix, &
850 : ncols_atmatrix, &
851 : atomic_H_blk_row_start, &
852 64 : atomic_H_blk_col_start)
853 256 : ALLOCATE (atomic_H(nrows_atmatrix, ncols_atmatrix))
854 : CALL fb_atmatrix_construct_2(H_mat_data, &
855 : atomic_halo, &
856 : atomic_H, &
857 : atomic_H_blk_row_start, &
858 64 : atomic_H_blk_col_start)
859 :
860 : ! construct atomic matrix for S for atomic_halo
861 : ALLOCATE (atomic_S_blk_row_start(natoms_in_halo + 1), &
862 : atomic_S_blk_col_start(natoms_in_halo + 1), &
863 256 : STAT=stat)
864 64 : CPASSERT(stat == 0)
865 : CALL fb_atmatrix_calc_size(S_mat, &
866 : atomic_halo, &
867 : nrows_atmatrix, &
868 : ncols_atmatrix, &
869 : atomic_S_blk_row_start, &
870 64 : atomic_S_blk_col_start)
871 256 : ALLOCATE (atomic_S(nrows_atmatrix, ncols_atmatrix))
872 : CALL fb_atmatrix_construct_2(S_mat_data, &
873 : atomic_halo, &
874 : atomic_S, &
875 : atomic_S_blk_row_start, &
876 64 : atomic_S_blk_col_start)
877 :
878 : ! construct the atomic filter matrix
879 192 : ALLOCATE (atomic_filter_mat(nrows_atmatrix, ncols_atmatrix))
880 : ! calculate atomic filter matrix only if it is non-zero sized
881 64 : IF (nrows_atmatrix > 0 .AND. ncols_atmatrix > 0) THEN
882 : CALL fb_fltrmat_build_atomic_fltrmat(atomic_H, &
883 : atomic_S, &
884 : fermi_level, &
885 : filter_temp, &
886 : atomic_filter_mat, &
887 64 : tolerance)
888 : END IF
889 :
890 : ! ----------------------------------------------------------------------
891 : ! Construct filter matrix block and add to filter_mat_data
892 : ! ----------------------------------------------------------------------
893 :
894 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
895 : owner_atom=jatom_global, &
896 64 : owner_id_in_halo=jatom_in_halo)
897 576 : nrows_blk_max = MAXVAL(row_block_size_data)
898 128 : ncols_blk_max = MAXVAL(ntfns)
899 256 : ALLOCATE (mat_blk(nrows_blk_max, ncols_blk_max))
900 3648 : mat_blk(:, :) = 0.0_dp
901 576 : DO iatom_in_halo = 1, natoms_in_halo
902 512 : iatom_global = halo_atoms(iatom_in_halo)
903 512 : atomic_kind => particle_set(jatom_global)%atomic_kind
904 : CALL get_atomic_kind(atomic_kind=atomic_kind, &
905 512 : kind_number=jkind)
906 512 : nrows_blk = row_block_size_data(iatom_global)
907 512 : ncols_blk = ntfns(jkind)
908 :
909 : ! ALLOCATE(mat_blk(nrows_blk,ncols_blk) STAT=stat)
910 : ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
911 :
912 : ! do it column-wise one trial function at a time
913 2560 : DO itrial = 1, ntfns(jkind)
914 : CALL dgemv("N", &
915 : nrows_blk, &
916 : ncols_atmatrix, &
917 : 1.0_dp, &
918 : atomic_filter_mat( &
919 : atomic_H_blk_row_start(iatom_in_halo): &
920 : atomic_H_blk_row_start(iatom_in_halo + 1) - 1, &
921 : 1:ncols_atmatrix &
922 : ), &
923 : nrows_blk, &
924 : atomic_S( &
925 : 1:nrows_atmatrix, &
926 : atomic_S_blk_col_start(jatom_in_halo) + &
927 : tfns(itrial, jkind) - 1 &
928 : ), &
929 : 1, &
930 : 0.0_dp, &
931 : mat_blk( &
932 : 1:nrows_blk, &
933 : itrial), &
934 5966336 : 1)
935 : END DO ! itrial
936 : CALL fb_matrix_data_add(filter_mat_data, &
937 : iatom_global, &
938 : jatom_global, &
939 1088 : mat_blk(1:nrows_blk, 1:ncols_blk))
940 : ! DEALLOCATE(mat_blk, STAT=stat)
941 : ! CPPostcondition(stat==0, cp_failure_level, routineP,failure)
942 : END DO ! iatom_in_halo
943 64 : DEALLOCATE (mat_blk)
944 :
945 : ! clean up
946 64 : DEALLOCATE (atomic_H)
947 64 : DEALLOCATE (atomic_H_blk_row_start)
948 64 : DEALLOCATE (atomic_S)
949 64 : DEALLOCATE (atomic_S_blk_row_start)
950 64 : DEALLOCATE (atomic_filter_mat)
951 :
952 64 : CALL timestop(handle)
953 :
954 384 : END SUBROUTINE fb_fltrmat_add_blkcol_2
955 :
956 : ! **************************************************************************************************
957 : !> \brief generate the list of blocks (atom pairs) to be sent and received
958 : !> in order to construct the filter matrix for each atomic halo.
959 : !> This version is for use with fb_fltrmat_build, where MPI
960 : !> communications are done at each step
961 : !> \param filter_mat : DBCSR formatted filter matrix
962 : !> \param atomic_halo : the halo that contributes to a blk
963 : !> col of the filter matrix
964 : !> \param para_env : cp2k parallel environment
965 : !> \param atom_pairs_send : list of blocks to be sent
966 : !> \param atom_pairs_recv : list of blocks to be received
967 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
968 : ! **************************************************************************************************
969 256 : SUBROUTINE fb_fltrmat_generate_com_pairs(filter_mat, &
970 : atomic_halo, &
971 : para_env, &
972 : atom_pairs_send, &
973 : atom_pairs_recv)
974 : TYPE(dbcsr_type), POINTER :: filter_mat
975 : TYPE(fb_atomic_halo_obj), INTENT(IN) :: atomic_halo
976 : TYPE(mp_para_env_type), POINTER :: para_env
977 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
978 :
979 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs'
980 :
981 : INTEGER :: dest, handle, iatom_global, &
982 : iatom_in_halo, itask, jatom_global, &
983 : natoms_in_halo, nblkrows_total, &
984 : ntasks_send
985 256 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_send
986 256 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
987 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
988 :
989 256 : CALL timeset(routineN, handle)
990 :
991 256 : NULLIFY (tasks_send)
992 256 : CALL fb_com_tasks_nullify(com_tasks_send)
993 256 : CALL fb_com_tasks_nullify(com_tasks_recv)
994 :
995 : ! initialise atom_pairs_send and atom_pairs_recv
996 256 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
997 256 : CALL fb_com_atom_pairs_init(atom_pairs_send)
998 : ELSE
999 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
1000 : END IF
1001 256 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
1002 256 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
1003 : ELSE
1004 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
1005 : END IF
1006 :
1007 : ! The total number of filter matrix blocks each processor is going
1008 : ! to construct equals to the total number of halo atoms in all of
1009 : ! the atomic halos local to the processor. The number of send
1010 : ! tasks will not exceed this. We do one halo (col) at a time, and
1011 : ! each call of this subroutine will only work on one filter matrix
1012 : ! col corresponding to atomic_halo.
1013 :
1014 : ! The col atom block index for each filter matrix block are the
1015 : ! owner atom of each halo. The row atom block index for each
1016 : ! filter matrix block corresponding to each col are the halo atoms
1017 : ! of the corresponding halos. Filter matrix is non-symmetric: it
1018 : ! is non-square, because the blocks themselves are non-square
1019 :
1020 : CALL fb_atomic_halo_get(atomic_halo=atomic_halo, &
1021 : owner_atom=jatom_global, &
1022 : natoms=natoms_in_halo, &
1023 256 : halo_atoms=halo_atoms)
1024 256 : ntasks_send = natoms_in_halo
1025 :
1026 : ! allocate send tasks
1027 768 : ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
1028 :
1029 : ! Get the total number of atoms, this can be obtained from the
1030 : ! total number of block rows in the DBCSR filter matrix. We
1031 : ! assumes that before calling this subroutine, the filter_mat has
1032 : ! already been created and initialised: i.e. using
1033 : ! dbcsr_create_new. Even if the matrix is at the moment empty,
1034 : ! the attribute nblkrows_total is already assigned from the dbcsr
1035 : ! distribution data
1036 : CALL dbcsr_get_info(filter_mat, &
1037 256 : nblkrows_total=nblkrows_total)
1038 :
1039 : ! source is always the local processor
1040 : ASSOCIATE (src => para_env%mepos)
1041 : ! construct send tasks
1042 256 : itask = 1
1043 2304 : DO iatom_in_halo = 1, natoms_in_halo
1044 2048 : iatom_global = halo_atoms(iatom_in_halo)
1045 : ! find where the constructed block of filter matrix belongs to
1046 : CALL dbcsr_get_stored_coordinates(filter_mat, &
1047 : iatom_global, &
1048 : jatom_global, &
1049 2048 : processor=dest)
1050 : ! create the send tasks
1051 2048 : tasks_send(TASK_DEST, itask) = dest
1052 2048 : tasks_send(TASK_SRC, itask) = src
1053 : CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
1054 : iatom_global, jatom_global, &
1055 2048 : nblkrows_total)
1056 : ! calculation of cost not implemented at the moment
1057 2048 : tasks_send(TASK_COST, itask) = 0
1058 4352 : itask = itask + 1
1059 : END DO ! iatom_in_halo
1060 : END ASSOCIATE
1061 :
1062 256 : CALL fb_com_tasks_create(com_tasks_recv)
1063 256 : CALL fb_com_tasks_create(com_tasks_send)
1064 :
1065 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
1066 : task_dim=TASK_N_RECORDS, &
1067 : ntasks=ntasks_send, &
1068 : nencode=nblkrows_total, &
1069 256 : tasks=tasks_send)
1070 :
1071 : ! generate the recv task list (tasks_recv) from the send task list
1072 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
1073 256 : para_env)
1074 :
1075 : ! task lists are now complete, now construct the atom_pairs_send
1076 : ! and atom_pairs_recv from the tasks lists
1077 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
1078 : atom_pairs=atom_pairs_send, &
1079 : natoms_encode=nblkrows_total, &
1080 256 : send_or_recv="send")
1081 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
1082 : atom_pairs=atom_pairs_recv, &
1083 : natoms_encode=nblkrows_total, &
1084 256 : send_or_recv="recv")
1085 :
1086 : ! cleanup
1087 256 : CALL fb_com_tasks_release(com_tasks_recv)
1088 256 : CALL fb_com_tasks_release(com_tasks_send)
1089 :
1090 256 : CALL timestop(handle)
1091 :
1092 768 : END SUBROUTINE fb_fltrmat_generate_com_pairs
1093 :
1094 : ! **************************************************************************************************
1095 : !> \brief generate the list of blocks (atom pairs) to be sent and received
1096 : !> in order to construct the filter matrix for each atomic halo.
1097 : !> This version is for use with fb_fltrmat_build_2, where MPI
1098 : !> communications are done collectively.
1099 : !> \param filter_mat : DBCSR formatted filter matrix
1100 : !> \param atomic_halos : set of all local atomic halos contributing to the
1101 : !> filter matrix
1102 : !> \param para_env : cp2k parallel environment
1103 : !> \param atom_pairs_send : list of blocks to be sent
1104 : !> \param atom_pairs_recv : list of blocks to be received
1105 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1106 : ! **************************************************************************************************
1107 16 : SUBROUTINE fb_fltrmat_generate_com_pairs_2(filter_mat, &
1108 : atomic_halos, &
1109 : para_env, &
1110 : atom_pairs_send, &
1111 : atom_pairs_recv)
1112 : TYPE(dbcsr_type), POINTER :: filter_mat
1113 : TYPE(fb_atomic_halo_list_obj), INTENT(IN) :: atomic_halos
1114 : TYPE(mp_para_env_type), POINTER :: para_env
1115 : TYPE(fb_com_atom_pairs_obj), INTENT(INOUT) :: atom_pairs_send, atom_pairs_recv
1116 :
1117 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_generate_com_pairs_2'
1118 :
1119 : INTEGER :: dest, handle, iatom_global, iatom_in_halo, iatom_stored, ihalo, itask, &
1120 : jatom_global, jatom_stored, natoms_in_halo, nblkrows_total, nhalos, ntasks_send
1121 16 : INTEGER(KIND=int_8), DIMENSION(:, :), POINTER :: tasks_send
1122 16 : INTEGER, DIMENSION(:), POINTER :: halo_atoms
1123 : LOGICAL :: transpose
1124 16 : TYPE(fb_atomic_halo_obj), DIMENSION(:), POINTER :: halos
1125 : TYPE(fb_com_tasks_obj) :: com_tasks_recv, com_tasks_send
1126 :
1127 16 : CALL timeset(routineN, handle)
1128 :
1129 16 : NULLIFY (tasks_send)
1130 16 : CALL fb_com_tasks_nullify(com_tasks_send)
1131 16 : CALL fb_com_tasks_nullify(com_tasks_recv)
1132 :
1133 : ! initialise atom_pairs_send and atom_pairs_recv
1134 16 : IF (fb_com_atom_pairs_has_data(atom_pairs_send)) THEN
1135 16 : CALL fb_com_atom_pairs_init(atom_pairs_send)
1136 : ELSE
1137 0 : CALL fb_com_atom_pairs_create(atom_pairs_send)
1138 : END IF
1139 16 : IF (fb_com_atom_pairs_has_data(atom_pairs_recv)) THEN
1140 16 : CALL fb_com_atom_pairs_init(atom_pairs_recv)
1141 : ELSE
1142 0 : CALL fb_com_atom_pairs_create(atom_pairs_recv)
1143 : END IF
1144 :
1145 : ! The col atom block index for each filter matrix block are the
1146 : ! owner atom of each halo. The row atom block index for each
1147 : ! filter matrix block corresponding to each col are the halo atoms
1148 : ! of the corresponding halos. Filter matrix is non-symmetric: it
1149 : ! is non-square, because the blocks themselves are non-square
1150 :
1151 : CALL fb_atomic_halo_list_get(atomic_halos=atomic_halos, &
1152 : nhalos=nhalos, &
1153 16 : halos=halos)
1154 :
1155 : ! estimate the maximum number of blocks (i.e. atom paris) to send
1156 16 : ntasks_send = 0
1157 80 : DO ihalo = 1, nhalos
1158 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
1159 64 : natoms=natoms_in_halo)
1160 80 : ntasks_send = ntasks_send + natoms_in_halo
1161 : END DO ! ihalo
1162 :
1163 : ! allocate send tasks
1164 48 : ALLOCATE (tasks_send(TASK_N_RECORDS, ntasks_send))
1165 :
1166 : ! Get the total number of atoms. This can be obtained from the
1167 : ! total number of block rows in the DBCSR filter matrix. We
1168 : ! assumes that before calling this subroutine, the filter_mat has
1169 : ! already been created and initialised: i.e. using
1170 : ! dbcsr_create_new. Even if the matrix is at the moment empty,
1171 : ! the attribute nblkrows_total is already assigned from the dbcsr
1172 : ! distribution data
1173 : CALL dbcsr_get_info(filter_mat, &
1174 16 : nblkrows_total=nblkrows_total)
1175 :
1176 : ! source is always the local processor
1177 : ASSOCIATE (src => para_env%mepos)
1178 : ! construct send tasks
1179 16 : itask = 1
1180 80 : DO ihalo = 1, nhalos
1181 : CALL fb_atomic_halo_get(atomic_halo=halos(ihalo), &
1182 : owner_atom=jatom_global, &
1183 : natoms=natoms_in_halo, &
1184 64 : halo_atoms=halo_atoms)
1185 592 : DO iatom_in_halo = 1, natoms_in_halo
1186 512 : iatom_global = halo_atoms(iatom_in_halo)
1187 512 : iatom_stored = iatom_global
1188 512 : jatom_stored = jatom_global
1189 512 : transpose = .FALSE.
1190 : ! find where the constructed block of filter matrix belongs to
1191 : CALL dbcsr_get_stored_coordinates(filter_mat, &
1192 : iatom_stored, &
1193 : jatom_stored, &
1194 512 : processor=dest)
1195 : ! create the send tasks
1196 512 : tasks_send(TASK_DEST, itask) = dest
1197 512 : tasks_send(TASK_SRC, itask) = src
1198 : CALL fb_com_tasks_encode_pair(tasks_send(TASK_PAIR, itask), &
1199 : iatom_global, jatom_global, &
1200 512 : nblkrows_total)
1201 : ! calculation of cost not implemented at the moment
1202 512 : tasks_send(TASK_COST, itask) = 0
1203 1088 : itask = itask + 1
1204 : END DO ! iatom_in_halo
1205 : END DO ! ihalo
1206 : END ASSOCIATE
1207 :
1208 : ! get the actual number of tasks
1209 16 : ntasks_send = itask - 1
1210 :
1211 16 : CALL fb_com_tasks_create(com_tasks_send)
1212 : CALL fb_com_tasks_set(com_tasks=com_tasks_send, &
1213 : task_dim=TASK_N_RECORDS, &
1214 : ntasks=ntasks_send, &
1215 : nencode=nblkrows_total, &
1216 16 : tasks=tasks_send)
1217 :
1218 : ! generate the recv task list (tasks_recv) from the send task list
1219 16 : CALL fb_com_tasks_create(com_tasks_recv)
1220 : CALL fb_com_tasks_transpose_dest_src(com_tasks_recv, "<", com_tasks_send, &
1221 16 : para_env)
1222 :
1223 : ! task lists are now complete, now construct the atom_pairs_send
1224 : ! and atom_pairs_recv from the tasks lists
1225 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_send, &
1226 : atom_pairs=atom_pairs_send, &
1227 : natoms_encode=nblkrows_total, &
1228 16 : send_or_recv="send")
1229 : CALL fb_com_tasks_build_atom_pairs(com_tasks=com_tasks_recv, &
1230 : atom_pairs=atom_pairs_recv, &
1231 : natoms_encode=nblkrows_total, &
1232 16 : send_or_recv="recv")
1233 :
1234 : ! cleanup
1235 16 : CALL fb_com_tasks_release(com_tasks_recv)
1236 16 : CALL fb_com_tasks_release(com_tasks_send)
1237 :
1238 16 : CALL timestop(handle)
1239 :
1240 48 : END SUBROUTINE fb_fltrmat_generate_com_pairs_2
1241 :
1242 : ! **************************************************************************************************
1243 : !> \brief Build the atomic filter matrix for each atomic halo
1244 : !> \param atomic_H : atomic KS matrix
1245 : !> \param atomic_S : atomic overlap matrix
1246 : !> \param fermi_level : fermi level used to construct the Fermi-Dirac
1247 : !> filter function
1248 : !> \param filter_temp : temperature used to construct the Fermi-Dirac
1249 : !> filter function
1250 : !> \param atomic_filter_mat : the atomic filter matrix
1251 : !> \param tolerance : anything smaller than tolerance is treated as zero
1252 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1253 : ! **************************************************************************************************
1254 960 : SUBROUTINE fb_fltrmat_build_atomic_fltrmat(atomic_H, &
1255 320 : atomic_S, &
1256 : fermi_level, &
1257 : filter_temp, &
1258 320 : atomic_filter_mat, &
1259 : tolerance)
1260 : REAL(KIND=dp), DIMENSION(:, :), INTENT(IN) :: atomic_H, atomic_S
1261 : REAL(KIND=dp), INTENT(IN) :: fermi_level, filter_temp
1262 : REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT) :: atomic_filter_mat
1263 : REAL(KIND=dp), INTENT(IN) :: tolerance
1264 :
1265 : CHARACTER(LEN=*), PARAMETER :: routineN = 'fb_fltrmat_build_atomic_fltrmat'
1266 :
1267 : INTEGER :: handle, handle_dgemm, handle_dsygv, ii, &
1268 : info, jj, mat_dim, work_array_size
1269 : LOGICAL :: check_ok
1270 320 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues, filter_function, work
1271 320 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: atomic_S_copy, eigenvectors, &
1272 320 : filtered_eigenvectors
1273 :
1274 320 : CALL timeset(routineN, handle)
1275 :
1276 : ! This subroutine assumes atomic_filter_mat is not zero size, in
1277 : ! other words, it really has to be constructed, instead of just
1278 : ! being a dummy
1279 :
1280 : check_ok = SIZE(atomic_filter_mat, 1) > 0 .AND. &
1281 320 : SIZE(atomic_filter_mat, 2) > 0
1282 0 : CPASSERT(check_ok)
1283 :
1284 : ! initialise
1285 3494720 : atomic_filter_mat = 0.0_dp
1286 320 : mat_dim = SIZE(atomic_H, 1)
1287 :
1288 : ! diagonalise using LAPACK
1289 960 : ALLOCATE (eigenvalues(mat_dim))
1290 : ! get optimal work array size
1291 320 : ALLOCATE (work(1))
1292 : ! dsygv will overwrite part of atomic_H and atomic_S, thus need to copy them
1293 1280 : ALLOCATE (atomic_S_copy(SIZE(atomic_S, 1), SIZE(atomic_S, 2)))
1294 3494720 : atomic_S_copy(:, :) = atomic_S(:, :)
1295 1280 : ALLOCATE (eigenvectors(SIZE(atomic_H, 1), SIZE(atomic_H, 2)))
1296 3494720 : eigenvectors(:, :) = atomic_H(:, :)
1297 :
1298 320 : CALL timeset("fb_atomic_filter_dsygv", handle_dsygv)
1299 :
1300 320 : info = 0
1301 : CALL dsygv(1, 'V', 'U', &
1302 : mat_dim, eigenvectors, mat_dim, &
1303 : atomic_S_copy, mat_dim, eigenvalues, &
1304 320 : work, -1, info)
1305 320 : work_array_size = NINT(work(1))
1306 : ! now allocate work array
1307 320 : DEALLOCATE (work)
1308 960 : ALLOCATE (work(work_array_size))
1309 1131840 : work = 0.0_dp
1310 : ! do calculation
1311 3494720 : atomic_S_copy(:, :) = atomic_S(:, :)
1312 3494720 : eigenvectors(:, :) = atomic_H(:, :)
1313 320 : info = 0
1314 : CALL dsygv(1, 'V', 'U', &
1315 : mat_dim, eigenvectors, mat_dim, &
1316 : atomic_S_copy, mat_dim, eigenvalues, &
1317 320 : work, work_array_size, info)
1318 : ! check if diagonalisation is successful
1319 320 : IF (info /= 0) THEN
1320 0 : WRITE (*, *) "DSYGV ERROR MESSAGE: ", info
1321 0 : CPABORT("DSYGV failed")
1322 : END IF
1323 :
1324 320 : CALL timestop(handle_dsygv)
1325 :
1326 320 : DEALLOCATE (work)
1327 320 : DEALLOCATE (atomic_S_copy)
1328 :
1329 : ! first get the filter function
1330 960 : ALLOCATE (filter_function(mat_dim))
1331 33600 : filter_function = 0.0_dp
1332 : CALL fb_fltrmat_fermi_dirac_mu(filter_function, &
1333 : eigenvalues, &
1334 : filter_temp, &
1335 320 : fermi_level)
1336 320 : DEALLOCATE (eigenvalues)
1337 :
1338 : ! atomic_H has the eigenvectors, construct the version of it
1339 : ! filtered through the filter function
1340 1280 : ALLOCATE (filtered_eigenvectors(mat_dim, mat_dim))
1341 33600 : DO jj = 1, mat_dim
1342 3494720 : DO ii = 1, mat_dim
1343 : filtered_eigenvectors(ii, jj) = &
1344 3494400 : filter_function(jj)*eigenvectors(ii, jj)
1345 : END DO ! ii
1346 : END DO ! jj
1347 :
1348 320 : DEALLOCATE (filter_function)
1349 :
1350 320 : CALL timeset("fb_atomic_filter_dgemm", handle_dgemm)
1351 :
1352 : ! construct atomic filter matrix
1353 : CALL dgemm("N", &
1354 : "T", &
1355 : mat_dim, &
1356 : mat_dim, &
1357 : mat_dim, &
1358 : 1.0_dp, &
1359 : filtered_eigenvectors, &
1360 : mat_dim, &
1361 : eigenvectors, &
1362 : mat_dim, &
1363 : 0.0_dp, &
1364 : atomic_filter_mat, &
1365 320 : mat_dim)
1366 :
1367 320 : CALL timestop(handle_dgemm)
1368 :
1369 : ! remove small negative terms due to numerical error, the filter
1370 : ! matrix must not be negative definite
1371 33600 : DO jj = 1, SIZE(atomic_filter_mat, 2)
1372 3494720 : DO ii = 1, SIZE(atomic_filter_mat, 1)
1373 3494400 : IF (ABS(atomic_filter_mat(ii, jj)) < tolerance) THEN
1374 53568 : atomic_filter_mat(ii, jj) = 0.0_dp
1375 : END IF
1376 : END DO
1377 : END DO
1378 :
1379 320 : DEALLOCATE (filtered_eigenvectors)
1380 320 : DEALLOCATE (eigenvectors)
1381 :
1382 320 : CALL timestop(handle)
1383 :
1384 960 : END SUBROUTINE fb_fltrmat_build_atomic_fltrmat
1385 :
1386 : ! **************************************************************************************************
1387 : !> \brief get values of Fermi-Dirac distribution based on a given fermi
1388 : !> level at a given set of energy eigenvalues
1389 : !> \param f : the Fermi-Dirac distribution function values
1390 : !> \param eigenvals : set of energy eigenvalues
1391 : !> \param T : temperature
1392 : !> \param mu : the fermi level
1393 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1394 : ! **************************************************************************************************
1395 320 : SUBROUTINE fb_fltrmat_fermi_dirac_mu(f, eigenvals, T, mu)
1396 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: f
1397 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigenvals
1398 : REAL(KIND=dp), INTENT(IN) :: T, mu
1399 :
1400 : REAL(KIND=dp) :: kTS, ne
1401 :
1402 : ! we want fermi function max at 1, so maxocc = 1 here
1403 :
1404 320 : CALL SmearOcc(f, ne, kTS, eigenvals, mu, T, 1.0_dp, smear_fermi_dirac)
1405 320 : END SUBROUTINE fb_fltrmat_fermi_dirac_mu
1406 :
1407 : ! **************************************************************************************************
1408 : !> \brief get values of Fermi-Dirac distribution based on a given electron
1409 : !> number at a given set of energy eigenvales
1410 : !> \param f : the Fermi-Dirac distribution function values
1411 : !> \param eigenvals : set of energy eigenvalues
1412 : !> \param T : temperature
1413 : !> \param ne : number of electrons
1414 : !> \param maxocc : maximum occupancy per orbital
1415 : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
1416 : ! **************************************************************************************************
1417 0 : SUBROUTINE fb_fltrmat_fermi_dirac_ne(f, eigenvals, T, ne, maxocc)
1418 : REAL(KIND=dp), DIMENSION(:), INTENT(OUT) :: f
1419 : REAL(KIND=dp), DIMENSION(:), INTENT(IN) :: eigenvals
1420 : REAL(KIND=dp), INTENT(IN) :: T, ne, maxocc
1421 :
1422 : REAL(KIND=dp) :: kTS, mu
1423 :
1424 : ! mu is the calculated fermi level
1425 : ! kTS is the calculated entropic contribution to the energy i.e. -TS
1426 : ! kTS = kT*[f ln f + (1-f) ln (1-f)]
1427 :
1428 0 : CALL SmearFixed(f, mu, kTS, eigenvals, ne, T, maxocc, smear_fermi_dirac)
1429 0 : END SUBROUTINE fb_fltrmat_fermi_dirac_ne
1430 :
1431 : END MODULE qs_fb_filter_matrix_methods
|