Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Methods for mixed CDFT calculations
10 : !> \par History
11 : !> Separated CDFT routines from mixed_environment_utils
12 : !> \author Nico Holmberg [01.2017]
13 : ! **************************************************************************************************
14 : MODULE mixed_cdft_methods
15 : USE ao_util, ONLY: exp_radius_very_extended
16 : USE atomic_kind_types, ONLY: atomic_kind_type,&
17 : get_atomic_kind
18 : USE cell_types, ONLY: cell_type,&
19 : pbc
20 : USE cp_array_utils, ONLY: cp_1d_i_p_type,&
21 : cp_1d_r_p_type,&
22 : cp_2d_r_p_type
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_dbcsr_api, ONLY: &
25 : dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_init_p, dbcsr_p_type, dbcsr_release, &
26 : dbcsr_release_p, dbcsr_scale, dbcsr_type
27 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_syevd
28 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
29 : USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale,&
30 : cp_fm_invert,&
31 : cp_fm_transpose
32 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
33 : cp_fm_struct_release,&
34 : cp_fm_struct_type
35 : USE cp_fm_types, ONLY: cp_fm_create,&
36 : cp_fm_get_info,&
37 : cp_fm_release,&
38 : cp_fm_set_all,&
39 : cp_fm_to_fm,&
40 : cp_fm_type,&
41 : cp_fm_write_formatted
42 : USE cp_log_handling, ONLY: cp_get_default_logger,&
43 : cp_logger_type,&
44 : cp_to_string
45 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
46 : cp_print_key_unit_nr
47 : USE cp_realspace_grid_cube, ONLY: cp_pw_to_cube
48 : USE cp_subsys_types, ONLY: cp_subsys_get,&
49 : cp_subsys_type
50 : USE cp_units, ONLY: cp_unit_from_cp2k
51 : USE force_env_types, ONLY: force_env_get,&
52 : force_env_type,&
53 : use_qmmm,&
54 : use_qmmmx,&
55 : use_qs_force
56 : USE grid_api, ONLY: GRID_FUNC_AB,&
57 : collocate_pgf_product
58 : USE hirshfeld_methods, ONLY: create_shape_function
59 : USE hirshfeld_types, ONLY: hirshfeld_type
60 : USE input_constants, ONLY: &
61 : becke_cutoff_element, becke_cutoff_global, cdft_alpha_constraint, cdft_beta_constraint, &
62 : cdft_charge_constraint, cdft_magnetization_constraint, mix_cdft, mixed_cdft_parallel, &
63 : mixed_cdft_parallel_nobuild, mixed_cdft_serial, outer_scf_becke_constraint
64 : USE input_section_types, ONLY: section_get_lval,&
65 : section_vals_get,&
66 : section_vals_get_subs_vals,&
67 : section_vals_type,&
68 : section_vals_val_get
69 : USE kinds, ONLY: default_path_length,&
70 : dp
71 : USE machine, ONLY: m_walltime
72 : USE mathlib, ONLY: diamat_all
73 : USE memory_utilities, ONLY: reallocate
74 : USE message_passing, ONLY: mp_request_type,&
75 : mp_testall,&
76 : mp_waitall
77 : USE mixed_cdft_types, ONLY: mixed_cdft_result_type_set,&
78 : mixed_cdft_settings_type,&
79 : mixed_cdft_type,&
80 : mixed_cdft_type_create,&
81 : mixed_cdft_work_type_release
82 : USE mixed_cdft_utils, ONLY: &
83 : hfun_zero, map_permutation_to_states, mixed_cdft_assemble_block_diag, &
84 : mixed_cdft_diagonalize_blocks, mixed_cdft_get_blocks, mixed_cdft_init_structures, &
85 : mixed_cdft_parse_settings, mixed_cdft_print_couplings, mixed_cdft_read_block_diag, &
86 : mixed_cdft_redistribute_arrays, mixed_cdft_release_work, mixed_cdft_transfer_settings
87 : USE mixed_environment_types, ONLY: get_mixed_env,&
88 : mixed_environment_type,&
89 : set_mixed_env
90 : USE parallel_gemm_api, ONLY: parallel_gemm
91 : USE particle_list_types, ONLY: particle_list_type
92 : USE particle_types, ONLY: particle_type
93 : USE pw_env_types, ONLY: pw_env_get,&
94 : pw_env_type
95 : USE pw_methods, ONLY: pw_copy,&
96 : pw_scale,&
97 : pw_zero
98 : USE pw_pool_types, ONLY: pw_pool_type
99 : USE qs_cdft_types, ONLY: cdft_control_type
100 : USE qs_energy_types, ONLY: qs_energy_type
101 : USE qs_environment_types, ONLY: get_qs_env,&
102 : qs_environment_type
103 : USE qs_kind_types, ONLY: qs_kind_type
104 : USE qs_mo_io, ONLY: read_mo_set_from_restart,&
105 : wfn_restart_file_name
106 : USE qs_mo_methods, ONLY: make_basis_simple,&
107 : make_basis_sm
108 : USE qs_mo_types, ONLY: allocate_mo_set,&
109 : deallocate_mo_set,&
110 : mo_set_type,&
111 : set_mo_set
112 : USE realspace_grid_types, ONLY: realspace_grid_type,&
113 : rs_grid_zero,&
114 : transfer_rs2pw
115 : USE util, ONLY: sort
116 : #include "./base/base_uses.f90"
117 :
118 : IMPLICIT NONE
119 :
120 : PRIVATE
121 :
122 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mixed_cdft_methods'
123 : LOGICAL, PARAMETER, PRIVATE :: debug_this_module = .FALSE.
124 :
125 : PUBLIC :: mixed_cdft_init, &
126 : mixed_cdft_build_weight, &
127 : mixed_cdft_calculate_coupling
128 :
129 : CONTAINS
130 :
131 : ! **************************************************************************************************
132 : !> \brief Initialize a mixed CDFT calculation
133 : !> \param force_env the force_env that holds the CDFT states
134 : !> \param calculate_forces determines if forces should be calculated
135 : !> \par History
136 : !> 01.2016 created [Nico Holmberg]
137 : ! **************************************************************************************************
138 530 : SUBROUTINE mixed_cdft_init(force_env, calculate_forces)
139 : TYPE(force_env_type), POINTER :: force_env
140 : LOGICAL, INTENT(IN) :: calculate_forces
141 :
142 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_init'
143 :
144 : INTEGER :: et_freq, handle, iforce_eval, iounit, &
145 : mixing_type, nforce_eval
146 : LOGICAL :: explicit, is_parallel, is_qmmm
147 : TYPE(cp_logger_type), POINTER :: logger
148 : TYPE(cp_subsys_type), POINTER :: subsys_mix
149 : TYPE(force_env_type), POINTER :: force_env_qs
150 : TYPE(mixed_cdft_settings_type) :: settings
151 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
152 : TYPE(mixed_environment_type), POINTER :: mixed_env
153 : TYPE(particle_list_type), POINTER :: particles_mix
154 : TYPE(section_vals_type), POINTER :: force_env_section, mapping_section, &
155 : md_section, mixed_section, &
156 : print_section, root_section
157 :
158 530 : NULLIFY (subsys_mix, force_env_qs, force_env_section, print_section, &
159 530 : root_section, mixed_section, md_section, mixed_env, mixed_cdft, &
160 530 : mapping_section)
161 :
162 : NULLIFY (settings%grid_span, settings%npts, settings%cutoff, settings%rel_cutoff, &
163 : settings%spherical, settings%rs_dims, settings%odd, settings%atoms, &
164 : settings%coeffs, settings%si, settings%sr, &
165 : settings%cutoffs, settings%radii)
166 :
167 530 : is_qmmm = .FALSE.
168 1060 : logger => cp_get_default_logger()
169 530 : CPASSERT(ASSOCIATED(force_env))
170 530 : nforce_eval = SIZE(force_env%sub_force_env)
171 530 : CALL timeset(routineN, handle)
172 530 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
173 530 : mixed_env => force_env%mixed_env
174 530 : mixed_section => section_vals_get_subs_vals(force_env_section, "MIXED")
175 530 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
176 530 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
177 : ! Check if a mixed CDFT calculation is requested
178 530 : CALL section_vals_val_get(mixed_section, "MIXING_TYPE", i_val=mixing_type)
179 530 : IF (mixing_type == mix_cdft .AND. .NOT. ASSOCIATED(mixed_env%cdft_control)) THEN
180 78 : mixed_env%do_mixed_cdft = .TRUE.
181 156 : IF (mixed_env%do_mixed_cdft) THEN
182 : ! Sanity check
183 78 : IF (nforce_eval < 2) &
184 : CALL cp_abort(__LOCATION__, &
185 0 : "Mixed CDFT calculation requires at least 2 force_evals.")
186 78 : mapping_section => section_vals_get_subs_vals(mixed_section, "MAPPING")
187 78 : CALL section_vals_get(mapping_section, explicit=explicit)
188 : ! The sub_force_envs must share the same geometrical structure
189 78 : IF (explicit) &
190 0 : CPABORT("Please disable section &MAPPING for mixed CDFT calculations")
191 78 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%COUPLING", i_val=et_freq)
192 78 : IF (et_freq < 0) THEN
193 0 : mixed_env%do_mixed_et = .FALSE.
194 : ELSE
195 78 : mixed_env%do_mixed_et = .TRUE.
196 78 : IF (et_freq == 0) THEN
197 0 : mixed_env%et_freq = 1
198 : ELSE
199 78 : mixed_env%et_freq = et_freq
200 : END IF
201 : END IF
202 : ! Start initializing the mixed_cdft type
203 : ! First determine if the calculation is pure DFT or QMMM and find the qs force_env
204 258 : DO iforce_eval = 1, nforce_eval
205 180 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
206 296 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
207 : CASE (use_qs_force)
208 142 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
209 : CASE (use_qmmm)
210 12 : is_qmmm = .TRUE.
211 : ! This is really the container for QMMM
212 12 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
213 : CASE (use_qmmmx)
214 0 : CPABORT("No force mixing allowed for mixed CDFT QM/MM")
215 : CASE DEFAULT
216 : CALL cp_abort(__LOCATION__, &
217 : "Only use_qs_force and use_qmmm are "// &
218 154 : "supported for mixed_cdft_init")
219 : END SELECT
220 232 : CPASSERT(ASSOCIATED(force_env_qs))
221 : END DO
222 : ! Get infos about the mixed subsys
223 78 : IF (.NOT. is_qmmm) THEN
224 : CALL force_env_get(force_env=force_env, &
225 70 : subsys=subsys_mix)
226 : CALL cp_subsys_get(subsys=subsys_mix, &
227 70 : particles=particles_mix)
228 : ELSE
229 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
230 8 : cp_subsys=subsys_mix)
231 : CALL cp_subsys_get(subsys=subsys_mix, &
232 8 : particles=particles_mix)
233 : END IF
234 : ! Init mixed_cdft_type
235 78 : ALLOCATE (mixed_cdft)
236 78 : CALL mixed_cdft_type_create(mixed_cdft)
237 78 : mixed_cdft%first_iteration = .TRUE.
238 : ! Determine what run type to use
239 78 : IF (mixed_env%ngroups == 1) THEN
240 : ! States treated in serial, possibly copying CDFT weight function and gradients from state to state
241 52 : mixed_cdft%run_type = mixed_cdft_serial
242 26 : ELSE IF (mixed_env%ngroups == 2) THEN
243 26 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%PARALLEL_BUILD", l_val=is_parallel)
244 26 : IF (is_parallel) THEN
245 : ! Treat states in parallel, build weight function and gradients in parallel before SCF process
246 24 : mixed_cdft%run_type = mixed_cdft_parallel
247 24 : IF (.NOT. nforce_eval == 2) &
248 : CALL cp_abort(__LOCATION__, &
249 0 : "Parallel mode mixed CDFT calculation supports only 2 force_evals.")
250 : ELSE
251 : ! Treat states in parallel, but each states builds its own weight function and gradients
252 2 : mixed_cdft%run_type = mixed_cdft_parallel_nobuild
253 : END IF
254 : ELSE
255 0 : mixed_cdft%run_type = mixed_cdft_parallel_nobuild
256 : END IF
257 : ! Store QMMM flag
258 78 : mixed_env%do_mixed_qmmm_cdft = is_qmmm
259 : ! Setup dynamic load balancing
260 78 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%DLB", l_val=mixed_cdft%dlb)
261 78 : mixed_cdft%dlb = mixed_cdft%dlb .AND. calculate_forces ! disable if forces are not needed
262 78 : mixed_cdft%dlb = mixed_cdft%dlb .AND. (mixed_cdft%run_type == mixed_cdft_parallel) ! disable if not parallel
263 78 : IF (mixed_cdft%dlb) THEN
264 40 : ALLOCATE (mixed_cdft%dlb_control)
265 4 : NULLIFY (mixed_cdft%dlb_control%weight, mixed_cdft%dlb_control%gradients, &
266 4 : mixed_cdft%dlb_control%cavity, mixed_cdft%dlb_control%target_list, &
267 4 : mixed_cdft%dlb_control%bo, mixed_cdft%dlb_control%expected_work, &
268 4 : mixed_cdft%dlb_control%prediction_error, mixed_cdft%dlb_control%sendbuff, &
269 4 : mixed_cdft%dlb_control%recvbuff, mixed_cdft%dlb_control%recv_work_repl, &
270 4 : mixed_cdft%dlb_control%recv_info)
271 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%LOAD_SCALE", &
272 4 : r_val=mixed_cdft%dlb_control%load_scale)
273 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%VERY_OVERLOADED", &
274 4 : r_val=mixed_cdft%dlb_control%very_overloaded)
275 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%MORE_WORK", &
276 4 : i_val=mixed_cdft%dlb_control%more_work)
277 : END IF
278 : ! Metric/Wavefunction overlap method/Lowdin orthogonalization/CDFT-CI
279 78 : mixed_cdft%calculate_metric = .FALSE.
280 78 : mixed_cdft%wfn_overlap_method = .FALSE.
281 78 : mixed_cdft%use_lowdin = .FALSE.
282 78 : mixed_cdft%do_ci = .FALSE.
283 78 : mixed_cdft%nonortho_coupling = .FALSE.
284 78 : mixed_cdft%identical_constraints = .TRUE.
285 78 : mixed_cdft%block_diagonalize = .FALSE.
286 78 : IF (mixed_env%do_mixed_et) THEN
287 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%METRIC", &
288 78 : l_val=mixed_cdft%calculate_metric)
289 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%WFN_OVERLAP", &
290 78 : l_val=mixed_cdft%wfn_overlap_method)
291 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%LOWDIN", &
292 78 : l_val=mixed_cdft%use_lowdin)
293 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%CI", &
294 78 : l_val=mixed_cdft%do_ci)
295 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%NONORTHOGONAL_COUPLING", &
296 78 : l_val=mixed_cdft%nonortho_coupling)
297 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%BLOCK_DIAGONALIZE", &
298 78 : l_val=mixed_cdft%block_diagonalize)
299 : END IF
300 : ! Inversion method
301 78 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%EPS_SVD", r_val=mixed_cdft%eps_svd)
302 78 : IF (mixed_cdft%eps_svd < 0.0_dp .OR. mixed_cdft%eps_svd > 1.0_dp) &
303 0 : CPABORT("Illegal value for EPS_SVD. Value must be between 0.0 and 1.0.")
304 : ! MD related settings
305 78 : CALL force_env_get(force_env, root_section=root_section)
306 78 : md_section => section_vals_get_subs_vals(root_section, "MOTION%MD")
307 78 : CALL section_vals_val_get(md_section, "TIMESTEP", r_val=mixed_cdft%sim_dt)
308 78 : CALL section_vals_val_get(md_section, "STEP_START_VAL", i_val=mixed_cdft%sim_step)
309 78 : mixed_cdft%sim_step = mixed_cdft%sim_step - 1 ! to get the first step correct
310 78 : mixed_cdft%sim_dt = cp_unit_from_cp2k(mixed_cdft%sim_dt, "fs")
311 : ! Parse constraint settings from the individual force_evals and check consistency
312 : CALL mixed_cdft_parse_settings(force_env, mixed_env, mixed_cdft, &
313 78 : settings, natom=SIZE(particles_mix%els))
314 : ! Transfer settings to mixed_cdft
315 78 : CALL mixed_cdft_transfer_settings(force_env, mixed_cdft, settings)
316 : ! Initilize necessary structures
317 78 : CALL mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_cdft, settings)
318 : ! Write information about the mixed CDFT calculation
319 78 : IF (iounit > 0) THEN
320 39 : WRITE (iounit, *) ""
321 : WRITE (iounit, FMT="(T2,A,T71)") &
322 39 : "MIXED_CDFT| Activating mixed CDFT calculation"
323 : WRITE (iounit, FMT="(T2,A,T71,I10)") &
324 39 : "MIXED_CDFT| Number of CDFT states: ", nforce_eval
325 51 : SELECT CASE (mixed_cdft%run_type)
326 : CASE (mixed_cdft_parallel)
327 : WRITE (iounit, FMT="(T2,A,T71)") &
328 12 : "MIXED_CDFT| CDFT states calculation mode: parallel with build"
329 : WRITE (iounit, FMT="(T2,A,T71)") &
330 12 : "MIXED_CDFT| Becke constraint is first built using all available processors"
331 : WRITE (iounit, FMT="(T2,A,T71)") &
332 12 : " and then copied to both states with their own processor groups"
333 : CASE (mixed_cdft_serial)
334 : WRITE (iounit, FMT="(T2,A,T71)") &
335 26 : "MIXED_CDFT| CDFT states calculation mode: serial"
336 26 : IF (mixed_cdft%identical_constraints) THEN
337 : WRITE (iounit, FMT="(T2,A,T71)") &
338 25 : "MIXED_CDFT| The constraints are built before the SCF procedure of the first"
339 : WRITE (iounit, FMT="(T2,A,T71)") &
340 25 : " CDFT state and subsequently copied to the other states"
341 : ELSE
342 : WRITE (iounit, FMT="(T2,A,T71)") &
343 1 : "MIXED_CDFT| The constraints are separately built for all CDFT states"
344 : END IF
345 : CASE (mixed_cdft_parallel_nobuild)
346 : WRITE (iounit, FMT="(T2,A,T71)") &
347 1 : "MIXED_CDFT| CDFT states calculation mode: parallel without build"
348 : WRITE (iounit, FMT="(T2,A,T71)") &
349 1 : "MIXED_CDFT| The constraints are separately built for all CDFT states"
350 : CASE DEFAULT
351 39 : CPABORT("Unknown mixed CDFT run type.")
352 : END SELECT
353 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
354 39 : "MIXED_CDFT| Calculating electronic coupling between states: ", mixed_env%do_mixed_et
355 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
356 39 : "MIXED_CDFT| Calculating electronic coupling reliability metric: ", mixed_cdft%calculate_metric
357 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
358 39 : "MIXED_CDFT| Configuration interaction (CDFT-CI) was requested: ", mixed_cdft%do_ci
359 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
360 39 : "MIXED_CDFT| Block diagonalizing the mixed CDFT Hamiltonian: ", mixed_cdft%block_diagonalize
361 39 : IF (mixed_cdft%run_type == mixed_cdft_parallel) THEN
362 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
363 12 : "MIXED_CDFT| Dynamic load balancing enabled: ", mixed_cdft%dlb
364 12 : IF (mixed_cdft%dlb) THEN
365 2 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Dynamic load balancing parameters:"
366 : WRITE (iounit, FMT="(T2,A,T71,F10.2)") &
367 2 : "MIXED_CDFT| load_scale:", mixed_cdft%dlb_control%load_scale
368 : WRITE (iounit, FMT="(T2,A,T71,F10.2)") &
369 2 : "MIXED_CDFT| very_overloaded:", mixed_cdft%dlb_control%very_overloaded
370 : WRITE (iounit, FMT="(T2,A,T71,I10)") &
371 2 : "MIXED_CDFT| more_work:", mixed_cdft%dlb_control%more_work
372 : END IF
373 : END IF
374 39 : IF (mixed_env%do_mixed_et) THEN
375 39 : IF (mixed_cdft%eps_svd == 0.0_dp) THEN
376 31 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Matrix inversions calculated with LU decomposition."
377 : ELSE
378 8 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Matrix inversions calculated with SVD decomposition."
379 8 : WRITE (iounit, FMT="(T2,A,T71,ES10.2)") "MIXED_CDFT| EPS_SVD:", mixed_cdft%eps_svd
380 : END IF
381 : END IF
382 : END IF
383 78 : CALL set_mixed_env(mixed_env, cdft_control=mixed_cdft)
384 : END IF
385 : END IF
386 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
387 530 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
388 530 : CALL timestop(handle)
389 :
390 5300 : END SUBROUTINE mixed_cdft_init
391 :
392 : ! **************************************************************************************************
393 : !> \brief Driver routine to handle the build of CDFT weight/gradient in parallel and serial modes
394 : !> \param force_env the force_env that holds the CDFT states
395 : !> \param calculate_forces if forces should be calculated
396 : !> \param iforce_eval index of the currently active CDFT state (serial mode only)
397 : !> \par History
398 : !> 01.2017 created [Nico Holmberg]
399 : ! **************************************************************************************************
400 286 : SUBROUTINE mixed_cdft_build_weight(force_env, calculate_forces, iforce_eval)
401 : TYPE(force_env_type), POINTER :: force_env
402 : LOGICAL, INTENT(IN) :: calculate_forces
403 : INTEGER, INTENT(IN), OPTIONAL :: iforce_eval
404 :
405 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
406 :
407 286 : NULLIFY (mixed_cdft)
408 286 : CPASSERT(ASSOCIATED(force_env))
409 286 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
410 286 : CPASSERT(ASSOCIATED(mixed_cdft))
411 286 : IF (.NOT. PRESENT(iforce_eval)) THEN
412 136 : SELECT CASE (mixed_cdft%run_type)
413 : CASE (mixed_cdft_parallel)
414 36 : CALL mixed_cdft_build_weight_parallel(force_env, calculate_forces)
415 : CASE (mixed_cdft_parallel_nobuild)
416 100 : CALL mixed_cdft_set_flags(force_env)
417 : CASE DEFAULT
418 : ! Do nothing
419 : END SELECT
420 : ELSE
421 334 : SELECT CASE (mixed_cdft%run_type)
422 : CASE (mixed_cdft_serial)
423 186 : CALL mixed_cdft_transfer_weight(force_env, calculate_forces, iforce_eval)
424 : CASE DEFAULT
425 : ! Do nothing
426 : END SELECT
427 : END IF
428 :
429 286 : END SUBROUTINE mixed_cdft_build_weight
430 :
431 : ! **************************************************************************************************
432 : !> \brief Build CDFT weight and gradient on 2N processors and copy it to two N processor subgroups
433 : !> \param force_env the force_env that holds the CDFT states
434 : !> \param calculate_forces if forces should be calculted
435 : !> \par History
436 : !> 01.2016 created [Nico Holmberg]
437 : ! **************************************************************************************************
438 36 : SUBROUTINE mixed_cdft_build_weight_parallel(force_env, calculate_forces)
439 : TYPE(force_env_type), POINTER :: force_env
440 : LOGICAL, INTENT(IN) :: calculate_forces
441 :
442 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_build_weight_parallel'
443 :
444 : INTEGER :: handle, handle2, i, iforce_eval, ind, INDEX(6), iounit, j, lb_min, &
445 : my_special_work, natom, nforce_eval, recv_offset, ub_max
446 : INTEGER, DIMENSION(2, 3) :: bo
447 36 : INTEGER, DIMENSION(:), POINTER :: lb, sendbuffer_i, ub
448 : REAL(KIND=dp) :: t1, t2
449 : TYPE(cdft_control_type), POINTER :: cdft_control, cdft_control_target
450 : TYPE(cp_logger_type), POINTER :: logger
451 : TYPE(cp_subsys_type), POINTER :: subsys_mix
452 : TYPE(dft_control_type), POINTER :: dft_control
453 : TYPE(force_env_type), POINTER :: force_env_qs
454 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
455 : TYPE(mixed_environment_type), POINTER :: mixed_env
456 36 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_total
457 : TYPE(particle_list_type), POINTER :: particles_mix
458 : TYPE(pw_env_type), POINTER :: pw_env
459 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool, mixed_auxbas_pw_pool
460 : TYPE(qs_environment_type), POINTER :: qs_env
461 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
462 :
463 : TYPE buffers
464 : INTEGER :: imap(6)
465 : INTEGER, DIMENSION(:), &
466 : POINTER :: iv => null()
467 : REAL(KIND=dp), POINTER, &
468 : DIMENSION(:, :, :) :: r3 => null()
469 : REAL(KIND=dp), POINTER, &
470 : DIMENSION(:, :, :, :) :: r4 => null()
471 : END TYPE buffers
472 36 : TYPE(buffers), DIMENSION(:), POINTER :: recvbuffer
473 :
474 36 : NULLIFY (subsys_mix, force_env_qs, particles_mix, force_env_section, print_section, &
475 36 : mixed_env, mixed_cdft, pw_env, auxbas_pw_pool, mixed_auxbas_pw_pool, &
476 36 : qs_env, dft_control, sendbuffer_i, lb, ub, req_total, recvbuffer, &
477 36 : cdft_control, cdft_control_target)
478 :
479 72 : logger => cp_get_default_logger()
480 36 : CPASSERT(ASSOCIATED(force_env))
481 36 : nforce_eval = SIZE(force_env%sub_force_env)
482 36 : CALL timeset(routineN, handle)
483 36 : t1 = m_walltime()
484 : ! Get infos about the mixed subsys
485 : CALL force_env_get(force_env=force_env, &
486 : subsys=subsys_mix, &
487 36 : force_env_section=force_env_section)
488 : CALL cp_subsys_get(subsys=subsys_mix, &
489 36 : particles=particles_mix)
490 108 : DO iforce_eval = 1, nforce_eval
491 72 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
492 36 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
493 : CASE (use_qs_force)
494 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
495 : CASE (use_qmmm)
496 0 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
497 : CASE DEFAULT
498 : CALL cp_abort(__LOCATION__, &
499 : "Only use_qs_force and use_qmmm are "// &
500 72 : "supported for mixed_cdft_build_weight_parallel")
501 : END SELECT
502 : END DO
503 36 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
504 : CALL force_env_get(force_env=force_env_qs, &
505 : qs_env=qs_env, &
506 28 : subsys=subsys_mix)
507 : CALL cp_subsys_get(subsys=subsys_mix, &
508 28 : particles=particles_mix)
509 : ELSE
510 8 : qs_env => force_env_qs%qmmm_env%qs_env
511 8 : CALL get_qs_env(qs_env, cp_subsys=subsys_mix)
512 : CALL cp_subsys_get(subsys=subsys_mix, &
513 8 : particles=particles_mix)
514 : END IF
515 36 : mixed_env => force_env%mixed_env
516 36 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
517 36 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
518 36 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
519 36 : CPASSERT(ASSOCIATED(mixed_cdft))
520 36 : cdft_control => mixed_cdft%cdft_control
521 36 : CPASSERT(ASSOCIATED(cdft_control))
522 : ! Calculate the Becke weight function and gradient on all np processors
523 36 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=mixed_auxbas_pw_pool)
524 36 : natom = SIZE(particles_mix%els)
525 36 : CALL mixed_becke_constraint(force_env, calculate_forces)
526 : ! Start replicating the working arrays on both np/2 processor groups
527 36 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
528 36 : CALL get_qs_env(qs_env, pw_env=pw_env, dft_control=dft_control)
529 36 : cdft_control_target => dft_control%qs_control%cdft_control
530 36 : CPASSERT(dft_control%qs_control%cdft)
531 36 : CPASSERT(ASSOCIATED(cdft_control_target))
532 36 : CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool)
533 360 : bo = auxbas_pw_pool%pw_grid%bounds_local
534 : ! First determine the size of the arrays along the confinement dir
535 36 : IF (mixed_cdft%is_special) THEN
536 : my_special_work = 2
537 : ELSE
538 36 : my_special_work = 1
539 : END IF
540 216 : ALLOCATE (recvbuffer(SIZE(mixed_cdft%source_list)))
541 252 : ALLOCATE (req_total(my_special_work*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)))
542 144 : ALLOCATE (lb(SIZE(mixed_cdft%source_list)), ub(SIZE(mixed_cdft%source_list)))
543 36 : IF (cdft_control%becke_control%cavity_confine) THEN
544 : ! Gaussian confinement => the bounds depend on the processor and need to be communicated
545 34 : ALLOCATE (sendbuffer_i(2))
546 204 : sendbuffer_i = cdft_control%becke_control%confine_bounds
547 102 : DO i = 1, SIZE(mixed_cdft%source_list)
548 68 : ALLOCATE (recvbuffer(i)%iv(2))
549 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%iv, source=mixed_cdft%source_list(i), &
550 102 : request=req_total(i))
551 : END DO
552 68 : DO i = 1, my_special_work
553 136 : DO j = 1, SIZE(mixed_cdft%dest_list)
554 68 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
555 : CALL force_env%para_env%isend(msgin=sendbuffer_i, &
556 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
557 102 : request=req_total(ind))
558 : END DO
559 : END DO
560 34 : CALL mp_waitall(req_total)
561 : ! Find the largest/smallest value of ub/lb
562 34 : DEALLOCATE (sendbuffer_i)
563 34 : lb_min = HUGE(0)
564 34 : ub_max = -HUGE(0)
565 102 : DO i = 1, SIZE(mixed_cdft%source_list)
566 68 : lb(i) = recvbuffer(i)%iv(1)
567 68 : ub(i) = recvbuffer(i)%iv(2)
568 68 : IF (lb(i) < lb_min) lb_min = lb(i)
569 68 : IF (ub(i) > ub_max) ub_max = ub(i)
570 102 : DEALLOCATE (recvbuffer(i)%iv)
571 : END DO
572 : ! Take into account the grids already communicated during dlb
573 34 : IF (mixed_cdft%dlb) THEN
574 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
575 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
576 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
577 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
578 16 : IF (LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3) &
579 0 : < lb_min) lb_min = LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)
580 16 : IF (UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3) &
581 8 : > ub_max) ub_max = UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)
582 : END DO
583 : END IF
584 : END DO
585 : END IF
586 : END IF
587 : ELSE
588 : ! No confinement
589 2 : ub_max = bo(2, 3)
590 2 : lb_min = bo(1, 3)
591 6 : lb = lb_min
592 6 : ub = ub_max
593 : END IF
594 : ! Determine the sender specific indices of grid slices that are to be received
595 36 : CALL timeset(routineN//"_comm", handle2)
596 108 : DO j = 1, SIZE(recvbuffer)
597 72 : ind = j + (j/2)
598 108 : IF (mixed_cdft%is_special) THEN
599 : recvbuffer(j)%imap = [mixed_cdft%source_list_bo(1, j), mixed_cdft%source_list_bo(2, j), &
600 : mixed_cdft%source_list_bo(3, j), mixed_cdft%source_list_bo(4, j), &
601 0 : lb(j), ub(j)]
602 72 : ELSE IF (mixed_cdft%is_pencil) THEN
603 0 : recvbuffer(j)%imap = [bo(1, 1), bo(2, 1), mixed_cdft%recv_bo(ind), mixed_cdft%recv_bo(ind + 1), lb(j), ub(j)]
604 : ELSE
605 504 : recvbuffer(j)%imap = [mixed_cdft%recv_bo(ind), mixed_cdft%recv_bo(ind + 1), bo(1, 2), bo(2, 2), lb(j), ub(j)]
606 : END IF
607 : END DO
608 36 : IF (mixed_cdft%dlb .AND. .NOT. mixed_cdft%is_special) THEN
609 8 : IF (mixed_cdft%dlb_control%recv_work_repl(1) .OR. mixed_cdft%dlb_control%recv_work_repl(2)) THEN
610 24 : DO j = 1, 2
611 16 : recv_offset = 0
612 16 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) &
613 16 : recv_offset = SUM(mixed_cdft%dlb_control%recv_info(j)%target_list(2, :))
614 24 : IF (mixed_cdft%is_pencil) THEN
615 0 : recvbuffer(j)%imap(1) = recvbuffer(j)%imap(1) + recv_offset
616 : ELSE
617 16 : recvbuffer(j)%imap(3) = recvbuffer(j)%imap(3) + recv_offset
618 : END IF
619 : END DO
620 : END IF
621 : END IF
622 : ! Transfer the arrays one-by-one and deallocate shared storage
623 : ! Start with the weight function
624 108 : DO j = 1, SIZE(mixed_cdft%source_list)
625 : ALLOCATE (recvbuffer(j)%r3(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
626 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
627 360 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)))
628 :
629 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r3, source=mixed_cdft%source_list(j), &
630 108 : request=req_total(j))
631 : END DO
632 72 : DO i = 1, my_special_work
633 144 : DO j = 1, SIZE(mixed_cdft%dest_list)
634 72 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
635 108 : IF (mixed_cdft%is_special) THEN
636 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%weight, &
637 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
638 0 : request=req_total(ind))
639 : ELSE
640 : CALL force_env%para_env%isend(msgin=mixed_cdft%weight, dest=mixed_cdft%dest_list(j), &
641 72 : request=req_total(ind))
642 : END IF
643 : END DO
644 : END DO
645 36 : CALL mp_waitall(req_total)
646 36 : IF (mixed_cdft%is_special) THEN
647 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
648 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%weight)
649 : END DO
650 : ELSE
651 36 : DEALLOCATE (mixed_cdft%weight)
652 : END IF
653 : ! In principle, we could reduce the memory footprint of becke_pot by only transferring the nonzero portion
654 : ! of the potential, but this would require a custom integrate_v_rspace
655 36 : ALLOCATE (cdft_control_target%group(1)%weight)
656 36 : CALL auxbas_pw_pool%create_pw(cdft_control_target%group(1)%weight)
657 36 : CALL pw_zero(cdft_control_target%group(1)%weight)
658 : ! Assemble the recved slices
659 108 : DO j = 1, SIZE(mixed_cdft%source_list)
660 : cdft_control_target%group(1)%weight%array(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
661 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
662 7517624 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r3
663 : END DO
664 : ! Do the same for slices sent during dlb
665 36 : IF (mixed_cdft%dlb) THEN
666 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
667 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
668 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
669 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
670 : index = [LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 1), &
671 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 1), &
672 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 2), &
673 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 2), &
674 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3), &
675 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)]
676 : cdft_control_target%group(1)%weight%array(INDEX(1):INDEX(2), &
677 : INDEX(3):INDEX(4), &
678 : INDEX(5):INDEX(6)) = &
679 14096 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight
680 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight)
681 : END DO
682 : END IF
683 : END DO
684 : END IF
685 : END IF
686 : ! Gaussian confinement cavity
687 36 : IF (cdft_control%becke_control%cavity_confine) THEN
688 102 : DO j = 1, SIZE(mixed_cdft%source_list)
689 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r3, source=mixed_cdft%source_list(j), &
690 102 : request=req_total(j))
691 : END DO
692 68 : DO i = 1, my_special_work
693 136 : DO j = 1, SIZE(mixed_cdft%dest_list)
694 68 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
695 102 : IF (mixed_cdft%is_special) THEN
696 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%cavity, &
697 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
698 0 : request=req_total(ind))
699 : ELSE
700 : CALL force_env%para_env%isend(msgin=mixed_cdft%cavity, dest=mixed_cdft%dest_list(j), &
701 68 : request=req_total(ind))
702 : END IF
703 : END DO
704 : END DO
705 34 : CALL mp_waitall(req_total)
706 34 : IF (mixed_cdft%is_special) THEN
707 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
708 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%cavity)
709 : END DO
710 : ELSE
711 34 : DEALLOCATE (mixed_cdft%cavity)
712 : END IF
713 : ! We only need the nonzero part of the confinement cavity
714 : ALLOCATE (cdft_control_target%becke_control%cavity_mat(bo(1, 1):bo(2, 1), &
715 : bo(1, 2):bo(2, 2), &
716 170 : lb_min:ub_max))
717 3504226 : cdft_control_target%becke_control%cavity_mat = 0.0_dp
718 :
719 102 : DO j = 1, SIZE(mixed_cdft%source_list)
720 : cdft_control_target%becke_control%cavity_mat(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
721 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
722 7136554 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r3
723 : END DO
724 34 : IF (mixed_cdft%dlb) THEN
725 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
726 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
727 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
728 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
729 : index = [LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 1), &
730 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 1), &
731 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 2), &
732 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 2), &
733 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 3), &
734 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 3)]
735 : cdft_control_target%becke_control%cavity_mat(INDEX(1):INDEX(2), &
736 : INDEX(3):INDEX(4), &
737 : INDEX(5):INDEX(6)) = &
738 14096 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity
739 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity)
740 : END DO
741 : END IF
742 : END DO
743 : END IF
744 : END IF
745 : END IF
746 108 : DO j = 1, SIZE(mixed_cdft%source_list)
747 108 : DEALLOCATE (recvbuffer(j)%r3)
748 : END DO
749 36 : IF (calculate_forces) THEN
750 : ! Gradients of the weight function
751 72 : DO j = 1, SIZE(mixed_cdft%source_list)
752 : ALLOCATE (recvbuffer(j)%r4(3*natom, recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
753 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
754 288 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)))
755 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r4, source=mixed_cdft%source_list(j), &
756 72 : request=req_total(j))
757 : END DO
758 48 : DO i = 1, my_special_work
759 96 : DO j = 1, SIZE(mixed_cdft%dest_list)
760 48 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
761 72 : IF (mixed_cdft%is_special) THEN
762 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%gradients, &
763 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
764 0 : request=req_total(ind))
765 : ELSE
766 : CALL force_env%para_env%isend(msgin=cdft_control%group(1)%gradients, dest=mixed_cdft%dest_list(j), &
767 48 : request=req_total(ind))
768 : END IF
769 : END DO
770 : END DO
771 24 : CALL mp_waitall(req_total)
772 24 : IF (mixed_cdft%is_special) THEN
773 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
774 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%gradients)
775 : END DO
776 0 : DEALLOCATE (mixed_cdft%sendbuff)
777 : ELSE
778 24 : DEALLOCATE (cdft_control%group(1)%gradients)
779 : END IF
780 : ALLOCATE (cdft_control_target%group(1)%gradients(3*natom, bo(1, 1):bo(2, 1), &
781 144 : bo(1, 2):bo(2, 2), lb_min:ub_max))
782 72 : DO j = 1, SIZE(mixed_cdft%source_list)
783 : cdft_control_target%group(1)%gradients(:, recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
784 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
785 39235744 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r4
786 72 : DEALLOCATE (recvbuffer(j)%r4)
787 : END DO
788 24 : IF (mixed_cdft%dlb) THEN
789 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
790 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
791 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
792 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
793 : index = [LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 2), &
794 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 2), &
795 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 3), &
796 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 3), &
797 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 4), &
798 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 4)]
799 : cdft_control_target%group(1)%gradients(:, INDEX(1):INDEX(2), &
800 : INDEX(3):INDEX(4), &
801 : INDEX(5):INDEX(6)) = &
802 90896 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients
803 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients)
804 : END DO
805 : END IF
806 : END DO
807 : END IF
808 : END IF
809 : END IF
810 : ! Clean up remaining temporaries
811 36 : IF (mixed_cdft%dlb) THEN
812 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
813 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
814 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
815 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%recv_info(j)%target_list)) &
816 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%target_list)
817 8 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs)
818 : END IF
819 : END DO
820 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info, mixed_cdft%dlb_control%recvbuff)
821 : END IF
822 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%target_list)) &
823 4 : DEALLOCATE (mixed_cdft%dlb_control%target_list)
824 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_work_repl)
825 : END IF
826 36 : DEALLOCATE (recvbuffer)
827 36 : DEALLOCATE (req_total)
828 36 : DEALLOCATE (lb)
829 36 : DEALLOCATE (ub)
830 36 : CALL timestop(handle2)
831 : ! Set some flags so the weight is not rebuilt during SCF
832 36 : cdft_control_target%external_control = .TRUE.
833 36 : cdft_control_target%need_pot = .FALSE.
834 36 : cdft_control_target%transfer_pot = .FALSE.
835 : ! Store the bound indices for force calculation
836 36 : IF (calculate_forces) THEN
837 24 : cdft_control_target%becke_control%confine_bounds(2) = ub_max
838 24 : cdft_control_target%becke_control%confine_bounds(1) = lb_min
839 : END IF
840 : CALL pw_scale(cdft_control_target%group(1)%weight, &
841 36 : cdft_control_target%group(1)%weight%pw_grid%dvol)
842 : ! Set flags for ET coupling calculation
843 36 : IF (mixed_env%do_mixed_et) THEN
844 36 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
845 36 : dft_control%qs_control%cdft_control%do_et = .TRUE.
846 36 : dft_control%qs_control%cdft_control%calculate_metric = mixed_cdft%calculate_metric
847 : ELSE
848 0 : dft_control%qs_control%cdft_control%do_et = .FALSE.
849 0 : dft_control%qs_control%cdft_control%calculate_metric = .FALSE.
850 : END IF
851 : END IF
852 36 : t2 = m_walltime()
853 36 : IF (iounit > 0) THEN
854 18 : WRITE (iounit, '(A)') ' '
855 18 : WRITE (iounit, '(T2,A,F6.1,A)') 'MIXED_CDFT| Becke constraint built in ', t2 - t1, ' seconds'
856 18 : WRITE (iounit, '(A)') ' '
857 : END IF
858 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
859 36 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
860 36 : CALL timestop(handle)
861 :
862 108 : END SUBROUTINE mixed_cdft_build_weight_parallel
863 :
864 : ! **************************************************************************************************
865 : !> \brief Transfer CDFT weight/gradient between force_evals
866 : !> \param force_env the force_env that holds the CDFT sub_force_envs
867 : !> \param calculate_forces if forces should be computed
868 : !> \param iforce_eval index of the currently active CDFT state
869 : !> \par History
870 : !> 01.2017 created [Nico Holmberg]
871 : ! **************************************************************************************************
872 296 : SUBROUTINE mixed_cdft_transfer_weight(force_env, calculate_forces, iforce_eval)
873 : TYPE(force_env_type), POINTER :: force_env
874 : LOGICAL, INTENT(IN) :: calculate_forces
875 : INTEGER, INTENT(IN) :: iforce_eval
876 :
877 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_transfer_weight'
878 :
879 : INTEGER :: bounds_of(8), handle, iatom, igroup, &
880 : jforce_eval, nforce_eval
881 : LOGICAL, SAVE :: first_call = .TRUE.
882 : TYPE(cdft_control_type), POINTER :: cdft_control_source, cdft_control_target
883 : TYPE(dft_control_type), POINTER :: dft_control_source, dft_control_target
884 : TYPE(force_env_type), POINTER :: force_env_qs_source, force_env_qs_target
885 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
886 : TYPE(mixed_environment_type), POINTER :: mixed_env
887 : TYPE(pw_env_type), POINTER :: pw_env_source, pw_env_target
888 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool_source, &
889 : auxbas_pw_pool_target
890 : TYPE(qs_environment_type), POINTER :: qs_env_source, qs_env_target
891 :
892 148 : NULLIFY (mixed_cdft, dft_control_source, dft_control_target, force_env_qs_source, &
893 148 : force_env_qs_target, pw_env_source, pw_env_target, auxbas_pw_pool_source, &
894 148 : auxbas_pw_pool_target, qs_env_source, qs_env_target, mixed_env, &
895 148 : cdft_control_source, cdft_control_target)
896 148 : mixed_env => force_env%mixed_env
897 148 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
898 148 : CALL timeset(routineN, handle)
899 148 : IF (iforce_eval == 1) THEN
900 : jforce_eval = 1
901 : ELSE
902 86 : jforce_eval = iforce_eval - 1
903 : END IF
904 148 : nforce_eval = SIZE(force_env%sub_force_env)
905 296 : SELECT CASE (force_env%sub_force_env(jforce_eval)%force_env%in_use)
906 : CASE (use_qs_force, use_qmmm)
907 148 : force_env_qs_source => force_env%sub_force_env(jforce_eval)%force_env
908 148 : force_env_qs_target => force_env%sub_force_env(iforce_eval)%force_env
909 : CASE DEFAULT
910 : CALL cp_abort(__LOCATION__, &
911 : "Only use_qs_force and use_qmmm are "// &
912 148 : "supported for mixed_cdft_transfer_weight")
913 : END SELECT
914 148 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
915 : CALL force_env_get(force_env=force_env_qs_source, &
916 132 : qs_env=qs_env_source)
917 : CALL force_env_get(force_env=force_env_qs_target, &
918 132 : qs_env=qs_env_target)
919 : ELSE
920 16 : qs_env_source => force_env_qs_source%qmmm_env%qs_env
921 16 : qs_env_target => force_env_qs_target%qmmm_env%qs_env
922 : END IF
923 148 : IF (iforce_eval == 1) THEN
924 : ! The first force_eval builds the weight function and gradients in qs_cdft_methods.F
925 : ! Set some flags so the constraint is saved if the constraint definitions are identical in all CDFT states
926 62 : CALL get_qs_env(qs_env_source, dft_control=dft_control_source)
927 62 : cdft_control_source => dft_control_source%qs_control%cdft_control
928 62 : cdft_control_source%external_control = .FALSE.
929 62 : cdft_control_source%need_pot = .TRUE.
930 62 : IF (mixed_cdft%identical_constraints) THEN
931 60 : cdft_control_source%transfer_pot = .TRUE.
932 : ELSE
933 2 : cdft_control_source%transfer_pot = .FALSE.
934 : END IF
935 62 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
936 : ELSE
937 : ! Transfer the constraint from the ith force_eval to the i+1th
938 : CALL get_qs_env(qs_env_source, dft_control=dft_control_source, &
939 86 : pw_env=pw_env_source)
940 86 : CALL pw_env_get(pw_env_source, auxbas_pw_pool=auxbas_pw_pool_source)
941 86 : cdft_control_source => dft_control_source%qs_control%cdft_control
942 : CALL get_qs_env(qs_env_target, dft_control=dft_control_target, &
943 86 : pw_env=pw_env_target)
944 86 : CALL pw_env_get(pw_env_target, auxbas_pw_pool=auxbas_pw_pool_target)
945 86 : cdft_control_target => dft_control_target%qs_control%cdft_control
946 : ! The constraint can be transferred only when the constraint defitions are identical in all CDFT states
947 86 : IF (mixed_cdft%identical_constraints) THEN
948 : ! Weight function
949 170 : DO igroup = 1, SIZE(cdft_control_target%group)
950 86 : ALLOCATE (cdft_control_target%group(igroup)%weight)
951 86 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%group(igroup)%weight)
952 : ! We have ensured that the grids are consistent => no danger in using explicit copy
953 86 : CALL pw_copy(cdft_control_source%group(igroup)%weight, cdft_control_target%group(igroup)%weight)
954 86 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%group(igroup)%weight)
955 170 : DEALLOCATE (cdft_control_source%group(igroup)%weight)
956 : END DO
957 : ! Cavity
958 84 : IF (cdft_control_source%type == outer_scf_becke_constraint) THEN
959 78 : IF (cdft_control_source%becke_control%cavity_confine) THEN
960 72 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%becke_control%cavity)
961 72 : CALL pw_copy(cdft_control_source%becke_control%cavity, cdft_control_target%becke_control%cavity)
962 72 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%becke_control%cavity)
963 : END IF
964 : END IF
965 : ! Gradients
966 84 : IF (calculate_forces) THEN
967 40 : DO igroup = 1, SIZE(cdft_control_source%group)
968 : bounds_of = [LBOUND(cdft_control_source%group(igroup)%gradients, 1), &
969 : UBOUND(cdft_control_source%group(igroup)%gradients, 1), &
970 : LBOUND(cdft_control_source%group(igroup)%gradients, 2), &
971 : UBOUND(cdft_control_source%group(igroup)%gradients, 2), &
972 : LBOUND(cdft_control_source%group(igroup)%gradients, 3), &
973 : UBOUND(cdft_control_source%group(igroup)%gradients, 3), &
974 : LBOUND(cdft_control_source%group(igroup)%gradients, 4), &
975 340 : UBOUND(cdft_control_source%group(igroup)%gradients, 4)]
976 : ALLOCATE (cdft_control_target%group(igroup)% &
977 : gradients(bounds_of(1):bounds_of(2), bounds_of(3):bounds_of(4), &
978 120 : bounds_of(5):bounds_of(6), bounds_of(7):bounds_of(8)))
979 17857864 : cdft_control_target%group(igroup)%gradients = cdft_control_source%group(igroup)%gradients
980 40 : DEALLOCATE (cdft_control_source%group(igroup)%gradients)
981 : END DO
982 : END IF
983 : ! Atomic weight functions needed for CDFT charges
984 84 : IF (cdft_control_source%atomic_charges) THEN
985 18 : IF (.NOT. ASSOCIATED(cdft_control_target%charge)) &
986 10 : ALLOCATE (cdft_control_target%charge(cdft_control_target%natoms))
987 54 : DO iatom = 1, cdft_control_target%natoms
988 36 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%charge(iatom))
989 36 : CALL pw_copy(cdft_control_source%charge(iatom), cdft_control_target%charge(iatom))
990 54 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%charge(iatom))
991 : END DO
992 : END IF
993 : ! Set some flags so the weight is not rebuilt during SCF
994 84 : cdft_control_target%external_control = .FALSE.
995 84 : cdft_control_target%need_pot = .FALSE.
996 : ! For states i+1 < nforce_eval, prevent deallocation of constraint
997 84 : IF (iforce_eval == nforce_eval) THEN
998 60 : cdft_control_target%transfer_pot = .FALSE.
999 : ELSE
1000 24 : cdft_control_target%transfer_pot = .TRUE.
1001 : END IF
1002 84 : cdft_control_target%first_iteration = .FALSE.
1003 : ELSE
1004 : ! Force rebuild of constraint and dont save it
1005 2 : cdft_control_target%external_control = .FALSE.
1006 2 : cdft_control_target%need_pot = .TRUE.
1007 2 : cdft_control_target%transfer_pot = .FALSE.
1008 2 : IF (first_call) THEN
1009 2 : cdft_control_target%first_iteration = .TRUE.
1010 : ELSE
1011 0 : cdft_control_target%first_iteration = .FALSE.
1012 : END IF
1013 : END IF
1014 : END IF
1015 : ! Set flags for ET coupling calculation
1016 148 : IF (mixed_env%do_mixed_et) THEN
1017 148 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
1018 148 : IF (iforce_eval == 1) THEN
1019 62 : cdft_control_source%do_et = .TRUE.
1020 62 : cdft_control_source%calculate_metric = mixed_cdft%calculate_metric
1021 : ELSE
1022 86 : cdft_control_target%do_et = .TRUE.
1023 86 : cdft_control_target%calculate_metric = mixed_cdft%calculate_metric
1024 : END IF
1025 : ELSE
1026 0 : IF (iforce_eval == 1) THEN
1027 0 : cdft_control_source%do_et = .FALSE.
1028 0 : cdft_control_source%calculate_metric = .FALSE.
1029 : ELSE
1030 0 : cdft_control_target%do_et = .FALSE.
1031 0 : cdft_control_target%calculate_metric = .FALSE.
1032 : END IF
1033 : END IF
1034 : END IF
1035 148 : IF (iforce_eval == nforce_eval .AND. first_call) first_call = .FALSE.
1036 148 : CALL timestop(handle)
1037 :
1038 148 : END SUBROUTINE mixed_cdft_transfer_weight
1039 :
1040 : ! **************************************************************************************************
1041 : !> \brief In case CDFT states are treated in parallel, sets flags so that each CDFT state
1042 : !> builds their own weight functions and gradients
1043 : !> \param force_env the force_env that holds the CDFT sub_force_envs
1044 : !> \par History
1045 : !> 09.2018 created [Nico Holmberg]
1046 : ! **************************************************************************************************
1047 4 : SUBROUTINE mixed_cdft_set_flags(force_env)
1048 : TYPE(force_env_type), POINTER :: force_env
1049 :
1050 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_set_flags'
1051 :
1052 : INTEGER :: handle, iforce_eval, nforce_eval
1053 : LOGICAL, SAVE :: first_call = .TRUE.
1054 : TYPE(cdft_control_type), POINTER :: cdft_control
1055 : TYPE(dft_control_type), POINTER :: dft_control
1056 : TYPE(force_env_type), POINTER :: force_env_qs
1057 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1058 : TYPE(mixed_environment_type), POINTER :: mixed_env
1059 : TYPE(qs_environment_type), POINTER :: qs_env
1060 :
1061 2 : NULLIFY (mixed_cdft, dft_control, force_env_qs, qs_env, mixed_env, cdft_control)
1062 2 : mixed_env => force_env%mixed_env
1063 2 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
1064 2 : CALL timeset(routineN, handle)
1065 2 : nforce_eval = SIZE(force_env%sub_force_env)
1066 6 : DO iforce_eval = 1, nforce_eval
1067 4 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
1068 2 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
1069 : CASE (use_qs_force, use_qmmm)
1070 0 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
1071 : CASE DEFAULT
1072 : CALL cp_abort(__LOCATION__, &
1073 : "Only use_qs_force and use_qmmm are "// &
1074 2 : "supported for mixed_cdft_set_flags")
1075 : END SELECT
1076 2 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
1077 2 : CALL force_env_get(force_env=force_env_qs, qs_env=qs_env)
1078 : ELSE
1079 0 : qs_env => force_env_qs%qmmm_env%qs_env
1080 : END IF
1081 : ! All force_evals build the weight function and gradients in qs_cdft_methods.F
1082 : ! Update flags to match run type
1083 2 : CALL get_qs_env(qs_env, dft_control=dft_control)
1084 2 : cdft_control => dft_control%qs_control%cdft_control
1085 2 : cdft_control%external_control = .FALSE.
1086 2 : cdft_control%need_pot = .TRUE.
1087 2 : cdft_control%transfer_pot = .FALSE.
1088 2 : IF (first_call) THEN
1089 2 : cdft_control%first_iteration = .TRUE.
1090 : ELSE
1091 0 : cdft_control%first_iteration = .FALSE.
1092 : END IF
1093 : ! Set flags for ET coupling calculation
1094 4 : IF (mixed_env%do_mixed_et) THEN
1095 2 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
1096 2 : cdft_control%do_et = .TRUE.
1097 2 : cdft_control%calculate_metric = mixed_cdft%calculate_metric
1098 : ELSE
1099 0 : cdft_control%do_et = .FALSE.
1100 0 : cdft_control%calculate_metric = .FALSE.
1101 : END IF
1102 : END IF
1103 : END DO
1104 2 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
1105 2 : IF (first_call) first_call = .FALSE.
1106 2 : CALL timestop(handle)
1107 :
1108 2 : END SUBROUTINE mixed_cdft_set_flags
1109 :
1110 : ! **************************************************************************************************
1111 : !> \brief Driver routine to calculate the electronic coupling(s) between CDFT states.
1112 : !> \param force_env the force_env that holds the CDFT states
1113 : !> \par History
1114 : !> 02.15 created [Nico Holmberg]
1115 : ! **************************************************************************************************
1116 200 : SUBROUTINE mixed_cdft_calculate_coupling(force_env)
1117 : TYPE(force_env_type), POINTER :: force_env
1118 :
1119 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_coupling'
1120 :
1121 : INTEGER :: handle
1122 :
1123 100 : CPASSERT(ASSOCIATED(force_env))
1124 100 : CALL timeset(routineN, handle)
1125 : ! Move needed arrays from individual CDFT states to the mixed CDFT env
1126 100 : CALL mixed_cdft_redistribute_arrays(force_env)
1127 : ! Calculate the mixed CDFT Hamiltonian and overlap matrices.
1128 : ! All work matrices defined in the wavefunction basis get deallocated on exit.
1129 : ! Any analyses which depend on these work matrices are performed within.
1130 100 : CALL mixed_cdft_interaction_matrices(force_env)
1131 : ! Calculate eletronic couplings between states (Lowdin/rotation)
1132 100 : CALL mixed_cdft_calculate_coupling_low(force_env)
1133 : ! Print out couplings
1134 100 : CALL mixed_cdft_print_couplings(force_env)
1135 : ! Block diagonalize the mixed CDFT Hamiltonian matrix
1136 100 : CALL mixed_cdft_block_diag(force_env)
1137 : ! CDFT Configuration Interaction
1138 100 : CALL mixed_cdft_configuration_interaction(force_env)
1139 : ! Clean up
1140 100 : CALL mixed_cdft_release_work(force_env)
1141 100 : CALL timestop(handle)
1142 :
1143 100 : END SUBROUTINE mixed_cdft_calculate_coupling
1144 :
1145 : ! **************************************************************************************************
1146 : !> \brief Routine to calculate the mixed CDFT Hamiltonian and overlap matrices.
1147 : !> \param force_env the force_env that holds the CDFT states
1148 : !> \par History
1149 : !> 11.17 created [Nico Holmberg]
1150 : ! **************************************************************************************************
1151 100 : SUBROUTINE mixed_cdft_interaction_matrices(force_env)
1152 : TYPE(force_env_type), POINTER :: force_env
1153 :
1154 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_interaction_matrices'
1155 :
1156 : INTEGER :: check_ao(2), check_mo(2), handle, iforce_eval, ipermutation, ispin, istate, ivar, &
1157 : j, jstate, k, moeigvalunit, mounit, nao, ncol_local, nforce_eval, nmo, npermutations, &
1158 : nrow_local, nspins, nvar
1159 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ncol_mo, nrow_mo
1160 100 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: homo
1161 : LOGICAL :: nelectron_mismatch, print_mo, &
1162 : print_mo_eigval, should_scale, &
1163 : uniform_occupation
1164 : REAL(KIND=dp) :: c(2), eps_occupied, nelectron_tot, &
1165 : sum_a(2), sum_b(2)
1166 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_nonortho, eigenv, energy, Sda
1167 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: H_mat, S_det, S_mat, strength, tmp_mat, &
1168 100 : W_diagonal, Wad, Wda
1169 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: a, b
1170 100 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigval
1171 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_mo, mo_mo_fmstruct
1172 : TYPE(cp_fm_type) :: inverse_mat, Tinverse, tmp2
1173 100 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: mo_overlap
1174 100 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: w_matrix_mo
1175 100 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: mixed_mo_coeff
1176 : TYPE(cp_logger_type), POINTER :: logger
1177 100 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: density_matrix, density_matrix_diff, &
1178 100 : w_matrix
1179 : TYPE(dbcsr_type), POINTER :: mixed_matrix_s
1180 : TYPE(dft_control_type), POINTER :: dft_control
1181 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1182 : TYPE(mixed_environment_type), POINTER :: mixed_env
1183 : TYPE(qs_energy_type), POINTER :: energy_qs
1184 : TYPE(qs_environment_type), POINTER :: qs_env
1185 : TYPE(section_vals_type), POINTER :: force_env_section, mixed_cdft_section, &
1186 : print_section
1187 :
1188 100 : NULLIFY (force_env_section, print_section, mixed_cdft_section, &
1189 100 : mixed_env, mixed_cdft, qs_env, dft_control, fm_struct_mo, &
1190 100 : density_matrix_diff, mo_mo_fmstruct, &
1191 100 : mixed_mo_coeff, mixed_matrix_s, &
1192 100 : density_matrix, energy_qs, w_matrix, mo_eigval)
1193 200 : logger => cp_get_default_logger()
1194 100 : CPASSERT(ASSOCIATED(force_env))
1195 100 : CALL timeset(routineN, handle)
1196 : CALL force_env_get(force_env=force_env, &
1197 100 : force_env_section=force_env_section)
1198 100 : mixed_env => force_env%mixed_env
1199 100 : nforce_eval = SIZE(force_env%sub_force_env)
1200 100 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1201 100 : IF (section_get_lval(print_section, "MO_OVERLAP_MATRIX")) THEN
1202 2 : print_mo = .TRUE.
1203 2 : mounit = cp_print_key_unit_nr(logger, print_section, extension='.moOverlap', on_file=.TRUE.)
1204 : ELSE
1205 : print_mo = .FALSE.
1206 : END IF
1207 100 : IF (section_get_lval(print_section, "MO_OVERLAP_EIGENVALUES")) THEN
1208 14 : print_mo_eigval = .TRUE.
1209 14 : moeigvalunit = cp_print_key_unit_nr(logger, print_section, extension='.moOverlapEigval', on_file=.TRUE.)
1210 : ELSE
1211 : print_mo_eigval = .FALSE.
1212 : END IF
1213 100 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
1214 : ! Get redistributed work matrices
1215 100 : CPASSERT(ASSOCIATED(mixed_cdft))
1216 100 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%mixed_mo_coeff))
1217 100 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%w_matrix))
1218 100 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%mixed_matrix_s))
1219 100 : mixed_mo_coeff => mixed_cdft%matrix%mixed_mo_coeff
1220 100 : w_matrix => mixed_cdft%matrix%w_matrix
1221 100 : mixed_matrix_s => mixed_cdft%matrix%mixed_matrix_s
1222 100 : IF (mixed_cdft%calculate_metric) THEN
1223 14 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%density_matrix))
1224 14 : density_matrix => mixed_cdft%matrix%density_matrix
1225 : END IF
1226 : ! Get number of weight functions per state
1227 100 : nvar = SIZE(w_matrix, 2)
1228 100 : nspins = SIZE(mixed_mo_coeff, 2)
1229 : ! Check that the number of MOs/AOs is equal in every CDFT state
1230 400 : ALLOCATE (nrow_mo(nspins), ncol_mo(nspins))
1231 294 : DO ispin = 1, nspins
1232 194 : CALL cp_fm_get_info(mixed_mo_coeff(1, ispin), ncol_global=check_mo(1), nrow_global=check_ao(1))
1233 536 : DO iforce_eval = 2, nforce_eval
1234 242 : CALL cp_fm_get_info(mixed_mo_coeff(iforce_eval, ispin), ncol_global=check_mo(2), nrow_global=check_ao(2))
1235 242 : IF (check_ao(1) /= check_ao(2)) &
1236 : CALL cp_abort(__LOCATION__, &
1237 0 : "The number of atomic orbitals must be the same in every CDFT state.")
1238 242 : IF (check_mo(1) /= check_mo(2)) &
1239 : CALL cp_abort(__LOCATION__, &
1240 194 : "The number of molecular orbitals must be the same in every CDFT state.")
1241 : END DO
1242 : END DO
1243 : ! Allocate work
1244 100 : npermutations = nforce_eval*(nforce_eval - 1)/2 ! Size of upper triangular part
1245 1426 : ALLOCATE (w_matrix_mo(nforce_eval, nforce_eval, nvar))
1246 782 : ALLOCATE (mo_overlap(npermutations), S_det(npermutations, nspins))
1247 800 : ALLOCATE (a(nspins, nvar, npermutations), b(nspins, nvar, npermutations))
1248 100 : a = 0.0_dp
1249 100 : b = 0.0_dp
1250 100 : IF (mixed_cdft%calculate_metric) THEN
1251 106 : ALLOCATE (density_matrix_diff(npermutations, nspins))
1252 42 : DO ispin = 1, nspins
1253 78 : DO ipermutation = 1, npermutations
1254 36 : NULLIFY (density_matrix_diff(ipermutation, ispin)%matrix)
1255 36 : CALL dbcsr_init_p(density_matrix_diff(ipermutation, ispin)%matrix)
1256 36 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1257 : CALL dbcsr_copy(density_matrix_diff(ipermutation, ispin)%matrix, &
1258 64 : density_matrix(istate, ispin)%matrix, name="DENSITY_MATRIX")
1259 : END DO
1260 : END DO
1261 : END IF
1262 : ! Check for uniform occupations
1263 100 : uniform_occupation = .NOT. ALLOCATED(mixed_cdft%occupations)
1264 100 : should_scale = .FALSE.
1265 100 : IF (.NOT. uniform_occupation) THEN
1266 56 : ALLOCATE (homo(nforce_eval, nspins))
1267 14 : mixed_cdft_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT")
1268 14 : CALL section_vals_val_get(mixed_cdft_section, "EPS_OCCUPIED", r_val=eps_occupied)
1269 14 : IF (eps_occupied > 1.0_dp .OR. eps_occupied < 0.0_dp) &
1270 : CALL cp_abort(__LOCATION__, &
1271 0 : "Keyword EPS_OCCUPIED only accepts values between 0.0 and 1.0")
1272 14 : IF (mixed_cdft%eps_svd == 0.0_dp) &
1273 : CALL cp_warn(__LOCATION__, &
1274 : "The usage of SVD based matrix inversions with fractionally occupied "// &
1275 0 : "orbitals is strongly recommended to screen nearly orthogonal states.")
1276 28 : CALL section_vals_val_get(mixed_cdft_section, "SCALE_WITH_OCCUPATION_NUMBERS", l_val=should_scale)
1277 : END IF
1278 : ! Start the actual calculation
1279 294 : DO ispin = 1, nspins
1280 : ! Create the MOxMO fm struct (mo_mo_fm_pools%struct)
1281 : ! The number of MOs/AOs is equal to the number of columns/rows of mo_coeff(:,:)%matrix
1282 194 : NULLIFY (fm_struct_mo, mo_mo_fmstruct)
1283 194 : CALL cp_fm_get_info(mixed_mo_coeff(1, ispin), ncol_global=ncol_mo(ispin), nrow_global=nrow_mo(ispin))
1284 194 : nao = nrow_mo(ispin)
1285 194 : IF (uniform_occupation) THEN
1286 166 : nmo = ncol_mo(ispin)
1287 : ELSE
1288 28 : nmo = ncol_mo(ispin)
1289 : ! Find indices of highest (fractionally) occupied molecular orbital
1290 84 : homo(:, ispin) = nmo
1291 84 : DO istate = 1, nforce_eval
1292 152 : DO j = nmo, 1, -1
1293 124 : IF (mixed_cdft%occupations(istate, ispin)%array(j) >= eps_occupied) THEN
1294 56 : homo(istate, ispin) = j
1295 56 : EXIT
1296 : END IF
1297 : END DO
1298 : END DO
1299 : ! Make matrices square by using the largest homo and emit warning if a state has fewer occupied MOs
1300 : ! Although it would be possible to handle the nonsquare situation as well,
1301 : ! all CDFT states should be in the same spin state for meaningful results
1302 84 : nmo = MAXVAL(homo(:, ispin))
1303 : ! Also check that the number of electrons is conserved (using a fixed sensible threshold)
1304 28 : nelectron_mismatch = .FALSE.
1305 92 : nelectron_tot = SUM(mixed_cdft%occupations(1, ispin)%array(1:nmo))
1306 56 : DO istate = 2, nforce_eval
1307 92 : IF (ABS(SUM(mixed_cdft%occupations(istate, ispin)%array(1:nmo)) - nelectron_tot) > 1.0E-4_dp) &
1308 28 : nelectron_mismatch = .TRUE.
1309 : END DO
1310 84 : IF (ANY(homo(:, ispin) /= nmo)) THEN
1311 0 : IF (ispin == 1) THEN
1312 : CALL cp_warn(__LOCATION__, &
1313 : "The number of occupied alpha MOs is not constant across all CDFT states. "// &
1314 0 : "Calculation proceeds but the results will likely be meaningless.")
1315 : ELSE
1316 : CALL cp_warn(__LOCATION__, &
1317 : "The number of occupied beta MOs is not constant across all CDFT states. "// &
1318 0 : "Calculation proceeds but the results will likely be meaningless.")
1319 : END IF
1320 28 : ELSE IF (nelectron_mismatch) THEN
1321 0 : IF (ispin == 1) THEN
1322 : CALL cp_warn(__LOCATION__, &
1323 : "The number of alpha electrons is not constant across all CDFT states. "// &
1324 0 : "Calculation proceeds but the results will likely be meaningless.")
1325 : ELSE
1326 : CALL cp_warn(__LOCATION__, &
1327 : "The number of beta electrons is not constant across all CDFT states. "// &
1328 0 : "Calculation proceeds but the results will likely be meaningless.")
1329 : END IF
1330 : END IF
1331 : END IF
1332 : CALL cp_fm_struct_create(fm_struct_mo, nrow_global=nao, ncol_global=nmo, &
1333 194 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
1334 : CALL cp_fm_struct_create(mo_mo_fmstruct, nrow_global=nmo, ncol_global=nmo, &
1335 194 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
1336 : ! Allocate work
1337 : CALL cp_fm_create(matrix=tmp2, matrix_struct=fm_struct_mo, &
1338 194 : name="ET_TMP_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1339 194 : CALL cp_fm_struct_release(fm_struct_mo)
1340 : CALL cp_fm_create(matrix=inverse_mat, matrix_struct=mo_mo_fmstruct, &
1341 194 : name="INVERSE_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1342 : CALL cp_fm_create(matrix=Tinverse, matrix_struct=mo_mo_fmstruct, &
1343 194 : name="T_INVERSE_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1344 552 : DO istate = 1, npermutations
1345 : CALL cp_fm_create(matrix=mo_overlap(istate), matrix_struct=mo_mo_fmstruct, &
1346 : name="MO_OVERLAP_"//TRIM(ADJUSTL(cp_to_string(istate)))//"_"// &
1347 552 : TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1348 : END DO
1349 392 : DO ivar = 1, nvar
1350 836 : DO istate = 1, nforce_eval
1351 1810 : DO jstate = 1, nforce_eval
1352 1168 : IF (istate == jstate) CYCLE
1353 : CALL cp_fm_create(matrix=w_matrix_mo(istate, jstate, ivar), matrix_struct=mo_mo_fmstruct, &
1354 : name="W_"//TRIM(ADJUSTL(cp_to_string(istate)))//"_"// &
1355 : TRIM(ADJUSTL(cp_to_string(jstate)))//"_"// &
1356 1612 : TRIM(ADJUSTL(cp_to_string(ivar)))//"_MATRIX")
1357 : END DO
1358 : END DO
1359 : END DO
1360 194 : CALL cp_fm_struct_release(mo_mo_fmstruct)
1361 : ! Remove empty MOs and (possibly) scale rest with occupation numbers
1362 194 : IF (.NOT. uniform_occupation) THEN
1363 84 : DO iforce_eval = 1, nforce_eval
1364 56 : CALL cp_fm_to_fm(mixed_mo_coeff(iforce_eval, ispin), tmp2, nmo, 1, 1)
1365 56 : CALL cp_fm_release(mixed_mo_coeff(iforce_eval, ispin))
1366 : CALL cp_fm_create(mixed_mo_coeff(iforce_eval, ispin), &
1367 : matrix_struct=tmp2%matrix_struct, &
1368 : name="MO_COEFF_"//TRIM(ADJUSTL(cp_to_string(iforce_eval)))//"_" &
1369 56 : //TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1370 56 : CALL cp_fm_to_fm(tmp2, mixed_mo_coeff(iforce_eval, ispin))
1371 56 : IF (should_scale) &
1372 : CALL cp_fm_column_scale(mixed_mo_coeff(iforce_eval, ispin), &
1373 40 : mixed_cdft%occupations(iforce_eval, ispin)%array(1:nmo))
1374 84 : DEALLOCATE (mixed_cdft%occupations(iforce_eval, ispin)%array)
1375 : END DO
1376 : END IF
1377 : ! calculate the MO overlaps (C_j)^T S C_i
1378 194 : ipermutation = 0
1379 630 : DO istate = 1, nforce_eval
1380 988 : DO jstate = istate + 1, nforce_eval
1381 358 : ipermutation = ipermutation + 1
1382 : CALL cp_dbcsr_sm_fm_multiply(mixed_matrix_s, mixed_mo_coeff(istate, ispin), &
1383 358 : tmp2, nmo, 1.0_dp, 0.0_dp)
1384 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
1385 : mixed_mo_coeff(jstate, ispin), &
1386 358 : tmp2, 0.0_dp, mo_overlap(ipermutation))
1387 358 : IF (print_mo) &
1388 : CALL cp_fm_write_formatted(mo_overlap(ipermutation), mounit, &
1389 : "# MO overlap matrix (step "//TRIM(ADJUSTL(cp_to_string(mixed_cdft%sim_step)))// &
1390 : "): CDFT states "//TRIM(ADJUSTL(cp_to_string(istate)))//" and "// &
1391 : TRIM(ADJUSTL(cp_to_string(jstate)))//" (spin "// &
1392 440 : TRIM(ADJUSTL(cp_to_string(ispin)))//")")
1393 : END DO
1394 : END DO
1395 : ! calculate the MO-representations of the restraint matrices of all CDFT states
1396 392 : DO ivar = 1, nvar
1397 836 : DO jstate = 1, nforce_eval
1398 1810 : DO istate = 1, nforce_eval
1399 1168 : IF (istate == jstate) CYCLE
1400 : ! State i: (C_j)^T W_i C_i
1401 : CALL cp_dbcsr_sm_fm_multiply(w_matrix(istate, ivar)%matrix, &
1402 : mixed_mo_coeff(istate, ispin), &
1403 724 : tmp2, nmo, 1.0_dp, 0.0_dp)
1404 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
1405 : mixed_mo_coeff(jstate, ispin), &
1406 1612 : tmp2, 0.0_dp, w_matrix_mo(istate, jstate, ivar))
1407 : END DO
1408 : END DO
1409 : END DO
1410 552 : DO ipermutation = 1, npermutations
1411 : ! Invert and calculate determinant of MO overlaps
1412 358 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1413 358 : IF (print_mo_eigval) THEN
1414 28 : NULLIFY (mo_eigval)
1415 : CALL cp_fm_invert(mo_overlap(ipermutation), inverse_mat, &
1416 : S_det(ipermutation, ispin), eps_svd=mixed_cdft%eps_svd, &
1417 28 : eigval=mo_eigval)
1418 28 : IF (moeigvalunit > 0) THEN
1419 14 : IF (mixed_cdft%eps_svd == 0.0_dp) THEN
1420 : WRITE (moeigvalunit, '(A,I2,A,I2,A,I1,A)') &
1421 0 : "# MO Overlap matrix eigenvalues for CDFT states ", istate, " and ", jstate, &
1422 0 : " (spin ", ispin, ")"
1423 : ELSE
1424 : WRITE (moeigvalunit, '(A,I2,A,I2,A,I1,A)') &
1425 14 : "# MO Overlap matrix singular values for CDFT states ", istate, " and ", jstate, &
1426 28 : " (spin ", ispin, ")"
1427 : END IF
1428 14 : WRITE (moeigvalunit, '(A1, A9, A12)') "#", "Index", ADJUSTL("Value")
1429 46 : DO j = 1, SIZE(mo_eigval)
1430 46 : WRITE (moeigvalunit, '(I10, F12.8)') j, mo_eigval(j)
1431 : END DO
1432 : END IF
1433 28 : DEALLOCATE (mo_eigval)
1434 : ELSE
1435 : CALL cp_fm_invert(mo_overlap(ipermutation), inverse_mat, &
1436 330 : S_det(ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
1437 : END IF
1438 358 : CALL cp_fm_get_info(inverse_mat, nrow_local=nrow_local, ncol_local=ncol_local)
1439 : ! Calculate <Psi_i | w_j(r) | Psi_j> for ivar:th constraint
1440 924 : DO j = 1, ncol_local
1441 1437 : DO k = 1, nrow_local
1442 1597 : DO ivar = 1, nvar
1443 : b(ispin, ivar, ipermutation) = b(ispin, ivar, ipermutation) + &
1444 : w_matrix_mo(jstate, istate, ivar)%local_data(k, j)* &
1445 1031 : inverse_mat%local_data(k, j)
1446 : END DO
1447 : END DO
1448 : END DO
1449 : ! Calculate <Psi_j | w_i(r) | Psi_i> for ivar:th constraint
1450 358 : CALL cp_fm_transpose(inverse_mat, Tinverse)
1451 924 : DO j = 1, ncol_local
1452 1437 : DO k = 1, nrow_local
1453 1597 : DO ivar = 1, nvar
1454 : a(ispin, ivar, ipermutation) = a(ispin, ivar, ipermutation) + &
1455 : w_matrix_mo(istate, jstate, ivar)%local_data(k, j)* &
1456 1031 : Tinverse%local_data(k, j)
1457 : END DO
1458 : END DO
1459 : END DO
1460 : ! Handle different constraint types
1461 720 : DO ivar = 1, nvar
1462 362 : SELECT CASE (mixed_cdft%constraint_type(ivar, istate))
1463 : CASE (cdft_charge_constraint)
1464 : ! No action needed
1465 : CASE (cdft_magnetization_constraint)
1466 0 : IF (ispin == 2) a(ispin, ivar, ipermutation) = -a(ispin, ivar, ipermutation)
1467 : CASE (cdft_alpha_constraint)
1468 : ! Constraint applied to alpha electrons only, set integrals involving beta to zero
1469 4 : IF (ispin == 2) a(ispin, ivar, ipermutation) = 0.0_dp
1470 : CASE (cdft_beta_constraint)
1471 : ! Constraint applied to beta electrons only, set integrals involving alpha to zero
1472 4 : IF (ispin == 1) a(ispin, ivar, ipermutation) = 0.0_dp
1473 : CASE DEFAULT
1474 362 : CPABORT("Unknown constraint type.")
1475 : END SELECT
1476 358 : SELECT CASE (mixed_cdft%constraint_type(ivar, jstate))
1477 : CASE (cdft_charge_constraint)
1478 : ! No action needed
1479 : CASE (cdft_magnetization_constraint)
1480 0 : IF (ispin == 2) b(ispin, ivar, ipermutation) = -b(ispin, ivar, ipermutation)
1481 : CASE (cdft_alpha_constraint)
1482 : ! Constraint applied to alpha electrons only, set integrals involving beta to zero
1483 4 : IF (ispin == 2) b(ispin, ivar, ipermutation) = 0.0_dp
1484 : CASE (cdft_beta_constraint)
1485 : ! Constraint applied to beta electrons only, set integrals involving alpha to zero
1486 4 : IF (ispin == 1) b(ispin, ivar, ipermutation) = 0.0_dp
1487 : CASE DEFAULT
1488 362 : CPABORT("Unknown constraint type.")
1489 : END SELECT
1490 : END DO
1491 : ! Compute density matrix difference P = P_j - P_i
1492 358 : IF (mixed_cdft%calculate_metric) &
1493 : CALL dbcsr_add(density_matrix_diff(ipermutation, ispin)%matrix, &
1494 36 : density_matrix(jstate, ispin)%matrix, -1.0_dp, 1.0_dp)
1495 : !
1496 1082 : CALL force_env%para_env%sum(a(ispin, :, ipermutation))
1497 1992 : CALL force_env%para_env%sum(b(ispin, :, ipermutation))
1498 : END DO
1499 : ! Release work
1500 194 : CALL cp_fm_release(tmp2)
1501 392 : DO ivar = 1, nvar
1502 836 : DO jstate = 1, nforce_eval
1503 1810 : DO istate = 1, nforce_eval
1504 1168 : IF (istate == jstate) CYCLE
1505 1612 : CALL cp_fm_release(w_matrix_mo(istate, jstate, ivar))
1506 : END DO
1507 : END DO
1508 : END DO
1509 552 : DO ipermutation = 1, npermutations
1510 552 : CALL cp_fm_release(mo_overlap(ipermutation))
1511 : END DO
1512 194 : CALL cp_fm_release(Tinverse)
1513 294 : CALL cp_fm_release(inverse_mat)
1514 : END DO
1515 100 : DEALLOCATE (mo_overlap)
1516 100 : DEALLOCATE (w_matrix_mo)
1517 100 : IF (.NOT. uniform_occupation) THEN
1518 14 : DEALLOCATE (homo)
1519 14 : DEALLOCATE (mixed_cdft%occupations)
1520 : END IF
1521 100 : IF (print_mo) &
1522 : CALL cp_print_key_finished_output(mounit, logger, force_env_section, &
1523 2 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO", on_file=.TRUE.)
1524 100 : IF (print_mo_eigval) &
1525 : CALL cp_print_key_finished_output(moeigvalunit, logger, force_env_section, &
1526 14 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO", on_file=.TRUE.)
1527 : ! solve eigenstates for the projector matrix
1528 400 : ALLOCATE (Wda(nvar, npermutations))
1529 300 : ALLOCATE (Sda(npermutations))
1530 104 : IF (.NOT. mixed_cdft%identical_constraints) ALLOCATE (Wad(nvar, npermutations))
1531 282 : DO ipermutation = 1, npermutations
1532 182 : IF (nspins == 2) THEN
1533 176 : Sda(ipermutation) = ABS(S_det(ipermutation, 1)*S_det(ipermutation, 2))
1534 : ELSE
1535 6 : Sda(ipermutation) = S_det(ipermutation, 1)**2
1536 : END IF
1537 : ! Finalize <Psi_j | w_i(r) | Psi_i> by multiplication with Sda
1538 466 : DO ivar = 1, nvar
1539 366 : IF (mixed_cdft%identical_constraints) THEN
1540 : Wda(ivar, ipermutation) = (SUM(a(:, ivar, ipermutation)) + SUM(b(:, ivar, ipermutation)))* &
1541 898 : Sda(ipermutation)/2.0_dp
1542 : ELSE
1543 6 : Wda(ivar, ipermutation) = SUM(a(:, ivar, ipermutation))*Sda(ipermutation)
1544 6 : Wad(ivar, ipermutation) = SUM(b(:, ivar, ipermutation))*Sda(ipermutation)
1545 : END IF
1546 : END DO
1547 : END DO
1548 100 : DEALLOCATE (a, b, S_det)
1549 : ! Transfer info about the constraint calculations
1550 800 : ALLOCATE (W_diagonal(nvar, nforce_eval), strength(nvar, nforce_eval), energy(nforce_eval))
1551 100 : W_diagonal = 0.0_dp
1552 324 : DO iforce_eval = 1, nforce_eval
1553 552 : strength(:, iforce_eval) = mixed_env%strength(iforce_eval, :)
1554 : END DO
1555 100 : energy = 0.0_dp
1556 324 : DO iforce_eval = 1, nforce_eval
1557 224 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
1558 186 : IF (force_env%mixed_env%do_mixed_qmmm_cdft) THEN
1559 24 : qs_env => force_env%sub_force_env(iforce_eval)%force_env%qmmm_env%qs_env
1560 : ELSE
1561 162 : CALL force_env_get(force_env%sub_force_env(iforce_eval)%force_env, qs_env=qs_env)
1562 : END IF
1563 186 : CALL get_qs_env(qs_env, energy=energy_qs, dft_control=dft_control)
1564 286 : IF (force_env%sub_force_env(iforce_eval)%force_env%para_env%is_source()) THEN
1565 226 : W_diagonal(:, iforce_eval) = dft_control%qs_control%cdft_control%value(:)
1566 150 : energy(iforce_eval) = energy_qs%total
1567 : END IF
1568 : END DO
1569 100 : CALL force_env%para_env%sum(W_diagonal)
1570 100 : CALL force_env%para_env%sum(energy)
1571 : CALL mixed_cdft_result_type_set(mixed_cdft%results, Wda=Wda, W_diagonal=W_diagonal, &
1572 100 : energy=energy, strength=strength)
1573 100 : IF (.NOT. mixed_cdft%identical_constraints) CALL mixed_cdft_result_type_set(mixed_cdft%results, Wad=Wad)
1574 : ! Construct S
1575 400 : ALLOCATE (S_mat(nforce_eval, nforce_eval))
1576 324 : DO istate = 1, nforce_eval
1577 324 : S_mat(istate, istate) = 1.0_dp
1578 : END DO
1579 282 : DO ipermutation = 1, npermutations
1580 182 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1581 182 : S_mat(istate, jstate) = Sda(ipermutation)
1582 282 : S_mat(jstate, istate) = Sda(ipermutation)
1583 : END DO
1584 100 : CALL mixed_cdft_result_type_set(mixed_cdft%results, S=S_mat)
1585 : ! Invert S via eigendecomposition and compute S^-(1/2)
1586 400 : ALLOCATE (eigenv(nforce_eval), tmp_mat(nforce_eval, nforce_eval))
1587 100 : CALL diamat_all(S_mat, eigenv, .TRUE.)
1588 100 : tmp_mat = 0.0_dp
1589 324 : DO istate = 1, nforce_eval
1590 224 : IF (eigenv(istate) < 1.0e-14_dp) THEN
1591 : ! Safeguard against division with 0 and negative numbers
1592 10 : eigenv(istate) = 1.0e-14_dp
1593 : CALL cp_warn(__LOCATION__, &
1594 : "The overlap matrix is numerically nearly singular. "// &
1595 10 : "Calculation proceeds but the results might be meaningless.")
1596 : END IF
1597 324 : tmp_mat(istate, istate) = 1.0_dp/SQRT(eigenv(istate))
1598 : END DO
1599 5940 : tmp_mat(:, :) = MATMUL(tmp_mat, TRANSPOSE(S_mat))
1600 10356 : S_mat(:, :) = MATMUL(S_mat, tmp_mat) ! S^(-1/2)
1601 100 : CALL mixed_cdft_result_type_set(mixed_cdft%results, S_minushalf=S_mat)
1602 100 : DEALLOCATE (eigenv, tmp_mat, S_mat)
1603 : ! Construct nonorthogonal diabatic Hamiltonian matrix H''
1604 300 : ALLOCATE (H_mat(nforce_eval, nforce_eval))
1605 118 : IF (mixed_cdft%nonortho_coupling) ALLOCATE (coupling_nonortho(npermutations))
1606 324 : DO istate = 1, nforce_eval
1607 324 : H_mat(istate, istate) = energy(istate)
1608 : END DO
1609 282 : DO ipermutation = 1, npermutations
1610 182 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1611 182 : sum_a = 0.0_dp
1612 182 : sum_b = 0.0_dp
1613 366 : DO ivar = 1, nvar
1614 : ! V_J * <Psi_J | w_J(r) | Psi_J>
1615 184 : sum_b(1) = sum_b(1) + strength(ivar, jstate)*W_diagonal(ivar, jstate)
1616 : ! V_I * <Psi_I | w_I(r) | Psi_I>
1617 184 : sum_a(1) = sum_a(1) + strength(ivar, istate)*W_diagonal(ivar, istate)
1618 366 : IF (mixed_cdft%identical_constraints) THEN
1619 : ! V_J * W_IJ
1620 182 : sum_b(2) = sum_b(2) + strength(ivar, jstate)*Wda(ivar, ipermutation)
1621 : ! V_I * W_JI
1622 182 : sum_a(2) = sum_a(2) + strength(ivar, istate)*Wda(ivar, ipermutation)
1623 : ELSE
1624 : ! V_J * W_IJ
1625 2 : sum_b(2) = sum_b(2) + strength(ivar, jstate)*Wad(ivar, ipermutation)
1626 : ! V_I * W_JI
1627 2 : sum_a(2) = sum_a(2) + strength(ivar, istate)*Wda(ivar, ipermutation)
1628 : END IF
1629 : END DO
1630 : ! Denote F_X = <Psi_X | H_X + V_X*w_X(r) | Psi_X> = E_X + V_X*<Psi_X | w_X(r) | Psi_X>
1631 : ! H_IJ = F_J*S_IJ - V_J * W_IJ
1632 182 : c(1) = (energy(jstate) + sum_b(1))*Sda(ipermutation) - sum_b(2)
1633 : ! H_JI = F_I*S_JI - V_I * W_JI
1634 182 : c(2) = (energy(istate) + sum_a(1))*Sda(ipermutation) - sum_a(2)
1635 : ! H''(I,J) = 0.5*(H_IJ+H_JI) = H''(J,I)
1636 182 : H_mat(istate, jstate) = (c(1) + c(2))*0.5_dp
1637 182 : H_mat(jstate, istate) = H_mat(istate, jstate)
1638 464 : IF (mixed_cdft%nonortho_coupling) coupling_nonortho(ipermutation) = H_mat(istate, jstate)
1639 : END DO
1640 100 : CALL mixed_cdft_result_type_set(mixed_cdft%results, H=H_mat)
1641 100 : DEALLOCATE (H_mat, W_diagonal, Wda, strength, energy, Sda)
1642 100 : IF (ALLOCATED(Wad)) DEALLOCATE (Wad)
1643 100 : IF (mixed_cdft%nonortho_coupling) THEN
1644 18 : CALL mixed_cdft_result_type_set(mixed_cdft%results, nonortho=coupling_nonortho)
1645 18 : DEALLOCATE (coupling_nonortho)
1646 : END IF
1647 : ! Compute metric to assess reliability of coupling
1648 100 : IF (mixed_cdft%calculate_metric) CALL mixed_cdft_calculate_metric(force_env, mixed_cdft, density_matrix_diff, ncol_mo)
1649 : ! Compute coupling also with the wavefunction overlap method, see Migliore2009
1650 : ! Requires the unconstrained KS ground state wavefunction as input
1651 100 : IF (mixed_cdft%wfn_overlap_method) THEN
1652 8 : IF (.NOT. uniform_occupation) &
1653 : CALL cp_abort(__LOCATION__, &
1654 0 : "Wavefunction overlap method supports only uniformly occupied MOs.")
1655 8 : CALL mixed_cdft_wfn_overlap_method(force_env, mixed_cdft, ncol_mo, nrow_mo)
1656 : END IF
1657 : ! Release remaining work
1658 100 : DEALLOCATE (nrow_mo, ncol_mo)
1659 100 : CALL mixed_cdft_work_type_release(mixed_cdft%matrix)
1660 100 : CALL timestop(handle)
1661 :
1662 300 : END SUBROUTINE mixed_cdft_interaction_matrices
1663 :
1664 : ! **************************************************************************************************
1665 : !> \brief Routine to calculate the CDFT electronic couplings.
1666 : !> \param force_env the force_env that holds the CDFT states
1667 : !> \par History
1668 : !> 11.17 created [Nico Holmberg]
1669 : ! **************************************************************************************************
1670 100 : SUBROUTINE mixed_cdft_calculate_coupling_low(force_env)
1671 : TYPE(force_env_type), POINTER :: force_env
1672 :
1673 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_coupling_low'
1674 :
1675 : INTEGER :: handle, ipermutation, istate, jstate, &
1676 : nforce_eval, npermutations, nvar
1677 : LOGICAL :: use_lowdin, use_rotation
1678 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_lowdin, coupling_rotation, &
1679 100 : eigenv
1680 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, W_mat
1681 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1682 :
1683 100 : NULLIFY (mixed_cdft)
1684 100 : CPASSERT(ASSOCIATED(force_env))
1685 100 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1686 100 : CALL timeset(routineN, handle)
1687 100 : CPASSERT(ASSOCIATED(mixed_cdft))
1688 100 : CPASSERT(ALLOCATED(mixed_cdft%results%W_diagonal))
1689 100 : CPASSERT(ALLOCATED(mixed_cdft%results%Wda))
1690 100 : CPASSERT(ALLOCATED(mixed_cdft%results%S_minushalf))
1691 100 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1692 : ! Decide which methods to use for computing the coupling
1693 : ! Default behavior is to use rotation when a single constraint is active, otherwise uses Lowdin orthogonalization
1694 : ! The latter can also be explicitly requested when a single constraint is active
1695 : ! Possibly computes the coupling additionally with the wavefunction overlap method
1696 100 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
1697 100 : nvar = SIZE(mixed_cdft%results%Wda, 1)
1698 100 : npermutations = nforce_eval*(nforce_eval - 1)/2
1699 400 : ALLOCATE (tmp_mat(nforce_eval, nforce_eval))
1700 100 : IF (nvar == 1 .AND. mixed_cdft%identical_constraints) THEN
1701 96 : use_rotation = .TRUE.
1702 96 : use_lowdin = mixed_cdft%use_lowdin
1703 : ELSE
1704 : use_rotation = .FALSE.
1705 : use_lowdin = .TRUE.
1706 : END IF
1707 : ! Calculate coupling by rotating the CDFT states to eigenstates of the weight matrix W (single constraint only)
1708 : IF (use_rotation) THEN
1709 : ! Construct W
1710 480 : ALLOCATE (W_mat(nforce_eval, nforce_eval), coupling_rotation(npermutations))
1711 288 : ALLOCATE (eigenv(nforce_eval))
1712 : ! W_mat(i, i) = N_i where N_i is the value of the constraint in state i
1713 312 : DO istate = 1, nforce_eval
1714 528 : W_mat(istate, istate) = SUM(mixed_cdft%results%W_diagonal(:, istate))
1715 : END DO
1716 : ! W_mat(i, j) = <Psi_i|w(r)|Psi_j>
1717 274 : DO ipermutation = 1, npermutations
1718 178 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1719 356 : W_mat(istate, jstate) = SUM(mixed_cdft%results%Wda(:, ipermutation))
1720 452 : W_mat(jstate, istate) = W_mat(istate, jstate)
1721 : END DO
1722 : ! Solve generalized eigenvalue equation WV = SVL
1723 : ! Convert to standard eigenvalue problem via symmetric orthogonalisation
1724 5036 : tmp_mat(:, :) = MATMUL(W_mat, mixed_cdft%results%S_minushalf) ! W * S^(-1/2)
1725 5036 : W_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, tmp_mat) ! W' = S^(-1/2) * W * S^(-1/2)
1726 96 : CALL diamat_all(W_mat, eigenv, .TRUE.) ! Solve W'V' = AV'
1727 9188 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, W_mat) ! Reverse transformation V = S^(-1/2) V'
1728 : ! Construct final, orthogonal diabatic Hamiltonian matrix H
1729 9188 : W_mat(:, :) = MATMUL(mixed_cdft%results%H, tmp_mat) ! H'' * V
1730 10168 : W_mat(:, :) = MATMUL(TRANSPOSE(tmp_mat), W_mat) ! H = V^T * H'' * V
1731 274 : DO ipermutation = 1, npermutations
1732 178 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1733 274 : coupling_rotation(ipermutation) = W_mat(istate, jstate)
1734 : END DO
1735 96 : CALL mixed_cdft_result_type_set(mixed_cdft%results, rotation=coupling_rotation)
1736 96 : DEALLOCATE (W_mat, coupling_rotation, eigenv)
1737 : END IF
1738 : ! Calculate coupling by Lowdin orthogonalization
1739 100 : IF (use_lowdin) THEN
1740 60 : ALLOCATE (coupling_lowdin(npermutations))
1741 780 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%H, mixed_cdft%results%S_minushalf) ! H'' * S^(-1/2)
1742 : ! Final orthogonal diabatic Hamiltonian matrix H
1743 580 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, tmp_mat) ! H = S^(-1/2) * H'' * S^(-1/2)
1744 40 : DO ipermutation = 1, npermutations
1745 20 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1746 40 : coupling_lowdin(ipermutation) = tmp_mat(istate, jstate)
1747 : END DO
1748 20 : CALL mixed_cdft_result_type_set(mixed_cdft%results, lowdin=coupling_lowdin)
1749 20 : DEALLOCATE (coupling_lowdin)
1750 : END IF
1751 100 : DEALLOCATE (tmp_mat)
1752 100 : CALL timestop(handle)
1753 :
1754 200 : END SUBROUTINE mixed_cdft_calculate_coupling_low
1755 :
1756 : ! **************************************************************************************************
1757 : !> \brief Performs a configuration interaction calculation in the basis spanned by the CDFT states.
1758 : !> \param force_env the force_env that holds the CDFT states
1759 : !> \par History
1760 : !> 11.17 created [Nico Holmberg]
1761 : ! **************************************************************************************************
1762 100 : SUBROUTINE mixed_cdft_configuration_interaction(force_env)
1763 : TYPE(force_env_type), POINTER :: force_env
1764 :
1765 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_configuration_interaction'
1766 :
1767 : INTEGER :: handle, info, iounit, istate, ivar, &
1768 : nforce_eval, work_array_size
1769 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenv, work
1770 100 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: H_mat, H_mat_copy, S_mat, S_mat_copy
1771 : REAL(KIND=dp), EXTERNAL :: dnrm2
1772 : TYPE(cp_logger_type), POINTER :: logger
1773 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1774 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
1775 :
1776 : EXTERNAL :: dsygv
1777 :
1778 100 : NULLIFY (logger, force_env_section, print_section, mixed_cdft)
1779 :
1780 100 : CPASSERT(ASSOCIATED(force_env))
1781 100 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1782 100 : CPASSERT(ASSOCIATED(mixed_cdft))
1783 :
1784 100 : IF (.NOT. mixed_cdft%do_ci) RETURN
1785 :
1786 20 : logger => cp_get_default_logger()
1787 20 : CALL timeset(routineN, handle)
1788 : CALL force_env_get(force_env=force_env, &
1789 20 : force_env_section=force_env_section)
1790 20 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1791 20 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
1792 :
1793 20 : CPASSERT(ALLOCATED(mixed_cdft%results%S))
1794 20 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1795 20 : nforce_eval = SIZE(mixed_cdft%results%S, 1)
1796 120 : ALLOCATE (S_mat(nforce_eval, nforce_eval), H_mat(nforce_eval, nforce_eval))
1797 60 : ALLOCATE (eigenv(nforce_eval))
1798 168 : S_mat(:, :) = mixed_cdft%results%S(:, :)
1799 168 : H_mat(:, :) = mixed_cdft%results%H(:, :)
1800 : ! Workspace query
1801 20 : ALLOCATE (work(1))
1802 20 : info = 0
1803 100 : ALLOCATE (H_mat_copy(nforce_eval, nforce_eval), S_mat_copy(nforce_eval, nforce_eval))
1804 168 : H_mat_copy(:, :) = H_mat(:, :) ! Need explicit copies because dsygv destroys original values
1805 168 : S_mat_copy(:, :) = S_mat(:, :)
1806 20 : CALL dsygv(1, 'V', 'U', nforce_eval, H_mat_copy, nforce_eval, S_mat_copy, nforce_eval, eigenv, work, -1, info)
1807 20 : work_array_size = NINT(work(1))
1808 20 : DEALLOCATE (H_mat_copy, S_mat_copy)
1809 : ! Allocate work array
1810 20 : DEALLOCATE (work)
1811 60 : ALLOCATE (work(work_array_size))
1812 20 : work = 0.0_dp
1813 : ! Solve Hc = eSc
1814 20 : info = 0
1815 20 : CALL dsygv(1, 'V', 'U', nforce_eval, H_mat, nforce_eval, S_mat, nforce_eval, eigenv, work, work_array_size, info)
1816 20 : IF (info /= 0) THEN
1817 0 : IF (info > nforce_eval) THEN
1818 0 : CPABORT("Matrix S is not positive definite")
1819 : ELSE
1820 0 : CPABORT("Diagonalization of H matrix failed.")
1821 : END IF
1822 : END IF
1823 : ! dsygv returns eigenvectors (stored in columns of H_mat) that are normalized to H^T * S * H = I
1824 : ! Renormalize eigenvectors to H^T * H = I
1825 64 : DO ivar = 1, nforce_eval
1826 168 : H_mat(:, ivar) = H_mat(:, ivar)/dnrm2(nforce_eval, H_mat(:, ivar), 1)
1827 : END DO
1828 20 : DEALLOCATE (work)
1829 20 : IF (iounit > 0) THEN
1830 10 : WRITE (iounit, '(/,T3,A)') '------------------ CDFT Configuration Interaction (CDFT-CI) ------------------'
1831 32 : DO ivar = 1, nforce_eval
1832 22 : IF (ivar == 1) THEN
1833 10 : WRITE (iounit, '(T3,A,T58,(3X,F20.14))') 'Ground state energy:', eigenv(ivar)
1834 : ELSE
1835 12 : WRITE (iounit, '(/,T3,A,I2,A,T58,(3X,F20.14))') 'Excited state (', ivar - 1, ' ) energy:', eigenv(ivar)
1836 : END IF
1837 58 : DO istate = 1, nforce_eval, 2
1838 48 : IF (istate == 1) THEN
1839 : WRITE (iounit, '(T3,A,T54,(3X,2F12.6))') &
1840 22 : 'Expansion coefficients:', H_mat(istate, ivar), H_mat(istate + 1, ivar)
1841 4 : ELSE IF (istate < nforce_eval) THEN
1842 4 : WRITE (iounit, '(T54,(3X,2F12.6))') H_mat(istate, ivar), H_mat(istate + 1, ivar)
1843 : ELSE
1844 0 : WRITE (iounit, '(T54,(3X,F12.6))') H_mat(istate, ivar)
1845 : END IF
1846 : END DO
1847 : END DO
1848 : WRITE (iounit, '(T3,A)') &
1849 10 : '------------------------------------------------------------------------------'
1850 : END IF
1851 20 : DEALLOCATE (S_mat, H_mat, eigenv)
1852 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
1853 20 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1854 20 : CALL timestop(handle)
1855 :
1856 20 : END SUBROUTINE mixed_cdft_configuration_interaction
1857 : ! **************************************************************************************************
1858 : !> \brief Block diagonalizes the mixed CDFT Hamiltonian matrix.
1859 : !> \param force_env the force_env that holds the CDFT states
1860 : !> \par History
1861 : !> 11.17 created [Nico Holmberg]
1862 : !> 01.18 added recursive diagonalization
1863 : !> split to subroutines [Nico Holmberg]
1864 : ! **************************************************************************************************
1865 100 : SUBROUTINE mixed_cdft_block_diag(force_env)
1866 : TYPE(force_env_type), POINTER :: force_env
1867 :
1868 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_block_diag'
1869 :
1870 : INTEGER :: handle, i, iounit, irecursion, j, n, &
1871 : nblk, nforce_eval, nrecursion
1872 : LOGICAL :: ignore_excited
1873 100 : TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:) :: blocks
1874 100 : TYPE(cp_1d_r_p_type), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1875 100 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: H_block, S_block
1876 : TYPE(cp_logger_type), POINTER :: logger
1877 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1878 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
1879 :
1880 : EXTERNAL :: dsygv
1881 :
1882 100 : NULLIFY (logger, force_env_section, print_section, mixed_cdft)
1883 :
1884 100 : CPASSERT(ASSOCIATED(force_env))
1885 100 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1886 100 : CPASSERT(ASSOCIATED(mixed_cdft))
1887 :
1888 100 : IF (.NOT. mixed_cdft%block_diagonalize) RETURN
1889 :
1890 8 : logger => cp_get_default_logger()
1891 8 : CALL timeset(routineN, handle)
1892 :
1893 8 : CPASSERT(ALLOCATED(mixed_cdft%results%S))
1894 8 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1895 8 : nforce_eval = SIZE(mixed_cdft%results%S, 1)
1896 :
1897 : CALL force_env_get(force_env=force_env, &
1898 8 : force_env_section=force_env_section)
1899 8 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1900 8 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
1901 : ! Read block definitions from input
1902 8 : CALL mixed_cdft_read_block_diag(force_env, blocks, ignore_excited, nrecursion)
1903 8 : nblk = SIZE(blocks)
1904 : ! Start block diagonalization
1905 18 : DO irecursion = 1, nrecursion
1906 : ! Print block definitions
1907 10 : IF (iounit > 0 .AND. irecursion == 1) THEN
1908 4 : WRITE (iounit, '(/,T3,A)') '-------------------------- CDFT BLOCK DIAGONALIZATION ------------------------'
1909 4 : WRITE (iounit, '(T3,A)') 'Block diagonalizing the mixed CDFT Hamiltonian'
1910 4 : WRITE (iounit, '(T3,A,I3)') 'Number of blocks:', nblk
1911 4 : WRITE (iounit, '(T3,A,L3)') 'Ignoring excited states within blocks:', ignore_excited
1912 4 : WRITE (iounit, '(/,T3,A)') 'List of CDFT states for each block'
1913 14 : DO i = 1, nblk
1914 33 : WRITE (iounit, '(T6,A,I3,A,6I3)') 'Block', i, ':', (blocks(i)%array(j), j=1, SIZE(blocks(i)%array))
1915 : END DO
1916 : END IF
1917 : ! Recursive diagonalization: update counters and references
1918 10 : IF (irecursion > 1) THEN
1919 2 : nblk = nblk/2
1920 10 : ALLOCATE (blocks(nblk))
1921 2 : j = 1
1922 6 : DO i = 1, nblk
1923 4 : NULLIFY (blocks(i)%array)
1924 4 : ALLOCATE (blocks(i)%array(2))
1925 12 : blocks(i)%array = [j, j + 1]
1926 6 : j = j + 2
1927 : END DO
1928 : ! Print info
1929 2 : IF (iounit > 0) THEN
1930 1 : WRITE (iounit, '(/, T3,A)') 'Recursive block diagonalization of the mixed CDFT Hamiltonian'
1931 1 : WRITE (iounit, '(T6,A)') 'Block diagonalization is continued until only two matrix blocks remain.'
1932 1 : WRITE (iounit, '(T6,A)') 'The new blocks are formed by collecting pairs of blocks from the previous'
1933 1 : WRITE (iounit, '(T6,A)') 'block diagonalized matrix in ascending order.'
1934 1 : WRITE (iounit, '(/,T3,A,I3,A,I3)') 'Recursion step:', irecursion - 1, ' of ', nrecursion - 1
1935 1 : WRITE (iounit, '(/,T3,A)') 'List of old block indices for each new block'
1936 3 : DO i = 1, nblk
1937 7 : WRITE (iounit, '(T6,A,I3,A,6I3)') 'Block', i, ':', (blocks(i)%array(j), j=1, SIZE(blocks(i)%array))
1938 : END DO
1939 : END IF
1940 : END IF
1941 : ! Get the Hamiltonian and overlap matrices of each block
1942 10 : CALL mixed_cdft_get_blocks(mixed_cdft, blocks, H_block, S_block)
1943 : ! Diagonalize blocks
1944 10 : CALL mixed_cdft_diagonalize_blocks(blocks, H_block, S_block, eigenvalues)
1945 : ! Assemble the block diagonalized matrices
1946 10 : IF (ignore_excited) THEN
1947 8 : n = nblk
1948 : ELSE
1949 2 : n = nforce_eval
1950 : END IF
1951 10 : CALL mixed_cdft_assemble_block_diag(mixed_cdft, blocks, H_block, eigenvalues, n, iounit)
1952 : ! Deallocate work
1953 34 : DO i = 1, nblk
1954 24 : DEALLOCATE (H_block(i)%array)
1955 24 : DEALLOCATE (S_block(i)%array)
1956 24 : DEALLOCATE (eigenvalues(i)%array)
1957 34 : DEALLOCATE (blocks(i)%array)
1958 : END DO
1959 18 : DEALLOCATE (H_block, S_block, eigenvalues, blocks)
1960 : END DO ! recursion
1961 8 : IF (iounit > 0) &
1962 : WRITE (iounit, '(T3,A)') &
1963 4 : '------------------------------------------------------------------------------'
1964 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
1965 8 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1966 8 : CALL timestop(handle)
1967 :
1968 100 : END SUBROUTINE mixed_cdft_block_diag
1969 : ! **************************************************************************************************
1970 : !> \brief Routine to calculate the CDFT electronic coupling reliability metric
1971 : !> \param force_env the force_env that holds the CDFT states
1972 : !> \param mixed_cdft the mixed_cdft env
1973 : !> \param density_matrix_diff array holding difference density matrices (P_j - P_i) for every CDFT
1974 : !> state permutation
1975 : !> \param ncol_mo the number of MOs per spin
1976 : !> \par History
1977 : !> 11.17 created [Nico Holmberg]
1978 : ! **************************************************************************************************
1979 14 : SUBROUTINE mixed_cdft_calculate_metric(force_env, mixed_cdft, density_matrix_diff, ncol_mo)
1980 : TYPE(force_env_type), POINTER :: force_env
1981 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1982 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: density_matrix_diff
1983 : INTEGER, DIMENSION(:) :: ncol_mo
1984 :
1985 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_metric'
1986 :
1987 : INTEGER :: handle, ipermutation, ispin, j, &
1988 : nforce_eval, npermutations, nspins
1989 14 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: evals
1990 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: metric
1991 : TYPE(dbcsr_type) :: e_vectors
1992 :
1993 14 : CALL timeset(routineN, handle)
1994 14 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
1995 14 : npermutations = nforce_eval*(nforce_eval - 1)/2
1996 14 : nspins = SIZE(density_matrix_diff, 2)
1997 56 : ALLOCATE (metric(npermutations, nspins))
1998 14 : metric = 0.0_dp
1999 14 : CALL dbcsr_create(e_vectors, template=density_matrix_diff(1, 1)%matrix)
2000 42 : DO ispin = 1, nspins
2001 84 : ALLOCATE (evals(ncol_mo(ispin)))
2002 64 : DO ipermutation = 1, npermutations
2003 : ! Take into account doubly occupied orbitals without LSD
2004 36 : IF (nspins == 1) &
2005 0 : CALL dbcsr_scale(density_matrix_diff(ipermutation, 1)%matrix, alpha_scalar=0.5_dp)
2006 : ! Diagonalize difference density matrix
2007 : CALL cp_dbcsr_syevd(density_matrix_diff(ipermutation, ispin)%matrix, e_vectors, evals, &
2008 36 : para_env=force_env%para_env, blacs_env=mixed_cdft%blacs_env)
2009 36 : CALL dbcsr_release_p(density_matrix_diff(ipermutation, ispin)%matrix)
2010 100 : DO j = 1, ncol_mo(ispin)
2011 72 : metric(ipermutation, ispin) = metric(ipermutation, ispin) + (evals(j)**2 - evals(j)**4)
2012 : END DO
2013 : END DO
2014 42 : DEALLOCATE (evals)
2015 : END DO
2016 14 : CALL dbcsr_release(e_vectors)
2017 14 : DEALLOCATE (density_matrix_diff)
2018 78 : metric(:, :) = metric(:, :)/4.0_dp
2019 14 : CALL mixed_cdft_result_type_set(mixed_cdft%results, metric=metric)
2020 14 : DEALLOCATE (metric)
2021 14 : CALL timestop(handle)
2022 :
2023 28 : END SUBROUTINE mixed_cdft_calculate_metric
2024 :
2025 : ! **************************************************************************************************
2026 : !> \brief Routine to calculate the electronic coupling according to the wavefunction overlap method
2027 : !> \param force_env the force_env that holds the CDFT states
2028 : !> \param mixed_cdft the mixed_cdft env
2029 : !> \param ncol_mo the number of MOs per spin
2030 : !> \param nrow_mo the number of AOs per spin
2031 : !> \par History
2032 : !> 11.17 created [Nico Holmberg]
2033 : ! **************************************************************************************************
2034 8 : SUBROUTINE mixed_cdft_wfn_overlap_method(force_env, mixed_cdft, ncol_mo, nrow_mo)
2035 : TYPE(force_env_type), POINTER :: force_env
2036 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2037 : INTEGER, DIMENSION(:) :: ncol_mo, nrow_mo
2038 :
2039 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_wfn_overlap_method'
2040 :
2041 : CHARACTER(LEN=default_path_length) :: file_name
2042 : INTEGER :: handle, ipermutation, ispin, istate, &
2043 : jstate, nao, nforce_eval, nmo, &
2044 : npermutations, nspins
2045 : LOGICAL :: exist, natom_mismatch
2046 : REAL(KIND=dp) :: energy_diff, maxocc, Sda
2047 8 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_wfn
2048 8 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: overlaps
2049 8 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2050 : TYPE(cp_fm_struct_type), POINTER :: mo_mo_fmstruct
2051 : TYPE(cp_fm_type) :: inverse_mat, mo_overlap_wfn, mo_tmp
2052 8 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: mixed_mo_coeff
2053 : TYPE(cp_logger_type), POINTER :: logger
2054 : TYPE(cp_subsys_type), POINTER :: subsys_mix
2055 : TYPE(dbcsr_type), POINTER :: mixed_matrix_s
2056 8 : TYPE(mo_set_type), ALLOCATABLE, DIMENSION(:) :: mo_set
2057 8 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2058 8 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2059 : TYPE(section_vals_type), POINTER :: force_env_section, mixed_cdft_section
2060 :
2061 8 : NULLIFY (mixed_cdft_section, subsys_mix, particle_set, qs_kind_set, atomic_kind_set, &
2062 8 : mixed_mo_coeff, mixed_matrix_s, force_env_section)
2063 16 : logger => cp_get_default_logger()
2064 :
2065 8 : CALL timeset(routineN, handle)
2066 8 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
2067 8 : npermutations = nforce_eval*(nforce_eval - 1)/2
2068 8 : nspins = SIZE(nrow_mo)
2069 8 : mixed_mo_coeff => mixed_cdft%matrix%mixed_mo_coeff
2070 8 : mixed_matrix_s => mixed_cdft%matrix%mixed_matrix_s
2071 : CALL force_env_get(force_env=force_env, &
2072 8 : force_env_section=force_env_section)
2073 : ! Create mo_set for input wfn
2074 40 : ALLOCATE (mo_set(nspins))
2075 8 : IF (nspins == 2) THEN
2076 8 : maxocc = 1.0_dp
2077 : ELSE
2078 0 : maxocc = 2.0_dp
2079 : END IF
2080 24 : DO ispin = 1, nspins
2081 16 : nao = nrow_mo(ispin)
2082 16 : nmo = ncol_mo(ispin)
2083 : ! Only OT with fully occupied orbitals is implicitly supported
2084 : CALL allocate_mo_set(mo_set(ispin), nao=nao, nmo=nmo, nelectron=INT(maxocc*nmo), &
2085 : n_el_f=REAL(maxocc*nmo, dp), maxocc=maxocc, &
2086 16 : flexible_electron_count=0.0_dp)
2087 16 : CALL set_mo_set(mo_set(ispin), uniform_occupation=.TRUE., homo=nmo)
2088 16 : ALLOCATE (mo_set(ispin)%mo_coeff)
2089 : CALL cp_fm_create(matrix=mo_set(ispin)%mo_coeff, &
2090 : matrix_struct=mixed_mo_coeff(1, ispin)%matrix_struct, &
2091 16 : name="GS_MO_COEFF"//TRIM(ADJUSTL(cp_to_string(ispin)))//"MATRIX")
2092 48 : ALLOCATE (mo_set(ispin)%eigenvalues(nmo))
2093 40 : ALLOCATE (mo_set(ispin)%occupation_numbers(nmo))
2094 : END DO
2095 : ! Read wfn file (note we assume that the basis set is the same)
2096 8 : IF (force_env%mixed_env%do_mixed_qmmm_cdft) &
2097 : ! This really shouldnt be a problem?
2098 : CALL cp_abort(__LOCATION__, &
2099 0 : "QMMM + wavefunction overlap method not supported.")
2100 8 : CALL force_env_get(force_env=force_env, subsys=subsys_mix)
2101 8 : mixed_cdft_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT")
2102 8 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
2103 8 : CPASSERT(ASSOCIATED(mixed_cdft%qs_kind_set))
2104 8 : IF (force_env%para_env%is_source()) &
2105 4 : CALL wfn_restart_file_name(file_name, exist, mixed_cdft_section, logger)
2106 8 : CALL force_env%para_env%bcast(exist)
2107 8 : CALL force_env%para_env%bcast(file_name)
2108 8 : IF (.NOT. exist) &
2109 : CALL cp_abort(__LOCATION__, &
2110 : "User requested to restart the wavefunction from the file named: "// &
2111 : TRIM(file_name)//". This file does not exist. Please check the existence of"// &
2112 : " the file or change properly the value of the keyword WFN_RESTART_FILE_NAME in"// &
2113 0 : " section FORCE_EVAL\MIXED\MIXED_CDFT.")
2114 : CALL read_mo_set_from_restart(mo_array=mo_set, qs_kind_set=mixed_cdft%qs_kind_set, particle_set=particle_set, &
2115 : para_env=force_env%para_env, id_nr=0, multiplicity=mixed_cdft%multiplicity, &
2116 : dft_section=mixed_cdft_section, natom_mismatch=natom_mismatch, &
2117 8 : cdft=.TRUE.)
2118 8 : IF (natom_mismatch) &
2119 : CALL cp_abort(__LOCATION__, &
2120 0 : "Restart wfn file has a wrong number of atoms")
2121 : ! Orthonormalize wfn
2122 24 : DO ispin = 1, nspins
2123 24 : IF (mixed_cdft%has_unit_metric) THEN
2124 0 : CALL make_basis_simple(mo_set(ispin)%mo_coeff, ncol_mo(ispin))
2125 : ELSE
2126 16 : CALL make_basis_sm(mo_set(ispin)%mo_coeff, ncol_mo(ispin), mixed_matrix_s)
2127 : END IF
2128 : END DO
2129 : ! Calculate MO overlaps between reference state (R) and CDFT state pairs I/J
2130 24 : ALLOCATE (coupling_wfn(npermutations))
2131 32 : ALLOCATE (overlaps(2, npermutations, nspins))
2132 8 : overlaps = 0.0_dp
2133 24 : DO ispin = 1, nspins
2134 : ! Allocate work
2135 16 : nao = nrow_mo(ispin)
2136 16 : nmo = ncol_mo(ispin)
2137 : CALL cp_fm_struct_create(mo_mo_fmstruct, nrow_global=nmo, ncol_global=nmo, &
2138 16 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
2139 : CALL cp_fm_create(matrix=mo_overlap_wfn, matrix_struct=mo_mo_fmstruct, &
2140 16 : name="MO_OVERLAP_MATRIX_WFN")
2141 : CALL cp_fm_create(matrix=inverse_mat, matrix_struct=mo_mo_fmstruct, &
2142 16 : name="INVERSE_MO_OVERLAP_MATRIX_WFN")
2143 16 : CALL cp_fm_struct_release(mo_mo_fmstruct)
2144 : CALL cp_fm_create(matrix=mo_tmp, &
2145 : matrix_struct=mixed_mo_coeff(1, ispin)%matrix_struct, &
2146 16 : name="OVERLAP_MO_COEFF_WFN")
2147 40 : DO ipermutation = 1, npermutations
2148 24 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
2149 : ! S*C_r
2150 : CALL cp_dbcsr_sm_fm_multiply(mixed_matrix_s, mo_set(ispin)%mo_coeff, &
2151 24 : mo_tmp, nmo, 1.0_dp, 0.0_dp)
2152 : ! C_i^T * (S*C_r)
2153 24 : CALL cp_fm_set_all(mo_overlap_wfn, alpha=0.0_dp)
2154 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
2155 : mixed_mo_coeff(istate, ispin), &
2156 24 : mo_tmp, 0.0_dp, mo_overlap_wfn)
2157 24 : CALL cp_fm_invert(mo_overlap_wfn, inverse_mat, overlaps(1, ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
2158 : ! C_j^T * (S*C_r)
2159 24 : CALL cp_fm_set_all(mo_overlap_wfn, alpha=0.0_dp)
2160 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
2161 : mixed_mo_coeff(jstate, ispin), &
2162 24 : mo_tmp, 0.0_dp, mo_overlap_wfn)
2163 64 : CALL cp_fm_invert(mo_overlap_wfn, inverse_mat, overlaps(2, ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
2164 : END DO
2165 16 : CALL cp_fm_release(mo_overlap_wfn)
2166 16 : CALL cp_fm_release(inverse_mat)
2167 16 : CALL cp_fm_release(mo_tmp)
2168 56 : CALL deallocate_mo_set(mo_set(ispin))
2169 : END DO
2170 8 : DEALLOCATE (mo_set)
2171 20 : DO ipermutation = 1, npermutations
2172 12 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
2173 12 : IF (nspins == 2) THEN
2174 12 : overlaps(1, ipermutation, 1) = ABS(overlaps(1, ipermutation, 1)*overlaps(1, ipermutation, 2)) ! A in eq. 12c
2175 12 : overlaps(2, ipermutation, 1) = ABS(overlaps(2, ipermutation, 1)*overlaps(2, ipermutation, 2)) ! B in eq. 12c
2176 : ELSE
2177 0 : overlaps(1, ipermutation, 1) = overlaps(1, ipermutation, 1)**2
2178 0 : overlaps(2, ipermutation, 1) = overlaps(2, ipermutation, 1)**2
2179 : END IF
2180 : ! Calculate coupling using eq. 12c
2181 : ! The coupling is singular if A = B (i.e. states I/J are identical or charge in ground state is fully delocalized)
2182 32 : IF (ABS(overlaps(1, ipermutation, 1) - overlaps(2, ipermutation, 1)) <= 1.0e-14_dp) THEN
2183 : CALL cp_warn(__LOCATION__, &
2184 : "Coupling between states is singular and set to zero. "// &
2185 : "Potential causes: coupling is computed between identical CDFT states or the spin/charge "// &
2186 2 : "density is fully delocalized in the unconstrained ground state.")
2187 2 : coupling_wfn(ipermutation) = 0.0_dp
2188 : ELSE
2189 10 : energy_diff = mixed_cdft%results%energy(jstate) - mixed_cdft%results%energy(istate)
2190 10 : Sda = mixed_cdft%results%S(istate, jstate)
2191 : coupling_wfn(ipermutation) = ABS((overlaps(1, ipermutation, 1)*overlaps(2, ipermutation, 1)/ &
2192 : (overlaps(1, ipermutation, 1)**2 - overlaps(2, ipermutation, 1)**2))* &
2193 : (energy_diff)/(1.0_dp - Sda**2)* &
2194 : (1.0_dp - (overlaps(1, ipermutation, 1)**2 + overlaps(2, ipermutation, 1)**2)/ &
2195 : (2.0_dp*overlaps(1, ipermutation, 1)*overlaps(2, ipermutation, 1))* &
2196 10 : Sda))
2197 : END IF
2198 : END DO
2199 8 : DEALLOCATE (overlaps)
2200 8 : CALL mixed_cdft_result_type_set(mixed_cdft%results, wfn=coupling_wfn)
2201 8 : DEALLOCATE (coupling_wfn)
2202 8 : CALL timestop(handle)
2203 :
2204 16 : END SUBROUTINE mixed_cdft_wfn_overlap_method
2205 :
2206 : ! **************************************************************************************************
2207 : !> \brief Becke constraint adapted to mixed calculations, details in qs_cdft_methods.F
2208 : !> \param force_env the force_env that holds the CDFT states
2209 : !> \param calculate_forces determines if forces should be calculted
2210 : !> \par History
2211 : !> 02.2016 created [Nico Holmberg]
2212 : !> 03.2016 added dynamic load balancing (dlb)
2213 : !> changed pw_p_type data types to rank-3 reals to accommodate dlb
2214 : !> and to reduce overall memory footprint
2215 : !> split to subroutines [Nico Holmberg]
2216 : !> 04.2016 introduced mixed grid mapping [Nico Holmberg]
2217 : ! **************************************************************************************************
2218 36 : SUBROUTINE mixed_becke_constraint(force_env, calculate_forces)
2219 : TYPE(force_env_type), POINTER :: force_env
2220 : LOGICAL, INTENT(IN) :: calculate_forces
2221 :
2222 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint'
2223 :
2224 : INTEGER :: handle
2225 36 : INTEGER, ALLOCATABLE, DIMENSION(:) :: catom
2226 : LOGICAL :: in_memory, store_vectors
2227 36 : LOGICAL, ALLOCATABLE, DIMENSION(:) :: is_constraint
2228 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: coefficients
2229 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: position_vecs, R12
2230 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pair_dist_vecs
2231 : TYPE(cp_logger_type), POINTER :: logger
2232 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2233 : TYPE(mixed_environment_type), POINTER :: mixed_env
2234 :
2235 36 : NULLIFY (mixed_env, mixed_cdft)
2236 36 : store_vectors = .TRUE.
2237 36 : logger => cp_get_default_logger()
2238 36 : CALL timeset(routineN, handle)
2239 36 : mixed_env => force_env%mixed_env
2240 36 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
2241 : CALL mixed_becke_constraint_init(force_env, mixed_cdft, calculate_forces, &
2242 : is_constraint, in_memory, store_vectors, &
2243 : R12, position_vecs, pair_dist_vecs, &
2244 36 : coefficients, catom)
2245 : CALL mixed_becke_constraint_low(force_env, mixed_cdft, in_memory, &
2246 : is_constraint, store_vectors, R12, &
2247 : position_vecs, pair_dist_vecs, &
2248 36 : coefficients, catom)
2249 36 : CALL timestop(handle)
2250 :
2251 36 : END SUBROUTINE mixed_becke_constraint
2252 : ! **************************************************************************************************
2253 : !> \brief Initialize the mixed Becke constraint calculation
2254 : !> \param force_env the force_env that holds the CDFT states
2255 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
2256 : !> \param calculate_forces determines if forces should be calculted
2257 : !> \param is_constraint a list used to determine which atoms in the system define the constraint
2258 : !> \param in_memory decides whether to build the weight function gradients in parallel before solving
2259 : !> the CDFT states or later during the SCF procedure of the individual states
2260 : !> \param store_vectors should temporary arrays be stored in memory to accelerate the calculation
2261 : !> \param R12 temporary array holding the pairwise atomic distances
2262 : !> \param position_vecs temporary array holding the pbc corrected atomic position vectors
2263 : !> \param pair_dist_vecs temporary array holding the pairwise displament vectors
2264 : !> \param coefficients array that determines how atoms should be summed to form the constraint
2265 : !> \param catom temporary array to map the global index of constraint atoms to their position
2266 : !> in a list that holds only constraint atoms
2267 : !> \par History
2268 : !> 03.2016 created [Nico Holmberg]
2269 : ! **************************************************************************************************
2270 36 : SUBROUTINE mixed_becke_constraint_init(force_env, mixed_cdft, calculate_forces, &
2271 : is_constraint, in_memory, store_vectors, &
2272 : R12, position_vecs, pair_dist_vecs, coefficients, &
2273 : catom)
2274 : TYPE(force_env_type), POINTER :: force_env
2275 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2276 : LOGICAL, INTENT(IN) :: calculate_forces
2277 : LOGICAL, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: is_constraint
2278 : LOGICAL, INTENT(OUT) :: in_memory
2279 : LOGICAL, INTENT(IN) :: store_vectors
2280 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
2281 : INTENT(out) :: R12, position_vecs
2282 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
2283 : INTENT(out) :: pair_dist_vecs
2284 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
2285 : INTENT(OUT) :: coefficients
2286 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(out) :: catom
2287 :
2288 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_init'
2289 :
2290 : CHARACTER(len=2) :: element_symbol
2291 : INTEGER :: atom_a, bounds(2), handle, i, iatom, iex, iforce_eval, ikind, iounit, ithread, j, &
2292 : jatom, katom, my_work, my_work_size, natom, nforce_eval, nkind, np(3), npme, nthread, &
2293 : numexp, offset_dlb, unit_nr
2294 : INTEGER, DIMENSION(2, 3) :: bo, bo_conf
2295 36 : INTEGER, DIMENSION(:), POINTER :: atom_list, cores, stride
2296 : LOGICAL :: build, mpi_io
2297 : REAL(kind=dp) :: alpha, chi, coef, ircov, jrcov, ra(3), &
2298 : radius, uij
2299 : REAL(kind=dp), DIMENSION(3) :: cell_v, dist_vec, dr, r, r1, shift
2300 36 : REAL(KIND=dp), DIMENSION(:), POINTER :: radii_list
2301 36 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab
2302 36 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2303 : TYPE(cdft_control_type), POINTER :: cdft_control
2304 : TYPE(cell_type), POINTER :: cell
2305 : TYPE(cp_logger_type), POINTER :: logger
2306 : TYPE(cp_subsys_type), POINTER :: subsys_mix
2307 : TYPE(force_env_type), POINTER :: force_env_qs
2308 : TYPE(hirshfeld_type), POINTER :: cavity_env
2309 : TYPE(particle_list_type), POINTER :: particles
2310 36 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2311 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
2312 36 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2313 : TYPE(realspace_grid_type), POINTER :: rs_cavity
2314 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
2315 :
2316 36 : NULLIFY (pab, cell, force_env_qs, particle_set, force_env_section, print_section, &
2317 36 : qs_kind_set, particles, subsys_mix, rs_cavity, cavity_env, auxbas_pw_pool, &
2318 36 : atomic_kind_set, radii_list, cdft_control)
2319 72 : logger => cp_get_default_logger()
2320 36 : nforce_eval = SIZE(force_env%sub_force_env)
2321 36 : CALL timeset(routineN, handle)
2322 36 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
2323 36 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
2324 : CALL force_env_get(force_env=force_env, &
2325 : subsys=subsys_mix, &
2326 28 : cell=cell)
2327 : CALL cp_subsys_get(subsys=subsys_mix, &
2328 : particles=particles, &
2329 28 : particle_set=particle_set)
2330 : ELSE
2331 24 : DO iforce_eval = 1, nforce_eval
2332 16 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
2333 24 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
2334 : END DO
2335 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
2336 : cp_subsys=subsys_mix, &
2337 8 : cell=cell)
2338 : CALL cp_subsys_get(subsys=subsys_mix, &
2339 : particles=particles, &
2340 8 : particle_set=particle_set)
2341 : END IF
2342 36 : natom = SIZE(particles%els)
2343 36 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2344 36 : cdft_control => mixed_cdft%cdft_control
2345 36 : CPASSERT(ASSOCIATED(cdft_control))
2346 36 : IF (.NOT. ASSOCIATED(cdft_control%becke_control%cutoffs)) THEN
2347 24 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set)
2348 72 : ALLOCATE (cdft_control%becke_control%cutoffs(natom))
2349 30 : SELECT CASE (cdft_control%becke_control%cutoff_type)
2350 : CASE (becke_cutoff_global)
2351 18 : cdft_control%becke_control%cutoffs(:) = cdft_control%becke_control%rglobal
2352 : CASE (becke_cutoff_element)
2353 18 : IF (.NOT. SIZE(atomic_kind_set) == SIZE(cdft_control%becke_control%cutoffs_tmp)) &
2354 : CALL cp_abort(__LOCATION__, &
2355 : "Size of keyword BECKE_CONSTRAINT\ELEMENT_CUTOFFS does "// &
2356 0 : "not match number of atomic kinds in the input coordinate file.")
2357 54 : DO ikind = 1, SIZE(atomic_kind_set)
2358 36 : CALL get_atomic_kind(atomic_kind_set(ikind), natom=katom, atom_list=atom_list)
2359 90 : DO iatom = 1, katom
2360 36 : atom_a = atom_list(iatom)
2361 72 : cdft_control%becke_control%cutoffs(atom_a) = cdft_control%becke_control%cutoffs_tmp(ikind)
2362 : END DO
2363 : END DO
2364 42 : DEALLOCATE (cdft_control%becke_control%cutoffs_tmp)
2365 : END SELECT
2366 : END IF
2367 36 : build = .FALSE.
2368 36 : IF (cdft_control%becke_control%adjust .AND. &
2369 : .NOT. ASSOCIATED(cdft_control%becke_control%aij)) THEN
2370 72 : ALLOCATE (cdft_control%becke_control%aij(natom, natom))
2371 18 : build = .TRUE.
2372 : END IF
2373 108 : ALLOCATE (catom(cdft_control%natoms))
2374 : IF (cdft_control%save_pot .OR. &
2375 : cdft_control%becke_control%cavity_confine .OR. &
2376 36 : cdft_control%becke_control%should_skip .OR. &
2377 : mixed_cdft%first_iteration) THEN
2378 108 : ALLOCATE (is_constraint(natom))
2379 36 : is_constraint = .FALSE.
2380 : END IF
2381 36 : in_memory = calculate_forces .AND. cdft_control%becke_control%in_memory
2382 36 : IF (in_memory .NEQV. calculate_forces) &
2383 : CALL cp_abort(__LOCATION__, &
2384 : "The flag BECKE_CONSTRAINT\IN_MEMORY must be activated "// &
2385 0 : "for the calculation of mixed CDFT forces")
2386 108 : IF (in_memory .OR. mixed_cdft%first_iteration) ALLOCATE (coefficients(natom))
2387 108 : DO i = 1, cdft_control%natoms
2388 72 : catom(i) = cdft_control%atoms(i)
2389 : IF (cdft_control%save_pot .OR. &
2390 : cdft_control%becke_control%cavity_confine .OR. &
2391 72 : cdft_control%becke_control%should_skip .OR. &
2392 : mixed_cdft%first_iteration) &
2393 72 : is_constraint(catom(i)) = .TRUE.
2394 72 : IF (in_memory .OR. mixed_cdft%first_iteration) &
2395 108 : coefficients(catom(i)) = cdft_control%group(1)%coeff(i)
2396 : END DO
2397 36 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=auxbas_pw_pool)
2398 360 : bo = auxbas_pw_pool%pw_grid%bounds_local
2399 144 : np = auxbas_pw_pool%pw_grid%npts
2400 144 : dr = auxbas_pw_pool%pw_grid%dr
2401 144 : shift = -REAL(MODULO(np, 2), dp)*dr/2.0_dp
2402 36 : IF (store_vectors) THEN
2403 108 : IF (in_memory) ALLOCATE (pair_dist_vecs(3, natom, natom))
2404 108 : ALLOCATE (position_vecs(3, natom))
2405 : END IF
2406 144 : DO i = 1, 3
2407 144 : cell_v(i) = cell%hmat(i, i)
2408 : END DO
2409 144 : ALLOCATE (R12(natom, natom))
2410 72 : DO iatom = 1, natom - 1
2411 108 : DO jatom = iatom + 1, natom
2412 144 : r = particle_set(iatom)%r
2413 144 : r1 = particle_set(jatom)%r
2414 144 : DO i = 1, 3
2415 108 : r(i) = MODULO(r(i), cell%hmat(i, i)) - cell%hmat(i, i)/2._dp
2416 144 : r1(i) = MODULO(r1(i), cell%hmat(i, i)) - cell%hmat(i, i)/2._dp
2417 : END DO
2418 144 : dist_vec = (r - r1) - ANINT((r - r1)/cell_v)*cell_v
2419 36 : IF (store_vectors) THEN
2420 144 : position_vecs(:, iatom) = r(:)
2421 144 : IF (iatom == 1 .AND. jatom == natom) position_vecs(:, jatom) = r1(:)
2422 36 : IF (in_memory) THEN
2423 96 : pair_dist_vecs(:, iatom, jatom) = dist_vec(:)
2424 96 : pair_dist_vecs(:, jatom, iatom) = -dist_vec(:)
2425 : END IF
2426 : END IF
2427 144 : R12(iatom, jatom) = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
2428 36 : R12(jatom, iatom) = R12(iatom, jatom)
2429 72 : IF (build) THEN
2430 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2431 18 : kind_number=ikind)
2432 18 : ircov = cdft_control%becke_control%radii(ikind)
2433 : CALL get_atomic_kind(atomic_kind=particle_set(jatom)%atomic_kind, &
2434 18 : kind_number=ikind)
2435 18 : jrcov = cdft_control%becke_control%radii(ikind)
2436 18 : IF (ircov /= jrcov) THEN
2437 18 : chi = ircov/jrcov
2438 18 : uij = (chi - 1.0_dp)/(chi + 1.0_dp)
2439 18 : cdft_control%becke_control%aij(iatom, jatom) = uij/(uij**2 - 1.0_dp)
2440 18 : IF (cdft_control%becke_control%aij(iatom, jatom) &
2441 : > 0.5_dp) THEN
2442 0 : cdft_control%becke_control%aij(iatom, jatom) = 0.5_dp
2443 18 : ELSE IF (cdft_control%becke_control%aij(iatom, jatom) &
2444 : < -0.5_dp) THEN
2445 0 : cdft_control%becke_control%aij(iatom, jatom) = -0.5_dp
2446 : END IF
2447 : ELSE
2448 0 : cdft_control%becke_control%aij(iatom, jatom) = 0.0_dp
2449 : END IF
2450 : cdft_control%becke_control%aij(jatom, iatom) = &
2451 18 : -cdft_control%becke_control%aij(iatom, jatom)
2452 : END IF
2453 : END DO
2454 : END DO
2455 : ! Dump some additional information about the calculation
2456 36 : IF (mixed_cdft%first_iteration) THEN
2457 24 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2458 24 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
2459 24 : IF (iounit > 0) THEN
2460 : WRITE (iounit, '(/,T3,A,T66)') &
2461 12 : '-------------------------- Becke atomic parameters ---------------------------'
2462 12 : IF (cdft_control%becke_control%adjust) THEN
2463 : WRITE (iounit, '(T3,A,A)') &
2464 9 : 'Atom Element Coefficient', ' Cutoff (angstrom) CDFT Radius (angstrom)'
2465 27 : DO iatom = 1, natom
2466 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2467 : element_symbol=element_symbol, &
2468 18 : kind_number=ikind)
2469 18 : ircov = cp_unit_from_cp2k(cdft_control%becke_control%radii(ikind), "angstrom")
2470 18 : IF (is_constraint(iatom)) THEN
2471 18 : coef = coefficients(iatom)
2472 : ELSE
2473 0 : coef = 0.0_dp
2474 : END IF
2475 : WRITE (iounit, "(i6,T14,A2,T22,F8.3,T44,F8.3,T73,F8.3)") &
2476 18 : iatom, ADJUSTR(element_symbol), coef, &
2477 18 : cp_unit_from_cp2k(cdft_control%becke_control%cutoffs(iatom), "angstrom"), &
2478 63 : ircov
2479 : END DO
2480 : ELSE
2481 : WRITE (iounit, '(T3,A,A)') &
2482 3 : 'Atom Element Coefficient', ' Cutoff (angstrom)'
2483 9 : DO iatom = 1, natom
2484 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2485 6 : element_symbol=element_symbol)
2486 6 : IF (is_constraint(iatom)) THEN
2487 6 : coef = coefficients(iatom)
2488 : ELSE
2489 0 : coef = 0.0_dp
2490 : END IF
2491 : WRITE (iounit, "(i6,T14,A2,T22,F8.3,T44,F8.3)") &
2492 6 : iatom, ADJUSTR(element_symbol), coef, &
2493 15 : cp_unit_from_cp2k(cdft_control%becke_control%cutoffs(iatom), "angstrom")
2494 : END DO
2495 : END IF
2496 : WRITE (iounit, '(T3,A)') &
2497 12 : '------------------------------------------------------------------------------'
2498 : END IF
2499 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
2500 24 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2501 24 : mixed_cdft%first_iteration = .FALSE.
2502 : END IF
2503 :
2504 36 : IF (cdft_control%becke_control%cavity_confine) THEN
2505 34 : CPASSERT(ASSOCIATED(mixed_cdft%qs_kind_set))
2506 34 : cavity_env => cdft_control%becke_control%cavity_env
2507 34 : qs_kind_set => mixed_cdft%qs_kind_set
2508 34 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set)
2509 34 : nkind = SIZE(qs_kind_set)
2510 34 : IF (.NOT. ASSOCIATED(cavity_env%kind_shape_fn)) THEN
2511 22 : IF (ASSOCIATED(cdft_control%becke_control%radii)) THEN
2512 54 : ALLOCATE (radii_list(SIZE(cdft_control%becke_control%radii)))
2513 54 : DO ikind = 1, SIZE(cdft_control%becke_control%radii)
2514 54 : IF (cavity_env%use_bohr) THEN
2515 0 : radii_list(ikind) = cdft_control%becke_control%radii(ikind)
2516 : ELSE
2517 36 : radii_list(ikind) = cp_unit_from_cp2k(cdft_control%becke_control%radii(ikind), "angstrom")
2518 : END IF
2519 : END DO
2520 : END IF
2521 : CALL create_shape_function(cavity_env, qs_kind_set, atomic_kind_set, &
2522 : radius=cdft_control%becke_control%rcavity, &
2523 22 : radii_list=radii_list)
2524 22 : IF (ASSOCIATED(radii_list)) &
2525 18 : DEALLOCATE (radii_list)
2526 : END IF
2527 34 : NULLIFY (rs_cavity)
2528 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_rs_grid=rs_cavity, &
2529 34 : auxbas_pw_pool=auxbas_pw_pool)
2530 : ! be careful in parallel nsmax is chosen with multigrid in mind!
2531 34 : CALL rs_grid_zero(rs_cavity)
2532 34 : ALLOCATE (pab(1, 1))
2533 34 : nthread = 1
2534 34 : ithread = 0
2535 102 : DO ikind = 1, SIZE(atomic_kind_set)
2536 68 : numexp = cavity_env%kind_shape_fn(ikind)%numexp
2537 68 : IF (numexp <= 0) CYCLE
2538 68 : CALL get_atomic_kind(atomic_kind_set(ikind), natom=katom, atom_list=atom_list)
2539 204 : ALLOCATE (cores(katom))
2540 136 : DO iex = 1, numexp
2541 68 : alpha = cavity_env%kind_shape_fn(ikind)%zet(iex)
2542 68 : coef = cavity_env%kind_shape_fn(ikind)%coef(iex)
2543 68 : npme = 0
2544 136 : cores = 0
2545 136 : DO iatom = 1, katom
2546 136 : IF (rs_cavity%desc%parallel .AND. .NOT. rs_cavity%desc%distributed) THEN
2547 : ! replicated realspace grid, split the atoms up between procs
2548 68 : IF (MODULO(iatom, rs_cavity%desc%group_size) == rs_cavity%desc%my_pos) THEN
2549 34 : npme = npme + 1
2550 34 : cores(npme) = iatom
2551 : END IF
2552 : ELSE
2553 0 : npme = npme + 1
2554 0 : cores(npme) = iatom
2555 : END IF
2556 : END DO
2557 170 : DO j = 1, npme
2558 34 : iatom = cores(j)
2559 34 : atom_a = atom_list(iatom)
2560 34 : pab(1, 1) = coef
2561 34 : IF (store_vectors) THEN
2562 136 : ra(:) = position_vecs(:, atom_a) + cell_v(:)/2._dp
2563 : ELSE
2564 0 : ra(:) = pbc(particle_set(atom_a)%r, cell)
2565 : END IF
2566 102 : IF (is_constraint(atom_a)) THEN
2567 : radius = exp_radius_very_extended(la_min=0, la_max=0, lb_min=0, lb_max=0, &
2568 : ra=ra, rb=ra, rp=ra, &
2569 : zetp=alpha, eps=mixed_cdft%eps_rho_rspace, &
2570 : pab=pab, o1=0, o2=0, & ! without map_consistent
2571 34 : prefactor=1.0_dp, cutoff=0.0_dp)
2572 :
2573 : CALL collocate_pgf_product(0, alpha, 0, 0, 0.0_dp, 0, ra, &
2574 : [0.0_dp, 0.0_dp, 0.0_dp], 1.0_dp, pab, 0, 0, &
2575 : rs_cavity, &
2576 : radius=radius, ga_gb_function=GRID_FUNC_AB, &
2577 : use_subpatch=.TRUE., &
2578 34 : subpatch_pattern=0)
2579 : END IF
2580 : END DO
2581 : END DO
2582 170 : DEALLOCATE (cores)
2583 : END DO
2584 34 : DEALLOCATE (pab)
2585 34 : CALL auxbas_pw_pool%create_pw(cdft_control%becke_control%cavity)
2586 34 : CALL transfer_rs2pw(rs_cavity, cdft_control%becke_control%cavity)
2587 : CALL hfun_zero(cdft_control%becke_control%cavity%array, &
2588 : cdft_control%becke_control%eps_cavity, &
2589 34 : just_zero=.FALSE., bounds=bounds, work=my_work)
2590 34 : IF (bounds(2) < bo(2, 3)) THEN
2591 8 : bounds(2) = bounds(2) - 1
2592 : ELSE
2593 26 : bounds(2) = bo(2, 3)
2594 : END IF
2595 34 : IF (bounds(1) > bo(1, 3)) THEN
2596 : ! In the special case bounds(1) == bounds(2) == bo(2, 3), after this check
2597 : ! bounds(1) > bounds(2) and the subsequent gradient allocation (:, :, :, bounds(1):bounds(2))
2598 : ! will correctly allocate a 0-sized array
2599 8 : bounds(1) = bounds(1) + 1
2600 : ELSE
2601 26 : bounds(1) = bo(1, 3)
2602 : END IF
2603 34 : IF (bounds(1) > bounds(2)) THEN
2604 0 : my_work_size = 0
2605 : ELSE
2606 34 : my_work_size = (bounds(2) - bounds(1) + 1)
2607 34 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2608 0 : my_work_size = my_work_size*(bo(2, 2) - bo(1, 2) + 1)
2609 : ELSE
2610 34 : my_work_size = my_work_size*(bo(2, 1) - bo(1, 1) + 1)
2611 : END IF
2612 : END IF
2613 102 : cdft_control%becke_control%confine_bounds = bounds
2614 34 : IF (cdft_control%becke_control%print_cavity) THEN
2615 : CALL hfun_zero(cdft_control%becke_control%cavity%array, &
2616 0 : cdft_control%becke_control%eps_cavity, just_zero=.TRUE.)
2617 : NULLIFY (stride)
2618 0 : ALLOCATE (stride(3))
2619 0 : stride = [2, 2, 2]
2620 0 : mpi_io = .TRUE.
2621 : unit_nr = cp_print_key_unit_nr(logger, print_section, "", &
2622 : middle_name="BECKE_CAVITY", &
2623 : extension=".cube", file_position="REWIND", &
2624 0 : log_filename=.FALSE., mpi_io=mpi_io)
2625 0 : IF (force_env%para_env%is_source() .AND. unit_nr < 1) &
2626 : CALL cp_abort(__LOCATION__, &
2627 0 : "Please turn on PROGRAM_RUN_INFO to print cavity")
2628 : CALL cp_pw_to_cube(cdft_control%becke_control%cavity, &
2629 : unit_nr, "CAVITY", particles=particles, &
2630 0 : stride=stride, mpi_io=mpi_io)
2631 0 : CALL cp_print_key_finished_output(unit_nr, logger, print_section, '', mpi_io=mpi_io)
2632 0 : DEALLOCATE (stride)
2633 : END IF
2634 : END IF
2635 36 : bo_conf = bo
2636 36 : IF (cdft_control%becke_control%cavity_confine) &
2637 102 : bo_conf(:, 3) = cdft_control%becke_control%confine_bounds
2638 : ! Load balance
2639 36 : IF (mixed_cdft%dlb) &
2640 : CALL mixed_becke_constraint_dlb(force_env, mixed_cdft, my_work, &
2641 8 : my_work_size, natom, bo, bo_conf)
2642 : ! The bounds have been finalized => time to allocate storage for working matrices
2643 36 : offset_dlb = 0
2644 36 : IF (mixed_cdft%dlb) THEN
2645 8 : IF (mixed_cdft%dlb_control%send_work .AND. .NOT. mixed_cdft%is_special) &
2646 8 : offset_dlb = SUM(mixed_cdft%dlb_control%target_list(2, :))
2647 : END IF
2648 36 : IF (cdft_control%becke_control%cavity_confine) THEN
2649 : ! Get rid of the zero part of the confinement cavity (cr3d -> real(:,:,:))
2650 34 : IF (mixed_cdft%is_special) THEN
2651 0 : ALLOCATE (mixed_cdft%sendbuff(SIZE(mixed_cdft%dest_list)))
2652 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2653 : ALLOCATE (mixed_cdft%sendbuff(i)%cavity(mixed_cdft%dest_list_bo(1, i):mixed_cdft%dest_list_bo(2, i), &
2654 0 : bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2655 : mixed_cdft%sendbuff(i)%cavity = cdft_control%becke_control%cavity%array(mixed_cdft%dest_list_bo(1, i): &
2656 : mixed_cdft%dest_list_bo(2, i), &
2657 : bo(1, 2):bo(2, 2), &
2658 0 : bo_conf(1, 3):bo_conf(2, 3))
2659 : END DO
2660 34 : ELSE IF (mixed_cdft%is_pencil) THEN
2661 0 : ALLOCATE (mixed_cdft%cavity(bo(1, 1) + offset_dlb:bo(2, 1), bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2662 : mixed_cdft%cavity = cdft_control%becke_control%cavity%array(bo(1, 1) + offset_dlb:bo(2, 1), &
2663 : bo(1, 2):bo(2, 2), &
2664 0 : bo_conf(1, 3):bo_conf(2, 3))
2665 : ELSE
2666 170 : ALLOCATE (mixed_cdft%cavity(bo(1, 1):bo(2, 1), bo(1, 2) + offset_dlb:bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2667 : mixed_cdft%cavity = cdft_control%becke_control%cavity%array(bo(1, 1):bo(2, 1), &
2668 : bo(1, 2) + offset_dlb:bo(2, 2), &
2669 3568226 : bo_conf(1, 3):bo_conf(2, 3))
2670 : END IF
2671 34 : CALL auxbas_pw_pool%give_back_pw(cdft_control%becke_control%cavity)
2672 : END IF
2673 36 : IF (mixed_cdft%is_special) THEN
2674 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2675 : ALLOCATE (mixed_cdft%sendbuff(i)%weight(mixed_cdft%dest_list_bo(1, i):mixed_cdft%dest_list_bo(2, i), &
2676 0 : bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2677 0 : mixed_cdft%sendbuff(i)%weight = 0.0_dp
2678 : END DO
2679 36 : ELSE IF (mixed_cdft%is_pencil) THEN
2680 0 : ALLOCATE (mixed_cdft%weight(bo(1, 1) + offset_dlb:bo(2, 1), bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2681 0 : mixed_cdft%weight = 0.0_dp
2682 : ELSE
2683 180 : ALLOCATE (mixed_cdft%weight(bo(1, 1):bo(2, 1), bo(1, 2) + offset_dlb:bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2684 1879397 : mixed_cdft%weight = 0.0_dp
2685 : END IF
2686 36 : IF (in_memory) THEN
2687 24 : IF (mixed_cdft%is_special) THEN
2688 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2689 : ALLOCATE (mixed_cdft%sendbuff(i)%gradients(3*natom, mixed_cdft%dest_list_bo(1, i): &
2690 : mixed_cdft%dest_list_bo(2, i), &
2691 : bo(1, 2):bo(2, 2), &
2692 0 : bo_conf(1, 3):bo_conf(2, 3)))
2693 0 : mixed_cdft%sendbuff(i)%gradients = 0.0_dp
2694 : END DO
2695 24 : ELSE IF (mixed_cdft%is_pencil) THEN
2696 : ALLOCATE (cdft_control%group(1)%gradients(3*natom, bo(1, 1) + offset_dlb:bo(2, 1), &
2697 : bo(1, 2):bo(2, 2), &
2698 0 : bo_conf(1, 3):bo_conf(2, 3)))
2699 0 : cdft_control%group(1)%gradients = 0.0_dp
2700 : ELSE
2701 : ALLOCATE (cdft_control%group(1)%gradients(3*natom, bo(1, 1):bo(2, 1), &
2702 : bo(1, 2) + offset_dlb:bo(2, 2), &
2703 144 : bo_conf(1, 3):bo_conf(2, 3)))
2704 9808936 : cdft_control%group(1)%gradients = 0.0_dp
2705 : END IF
2706 : END IF
2707 :
2708 36 : CALL timestop(handle)
2709 :
2710 36 : END SUBROUTINE mixed_becke_constraint_init
2711 :
2712 : ! **************************************************************************************************
2713 : !> \brief Setup load balancing for mixed Becke calculation
2714 : !> \param force_env the force_env that holds the CDFT states
2715 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
2716 : !> \param my_work an estimate of the work per processor
2717 : !> \param my_work_size size of the smallest array slice per processor. overloaded processors will
2718 : !> redistribute works as integer multiples of this value.
2719 : !> \param natom the total number of atoms
2720 : !> \param bo bounds of the realspace grid that holds the electron density
2721 : !> \param bo_conf same as bo, but bounds along z-direction have been compacted with confinement
2722 : !> \par History
2723 : !> 03.2016 created [Nico Holmberg]
2724 : ! **************************************************************************************************
2725 8 : SUBROUTINE mixed_becke_constraint_dlb(force_env, mixed_cdft, my_work, &
2726 : my_work_size, natom, bo, bo_conf)
2727 : TYPE(force_env_type), POINTER :: force_env
2728 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2729 : INTEGER, INTENT(IN) :: my_work, my_work_size, natom
2730 : INTEGER, DIMENSION(2, 3) :: bo, bo_conf
2731 :
2732 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_dlb'
2733 : INTEGER, PARAMETER :: should_deallocate = 7000, &
2734 : uninitialized = -7000
2735 :
2736 : INTEGER :: actually_sent, exhausted_work, handle, i, ind, iounit, ispecial, j, max_targets, &
2737 : more_work, my_pos, my_special_work, my_target, no_overloaded, no_underloaded, nsend, &
2738 : nsend_limit, nsend_max, offset, offset_proc, offset_special, send_total, tags(2)
2739 8 : INTEGER, DIMENSION(:), POINTER :: buffsize, cumulative_work, expected_work, load_imbalance, &
2740 16 : nrecv, nsend_proc, sendbuffer, should_warn, tmp, work_index, work_size
2741 8 : INTEGER, DIMENSION(:, :), POINTER :: targets, tmp_bo
2742 : LOGICAL :: consistent
2743 16 : LOGICAL, DIMENSION(:), POINTER :: mask_recv, mask_send, touched
2744 : REAL(kind=dp) :: average_work, load_scale, &
2745 : very_overloaded, work_factor
2746 8 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: cavity
2747 : TYPE(cdft_control_type), POINTER :: cdft_control
2748 : TYPE(cp_logger_type), POINTER :: logger
2749 40 : TYPE(mp_request_type), DIMENSION(4) :: req
2750 8 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_recv, req_total
2751 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
2752 :
2753 : TYPE buffers
2754 : LOGICAL, POINTER, DIMENSION(:) :: bv
2755 : INTEGER, POINTER, DIMENSION(:) :: iv
2756 : END TYPE buffers
2757 16 : TYPE(buffers), POINTER, DIMENSION(:) :: recvbuffer, sbuff
2758 : CHARACTER(len=2) :: dummy
2759 :
2760 16 : logger => cp_get_default_logger()
2761 8 : CALL timeset(routineN, handle)
2762 8 : mixed_cdft%dlb_control%recv_work = .FALSE.
2763 8 : mixed_cdft%dlb_control%send_work = .FALSE.
2764 8 : NULLIFY (expected_work, work_index, load_imbalance, work_size, &
2765 8 : cumulative_work, sendbuffer, buffsize, req_recv, req_total, &
2766 8 : tmp, nrecv, nsend_proc, targets, tmp_bo, touched, &
2767 8 : mask_recv, mask_send, cavity, recvbuffer, sbuff, force_env_section, &
2768 8 : print_section, cdft_control)
2769 8 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
2770 8 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2771 8 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
2772 8 : cdft_control => mixed_cdft%cdft_control
2773 : ! These numerical values control data redistribution and are system sensitive
2774 : ! Currently they are not refined during run time which may cause crashes
2775 : ! However, using too many processors or a confinement cavity that is too large relative to the
2776 : ! total system volume are more likely culprits.
2777 8 : load_scale = mixed_cdft%dlb_control%load_scale
2778 8 : very_overloaded = mixed_cdft%dlb_control%very_overloaded
2779 8 : more_work = mixed_cdft%dlb_control%more_work
2780 8 : max_targets = 40
2781 8 : work_factor = 0.8_dp
2782 : ! Reset targets/sources
2783 8 : IF (mixed_cdft%is_special) THEN
2784 0 : DEALLOCATE (mixed_cdft%dest_list, mixed_cdft%dest_list_bo, &
2785 0 : mixed_cdft%source_list, mixed_cdft%source_list_bo)
2786 : ALLOCATE (mixed_cdft%dest_list(SIZE(mixed_cdft%dest_list_save)), &
2787 : mixed_cdft%dest_list_bo(SIZE(mixed_cdft%dest_bo_save, 1), SIZE(mixed_cdft%dest_bo_save, 2)), &
2788 : mixed_cdft%source_list(SIZE(mixed_cdft%source_list_save)), &
2789 0 : mixed_cdft%source_list_bo(SIZE(mixed_cdft%source_bo_save, 1), SIZE(mixed_cdft%source_bo_save, 2)))
2790 0 : mixed_cdft%dest_list = mixed_cdft%dest_list_save
2791 0 : mixed_cdft%source_list = mixed_cdft%source_list_save
2792 0 : mixed_cdft%dest_list_bo = mixed_cdft%dest_bo_save
2793 0 : mixed_cdft%source_list_bo = mixed_cdft%source_bo_save
2794 : END IF
2795 : ALLOCATE (mixed_cdft%dlb_control%expected_work(force_env%para_env%num_pe), &
2796 : expected_work(force_env%para_env%num_pe), &
2797 48 : work_size(force_env%para_env%num_pe))
2798 : IF (debug_this_module) THEN
2799 : ALLOCATE (should_warn(force_env%para_env%num_pe))
2800 : should_warn = 0
2801 : END IF
2802 24 : expected_work = 0
2803 8 : expected_work(force_env%para_env%mepos + 1) = my_work
2804 24 : work_size = 0
2805 8 : work_size(force_env%para_env%mepos + 1) = my_work_size
2806 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) THEN
2807 4 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2808 : work_size(force_env%para_env%mepos + 1) = work_size(force_env%para_env%mepos + 1) - &
2809 : NINT(REAL(mixed_cdft%dlb_control% &
2810 : prediction_error(force_env%para_env%mepos + 1), dp)/ &
2811 0 : REAL(bo(2, 1) - bo(1, 1) + 1, dp))
2812 : ELSE
2813 : work_size(force_env%para_env%mepos + 1) = work_size(force_env%para_env%mepos + 1) - &
2814 : NINT(REAL(mixed_cdft%dlb_control% &
2815 : prediction_error(force_env%para_env%mepos + 1), dp)/ &
2816 4 : REAL(bo(2, 2) - bo(1, 2) + 1, dp))
2817 : END IF
2818 : END IF
2819 40 : CALL force_env%para_env%sum(expected_work)
2820 40 : CALL force_env%para_env%sum(work_size)
2821 : ! We store the unsorted expected work to refine the estimate on subsequent calls to this routine
2822 40 : mixed_cdft%dlb_control%expected_work = expected_work
2823 : ! Take into account the prediction error of the last step
2824 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) &
2825 20 : expected_work = expected_work - mixed_cdft%dlb_control%prediction_error
2826 : !
2827 24 : average_work = REAL(SUM(expected_work), dp)/REAL(force_env%para_env%num_pe, dp)
2828 : ALLOCATE (work_index(force_env%para_env%num_pe), &
2829 : load_imbalance(force_env%para_env%num_pe), &
2830 48 : targets(2, force_env%para_env%num_pe))
2831 40 : load_imbalance = expected_work - NINT(average_work)
2832 8 : no_overloaded = 0
2833 8 : no_underloaded = 0
2834 56 : targets = 0
2835 : ! Convert the load imbalance to a multiple of the actual work size
2836 24 : DO i = 1, force_env%para_env%num_pe
2837 24 : IF (load_imbalance(i) > 0) THEN
2838 8 : no_overloaded = no_overloaded + 1
2839 : ! Allow heavily overloaded processors to dump more data since most likely they have a lot of 'real' work
2840 8 : IF (expected_work(i) > NINT(very_overloaded*average_work)) THEN
2841 0 : load_imbalance(i) = (CEILING(REAL(load_imbalance(i), dp)/REAL(work_size(i), dp)) + more_work)*work_size(i)
2842 : ELSE
2843 8 : load_imbalance(i) = CEILING(REAL(load_imbalance(i), dp)/REAL(work_size(i), dp))*work_size(i)
2844 : END IF
2845 : ELSE
2846 : ! Allow the underloaded processors to take load_scale amount of additional work
2847 : ! otherwise we may be unable to exhaust all overloaded processors
2848 8 : load_imbalance(i) = NINT(load_imbalance(i)*load_scale)
2849 8 : no_underloaded = no_underloaded + 1
2850 : END IF
2851 : END DO
2852 8 : CALL sort(expected_work, force_env%para_env%num_pe, indices=work_index)
2853 : ! Redistribute work in order from the most overloaded processors to the most underloaded processors
2854 : ! Each underloaded processor is limited to one overloaded processor
2855 8 : IF (load_imbalance(force_env%para_env%mepos + 1) > 0) THEN
2856 4 : offset = 0
2857 4 : mixed_cdft%dlb_control%send_work = .TRUE.
2858 : ! Build up the total amount of work that needs redistribution
2859 12 : ALLOCATE (cumulative_work(force_env%para_env%num_pe))
2860 12 : cumulative_work = 0
2861 4 : DO i = force_env%para_env%num_pe, force_env%para_env%num_pe - no_overloaded + 1, -1
2862 4 : IF (work_index(i) == force_env%para_env%mepos + 1) THEN
2863 : EXIT
2864 : ELSE
2865 0 : offset = offset + load_imbalance(work_index(i))
2866 0 : IF (i == force_env%para_env%num_pe) THEN
2867 0 : cumulative_work(i) = load_imbalance(work_index(i))
2868 : ELSE
2869 0 : cumulative_work(i) = cumulative_work(i + 1) + load_imbalance(work_index(i))
2870 : END IF
2871 : END IF
2872 : END DO
2873 4 : my_pos = i
2874 4 : j = force_env%para_env%num_pe
2875 4 : nsend_max = load_imbalance(work_index(j))/work_size(work_index(j))
2876 4 : exhausted_work = 0
2877 : ! Determine send offset by going through all processors that are more overloaded than my_pos
2878 4 : DO i = 1, no_underloaded
2879 4 : IF (my_pos == force_env%para_env%num_pe) EXIT
2880 0 : nsend = -load_imbalance(work_index(i))/work_size(work_index(j))
2881 0 : IF (nsend < 1) nsend = 1
2882 0 : nsend_max = nsend_max - nsend
2883 0 : IF (nsend_max < 0) nsend = nsend + nsend_max
2884 0 : exhausted_work = exhausted_work + nsend*work_size(work_index(j))
2885 0 : offset = offset - nsend*work_size(work_index(j))
2886 0 : IF (offset < 0) EXIT
2887 4 : IF (exhausted_work == cumulative_work(j)) THEN
2888 0 : j = j - 1
2889 0 : nsend_max = load_imbalance(work_index(j))/work_size(work_index(j))
2890 : END IF
2891 : END DO
2892 : ! Underloaded processors were fully exhausted: rewind index
2893 : ! Load balancing will fail if this happens on multiple processors
2894 4 : IF (i > no_underloaded) THEN
2895 0 : i = no_underloaded
2896 : END IF
2897 4 : my_target = i
2898 4 : DEALLOCATE (cumulative_work)
2899 : ! Determine how much and who to send slices of my grid points
2900 4 : nsend_max = load_imbalance(force_env%para_env%mepos + 1)/work_size(force_env%para_env%mepos + 1)
2901 : ! This the actual number of available array slices
2902 4 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2903 0 : nsend_limit = bo(2, 1) - bo(1, 1) + 1
2904 : ELSE
2905 4 : nsend_limit = bo(2, 2) - bo(1, 2) + 1
2906 : END IF
2907 4 : IF (.NOT. mixed_cdft%is_special) THEN
2908 4 : ALLOCATE (mixed_cdft%dlb_control%target_list(3, max_targets))
2909 : ELSE
2910 0 : ALLOCATE (mixed_cdft%dlb_control%target_list(3 + 2*SIZE(mixed_cdft%dest_list), max_targets))
2911 0 : ALLOCATE (touched(SIZE(mixed_cdft%dest_list)))
2912 0 : touched = .FALSE.
2913 : END IF
2914 644 : mixed_cdft%dlb_control%target_list = uninitialized
2915 4 : i = 1
2916 4 : ispecial = 1
2917 4 : offset_special = 0
2918 4 : targets(1, my_pos) = my_target
2919 4 : send_total = 0
2920 : ! Main loop. Note, we actually allow my_pos to offload more slices than nsend_max
2921 : DO
2922 4 : nsend = -load_imbalance(work_index(my_target))/work_size(force_env%para_env%mepos + 1)
2923 4 : IF (nsend < 1) nsend = 1 ! send at least one block
2924 : ! Prevent over redistribution: leave at least (1-work_factor)*nsend_limit slices to my_pos
2925 4 : IF (nsend > NINT(work_factor*nsend_limit - send_total)) THEN
2926 : nsend = NINT(work_factor*nsend_limit - send_total)
2927 : IF (debug_this_module) &
2928 : should_warn(force_env%para_env%mepos + 1) = 1
2929 : END IF
2930 4 : mixed_cdft%dlb_control%target_list(1, i) = work_index(my_target) - 1 ! This is the actual processor rank
2931 4 : IF (mixed_cdft%is_special) THEN
2932 0 : mixed_cdft%dlb_control%target_list(2, i) = 0
2933 0 : actually_sent = nsend
2934 0 : DO j = ispecial, SIZE(mixed_cdft%dest_list)
2935 0 : mixed_cdft%dlb_control%target_list(2, i) = mixed_cdft%dlb_control%target_list(2, i) + 1
2936 0 : touched(j) = .TRUE.
2937 0 : IF (nsend < mixed_cdft%dest_list_bo(2, j) - mixed_cdft%dest_list_bo(1, j) + 1) THEN
2938 0 : mixed_cdft%dlb_control%target_list(3 + 2*j - 1, i) = mixed_cdft%dest_list_bo(1, j)
2939 0 : mixed_cdft%dlb_control%target_list(3 + 2*j, i) = mixed_cdft%dest_list_bo(1, j) + nsend - 1
2940 0 : mixed_cdft%dest_list_bo(1, j) = mixed_cdft%dest_list_bo(1, j) + nsend
2941 0 : nsend = 0
2942 0 : EXIT
2943 : ELSE
2944 0 : mixed_cdft%dlb_control%target_list(3 + 2*j - 1, i) = mixed_cdft%dest_list_bo(1, j)
2945 0 : mixed_cdft%dlb_control%target_list(3 + 2*j, i) = mixed_cdft%dest_list_bo(2, j)
2946 0 : nsend = nsend - (mixed_cdft%dest_list_bo(2, j) - mixed_cdft%dest_list_bo(1, j) + 1)
2947 0 : mixed_cdft%dest_list_bo(1:2, j) = should_deallocate
2948 : END IF
2949 0 : IF (nsend <= 0) EXIT
2950 : END DO
2951 0 : IF (mixed_cdft%dest_list_bo(1, ispecial) == should_deallocate) ispecial = j + 1
2952 0 : actually_sent = actually_sent - nsend
2953 0 : nsend_max = nsend_max - actually_sent
2954 0 : send_total = send_total + actually_sent
2955 : ELSE
2956 4 : mixed_cdft%dlb_control%target_list(2, i) = nsend
2957 4 : nsend_max = nsend_max - nsend
2958 4 : send_total = send_total + nsend
2959 : END IF
2960 4 : IF (nsend_max < 0) nsend_max = 0
2961 4 : IF (nsend_max == 0) EXIT
2962 0 : IF (my_target /= no_underloaded) THEN
2963 0 : my_target = my_target + 1
2964 : ELSE
2965 : ! If multiple processors execute this block load balancing will fail
2966 0 : mixed_cdft%dlb_control%target_list(2, i) = mixed_cdft%dlb_control%target_list(2, i) + nsend_max
2967 0 : nsend_max = 0
2968 0 : EXIT
2969 : END IF
2970 0 : i = i + 1
2971 0 : IF (i > max_targets) &
2972 : CALL cp_abort(__LOCATION__, &
2973 4 : "Load balancing error: increase max_targets")
2974 : END DO
2975 4 : IF (.NOT. mixed_cdft%is_special) THEN
2976 4 : CALL reallocate(mixed_cdft%dlb_control%target_list, 1, 3, 1, i)
2977 : ELSE
2978 0 : CALL reallocate(mixed_cdft%dlb_control%target_list, 1, 3 + 2*SIZE(mixed_cdft%dest_list), 1, i)
2979 : END IF
2980 4 : targets(2, my_pos) = my_target
2981 : ! Equalize the load on the target processors
2982 4 : IF (.NOT. mixed_cdft%is_special) THEN
2983 4 : IF (send_total > NINT(work_factor*nsend_limit)) send_total = NINT(work_factor*nsend_limit) - 1
2984 4 : nsend = NINT(REAL(send_total, dp)/REAL(SIZE(mixed_cdft%dlb_control%target_list, 2), dp))
2985 8 : mixed_cdft%dlb_control%target_list(2, :) = nsend
2986 : END IF
2987 : ELSE
2988 4 : DO i = 1, no_underloaded
2989 4 : IF (work_index(i) == force_env%para_env%mepos + 1) EXIT
2990 : END DO
2991 : my_pos = i
2992 : END IF
2993 104 : CALL force_env%para_env%sum(targets)
2994 : IF (debug_this_module) THEN
2995 : CALL force_env%para_env%sum(should_warn)
2996 : IF (ANY(should_warn == 1)) &
2997 : CALL cp_warn(__LOCATION__, &
2998 : "MIXED_CDFT DLB: Attempted to redistribute more array"// &
2999 : " slices than actually available. Leaving a fraction of the total"// &
3000 : " slices on the overloaded processor. Perhaps you have set LOAD_SCALE too high?")
3001 : DEALLOCATE (should_warn)
3002 : END IF
3003 : ! check that there is one-to-one mapping between over- and underloaded processors
3004 8 : IF (force_env%para_env%is_source()) THEN
3005 4 : consistent = .TRUE.
3006 4 : DO i = force_env%para_env%num_pe - 1, force_env%para_env%num_pe - no_overloaded + 1, -1
3007 0 : IF (targets(1, i) > no_underloaded) consistent = .FALSE.
3008 4 : IF (targets(1, i) > targets(2, i + 1)) THEN
3009 : CYCLE
3010 : ELSE
3011 0 : consistent = .FALSE.
3012 : END IF
3013 : END DO
3014 4 : IF (.NOT. consistent) THEN
3015 : IF (debug_this_module .AND. iounit > 0) THEN
3016 : DO i = force_env%para_env%num_pe - 1, force_env%para_env%num_pe - no_overloaded + 1, -1
3017 : WRITE (iounit, '(A,I8,I8,I8,I8,I8)') &
3018 : 'load balancing info', load_imbalance(i), work_index(i), &
3019 : work_size(i), targets(1, i), targets(2, i)
3020 : END DO
3021 : END IF
3022 : CALL cp_abort(__LOCATION__, &
3023 : "Load balancing error: too much data to redistribute."// &
3024 : " Increase LOAD_SCALE or change the number of processors."// &
3025 : " If the confinement cavity occupies a large volume relative"// &
3026 0 : " to the total system volume, it might be worth disabling DLB.")
3027 : END IF
3028 : END IF
3029 : ! Tell the target processors which grid points they should compute
3030 8 : IF (my_pos <= no_underloaded) THEN
3031 4 : DO i = force_env%para_env%num_pe, force_env%para_env%num_pe - no_overloaded + 1, -1
3032 4 : IF (targets(1, i) <= my_pos .AND. targets(2, i) >= my_pos) THEN
3033 4 : mixed_cdft%dlb_control%recv_work = .TRUE.
3034 4 : mixed_cdft%dlb_control%my_source = work_index(i) - 1
3035 4 : EXIT
3036 : END IF
3037 : END DO
3038 4 : IF (mixed_cdft%dlb_control%recv_work) THEN
3039 4 : IF (.NOT. mixed_cdft%is_special) THEN
3040 4 : ALLOCATE (mixed_cdft%dlb_control%bo(12))
3041 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%bo, source=mixed_cdft%dlb_control%my_source, &
3042 4 : request=req(1))
3043 4 : CALL req(1)%wait()
3044 12 : mixed_cdft%dlb_control%my_dest_repl = [mixed_cdft%dlb_control%bo(11), mixed_cdft%dlb_control%bo(12)]
3045 12 : mixed_cdft%dlb_control%dest_tags_repl = [mixed_cdft%dlb_control%bo(9), mixed_cdft%dlb_control%bo(10)]
3046 : ALLOCATE (mixed_cdft%dlb_control%cavity(mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3047 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3048 20 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3049 : ALLOCATE (mixed_cdft%dlb_control%weight(mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3050 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3051 20 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3052 : ALLOCATE (mixed_cdft%dlb_control%gradients(3*natom, &
3053 : mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3054 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3055 24 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3056 22724 : mixed_cdft%dlb_control%gradients = 0.0_dp
3057 3524 : mixed_cdft%dlb_control%weight = 0.0_dp
3058 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%cavity, source=mixed_cdft%dlb_control%my_source, &
3059 4 : request=req(1))
3060 4 : CALL req(1)%wait()
3061 4 : DEALLOCATE (mixed_cdft%dlb_control%bo)
3062 : ELSE
3063 0 : ALLOCATE (buffsize(1))
3064 : CALL force_env%para_env%irecv(msgout=buffsize, source=mixed_cdft%dlb_control%my_source, &
3065 0 : request=req(1))
3066 0 : CALL req(1)%wait()
3067 0 : ALLOCATE (mixed_cdft%dlb_control%bo(12*buffsize(1)))
3068 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%bo, source=mixed_cdft%dlb_control%my_source, &
3069 0 : request=req(1))
3070 0 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(buffsize(1)))
3071 0 : ALLOCATE (req_recv(buffsize(1)))
3072 0 : DEALLOCATE (buffsize)
3073 0 : CALL req(1)%wait()
3074 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
3075 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity(mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3076 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3077 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3078 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3079 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3080 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3081 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%sendbuff(j)%cavity, &
3082 : source=mixed_cdft%dlb_control%my_source, &
3083 0 : request=req_recv(j))
3084 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight(mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3085 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3086 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3087 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3088 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3089 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3090 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients(3*natom, &
3091 : mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3092 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3093 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3094 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3095 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3096 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3097 0 : mixed_cdft%dlb_control%sendbuff(j)%weight = 0.0_dp
3098 0 : mixed_cdft%dlb_control%sendbuff(j)%gradients = 0.0_dp
3099 : mixed_cdft%dlb_control%sendbuff(j)%tag = [mixed_cdft%dlb_control%bo(12*(j - 1) + 9), &
3100 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 10)]
3101 : mixed_cdft%dlb_control%sendbuff(j)%rank = [mixed_cdft%dlb_control%bo(12*(j - 1) + 11), &
3102 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 12)]
3103 : END DO
3104 0 : CALL mp_waitall(req_recv)
3105 0 : DEALLOCATE (req_recv)
3106 : END IF
3107 : END IF
3108 : ELSE
3109 4 : IF (.NOT. mixed_cdft%is_special) THEN
3110 4 : offset = 0
3111 4 : ALLOCATE (sendbuffer(12))
3112 4 : send_total = 0
3113 8 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3114 : tags = [(i - 1)*3 + 1 + force_env%para_env%mepos*6*max_targets, &
3115 12 : (i - 1)*3 + 1 + 3*max_targets + force_env%para_env%mepos*6*max_targets] ! Unique communicator tags
3116 4 : mixed_cdft%dlb_control%target_list(3, i) = tags(1)
3117 4 : IF (mixed_cdft%is_pencil) THEN
3118 : sendbuffer = [bo_conf(1, 1) + offset, &
3119 : bo_conf(1, 1) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3120 : bo_conf(1, 2), bo_conf(2, 2), bo(1, 3), bo(2, 3), bo_conf(1, 3), bo_conf(2, 3), &
3121 0 : tags(1), tags(2), mixed_cdft%dest_list(1), mixed_cdft%dest_list(2)]
3122 : ELSE
3123 : sendbuffer = [bo_conf(1, 1), bo_conf(2, 1), &
3124 : bo_conf(1, 2) + offset, &
3125 : bo_conf(1, 2) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3126 : bo(1, 3), bo(2, 3), bo_conf(1, 3), bo_conf(2, 3), tags(1), tags(2), &
3127 52 : mixed_cdft%dest_list(1), mixed_cdft%dest_list(2)]
3128 : END IF
3129 4 : send_total = send_total + mixed_cdft%dlb_control%target_list(2, i) - 1
3130 : CALL force_env%para_env%isend(msgin=sendbuffer, dest=mixed_cdft%dlb_control%target_list(1, i), &
3131 4 : request=req(1))
3132 4 : CALL req(1)%wait()
3133 4 : IF (mixed_cdft%is_pencil) THEN
3134 : ALLOCATE (cavity(bo_conf(1, 1) + offset: &
3135 : bo_conf(1, 1) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3136 0 : bo_conf(1, 2):bo_conf(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
3137 : cavity = cdft_control%becke_control%cavity%array(bo_conf(1, 1) + offset: &
3138 : bo_conf(1, 1) + offset + &
3139 : (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3140 : bo_conf(1, 2):bo_conf(2, 2), &
3141 0 : bo_conf(1, 3):bo_conf(2, 3))
3142 : ELSE
3143 : ALLOCATE (cavity(bo_conf(1, 1):bo_conf(2, 1), &
3144 : bo_conf(1, 2) + offset: &
3145 : bo_conf(1, 2) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3146 20 : bo_conf(1, 3):bo_conf(2, 3)))
3147 : cavity = cdft_control%becke_control%cavity%array(bo_conf(1, 1):bo_conf(2, 1), &
3148 : bo_conf(1, 2) + offset: &
3149 : bo_conf(1, 2) + offset + &
3150 : (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3151 7044 : bo_conf(1, 3):bo_conf(2, 3))
3152 : END IF
3153 : CALL force_env%para_env%isend(msgin=cavity, &
3154 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3155 4 : request=req(1))
3156 4 : CALL req(1)%wait()
3157 4 : offset = offset + mixed_cdft%dlb_control%target_list(2, i)
3158 8 : DEALLOCATE (cavity)
3159 : END DO
3160 4 : IF (mixed_cdft%is_pencil) THEN
3161 0 : mixed_cdft%dlb_control%distributed(1) = bo_conf(1, 1)
3162 0 : mixed_cdft%dlb_control%distributed(2) = bo_conf(1, 1) + offset - 1
3163 : ELSE
3164 4 : mixed_cdft%dlb_control%distributed(1) = bo_conf(1, 2)
3165 4 : mixed_cdft%dlb_control%distributed(2) = bo_conf(1, 2) + offset - 1
3166 : END IF
3167 4 : DEALLOCATE (sendbuffer)
3168 : ELSE
3169 0 : ALLOCATE (buffsize(1))
3170 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3171 0 : buffsize = mixed_cdft%dlb_control%target_list(2, i)
3172 : ! Unique communicator tags (dont actually need these, should be removed)
3173 : tags = [(i - 1)*3 + 1 + force_env%para_env%mepos*6*max_targets, &
3174 0 : (i - 1)*3 + 1 + 3*max_targets + force_env%para_env%mepos*6*max_targets]
3175 0 : DO j = 4, SIZE(mixed_cdft%dlb_control%target_list, 1)
3176 0 : IF (mixed_cdft%dlb_control%target_list(j, i) > uninitialized) EXIT
3177 : END DO
3178 0 : offset_special = j
3179 0 : offset_proc = j - 4 - (j - 4)/2
3180 : CALL force_env%para_env%isend(msgin=buffsize, &
3181 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3182 0 : request=req(1))
3183 0 : CALL req(1)%wait()
3184 0 : ALLOCATE (sendbuffer(12*buffsize(1)))
3185 0 : DO j = 1, buffsize(1)
3186 : sendbuffer(12*(j - 1) + 1:12*(j - 1) + 12) = [mixed_cdft%dlb_control%target_list(offset_special + 2*(j - 1), i), &
3187 : mixed_cdft%dlb_control%target_list(offset_special + 2*j - 1, i), &
3188 : bo_conf(1, 2), bo_conf(2, 2), bo(1, 3), bo(2, 3), &
3189 : bo_conf(1, 3), bo_conf(2, 3), tags(1), tags(2), &
3190 : mixed_cdft%dest_list(j + offset_proc), &
3191 0 : mixed_cdft%dest_list(j + offset_proc) + force_env%para_env%num_pe/2]
3192 : END DO
3193 : CALL force_env%para_env%isend(msgin=sendbuffer, &
3194 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3195 0 : request=req(1))
3196 0 : CALL req(1)%wait()
3197 0 : DEALLOCATE (sendbuffer)
3198 0 : DO j = 1, buffsize(1)
3199 : ALLOCATE (cavity(mixed_cdft%dlb_control%target_list(offset_special + 2*(j - 1), i): &
3200 : mixed_cdft%dlb_control%target_list(offset_special + 2*j - 1, i), &
3201 0 : bo_conf(1, 2):bo_conf(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
3202 : cavity = cdft_control%becke_control%cavity%array(LBOUND(cavity, 1):UBOUND(cavity, 1), &
3203 : bo_conf(1, 2):bo_conf(2, 2), &
3204 0 : bo_conf(1, 3):bo_conf(2, 3))
3205 : CALL force_env%para_env%isend(msgin=cavity, &
3206 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3207 0 : request=req(1))
3208 0 : CALL req(1)%wait()
3209 0 : DEALLOCATE (cavity)
3210 : END DO
3211 : END DO
3212 0 : DEALLOCATE (buffsize)
3213 : END IF
3214 : END IF
3215 8 : DEALLOCATE (expected_work, work_size, load_imbalance, work_index, targets)
3216 : ! Once calculated, data defined on the distributed grid points is sent directly to the processors that own the
3217 : ! grid points after the constraint is copied onto the two processor groups, instead of sending the data back to
3218 : ! the original owner
3219 8 : IF (mixed_cdft%is_special) THEN
3220 0 : my_special_work = 2
3221 0 : ALLOCATE (mask_send(SIZE(mixed_cdft%dest_list)), mask_recv(SIZE(mixed_cdft%source_list)))
3222 0 : ALLOCATE (nsend_proc(SIZE(mixed_cdft%dest_list)), nrecv(SIZE(mixed_cdft%source_list)))
3223 0 : nrecv = 0
3224 0 : nsend_proc = 0
3225 0 : mask_recv = .FALSE.
3226 0 : mask_send = .FALSE.
3227 : ELSE
3228 : my_special_work = 1
3229 : END IF
3230 40 : ALLOCATE (recvbuffer(SIZE(mixed_cdft%source_list)), sbuff(SIZE(mixed_cdft%dest_list)))
3231 56 : ALLOCATE (req_total(my_special_work*SIZE(mixed_cdft%source_list) + (my_special_work**2)*SIZE(mixed_cdft%dest_list)))
3232 24 : ALLOCATE (mixed_cdft%dlb_control%recv_work_repl(SIZE(mixed_cdft%source_list)))
3233 24 : DO i = 1, SIZE(mixed_cdft%source_list)
3234 16 : NULLIFY (recvbuffer(i)%bv, recvbuffer(i)%iv)
3235 16 : ALLOCATE (recvbuffer(i)%bv(1), recvbuffer(i)%iv(3))
3236 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%bv, &
3237 : source=mixed_cdft%source_list(i), &
3238 16 : request=req_total(i), tag=1)
3239 16 : IF (mixed_cdft%is_special) &
3240 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%iv, &
3241 : source=mixed_cdft%source_list(i), &
3242 : request=req_total(i + SIZE(mixed_cdft%source_list)), &
3243 8 : tag=2)
3244 : END DO
3245 16 : DO i = 1, my_special_work
3246 32 : DO j = 1, SIZE(mixed_cdft%dest_list)
3247 16 : IF (i == 1) THEN
3248 16 : NULLIFY (sbuff(j)%iv, sbuff(j)%bv)
3249 16 : ALLOCATE (sbuff(j)%bv(1))
3250 32 : sbuff(j)%bv = mixed_cdft%dlb_control%send_work
3251 16 : IF (mixed_cdft%is_special) THEN
3252 0 : ALLOCATE (sbuff(j)%iv(3))
3253 0 : sbuff(j)%iv(1:2) = mixed_cdft%dest_list_bo(1:2, j)
3254 0 : sbuff(j)%iv(3) = 0
3255 0 : IF (sbuff(j)%iv(1) == should_deallocate) mask_send(j) = .TRUE.
3256 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3257 0 : sbuff(j)%bv = touched(j)
3258 0 : IF (touched(j)) THEN
3259 0 : nsend = 0
3260 0 : DO ispecial = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3261 0 : IF (mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), ispecial) /= uninitialized) &
3262 0 : nsend = nsend + 1
3263 : END DO
3264 0 : sbuff(j)%iv(3) = nsend
3265 0 : nsend_proc(j) = nsend
3266 : END IF
3267 : END IF
3268 : END IF
3269 : END IF
3270 16 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + my_special_work*SIZE(mixed_cdft%source_list)
3271 : CALL force_env%para_env%isend(msgin=sbuff(j)%bv, &
3272 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
3273 16 : request=req_total(ind), tag=1)
3274 16 : IF (mixed_cdft%is_special) &
3275 : CALL force_env%para_env%isend(msgin=sbuff(j)%iv, &
3276 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
3277 8 : request=req_total(ind + 2*SIZE(mixed_cdft%dest_list)), tag=2)
3278 : END DO
3279 : END DO
3280 8 : CALL mp_waitall(req_total)
3281 8 : DEALLOCATE (req_total)
3282 24 : DO i = 1, SIZE(mixed_cdft%source_list)
3283 16 : mixed_cdft%dlb_control%recv_work_repl(i) = recvbuffer(i)%bv(1)
3284 16 : IF (mixed_cdft%is_special .AND. mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3285 0 : mixed_cdft%source_list_bo(1:2, i) = recvbuffer(i)%iv(1:2)
3286 0 : nrecv(i) = recvbuffer(i)%iv(3)
3287 0 : IF (recvbuffer(i)%iv(1) == should_deallocate) mask_recv(i) = .TRUE.
3288 : END IF
3289 16 : DEALLOCATE (recvbuffer(i)%bv)
3290 24 : IF (ASSOCIATED(recvbuffer(i)%iv)) DEALLOCATE (recvbuffer(i)%iv)
3291 : END DO
3292 24 : DO j = 1, SIZE(mixed_cdft%dest_list)
3293 16 : DEALLOCATE (sbuff(j)%bv)
3294 24 : IF (ASSOCIATED(sbuff(j)%iv)) DEALLOCATE (sbuff(j)%iv)
3295 : END DO
3296 8 : DEALLOCATE (recvbuffer)
3297 : ! For some reason if debug_this_module is true and is_special is false, the deallocate statement
3298 : ! on line 3433 gets executed no matter what (gfortran 5.3.0 bug?). Printing out the variable seems to fix it...
3299 : IF (debug_this_module) THEN
3300 : WRITE (dummy, *) mixed_cdft%is_special
3301 : END IF
3302 :
3303 8 : IF (.NOT. mixed_cdft%is_special) THEN
3304 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3305 32 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl) + 2))
3306 4 : ALLOCATE (sendbuffer(6))
3307 4 : IF (mixed_cdft%is_pencil) THEN
3308 : sendbuffer = [SIZE(mixed_cdft%dlb_control%target_list, 2), bo_conf(1, 3), bo_conf(2, 3), &
3309 0 : bo_conf(1, 1), bo_conf(1, 2), bo_conf(2, 2)]
3310 : ELSE
3311 : sendbuffer = [SIZE(mixed_cdft%dlb_control%target_list, 2), bo_conf(1, 3), bo_conf(2, 3), &
3312 28 : bo_conf(1, 2), bo_conf(1, 1), bo_conf(2, 1)]
3313 : END IF
3314 8 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3315 24 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl)))
3316 : END IF
3317 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3318 24 : ALLOCATE (mixed_cdft%dlb_control%recv_info(2))
3319 8 : NULLIFY (mixed_cdft%dlb_control%recv_info(1)%target_list, mixed_cdft%dlb_control%recv_info(2)%target_list)
3320 24 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(2))
3321 8 : NULLIFY (mixed_cdft%dlb_control%recvbuff(1)%buffs, mixed_cdft%dlb_control%recvbuff(2)%buffs)
3322 : END IF
3323 : ! First communicate which grid points were distributed
3324 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3325 12 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl) + 1
3326 12 : DO i = 1, 2
3327 : CALL force_env%para_env%isend(msgin=sendbuffer, &
3328 : dest=mixed_cdft%dest_list(i), &
3329 8 : request=req_total(ind))
3330 12 : ind = ind + 1
3331 : END DO
3332 : END IF
3333 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3334 8 : ind = 1
3335 24 : DO i = 1, 2
3336 24 : IF (mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3337 8 : ALLOCATE (mixed_cdft%dlb_control%recv_info(i)%matrix_info(6))
3338 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recv_info(i)%matrix_info, &
3339 : source=mixed_cdft%source_list(i), &
3340 8 : request=req_total(ind))
3341 8 : ind = ind + 1
3342 : END IF
3343 : END DO
3344 : END IF
3345 8 : IF (ASSOCIATED(req_total)) THEN
3346 8 : CALL mp_waitall(req_total)
3347 : END IF
3348 : ! Now communicate which processor handles which grid points
3349 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3350 12 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl) + 1
3351 12 : DO i = 1, 2
3352 8 : IF (i == 2) &
3353 8 : mixed_cdft%dlb_control%target_list(3, :) = mixed_cdft%dlb_control%target_list(3, :) + 3*max_targets
3354 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%target_list, &
3355 : dest=mixed_cdft%dest_list(i), &
3356 8 : request=req_total(ind))
3357 12 : ind = ind + 1
3358 : END DO
3359 : END IF
3360 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3361 8 : ind = 1
3362 24 : DO i = 1, 2
3363 24 : IF (mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3364 : ALLOCATE (mixed_cdft%dlb_control%recv_info(i)% &
3365 24 : target_list(3, mixed_cdft%dlb_control%recv_info(i)%matrix_info(1)))
3366 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recv_info(i)%target_list, &
3367 : source=mixed_cdft%source_list(i), &
3368 8 : request=req_total(ind))
3369 8 : ind = ind + 1
3370 : END IF
3371 : END DO
3372 : END IF
3373 8 : IF (ASSOCIATED(req_total)) THEN
3374 8 : CALL mp_waitall(req_total)
3375 8 : DEALLOCATE (req_total)
3376 : END IF
3377 8 : IF (ASSOCIATED(sendbuffer)) DEALLOCATE (sendbuffer)
3378 : ELSE
3379 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3380 0 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl) + 2*COUNT(touched)))
3381 0 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3382 0 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl)))
3383 : END IF
3384 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3385 0 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl)
3386 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3387 0 : IF (touched(j)) THEN
3388 0 : ALLOCATE (sbuff(j)%iv(4 + 3*nsend_proc(j)))
3389 0 : sbuff(j)%iv(1:4) = [bo_conf(1, 2), bo_conf(2, 2), bo_conf(1, 3), bo_conf(2, 3)]
3390 0 : offset = 5
3391 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3392 0 : IF (mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), i) /= uninitialized) THEN
3393 : sbuff(j)%iv(offset:offset + 2) = [mixed_cdft%dlb_control%target_list(1, i), &
3394 : mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), i), &
3395 0 : mixed_cdft%dlb_control%target_list(4 + 2*j - 1, i)]
3396 0 : offset = offset + 3
3397 : END IF
3398 : END DO
3399 0 : DO ispecial = 1, my_special_work
3400 : CALL force_env%para_env%isend(msgin=sbuff(j)%iv, &
3401 : dest=mixed_cdft%dest_list(j) + (ispecial - 1)*force_env%para_env%num_pe/2, &
3402 0 : request=req_total(ind + ispecial))
3403 : END DO
3404 0 : ind = ind + my_special_work
3405 : END IF
3406 : END DO
3407 : END IF
3408 0 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3409 0 : ALLOCATE (mixed_cdft%dlb_control%recv_info(SIZE(mixed_cdft%source_list)))
3410 0 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(SIZE(mixed_cdft%source_list)))
3411 0 : ind = 1
3412 0 : DO j = 1, SIZE(mixed_cdft%source_list)
3413 : NULLIFY (mixed_cdft%dlb_control%recv_info(j)%target_list, &
3414 0 : mixed_cdft%dlb_control%recvbuff(j)%buffs)
3415 0 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3416 0 : ALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info(4 + 3*nrecv(j)))
3417 : CALL force_env%para_env%irecv(mixed_cdft%dlb_control%recv_info(j)%matrix_info, &
3418 : source=mixed_cdft%source_list(j), &
3419 0 : request=req_total(ind))
3420 0 : ind = ind + 1
3421 : END IF
3422 : END DO
3423 : END IF
3424 0 : IF (ASSOCIATED(req_total)) THEN
3425 0 : CALL mp_waitall(req_total)
3426 0 : DEALLOCATE (req_total)
3427 : END IF
3428 0 : IF (ANY(mask_send)) THEN
3429 : ALLOCATE (tmp(SIZE(mixed_cdft%dest_list) - COUNT(mask_send)), &
3430 0 : tmp_bo(2, SIZE(mixed_cdft%dest_list) - COUNT(mask_send)))
3431 0 : i = 1
3432 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3433 0 : IF (.NOT. mask_send(j)) THEN
3434 0 : tmp(i) = mixed_cdft%dest_list(j)
3435 0 : tmp_bo(1:2, i) = mixed_cdft%dest_list_bo(1:2, j)
3436 0 : i = i + 1
3437 : END IF
3438 : END DO
3439 0 : DEALLOCATE (mixed_cdft%dest_list, mixed_cdft%dest_list_bo)
3440 0 : ALLOCATE (mixed_cdft%dest_list(SIZE(tmp)), mixed_cdft%dest_list_bo(2, SIZE(tmp)))
3441 0 : mixed_cdft%dest_list = tmp
3442 0 : mixed_cdft%dest_list_bo = tmp_bo
3443 0 : DEALLOCATE (tmp, tmp_bo)
3444 : END IF
3445 0 : IF (ANY(mask_recv)) THEN
3446 : ALLOCATE (tmp(SIZE(mixed_cdft%source_list) - COUNT(mask_recv)), &
3447 0 : tmp_bo(4, SIZE(mixed_cdft%source_list) - COUNT(mask_recv)))
3448 0 : i = 1
3449 0 : DO j = 1, SIZE(mixed_cdft%source_list)
3450 0 : IF (.NOT. mask_recv(j)) THEN
3451 0 : tmp(i) = mixed_cdft%source_list(j)
3452 0 : tmp_bo(1:4, i) = mixed_cdft%source_list_bo(1:4, j)
3453 0 : i = i + 1
3454 : END IF
3455 : END DO
3456 0 : DEALLOCATE (mixed_cdft%source_list, mixed_cdft%source_list_bo)
3457 0 : ALLOCATE (mixed_cdft%source_list(SIZE(tmp)), mixed_cdft%source_list_bo(4, SIZE(tmp)))
3458 0 : mixed_cdft%source_list = tmp
3459 0 : mixed_cdft%source_list_bo = tmp_bo
3460 0 : DEALLOCATE (tmp, tmp_bo)
3461 : END IF
3462 0 : DEALLOCATE (mask_recv, mask_send)
3463 0 : DEALLOCATE (nsend_proc, nrecv)
3464 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3465 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3466 0 : IF (touched(j)) DEALLOCATE (sbuff(j)%iv)
3467 : END DO
3468 0 : IF (ASSOCIATED(touched)) DEALLOCATE (touched)
3469 : END IF
3470 : END IF
3471 8 : DEALLOCATE (sbuff)
3472 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
3473 8 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
3474 8 : CALL timestop(handle)
3475 :
3476 16 : END SUBROUTINE mixed_becke_constraint_dlb
3477 :
3478 : ! **************************************************************************************************
3479 : !> \brief Low level routine to build mixed Becke constraint and gradients
3480 : !> \param force_env the force_env that holds the CDFT states
3481 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
3482 : !> \param in_memory decides whether to build the weight function gradients in parallel before solving
3483 : !> the CDFT states or later during the SCF procedure of the individual states
3484 : !> \param is_constraint a list used to determine which atoms in the system define the constraint
3485 : !> \param store_vectors should temporary arrays be stored in memory to accelerate the calculation
3486 : !> \param R12 temporary array holding the pairwise atomic distances
3487 : !> \param position_vecs temporary array holding the pbc corrected atomic position vectors
3488 : !> \param pair_dist_vecs temporary array holding the pairwise displament vectors
3489 : !> \param coefficients array that determines how atoms should be summed to form the constraint
3490 : !> \param catom temporary array to map the global index of constraint atoms to their position
3491 : !> in a list that holds only constraint atoms
3492 : !> \par History
3493 : !> 03.2016 created [Nico Holmberg]
3494 : ! **************************************************************************************************
3495 36 : SUBROUTINE mixed_becke_constraint_low(force_env, mixed_cdft, in_memory, &
3496 : is_constraint, store_vectors, R12, position_vecs, &
3497 : pair_dist_vecs, coefficients, catom)
3498 : TYPE(force_env_type), POINTER :: force_env
3499 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
3500 : LOGICAL, INTENT(IN) :: in_memory
3501 : LOGICAL, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: is_constraint
3502 : LOGICAL, INTENT(IN) :: store_vectors
3503 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
3504 : INTENT(INOUT) :: R12, position_vecs
3505 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
3506 : INTENT(INOUT) :: pair_dist_vecs
3507 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
3508 : INTENT(INOUT) :: coefficients
3509 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: catom
3510 :
3511 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_low'
3512 :
3513 : INTEGER :: handle, i, iatom, icomm, iforce_eval, index, iounit, ip, ispecial, iwork, j, &
3514 : jatom, jcomm, k, my_special_work, my_work, natom, nbuffs, nforce_eval, np(3), &
3515 : nsent_total, nskipped, nwork, offset, offset_repl
3516 36 : INTEGER, DIMENSION(:), POINTER :: work, work_dlb
3517 36 : INTEGER, DIMENSION(:, :), POINTER :: nsent
3518 : LOGICAL :: completed_recv, should_communicate
3519 36 : LOGICAL, ALLOCATABLE, DIMENSION(:) :: skip_me
3520 36 : LOGICAL, ALLOCATABLE, DIMENSION(:, :) :: completed
3521 : REAL(kind=dp) :: dist1, dist2, dmyexp, my1, my1_homo, &
3522 : myexp, sum_cell_f_all, &
3523 : sum_cell_f_constr, th, tmp_const
3524 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: cell_functions, distances, ds_dR_i, &
3525 36 : ds_dR_j
3526 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: d_sum_const_dR, d_sum_Pm_dR, &
3527 36 : distance_vecs, dP_i_dRi
3528 36 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: dP_i_dRj
3529 : REAL(kind=dp), DIMENSION(3) :: cell_v, dist_vec, dmy_dR_i, dmy_dR_j, &
3530 : dr, dr1_r2, dr_i_dR, dr_ij_dR, &
3531 : dr_j_dR, grid_p, r, r1, shift
3532 36 : REAL(kind=dp), DIMENSION(:), POINTER :: cutoffs
3533 36 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: cavity, weight
3534 36 : REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER :: gradients
3535 : TYPE(cdft_control_type), POINTER :: cdft_control
3536 : TYPE(cell_type), POINTER :: cell
3537 : TYPE(cp_logger_type), POINTER :: logger
3538 : TYPE(cp_subsys_type), POINTER :: subsys_mix
3539 : TYPE(force_env_type), POINTER :: force_env_qs
3540 36 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_recv, req_total
3541 36 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_send
3542 : TYPE(particle_list_type), POINTER :: particles
3543 36 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3544 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
3545 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
3546 :
3547 72 : logger => cp_get_default_logger()
3548 36 : NULLIFY (work, req_recv, req_send, work_dlb, nsent, cutoffs, cavity, &
3549 36 : weight, gradients, cell, subsys_mix, force_env_qs, &
3550 36 : particle_set, particles, auxbas_pw_pool, force_env_section, &
3551 36 : print_section, cdft_control)
3552 36 : CALL timeset(routineN, handle)
3553 36 : nforce_eval = SIZE(force_env%sub_force_env)
3554 36 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
3555 36 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
3556 36 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
3557 36 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
3558 : CALL force_env_get(force_env=force_env, &
3559 : subsys=subsys_mix, &
3560 28 : cell=cell)
3561 : CALL cp_subsys_get(subsys=subsys_mix, &
3562 : particles=particles, &
3563 28 : particle_set=particle_set)
3564 : ELSE
3565 24 : DO iforce_eval = 1, nforce_eval
3566 16 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
3567 24 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
3568 : END DO
3569 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
3570 : cp_subsys=subsys_mix, &
3571 8 : cell=cell)
3572 : CALL cp_subsys_get(subsys=subsys_mix, &
3573 : particles=particles, &
3574 8 : particle_set=particle_set)
3575 : END IF
3576 36 : natom = SIZE(particles%els)
3577 36 : cdft_control => mixed_cdft%cdft_control
3578 36 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=auxbas_pw_pool)
3579 144 : np = auxbas_pw_pool%pw_grid%npts
3580 144 : dr = auxbas_pw_pool%pw_grid%dr
3581 144 : shift = -REAL(MODULO(np, 2), dp)*dr/2.0_dp
3582 180 : ALLOCATE (cell_functions(natom), skip_me(natom))
3583 36 : IF (store_vectors) THEN
3584 72 : ALLOCATE (distances(natom))
3585 108 : ALLOCATE (distance_vecs(3, natom))
3586 : END IF
3587 36 : IF (in_memory) THEN
3588 24 : ALLOCATE (ds_dR_j(3))
3589 24 : ALLOCATE (ds_dR_i(3))
3590 72 : ALLOCATE (d_sum_Pm_dR(3, natom))
3591 48 : ALLOCATE (d_sum_const_dR(3, natom))
3592 96 : ALLOCATE (dP_i_dRj(3, natom, natom))
3593 48 : ALLOCATE (dP_i_dRi(3, natom))
3594 24 : th = 1.0e-8_dp
3595 : END IF
3596 36 : IF (mixed_cdft%dlb) THEN
3597 32 : ALLOCATE (work(force_env%para_env%num_pe), work_dlb(force_env%para_env%num_pe))
3598 24 : work = 0
3599 24 : work_dlb = 0
3600 : END IF
3601 36 : my_work = 1
3602 36 : my_special_work = 1
3603 : ! Load balancing: allocate storage for receiving buffers and post recv requests
3604 36 : IF (mixed_cdft%dlb) THEN
3605 8 : IF (mixed_cdft%dlb_control%recv_work) THEN
3606 4 : my_work = 2
3607 4 : IF (.NOT. mixed_cdft%is_special) THEN
3608 40 : ALLOCATE (req_send(2, 3))
3609 : ELSE
3610 0 : ALLOCATE (req_send(2, 3*SIZE(mixed_cdft%dlb_control%sendbuff)))
3611 : END IF
3612 : END IF
3613 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3614 8 : IF (.NOT. mixed_cdft%is_special) THEN
3615 8 : offset_repl = 0
3616 8 : IF (mixed_cdft%dlb_control%recv_work_repl(1) .AND. mixed_cdft%dlb_control%recv_work_repl(2)) THEN
3617 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2) + &
3618 0 : SIZE(mixed_cdft%dlb_control%recv_info(2)%target_list, 2))))
3619 0 : offset_repl = 3*SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2)
3620 8 : ELSE IF (mixed_cdft%dlb_control%recv_work_repl(1)) THEN
3621 0 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2))))
3622 : ELSE
3623 48 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(2)%target_list, 2))))
3624 : END IF
3625 : ELSE
3626 0 : nbuffs = 0
3627 0 : offset_repl = 1
3628 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
3629 0 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3630 0 : nbuffs = nbuffs + (SIZE(mixed_cdft%dlb_control%recv_info(j)%matrix_info) - 4)/3
3631 : END IF
3632 : END DO
3633 0 : ALLOCATE (req_recv(3*nbuffs))
3634 : END IF
3635 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
3636 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3637 8 : IF (.NOT. mixed_cdft%is_special) THEN
3638 8 : offset = 0
3639 8 : index = j + (j/2)
3640 64 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(SIZE(mixed_cdft%dlb_control%recv_info(j)%target_list, 2)))
3641 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recv_info(j)%target_list, 2)
3642 8 : IF (mixed_cdft%is_pencil) THEN
3643 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3644 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3645 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3646 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3647 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3648 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3649 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3650 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3651 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3652 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3653 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3654 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3655 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3656 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3657 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3658 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3659 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3660 : gradients(3*natom, &
3661 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3662 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3663 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3664 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3665 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3666 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3667 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3668 : ELSE
3669 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3670 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3671 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3672 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3673 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3674 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3675 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3676 40 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3677 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3678 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3679 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3680 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3681 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3682 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3683 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3684 40 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3685 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3686 : gradients(3*natom, &
3687 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3688 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3689 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3690 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3691 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3692 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3693 48 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3694 : END IF
3695 :
3696 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, &
3697 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3698 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 1), &
3699 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i))
3700 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, &
3701 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3702 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 2), &
3703 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i) + 1)
3704 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, &
3705 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3706 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 3), &
3707 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i) + 2)
3708 16 : offset = offset + mixed_cdft%dlb_control%recv_info(j)%target_list(2, i)
3709 : END DO
3710 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info)
3711 : ELSE
3712 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)% &
3713 0 : buffs((SIZE(mixed_cdft%dlb_control%recv_info(j)%matrix_info) - 4)/3))
3714 0 : index = 6
3715 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
3716 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3717 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3718 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3719 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3720 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3721 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3722 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3723 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3724 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3725 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3726 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3727 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3728 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3729 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3730 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3731 : gradients(3*natom, mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3732 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3733 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3734 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3735 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3736 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3737 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, &
3738 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3739 0 : request=req_recv(offset_repl), tag=1)
3740 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, &
3741 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3742 0 : request=req_recv(offset_repl + 1), tag=2)
3743 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, &
3744 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3745 0 : request=req_recv(offset_repl + 2), tag=3)
3746 0 : index = index + 3
3747 0 : offset_repl = offset_repl + 3
3748 : END DO
3749 0 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info)
3750 : END IF
3751 : END IF
3752 : END DO
3753 : END IF
3754 : END IF
3755 36 : cutoffs => cdft_control%becke_control%cutoffs
3756 36 : should_communicate = .FALSE.
3757 144 : DO i = 1, 3
3758 144 : cell_v(i) = cell%hmat(i, i)
3759 : END DO
3760 76 : DO iwork = my_work, 1, -1
3761 40 : IF (iwork == 2) THEN
3762 4 : IF (.NOT. mixed_cdft%is_special) THEN
3763 4 : cavity => mixed_cdft%dlb_control%cavity
3764 4 : weight => mixed_cdft%dlb_control%weight
3765 4 : gradients => mixed_cdft%dlb_control%gradients
3766 4 : ALLOCATE (completed(2, 3), nsent(2, 3))
3767 : ELSE
3768 0 : my_special_work = SIZE(mixed_cdft%dlb_control%sendbuff)
3769 0 : ALLOCATE (completed(2, 3*my_special_work), nsent(2, 3*my_special_work))
3770 : END IF
3771 4 : completed = .FALSE.
3772 40 : nsent = 0
3773 : ELSE
3774 36 : IF (.NOT. mixed_cdft%is_special) THEN
3775 36 : weight => mixed_cdft%weight
3776 36 : cavity => mixed_cdft%cavity
3777 36 : gradients => cdft_control%group(1)%gradients
3778 : ELSE
3779 0 : my_special_work = SIZE(mixed_cdft%dest_list)
3780 : END IF
3781 : END IF
3782 116 : DO ispecial = 1, my_special_work
3783 40 : nwork = 0
3784 40 : IF (mixed_cdft%is_special) THEN
3785 0 : IF (iwork == 1) THEN
3786 0 : weight => mixed_cdft%sendbuff(ispecial)%weight
3787 0 : cavity => mixed_cdft%sendbuff(ispecial)%cavity
3788 0 : gradients => mixed_cdft%sendbuff(ispecial)%gradients
3789 : ELSE
3790 0 : weight => mixed_cdft%dlb_control%sendbuff(ispecial)%weight
3791 0 : cavity => mixed_cdft%dlb_control%sendbuff(ispecial)%cavity
3792 0 : gradients => mixed_cdft%dlb_control%sendbuff(ispecial)%gradients
3793 : END IF
3794 : END IF
3795 1021 : DO k = LBOUND(weight, 1), UBOUND(weight, 1)
3796 901 : IF (mixed_cdft%dlb .AND. mixed_cdft%is_pencil .AND. .NOT. mixed_cdft%is_special) THEN
3797 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3798 0 : IF (k >= mixed_cdft%dlb_control%distributed(1) .AND. &
3799 : k <= mixed_cdft%dlb_control%distributed(2)) THEN
3800 : CYCLE
3801 : END IF
3802 : END IF
3803 : END IF
3804 41952 : DO j = LBOUND(weight, 2), UBOUND(weight, 2)
3805 39209 : IF (mixed_cdft%dlb .AND. .NOT. mixed_cdft%is_pencil .AND. .NOT. mixed_cdft%is_special) THEN
3806 6400 : IF (mixed_cdft%dlb_control%send_work) THEN
3807 3120 : IF (j >= mixed_cdft%dlb_control%distributed(1) .AND. &
3808 : j <= mixed_cdft%dlb_control%distributed(2)) THEN
3809 : CYCLE
3810 : END IF
3811 : END IF
3812 : END IF
3813 : ! Check if any of the buffers have become available for deallocation
3814 39209 : IF (should_communicate) THEN
3815 1768 : DO icomm = 1, SIZE(nsent, 2)
3816 4420 : DO jcomm = 1, SIZE(nsent, 1)
3817 2652 : IF (nsent(jcomm, icomm) == 1) CYCLE
3818 470 : completed(jcomm, icomm) = req_send(jcomm, icomm)%test()
3819 470 : IF (completed(jcomm, icomm)) THEN
3820 24 : nsent(jcomm, icomm) = nsent(jcomm, icomm) + 1
3821 24 : nsent_total = nsent_total + 1
3822 24 : IF (nsent_total == SIZE(nsent, 1)*SIZE(nsent, 2)) should_communicate = .FALSE.
3823 : END IF
3824 2262 : IF (ALL(completed(:, icomm))) THEN
3825 12 : IF (MODULO(icomm, 3) == 1) THEN
3826 4 : IF (.NOT. mixed_cdft%is_special) THEN
3827 4 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
3828 : ELSE
3829 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%cavity)
3830 : END IF
3831 8 : ELSE IF (MODULO(icomm, 3) == 2) THEN
3832 4 : IF (.NOT. mixed_cdft%is_special) THEN
3833 4 : DEALLOCATE (mixed_cdft%dlb_control%weight)
3834 : ELSE
3835 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%weight)
3836 : END IF
3837 : ELSE
3838 4 : IF (.NOT. mixed_cdft%is_special) THEN
3839 4 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
3840 : ELSE
3841 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%gradients)
3842 : END IF
3843 : END IF
3844 : END IF
3845 : END DO
3846 : END DO
3847 : END IF
3848 : ! Poll to prevent starvation
3849 39209 : IF (ASSOCIATED(req_recv)) &
3850 6400 : completed_recv = mp_testall(req_recv)
3851 : !
3852 1926389 : DO i = LBOUND(weight, 3), UBOUND(weight, 3)
3853 1807861 : IF (cdft_control%becke_control%cavity_confine) THEN
3854 1716736 : IF (cavity(k, j, i) < cdft_control%becke_control%eps_cavity) CYCLE
3855 : END IF
3856 988401 : grid_p(1) = k*dr(1) + shift(1)
3857 988401 : grid_p(2) = j*dr(2) + shift(2)
3858 988401 : grid_p(3) = i*dr(3) + shift(3)
3859 988401 : nskipped = 0
3860 2965203 : cell_functions = 1.0_dp
3861 988401 : skip_me = .FALSE.
3862 988401 : IF (store_vectors) distances = 0.0_dp
3863 988401 : IF (in_memory) THEN
3864 675116 : d_sum_Pm_dR = 0.0_dp
3865 675116 : d_sum_const_dR = 0.0_dp
3866 675116 : dP_i_dRi = 0.0_dp
3867 : END IF
3868 2554568 : DO iatom = 1, natom
3869 1976802 : IF (skip_me(iatom)) THEN
3870 65541 : cell_functions(iatom) = 0.0_dp
3871 65541 : IF (cdft_control%becke_control%should_skip) THEN
3872 37642 : IF (is_constraint(iatom)) nskipped = nskipped + 1
3873 37642 : IF (nskipped == cdft_control%natoms) THEN
3874 0 : IF (in_memory) THEN
3875 0 : IF (cdft_control%becke_control%cavity_confine) THEN
3876 0 : cavity(k, j, i) = 0.0_dp
3877 : END IF
3878 : END IF
3879 : EXIT
3880 : END IF
3881 : END IF
3882 : CYCLE
3883 : END IF
3884 1911261 : IF (store_vectors) THEN
3885 1911261 : IF (distances(iatom) == 0.0_dp) THEN
3886 6889176 : r = position_vecs(:, iatom)
3887 6889176 : dist_vec = (r - grid_p) - ANINT((r - grid_p)/cell_v)*cell_v
3888 6889176 : dist1 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3889 6889176 : distance_vecs(:, iatom) = dist_vec
3890 1722294 : distances(iatom) = dist1
3891 : ELSE
3892 755868 : dist_vec = distance_vecs(:, iatom)
3893 : dist1 = distances(iatom)
3894 : END IF
3895 : ELSE
3896 0 : r = particle_set(iatom)%r
3897 0 : DO ip = 1, 3
3898 0 : r(ip) = MODULO(r(ip), cell%hmat(ip, ip)) - cell%hmat(ip, ip)/2._dp
3899 : END DO
3900 0 : dist_vec = (r - grid_p) - ANINT((r - grid_p)/cell_v)*cell_v
3901 0 : dist1 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3902 : END IF
3903 2489027 : IF (dist1 <= cutoffs(iatom)) THEN
3904 509029 : IF (in_memory) THEN
3905 : IF (dist1 <= th) dist1 = th
3906 1184700 : dr_i_dR(:) = dist_vec(:)/dist1
3907 : END IF
3908 1527087 : DO jatom = 1, natom
3909 1527087 : IF (jatom /= iatom) THEN
3910 509029 : IF (jatom < iatom) THEN
3911 254521 : IF (.NOT. skip_me(jatom)) CYCLE
3912 : END IF
3913 320062 : IF (store_vectors) THEN
3914 320062 : IF (distances(jatom) == 0.0_dp) THEN
3915 1018032 : r1 = position_vecs(:, jatom)
3916 1018032 : dist_vec = (r1 - grid_p) - ANINT((r1 - grid_p)/cell_v)*cell_v
3917 1018032 : dist2 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3918 1018032 : distance_vecs(:, jatom) = dist_vec
3919 254508 : distances(jatom) = dist2
3920 : ELSE
3921 262216 : dist_vec = distance_vecs(:, jatom)
3922 : dist2 = distances(jatom)
3923 : END IF
3924 : ELSE
3925 0 : r1 = particle_set(jatom)%r
3926 0 : DO ip = 1, 3
3927 0 : r1(ip) = MODULO(r1(ip), cell%hmat(ip, ip)) - cell%hmat(ip, ip)/2._dp
3928 : END DO
3929 0 : dist_vec = (r1 - grid_p) - ANINT((r1 - grid_p)/cell_v)*cell_v
3930 0 : dist2 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3931 : END IF
3932 320062 : IF (in_memory) THEN
3933 191129 : IF (store_vectors) THEN
3934 764516 : dr1_r2 = pair_dist_vecs(:, iatom, jatom)
3935 : ELSE
3936 0 : dr1_r2 = (r - r1) - ANINT((r - r1)/cell_v)*cell_v
3937 : END IF
3938 : IF (dist2 <= th) dist2 = th
3939 191129 : tmp_const = (R12(iatom, jatom)**3)
3940 764516 : dr_ij_dR(:) = dr1_r2(:)/tmp_const
3941 : !derivativ w.r.t. Rj
3942 764516 : dr_j_dR = dist_vec(:)/dist2
3943 764516 : dmy_dR_j(:) = -(dr_j_dR(:)/R12(iatom, jatom) - (dist1 - dist2)*dr_ij_dR(:))
3944 : !derivativ w.r.t. Ri
3945 764516 : dmy_dR_i(:) = dr_i_dR(:)/R12(iatom, jatom) - (dist1 - dist2)*dr_ij_dR(:)
3946 : END IF
3947 320062 : my1 = (dist1 - dist2)/R12(iatom, jatom)
3948 320062 : IF (cdft_control%becke_control%adjust) THEN
3949 166839 : my1_homo = my1
3950 : my1 = my1 + &
3951 166839 : cdft_control%becke_control%aij(iatom, jatom)*(1.0_dp - my1**2)
3952 : END IF
3953 320062 : myexp = 1.5_dp*my1 - 0.5_dp*my1**3
3954 320062 : IF (in_memory) THEN
3955 191129 : dmyexp = 1.5_dp - 1.5_dp*my1**2
3956 : tmp_const = (1.5_dp**2)*dmyexp*(1 - myexp**2)* &
3957 191129 : (1.0_dp - ((1.5_dp*myexp - 0.5_dp*(myexp**3))**2))
3958 :
3959 764516 : ds_dR_i(:) = -0.5_dp*tmp_const*dmy_dR_i(:)
3960 764516 : ds_dR_j(:) = -0.5_dp*tmp_const*dmy_dR_j(:)
3961 191129 : IF (cdft_control%becke_control%adjust) THEN
3962 102514 : tmp_const = 1.0_dp - 2.0_dp*my1_homo*cdft_control%becke_control%aij(iatom, jatom)
3963 410056 : ds_dR_i(:) = ds_dR_i(:)*tmp_const
3964 410056 : ds_dR_j(:) = ds_dR_j(:)*tmp_const
3965 : END IF
3966 : END IF
3967 320062 : myexp = 1.5_dp*myexp - 0.5_dp*myexp**3
3968 320062 : myexp = 1.5_dp*myexp - 0.5_dp*myexp**3
3969 320062 : tmp_const = 0.5_dp*(1.0_dp - myexp)
3970 320062 : cell_functions(iatom) = cell_functions(iatom)*tmp_const
3971 320062 : IF (in_memory) THEN
3972 191129 : IF (ABS(tmp_const) <= th) tmp_const = tmp_const + th
3973 764516 : dP_i_dRi(:, iatom) = dP_i_dRi(:, iatom) + ds_dR_i(:)/tmp_const
3974 764516 : dP_i_dRj(:, iatom, jatom) = ds_dR_j(:)/tmp_const
3975 : END IF
3976 :
3977 320062 : IF (dist2 <= cutoffs(jatom)) THEN
3978 188967 : tmp_const = 0.5_dp*(1.0_dp + myexp)
3979 188967 : cell_functions(jatom) = cell_functions(jatom)*tmp_const
3980 188967 : IF (in_memory) THEN
3981 105046 : IF (ABS(tmp_const) <= th) tmp_const = tmp_const + th
3982 420184 : dP_i_dRj(:, jatom, iatom) = -ds_dR_i(:)/tmp_const
3983 420184 : dP_i_dRi(:, jatom) = dP_i_dRi(:, jatom) - ds_dR_j(:)/tmp_const
3984 : END IF
3985 : ELSE
3986 131095 : skip_me(jatom) = .TRUE.
3987 : END IF
3988 : END IF
3989 : END DO
3990 509029 : IF (in_memory) THEN
3991 1184700 : dP_i_dRi(:, iatom) = cell_functions(iatom)*dP_i_dRi(:, iatom)
3992 1184700 : d_sum_Pm_dR(:, iatom) = d_sum_Pm_dR(:, iatom) + dP_i_dRi(:, iatom)
3993 296175 : IF (is_constraint(iatom)) &
3994 : d_sum_const_dR(:, iatom) = d_sum_const_dR(:, iatom) + dP_i_dRi(:, iatom)* &
3995 1184700 : coefficients(iatom)
3996 888525 : DO jatom = 1, natom
3997 888525 : IF (jatom /= iatom) THEN
3998 296175 : IF (jatom < iatom) THEN
3999 148094 : IF (.NOT. skip_me(jatom)) THEN
4000 420184 : dP_i_dRj(:, iatom, jatom) = cell_functions(iatom)*dP_i_dRj(:, iatom, jatom)
4001 420184 : d_sum_Pm_dR(:, jatom) = d_sum_Pm_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)
4002 105046 : IF (is_constraint(iatom)) &
4003 : d_sum_const_dR(:, jatom) = d_sum_const_dR(:, jatom) + &
4004 : dP_i_dRj(:, iatom, jatom)* &
4005 420184 : coefficients(iatom)
4006 : CYCLE
4007 : END IF
4008 : END IF
4009 764516 : dP_i_dRj(:, iatom, jatom) = cell_functions(iatom)*dP_i_dRj(:, iatom, jatom)
4010 764516 : d_sum_Pm_dR(:, jatom) = d_sum_Pm_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)
4011 191129 : IF (is_constraint(iatom)) &
4012 : d_sum_const_dR(:, jatom) = d_sum_const_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)* &
4013 764516 : coefficients(iatom)
4014 : END IF
4015 : END DO
4016 : END IF
4017 : ELSE
4018 1402232 : cell_functions(iatom) = 0.0_dp
4019 1402232 : skip_me(iatom) = .TRUE.
4020 1402232 : IF (cdft_control%becke_control%should_skip) THEN
4021 858928 : IF (is_constraint(iatom)) nskipped = nskipped + 1
4022 858928 : IF (nskipped == cdft_control%natoms) THEN
4023 410635 : IF (in_memory) THEN
4024 252800 : IF (cdft_control%becke_control%cavity_confine) THEN
4025 252800 : cavity(k, j, i) = 0.0_dp
4026 : END IF
4027 : END IF
4028 : EXIT
4029 : END IF
4030 : END IF
4031 : END IF
4032 : END DO
4033 988401 : IF (nskipped == cdft_control%natoms) CYCLE
4034 : sum_cell_f_constr = 0.0_dp
4035 1733298 : DO ip = 1, cdft_control%natoms
4036 : sum_cell_f_constr = sum_cell_f_constr + cell_functions(catom(ip))* &
4037 1733298 : cdft_control%group(1)%coeff(ip)
4038 : END DO
4039 577766 : sum_cell_f_all = 0.0_dp
4040 577766 : nwork = nwork + 1
4041 1733298 : DO ip = 1, natom
4042 1733298 : sum_cell_f_all = sum_cell_f_all + cell_functions(ip)
4043 : END DO
4044 577766 : IF (in_memory) THEN
4045 1266948 : DO iatom = 1, natom
4046 1266948 : IF (ABS(sum_cell_f_all) > 0.0_dp) THEN
4047 : gradients(3*(iatom - 1) + 1:3*(iatom - 1) + 3, k, j, i) = &
4048 : d_sum_const_dR(:, iatom)/sum_cell_f_all - sum_cell_f_constr* &
4049 1529032 : d_sum_Pm_dR(:, iatom)/(sum_cell_f_all**2)
4050 : END IF
4051 : END DO
4052 : END IF
4053 577766 : IF (ABS(sum_cell_f_all) > 0.000001) &
4054 359271 : weight(k, j, i) = sum_cell_f_constr/sum_cell_f_all
4055 : END DO ! i
4056 : END DO ! j
4057 : END DO ! k
4058 : ! Load balancing: post send requests
4059 80 : IF (iwork == 2) THEN
4060 4 : IF (.NOT. mixed_cdft%is_special) THEN
4061 12 : DO i = 1, SIZE(req_send, 1)
4062 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%cavity, &
4063 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4064 : request=req_send(i, 1), &
4065 8 : tag=mixed_cdft%dlb_control%dest_tags_repl(i))
4066 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%weight, &
4067 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4068 : request=req_send(i, 2), &
4069 8 : tag=mixed_cdft%dlb_control%dest_tags_repl(i) + 1)
4070 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%gradients, &
4071 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4072 : request=req_send(i, 3), &
4073 12 : tag=mixed_cdft%dlb_control%dest_tags_repl(i) + 2)
4074 : END DO
4075 : should_communicate = .TRUE.
4076 : nsent_total = 0
4077 : ELSE
4078 0 : DO i = 1, SIZE(req_send, 1)
4079 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%cavity, &
4080 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4081 0 : request=req_send(i, 3*(ispecial - 1) + 1), tag=1)
4082 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%weight, &
4083 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4084 0 : request=req_send(i, 3*(ispecial - 1) + 2), tag=2)
4085 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%gradients, &
4086 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4087 0 : request=req_send(i, 3*(ispecial - 1) + 3), tag=3)
4088 : END DO
4089 0 : IF (ispecial == my_special_work) THEN
4090 0 : should_communicate = .TRUE.
4091 0 : nsent_total = 0
4092 : END IF
4093 : END IF
4094 4 : work(mixed_cdft%dlb_control%my_source + 1) = work(mixed_cdft%dlb_control%my_source + 1) + nwork
4095 4 : work_dlb(force_env%para_env%mepos + 1) = work_dlb(force_env%para_env%mepos + 1) + nwork
4096 : ELSE
4097 36 : IF (mixed_cdft%dlb) work(force_env%para_env%mepos + 1) = work(force_env%para_env%mepos + 1) + nwork
4098 36 : IF (mixed_cdft%dlb) work_dlb(force_env%para_env%mepos + 1) = work_dlb(force_env%para_env%mepos + 1) + nwork
4099 : END IF
4100 : END DO ! ispecial
4101 : END DO ! iwork
4102 : ! Load balancing: wait for communication and deallocate sending buffers
4103 36 : IF (mixed_cdft%dlb) THEN
4104 16 : IF (mixed_cdft%dlb_control%recv_work .AND. &
4105 : ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
4106 48 : ALLOCATE (req_total(SIZE(req_recv) + SIZE(req_send, 1)*SIZE(req_send, 2)))
4107 4 : index = SIZE(req_recv)
4108 28 : req_total(1:index) = req_recv
4109 16 : DO i = 1, SIZE(req_send, 2)
4110 40 : DO j = 1, SIZE(req_send, 1)
4111 24 : index = index + 1
4112 36 : req_total(index) = req_send(j, i)
4113 : END DO
4114 : END DO
4115 4 : CALL mp_waitall(req_total)
4116 4 : DEALLOCATE (req_total)
4117 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%cavity)) &
4118 0 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
4119 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%weight)) &
4120 0 : DEALLOCATE (mixed_cdft%dlb_control%weight)
4121 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%gradients)) &
4122 0 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
4123 4 : IF (mixed_cdft%is_special) THEN
4124 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
4125 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%cavity)) &
4126 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity)
4127 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%weight)) &
4128 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight)
4129 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%gradients)) &
4130 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients)
4131 : END DO
4132 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff)
4133 : END IF
4134 4 : DEALLOCATE (req_send, req_recv)
4135 4 : ELSE IF (mixed_cdft%dlb_control%recv_work) THEN
4136 0 : IF (should_communicate) THEN
4137 0 : CALL mp_waitall(req_send)
4138 : END IF
4139 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%cavity)) &
4140 0 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
4141 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%weight)) &
4142 0 : DEALLOCATE (mixed_cdft%dlb_control%weight)
4143 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%gradients)) &
4144 0 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
4145 0 : IF (mixed_cdft%is_special) THEN
4146 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
4147 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%cavity)) &
4148 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity)
4149 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%weight)) &
4150 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight)
4151 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%gradients)) &
4152 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients)
4153 : END DO
4154 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff)
4155 : END IF
4156 0 : DEALLOCATE (req_send)
4157 8 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
4158 4 : CALL mp_waitall(req_recv)
4159 4 : DEALLOCATE (req_recv)
4160 : END IF
4161 : END IF
4162 36 : IF (mixed_cdft%dlb) THEN
4163 40 : CALL force_env%para_env%sum(work)
4164 40 : CALL force_env%para_env%sum(work_dlb)
4165 8 : IF (.NOT. ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) &
4166 12 : ALLOCATE (mixed_cdft%dlb_control%prediction_error(force_env%para_env%num_pe))
4167 40 : mixed_cdft%dlb_control%prediction_error = mixed_cdft%dlb_control%expected_work - work
4168 : IF (debug_this_module .AND. iounit > 0) THEN
4169 : DO i = 1, SIZE(work, 1)
4170 : WRITE (iounit, '(A,I10,I10,I10)') &
4171 : 'Work', work(i), work_dlb(i), mixed_cdft%dlb_control%expected_work(i)
4172 : END DO
4173 : END IF
4174 8 : DEALLOCATE (work, work_dlb, mixed_cdft%dlb_control%expected_work)
4175 : END IF
4176 36 : NULLIFY (gradients, weight, cavity)
4177 36 : IF (ALLOCATED(coefficients)) &
4178 36 : DEALLOCATE (coefficients)
4179 36 : IF (in_memory) THEN
4180 24 : DEALLOCATE (ds_dR_j)
4181 24 : DEALLOCATE (ds_dR_i)
4182 24 : DEALLOCATE (d_sum_Pm_dR)
4183 24 : DEALLOCATE (d_sum_const_dR)
4184 24 : DEALLOCATE (dP_i_dRj)
4185 24 : DEALLOCATE (dP_i_dRi)
4186 24 : NULLIFY (gradients)
4187 24 : IF (store_vectors) THEN
4188 24 : DEALLOCATE (pair_dist_vecs)
4189 : END IF
4190 : END IF
4191 36 : NULLIFY (cutoffs)
4192 36 : IF (ALLOCATED(is_constraint)) &
4193 36 : DEALLOCATE (is_constraint)
4194 36 : DEALLOCATE (catom)
4195 36 : DEALLOCATE (R12)
4196 36 : DEALLOCATE (cell_functions)
4197 36 : DEALLOCATE (skip_me)
4198 36 : IF (ALLOCATED(completed)) &
4199 4 : DEALLOCATE (completed)
4200 36 : IF (ASSOCIATED(nsent)) &
4201 4 : DEALLOCATE (nsent)
4202 36 : IF (store_vectors) THEN
4203 36 : DEALLOCATE (distances)
4204 36 : DEALLOCATE (distance_vecs)
4205 36 : DEALLOCATE (position_vecs)
4206 : END IF
4207 36 : IF (ASSOCIATED(req_send)) &
4208 0 : DEALLOCATE (req_send)
4209 36 : IF (ASSOCIATED(req_recv)) &
4210 0 : DEALLOCATE (req_recv)
4211 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
4212 36 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
4213 36 : CALL timestop(handle)
4214 :
4215 72 : END SUBROUTINE mixed_becke_constraint_low
4216 :
4217 : END MODULE mixed_cdft_methods
|