Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : MODULE cp2k_runs
10 : USE atom, ONLY: atom_code
11 : USE bibliography, ONLY: Hutter2014,&
12 : cite_reference
13 : USE bsse, ONLY: do_bsse_calculation
14 : USE cell_opt, ONLY: cp_cell_opt
15 : USE cp2k_debug, ONLY: cp2k_debug_energy_and_forces
16 : USE cp2k_info, ONLY: compile_date,&
17 : compile_revision,&
18 : cp2k_version,&
19 : cp2k_year
20 : USE cp_control_types, ONLY: dft_control_type
21 : USE cp_dbcsr_api, ONLY: dbcsr_finalize_lib,&
22 : dbcsr_init_lib,&
23 : dbcsr_print_config,&
24 : dbcsr_print_statistics
25 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_config
26 : USE cp_files, ONLY: close_file,&
27 : open_file
28 : USE cp_log_handling, ONLY: cp_get_default_logger,&
29 : cp_logger_get_default_io_unit,&
30 : cp_logger_type,&
31 : cp_logger_would_log,&
32 : cp_note_level
33 : USE cp_output_handling, ONLY: cp_add_iter_level,&
34 : cp_print_key_finished_output,&
35 : cp_print_key_unit_nr,&
36 : cp_rm_iter_level
37 : USE cp_parser_methods, ONLY: parser_search_string
38 : USE cp_parser_types, ONLY: cp_parser_type,&
39 : parser_create,&
40 : parser_release
41 : USE cp_units, ONLY: cp_unit_set_create,&
42 : cp_unit_set_release,&
43 : cp_unit_set_type,&
44 : export_units_as_xml
45 : USE dbm_api, ONLY: dbm_library_print_stats
46 : USE environment, ONLY: cp2k_finalize,&
47 : cp2k_init,&
48 : cp2k_read,&
49 : cp2k_setup
50 : USE f77_interface, ONLY: create_force_env,&
51 : destroy_force_env,&
52 : f77_default_para_env => default_para_env,&
53 : f_env_add_defaults,&
54 : f_env_rm_defaults,&
55 : f_env_type
56 : USE farming_methods, ONLY: do_deadlock,&
57 : do_nothing,&
58 : do_wait,&
59 : farming_parse_input,&
60 : get_next_job
61 : USE farming_types, ONLY: deallocate_farming_env,&
62 : farming_env_type,&
63 : init_farming_env,&
64 : job_finished,&
65 : job_running
66 : USE force_env_methods, ONLY: force_env_calc_energy_force
67 : USE force_env_types, ONLY: force_env_get,&
68 : force_env_type
69 : USE geo_opt, ONLY: cp_geo_opt
70 : USE global_types, ONLY: global_environment_type,&
71 : globenv_create,&
72 : globenv_release
73 : USE grid_api, ONLY: grid_library_print_stats,&
74 : grid_library_set_config
75 : USE input_constants, ONLY: &
76 : bsse_run, cell_opt_run, debug_run, do_atom, do_band, do_cp2k, do_embed, do_farming, &
77 : do_fist, do_ipi, do_mixed, do_nnp, do_opt_basis, do_optimize_input, do_qmmm, do_qs, &
78 : do_sirius, do_swarm, do_tamc, do_test, do_tree_mc, do_tree_mc_ana, driver_run, ehrenfest, &
79 : energy_force_run, energy_run, geo_opt_run, linear_response_run, mol_dyn_run, mon_car_run, &
80 : negf_run, none_run, pint_run, real_time_propagation, rtp_method_bse, tree_mc_run, vib_anal
81 : USE input_cp2k, ONLY: create_cp2k_root_section
82 : USE input_cp2k_check, ONLY: check_cp2k_input
83 : USE input_cp2k_global, ONLY: create_global_section
84 : USE input_cp2k_read, ONLY: read_input
85 : USE input_keyword_types, ONLY: keyword_release
86 : USE input_parsing, ONLY: section_vals_parse
87 : USE input_section_types, ONLY: &
88 : section_release, section_type, section_vals_create, section_vals_get_subs_vals, &
89 : section_vals_release, section_vals_retain, section_vals_type, section_vals_val_get, &
90 : section_vals_write, write_section_xml
91 : USE ipi_driver, ONLY: run_driver
92 : USE kinds, ONLY: default_path_length,&
93 : default_string_length,&
94 : dp,&
95 : int_8
96 : USE library_tests, ONLY: lib_test
97 : USE machine, ONLY: default_output_unit,&
98 : m_chdir,&
99 : m_flush,&
100 : m_getcwd,&
101 : m_memory,&
102 : m_memory_max,&
103 : m_walltime
104 : USE mc_run, ONLY: do_mon_car
105 : USE md_run, ONLY: qs_mol_dyn
106 : USE message_passing, ONLY: mp_any_source,&
107 : mp_comm_type,&
108 : mp_para_env_release,&
109 : mp_para_env_type
110 : USE mscfg_methods, ONLY: do_mol_loop,&
111 : loop_over_molecules
112 : USE neb_methods, ONLY: neb
113 : USE negf_methods, ONLY: do_negf
114 : USE offload_api, ONLY: offload_get_chosen_device,&
115 : offload_get_device_count
116 : USE optimize_basis, ONLY: run_optimize_basis
117 : USE optimize_input, ONLY: run_optimize_input
118 : USE pint_methods, ONLY: do_pint_run
119 : USE qs_environment_types, ONLY: get_qs_env
120 : USE qs_linres_module, ONLY: linres_calculation
121 : USE reference_manager, ONLY: export_references_as_xml
122 : USE rt_bse, ONLY: run_propagation_bse
123 : USE rt_propagation, ONLY: rt_prop_setup
124 : USE swarm, ONLY: run_swarm
125 : USE tamc_run, ONLY: qs_tamc
126 : USE tmc_setup, ONLY: do_analyze_files,&
127 : do_tmc
128 : USE vibrational_analysis, ONLY: vb_anal
129 : #include "../base/base_uses.f90"
130 :
131 : IMPLICIT NONE
132 :
133 : PRIVATE
134 :
135 : PUBLIC :: write_xml_file, run_input
136 :
137 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp2k_runs'
138 :
139 : CONTAINS
140 :
141 : ! **************************************************************************************************
142 : !> \brief performs an instance of a cp2k run
143 : !> \param input_declaration ...
144 : !> \param input_file_name name of the file to be opened for input
145 : !> \param output_unit unit to which output should be written
146 : !> \param mpi_comm ...
147 : !> \param initial_variables key-value list of initial preprocessor variables
148 : !> \author Joost VandeVondele
149 : !> \note
150 : !> para_env should be a valid communicator
151 : !> output_unit should be writeable by at least the lowest rank of the mpi group
152 : !>
153 : !> recursive because a given run_type might need to be able to perform
154 : !> another cp2k_run as part of its job (e.g. farming, classical equilibration, ...)
155 : !>
156 : !> the idea is that a cp2k instance should be able to run with just three
157 : !> arguments, i.e. a given input file, output unit, mpi communicator.
158 : !> giving these three to cp2k_run should produce a valid run.
159 : !> the only task of the PROGRAM cp2k is to create valid instances of the
160 : !> above arguments. Ideally, anything that is called afterwards should be
161 : !> able to run simultaneously / multithreaded / sequential / parallel / ...
162 : !> and able to fail safe
163 : ! **************************************************************************************************
164 9276 : RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, mpi_comm, initial_variables)
165 : TYPE(section_type), POINTER :: input_declaration
166 : CHARACTER(LEN=*), INTENT(IN) :: input_file_name
167 : INTEGER, INTENT(IN) :: output_unit
168 :
169 : CLASS(mp_comm_type) :: mpi_comm
170 : CHARACTER(len=default_path_length), &
171 : DIMENSION(:, :), INTENT(IN) :: initial_variables
172 :
173 : INTEGER :: f_env_handle, grid_backend, ierr, &
174 : iter_level, method_name_id, &
175 : new_env_id, prog_name_id, run_type_id
176 : #if defined(__DBCSR_ACC)
177 : INTEGER, TARGET :: offload_chosen_device
178 : #endif
179 : INTEGER, POINTER :: active_device_id
180 : INTEGER(KIND=int_8) :: m_memory_max_mpi
181 : LOGICAL :: echo_input, grid_apply_cutoff, &
182 : grid_validate, I_was_ionode
183 : TYPE(cp_logger_type), POINTER :: logger, sublogger
184 : TYPE(mp_para_env_type), POINTER :: para_env
185 : TYPE(dft_control_type), POINTER :: dft_control
186 : TYPE(f_env_type), POINTER :: f_env
187 : TYPE(force_env_type), POINTER :: force_env
188 : TYPE(global_environment_type), POINTER :: globenv
189 : TYPE(section_vals_type), POINTER :: glob_section, input_file, root_section
190 :
191 9276 : NULLIFY (para_env, f_env, dft_control, active_device_id)
192 9276 : ALLOCATE (para_env)
193 9276 : para_env = mpi_comm
194 :
195 : #if defined(__DBCSR_ACC)
196 : IF (offload_get_device_count() > 0) THEN
197 : offload_chosen_device = offload_get_chosen_device()
198 : active_device_id => offload_chosen_device
199 : END IF
200 : #endif
201 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
202 9276 : accdrv_active_device_id=active_device_id)
203 :
204 9276 : NULLIFY (globenv, force_env)
205 :
206 9276 : CALL cite_reference(Hutter2014)
207 :
208 : ! parse the input
209 : input_file => read_input(input_declaration, input_file_name, initial_variables=initial_variables, &
210 9276 : para_env=para_env)
211 :
212 9276 : CALL para_env%sync()
213 :
214 9276 : glob_section => section_vals_get_subs_vals(input_file, "GLOBAL")
215 9276 : CALL section_vals_val_get(glob_section, "ECHO_INPUT", l_val=echo_input)
216 9276 : logger => cp_get_default_logger()
217 9276 : IF (echo_input) THEN
218 : CALL section_vals_write(input_file, &
219 : unit_nr=cp_logger_get_default_io_unit(logger), &
220 30 : hide_root=.TRUE., hide_defaults=.FALSE.)
221 : END IF
222 :
223 9276 : CALL check_cp2k_input(input_declaration, input_file, para_env=para_env, output_unit=output_unit)
224 9276 : root_section => input_file
225 : CALL section_vals_val_get(input_file, "GLOBAL%PROGRAM_NAME", &
226 9276 : i_val=prog_name_id)
227 : CALL section_vals_val_get(input_file, "GLOBAL%RUN_TYPE", &
228 9276 : i_val=run_type_id)
229 9276 : CALL section_vals_val_get(root_section, "FORCE_EVAL%METHOD", i_val=method_name_id)
230 :
231 9276 : IF (prog_name_id /= do_cp2k) THEN
232 : ! initial setup (cp2k does in in the creation of the force_env)
233 520 : CALL globenv_create(globenv)
234 520 : CALL section_vals_retain(input_file)
235 520 : CALL cp2k_init(para_env, output_unit, globenv, input_file_name=input_file_name)
236 520 : CALL cp2k_read(root_section, para_env, globenv)
237 520 : CALL cp2k_setup(root_section, para_env, globenv)
238 : END IF
239 :
240 9276 : CALL cp_dbcsr_config(root_section)
241 9276 : IF (output_unit > 0 .AND. &
242 : cp_logger_would_log(logger, cp_note_level)) THEN
243 4666 : CALL dbcsr_print_config(unit_nr=output_unit)
244 4666 : WRITE (UNIT=output_unit, FMT='()')
245 : END IF
246 :
247 : ! Configure the grid library.
248 9276 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%BACKEND", i_val=grid_backend)
249 9276 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%VALIDATE", l_val=grid_validate)
250 9276 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%APPLY_CUTOFF", l_val=grid_apply_cutoff)
251 :
252 : CALL grid_library_set_config(backend=grid_backend, &
253 : validate=grid_validate, &
254 9276 : apply_cutoff=grid_apply_cutoff)
255 :
256 360 : SELECT CASE (prog_name_id)
257 : CASE (do_atom)
258 360 : globenv%run_type_id = none_run
259 360 : CALL atom_code(root_section)
260 : CASE (do_optimize_input)
261 6 : CALL run_optimize_input(input_declaration, root_section, para_env)
262 : CASE (do_swarm)
263 6 : CALL run_swarm(input_declaration, root_section, para_env, globenv, input_file_name)
264 : CASE (do_farming) ! TODO: refactor cp2k's startup code
265 24 : CALL dbcsr_finalize_lib()
266 24 : CALL farming_run(input_declaration, root_section, para_env, initial_variables)
267 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
268 24 : accdrv_active_device_id=active_device_id)
269 : CASE (do_opt_basis)
270 4 : CALL run_optimize_basis(input_declaration, root_section, para_env)
271 4 : globenv%run_type_id = none_run
272 : CASE (do_cp2k)
273 : CALL create_force_env(new_env_id, &
274 : input_declaration=input_declaration, &
275 : input_path=input_file_name, &
276 : output_path="__STD_OUT__", mpi_comm=para_env, &
277 : output_unit=output_unit, &
278 : owns_out_unit=.FALSE., &
279 8756 : input=input_file, ierr=ierr)
280 8756 : CPASSERT(ierr == 0)
281 8756 : CALL f_env_add_defaults(new_env_id, f_env, handle=f_env_handle)
282 8756 : force_env => f_env%force_env
283 8756 : CALL force_env_get(force_env, globenv=globenv)
284 : CASE (do_test)
285 80 : CALL lib_test(root_section, para_env, globenv)
286 : CASE (do_tree_mc) ! TMC entry point
287 28 : CALL do_tmc(input_declaration, root_section, para_env, globenv)
288 : CASE (do_tree_mc_ana)
289 12 : CALL do_analyze_files(input_declaration, root_section, para_env)
290 : CASE default
291 18032 : CPABORT("")
292 : END SELECT
293 9276 : CALL section_vals_release(input_file)
294 :
295 9342 : SELECT CASE (globenv%run_type_id)
296 : CASE (pint_run)
297 66 : CALL do_pint_run(para_env, root_section, input_declaration, globenv)
298 : CASE (none_run, tree_mc_run)
299 : ! do nothing
300 : CASE (driver_run)
301 0 : CALL run_driver(force_env, globenv)
302 : CASE (energy_run, energy_force_run)
303 : IF (method_name_id /= do_qs .AND. &
304 : method_name_id /= do_sirius .AND. &
305 : method_name_id /= do_qmmm .AND. &
306 : method_name_id /= do_mixed .AND. &
307 : method_name_id /= do_nnp .AND. &
308 : method_name_id /= do_embed .AND. &
309 5088 : method_name_id /= do_fist .AND. &
310 : method_name_id /= do_ipi) &
311 0 : CPABORT("Energy/Force run not available for all methods ")
312 :
313 5088 : sublogger => cp_get_default_logger()
314 : CALL cp_add_iter_level(sublogger%iter_info, "JUST_ENERGY", &
315 5088 : n_rlevel_new=iter_level)
316 :
317 : ! loop over molecules to generate a molecular guess
318 : ! this procedure is initiated here to avoid passing globenv deep down
319 : ! the subroutine stack
320 5088 : IF (do_mol_loop(force_env=force_env)) &
321 10 : CALL loop_over_molecules(globenv, force_env)
322 :
323 9122 : SELECT CASE (globenv%run_type_id)
324 : CASE (energy_run)
325 4034 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
326 : CASE (energy_force_run)
327 1054 : CALL force_env_calc_energy_force(force_env, calc_force=.TRUE.)
328 : CASE default
329 5088 : CPABORT("")
330 : END SELECT
331 5088 : CALL cp_rm_iter_level(sublogger%iter_info, level_name="JUST_ENERGY", n_rlevel_att=iter_level)
332 : CASE (mol_dyn_run)
333 1622 : CALL qs_mol_dyn(force_env, globenv)
334 : CASE (geo_opt_run)
335 750 : CALL cp_geo_opt(force_env, globenv)
336 : CASE (cell_opt_run)
337 210 : CALL cp_cell_opt(force_env, globenv)
338 : CASE (mon_car_run)
339 20 : CALL do_mon_car(force_env, globenv, input_declaration, input_file_name)
340 : CASE (do_tamc)
341 2 : CALL qs_tamc(force_env, globenv)
342 : CASE (real_time_propagation)
343 138 : IF (method_name_id /= do_qs) &
344 0 : CPABORT("Real time propagation needs METHOD QS. ")
345 138 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
346 138 : dft_control%rtp_control%fixed_ions = .TRUE.
347 222 : SELECT CASE (dft_control%rtp_control%rtp_method)
348 : CASE (rtp_method_bse)
349 : ! Run the TD-BSE method
350 12 : CALL run_propagation_bse(force_env%qs_env, force_env)
351 : CASE default
352 : ! Run the TDDFT method
353 138 : CALL rt_prop_setup(force_env)
354 : END SELECT
355 : CASE (ehrenfest)
356 72 : IF (method_name_id /= do_qs) &
357 0 : CPABORT("Ehrenfest dynamics needs METHOD QS ")
358 72 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
359 72 : dft_control%rtp_control%fixed_ions = .FALSE.
360 72 : CALL qs_mol_dyn(force_env, globenv)
361 : CASE (bsse_run)
362 12 : CALL do_bsse_calculation(force_env, globenv)
363 : CASE (linear_response_run)
364 188 : IF (method_name_id /= do_qs .AND. &
365 : method_name_id /= do_qmmm) &
366 0 : CPABORT("Property calculations by Linear Response only within the QS or QMMM program ")
367 : ! The Ground State is needed, it can be read from Restart
368 188 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE., linres=.TRUE.)
369 188 : CALL linres_calculation(force_env)
370 : CASE (debug_run)
371 550 : SELECT CASE (method_name_id)
372 : CASE (do_qs, do_qmmm, do_fist)
373 496 : CALL cp2k_debug_energy_and_forces(force_env)
374 : CASE DEFAULT
375 496 : CPABORT("Debug run available only with QS, FIST, and QMMM program ")
376 : END SELECT
377 : CASE (vib_anal)
378 54 : CALL vb_anal(root_section, input_declaration, para_env, globenv)
379 : CASE (do_band)
380 34 : CALL neb(root_section, input_declaration, para_env, globenv)
381 : CASE (negf_run)
382 4 : CALL do_negf(force_env)
383 : CASE default
384 14364 : CPABORT("")
385 : END SELECT
386 :
387 : !sample peak memory
388 9276 : CALL m_memory()
389 :
390 9276 : CALL dbcsr_print_statistics()
391 9276 : CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
392 9276 : CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
393 :
394 9276 : m_memory_max_mpi = m_memory_max
395 9276 : CALL mpi_comm%max(m_memory_max_mpi)
396 9276 : IF (output_unit > 0) THEN
397 4666 : WRITE (output_unit, *)
398 : WRITE (output_unit, '(T2,"MEMORY| Estimated peak process memory [MiB]",T73,I8)') &
399 4666 : (m_memory_max_mpi + (1024*1024) - 1)/(1024*1024)
400 : END IF
401 :
402 9276 : IF (prog_name_id == do_cp2k) THEN
403 8756 : f_env%force_env => force_env ! for mc
404 8756 : IF (ASSOCIATED(force_env%globenv)) THEN
405 8756 : IF (.NOT. ASSOCIATED(force_env%globenv, globenv)) THEN
406 0 : CALL globenv_release(force_env%globenv) !mc
407 : END IF
408 : END IF
409 8756 : force_env%globenv => globenv !mc
410 : CALL f_env_rm_defaults(f_env, ierr=ierr, &
411 8756 : handle=f_env_handle)
412 8756 : CPASSERT(ierr == 0)
413 8756 : CALL destroy_force_env(new_env_id, ierr=ierr)
414 8756 : CPASSERT(ierr == 0)
415 : ELSE
416 : I_was_ionode = para_env%is_source()
417 520 : CALL cp2k_finalize(root_section, para_env, globenv)
418 520 : CPASSERT(globenv%ref_count == 1)
419 520 : CALL section_vals_release(root_section)
420 520 : CALL globenv_release(globenv)
421 : END IF
422 :
423 9276 : CALL dbcsr_finalize_lib()
424 :
425 9276 : CALL mp_para_env_release(para_env)
426 :
427 9276 : END SUBROUTINE cp2k_run
428 :
429 : ! **************************************************************************************************
430 : !> \brief performs a farming run that performs several independent cp2k_runs
431 : !> \param input_declaration ...
432 : !> \param root_section ...
433 : !> \param para_env ...
434 : !> \param initial_variables ...
435 : !> \author Joost VandeVondele
436 : !> \note
437 : !> needs to be part of this module as the cp2k_run -> farming_run -> cp2k_run
438 : !> calling style creates a hard circular dependency
439 : ! **************************************************************************************************
440 24 : RECURSIVE SUBROUTINE farming_run(input_declaration, root_section, para_env, initial_variables)
441 : TYPE(section_type), POINTER :: input_declaration
442 : TYPE(section_vals_type), POINTER :: root_section
443 : TYPE(mp_para_env_type), POINTER :: para_env
444 : CHARACTER(len=default_path_length), DIMENSION(:, :), INTENT(IN) :: initial_variables
445 :
446 : CHARACTER(len=*), PARAMETER :: routineN = 'farming_run'
447 : INTEGER, PARAMETER :: minion_status_done = -3, &
448 : minion_status_wait = -4
449 :
450 : CHARACTER(len=7) :: label
451 : CHARACTER(LEN=default_path_length) :: output_file
452 : CHARACTER(LEN=default_string_length) :: str
453 : INTEGER :: dest, handle, i, i_job_to_restart, ierr, ijob, ijob_current, &
454 : ijob_end, ijob_start, iunit, n_jobs_to_run, new_output_unit, &
455 : new_rank, ngroups, num_minions, output_unit, primus_minion, &
456 : minion_rank, source, tag, todo
457 24 : INTEGER, DIMENSION(:), POINTER :: group_distribution, &
458 24 : captain_minion_partition, &
459 24 : minion_distribution, &
460 24 : minion_status
461 : LOGICAL :: found, captain, minion
462 : REAL(KIND=dp) :: t1, t2
463 24 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: waittime
464 : TYPE(cp_logger_type), POINTER :: logger
465 : TYPE(cp_parser_type), POINTER :: my_parser
466 : TYPE(cp_unit_set_type) :: default_units
467 : TYPE(farming_env_type), POINTER :: farming_env
468 : TYPE(section_type), POINTER :: g_section
469 : TYPE(section_vals_type), POINTER :: g_data
470 : TYPE(mp_comm_type) :: minion_group, new_group
471 :
472 : ! the primus of all minions, talks to the captain on topics concerning all minions
473 24 : CALL timeset(routineN, handle)
474 24 : NULLIFY (my_parser, g_section, g_data)
475 :
476 24 : logger => cp_get_default_logger()
477 : output_unit = cp_print_key_unit_nr(logger, root_section, "FARMING%PROGRAM_RUN_INFO", &
478 24 : extension=".log")
479 :
480 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Hi, welcome on this farm!"
481 :
482 24 : ALLOCATE (farming_env)
483 24 : CALL init_farming_env(farming_env)
484 : ! remember where we started
485 24 : CALL m_getcwd(farming_env%cwd)
486 24 : CALL farming_parse_input(farming_env, root_section, para_env)
487 :
488 : ! the full mpi group is first split in a minion group and a captain group, the latter being at most 1 process
489 24 : minion = .TRUE.
490 24 : captain = .FALSE.
491 24 : IF (farming_env%captain_minion) THEN
492 4 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Captain-Minion setup"
493 :
494 4 : ALLOCATE (captain_minion_partition(0:1))
495 12 : captain_minion_partition = (/1, para_env%num_pe - 1/)
496 12 : ALLOCATE (group_distribution(0:para_env%num_pe - 1))
497 :
498 : CALL minion_group%from_split(para_env, ngroups, group_distribution, &
499 4 : n_subgroups=2, group_partition=captain_minion_partition)
500 4 : DEALLOCATE (captain_minion_partition)
501 4 : DEALLOCATE (group_distribution)
502 4 : num_minions = minion_group%num_pe
503 4 : minion_rank = minion_group%mepos
504 :
505 4 : IF (para_env%mepos == 0) THEN
506 2 : minion = .FALSE.
507 2 : captain = .TRUE.
508 : ! on the captain node, num_minions corresponds to the size of the captain group
509 2 : CPASSERT(num_minions == 1)
510 2 : num_minions = para_env%num_pe - 1
511 2 : minion_rank = -1
512 : END IF
513 4 : CPASSERT(num_minions == para_env%num_pe - 1)
514 : ELSE
515 : ! all processes are minions
516 20 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Minion-only setup"
517 20 : CALL minion_group%from_dup(para_env)
518 20 : num_minions = minion_group%num_pe
519 20 : minion_rank = minion_group%mepos
520 : END IF
521 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A,I0)") "FARMING| Number of Minions ", num_minions
522 :
523 : ! keep track of which para_env rank is which minion/captain
524 72 : ALLOCATE (minion_distribution(0:para_env%num_pe - 1))
525 72 : minion_distribution = 0
526 24 : minion_distribution(para_env%mepos) = minion_rank
527 120 : CALL para_env%sum(minion_distribution)
528 : ! we do have a primus inter pares
529 24 : primus_minion = 0
530 48 : DO i = 1, para_env%num_pe - 1
531 48 : IF (minion_distribution(i) == 0) primus_minion = i
532 : END DO
533 :
534 : ! split the current communicator for the minions
535 : ! in a new_group, new_size and new_rank according to the number of groups required according to the input
536 72 : ALLOCATE (group_distribution(0:num_minions - 1))
537 68 : group_distribution = -1
538 24 : IF (minion) THEN
539 22 : IF (farming_env%group_size_wish_set) THEN
540 4 : farming_env%group_size_wish = MIN(farming_env%group_size_wish, para_env%num_pe)
541 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
542 4 : subgroup_min_size=farming_env%group_size_wish, stride=farming_env%stride)
543 18 : ELSE IF (farming_env%ngroup_wish_set) THEN
544 18 : IF (ASSOCIATED(farming_env%group_partition)) THEN
545 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
546 : n_subgroups=farming_env%ngroup_wish, &
547 0 : group_partition=farming_env%group_partition, stride=farming_env%stride)
548 : ELSE
549 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
550 18 : n_subgroups=farming_env%ngroup_wish, stride=farming_env%stride)
551 : END IF
552 : ELSE
553 0 : CPABORT("must set either group_size_wish or ngroup_wish")
554 : END IF
555 22 : new_rank = new_group%mepos
556 : END IF
557 :
558 : ! transfer the info about the minion group distribution to the captain
559 24 : IF (farming_env%captain_minion) THEN
560 4 : IF (para_env%mepos == primus_minion) THEN
561 2 : tag = 1
562 4 : CALL para_env%send(group_distribution, 0, tag)
563 2 : tag = 2
564 2 : CALL para_env%send(ngroups, 0, tag)
565 : END IF
566 4 : IF (para_env%mepos == 0) THEN
567 2 : tag = 1
568 6 : CALL para_env%recv(group_distribution, primus_minion, tag)
569 2 : tag = 2
570 2 : CALL para_env%recv(ngroups, primus_minion, tag)
571 : END IF
572 : END IF
573 :
574 : ! write info on group distribution
575 24 : IF (output_unit > 0) THEN
576 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of created MPI (Minion) groups:", ngroups
577 12 : WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| MPI (Minion) process to group correspondence:"
578 34 : DO i = 0, num_minions - 1
579 22 : IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
580 : WRITE (output_unit, FMT='(A3,I6,A3,I6,A1)', ADVANCE="NO") &
581 34 : " (", i, " : ", group_distribution(i), ")"
582 : END DO
583 12 : WRITE (output_unit, *)
584 12 : CALL m_flush(output_unit)
585 : END IF
586 :
587 : ! protect about too many jobs being run in single go. Not more jobs are allowed than the number in the input file
588 : ! and determine the future restart point
589 24 : IF (farming_env%cycle) THEN
590 2 : n_jobs_to_run = farming_env%max_steps*ngroups
591 2 : i_job_to_restart = MODULO(farming_env%restart_n + n_jobs_to_run - 1, farming_env%njobs) + 1
592 : ELSE
593 22 : n_jobs_to_run = MIN(farming_env%njobs, farming_env%max_steps*ngroups)
594 22 : n_jobs_to_run = MIN(n_jobs_to_run, farming_env%njobs - farming_env%restart_n + 1)
595 22 : i_job_to_restart = n_jobs_to_run + farming_env%restart_n
596 : END IF
597 :
598 : ! and write the restart now, that's the point where the next job starts, even if this one is running
599 : iunit = cp_print_key_unit_nr(logger, root_section, "FARMING%RESTART", &
600 24 : extension=".restart")
601 24 : IF (iunit > 0) THEN
602 12 : WRITE (iunit, *) i_job_to_restart
603 : END IF
604 24 : CALL cp_print_key_finished_output(iunit, logger, root_section, "FARMING%RESTART")
605 :
606 : ! this is the job range to be executed.
607 24 : ijob_start = farming_env%restart_n
608 24 : ijob_end = ijob_start + n_jobs_to_run - 1
609 24 : IF (output_unit > 0 .AND. ijob_end - ijob_start < 0) THEN
610 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| --- WARNING --- NO JOBS NEED EXECUTION ? "
611 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| is the cycle keyword required ?"
612 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is a stray RESTART file present ?"
613 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is the group_size requested smaller than the number of CPUs?"
614 : END IF
615 :
616 : ! actual executions of the jobs in two different modes
617 24 : IF (farming_env%captain_minion) THEN
618 4 : IF (minion) THEN
619 : ! keep on doing work until captain has decided otherwise
620 2 : todo = do_wait
621 : DO
622 20 : IF (new_rank == 0) THEN
623 : ! the head minion tells the captain he's done or ready to start
624 : ! the message tells what has been done lately
625 20 : tag = 1
626 20 : dest = 0
627 20 : CALL para_env%send(todo, dest, tag)
628 :
629 : ! gets the new todo item
630 20 : tag = 2
631 20 : source = 0
632 20 : CALL para_env%recv(todo, source, tag)
633 :
634 : ! and informs his peer minions
635 20 : CALL new_group%bcast(todo, 0)
636 : ELSE
637 0 : CALL new_group%bcast(todo, 0)
638 : END IF
639 :
640 : ! if the todo is do_nothing we are flagged to quit. Otherwise it is the job number
641 0 : SELECT CASE (todo)
642 : CASE (do_wait, do_deadlock)
643 : ! go for a next round, but we first wait a bit
644 0 : t1 = m_walltime()
645 : DO
646 0 : t2 = m_walltime()
647 0 : IF (t2 - t1 > farming_env%wait_time) EXIT
648 : END DO
649 : CASE (do_nothing)
650 18 : EXIT
651 : CASE (1:)
652 20 : CALL execute_job(todo)
653 : END SELECT
654 : END DO
655 : ELSE ! captain
656 6 : ALLOCATE (minion_status(0:ngroups - 1))
657 4 : minion_status = minion_status_wait
658 2 : ijob_current = ijob_start - 1
659 :
660 20 : DO
661 24 : IF (ALL(minion_status == minion_status_done)) EXIT
662 :
663 : ! who's the next minion waiting for work
664 20 : tag = 1
665 20 : source = mp_any_source
666 20 : CALL para_env%recv(todo, source, tag) ! updates source
667 20 : IF (todo > 0) THEN
668 18 : farming_env%Job(todo)%status = job_finished
669 18 : IF (output_unit > 0) THEN
670 18 : WRITE (output_unit, FMT=*) "Job finished: ", todo
671 18 : CALL m_flush(output_unit)
672 : END IF
673 : END IF
674 :
675 : ! get the next job in line, this could be do_nothing, if we're finished
676 20 : CALL get_next_job(farming_env, ijob_start, ijob_end, ijob_current, todo)
677 20 : dest = source
678 20 : tag = 2
679 20 : CALL para_env%send(todo, dest, tag)
680 :
681 22 : IF (todo > 0) THEN
682 18 : farming_env%Job(todo)%status = job_running
683 18 : IF (output_unit > 0) THEN
684 18 : WRITE (output_unit, FMT=*) "Job: ", todo, " Dir: ", TRIM(farming_env%Job(todo)%cwd), &
685 36 : " assigned to group ", group_distribution(minion_distribution(dest))
686 18 : CALL m_flush(output_unit)
687 : END IF
688 : ELSE
689 2 : IF (todo == do_nothing) THEN
690 2 : minion_status(group_distribution(minion_distribution(dest))) = minion_status_done
691 2 : IF (output_unit > 0) THEN
692 2 : WRITE (output_unit, FMT=*) "group done: ", group_distribution(minion_distribution(dest))
693 2 : CALL m_flush(output_unit)
694 : END IF
695 : END IF
696 2 : IF (todo == do_deadlock) THEN
697 0 : IF (output_unit > 0) THEN
698 0 : WRITE (output_unit, FMT=*) ""
699 0 : WRITE (output_unit, FMT=*) "FARMING JOB DEADLOCKED ... CIRCULAR DEPENDENCIES"
700 0 : WRITE (output_unit, FMT=*) ""
701 0 : CALL m_flush(output_unit)
702 : END IF
703 0 : CPASSERT(todo .NE. do_deadlock)
704 : END IF
705 : END IF
706 :
707 : END DO
708 :
709 2 : DEALLOCATE (minion_status)
710 :
711 : END IF
712 : ELSE
713 : ! this is the non-captain-minion mode way of executing the jobs
714 : ! the i-th job in the input is always executed by the MODULO(i-1,ngroups)-th group
715 : ! (needed for cyclic runs, we don't want two groups working on the same job)
716 20 : IF (output_unit > 0) THEN
717 10 : IF (ijob_end - ijob_start >= 0) THEN
718 10 : WRITE (output_unit, FMT="(T2,A)") "FARMING| List of jobs : "
719 81 : DO ijob = ijob_start, ijob_end
720 71 : i = MODULO(ijob - 1, farming_env%njobs) + 1
721 71 : WRITE (output_unit, FMT=*) "Job: ", i, " Dir: ", TRIM(farming_env%Job(i)%cwd), " Input: ", &
722 152 : TRIM(farming_env%Job(i)%input), " MPI group:", MODULO(i - 1, ngroups)
723 : END DO
724 : END IF
725 10 : CALL m_flush(output_unit)
726 : END IF
727 :
728 162 : DO ijob = ijob_start, ijob_end
729 142 : i = MODULO(ijob - 1, farming_env%njobs) + 1
730 : ! this farms out the jobs
731 162 : IF (MODULO(i - 1, ngroups) == group_distribution(minion_rank)) THEN
732 104 : IF (output_unit > 0) THEN
733 54 : WRITE (output_unit, FMT="(T2,A,I5.5,A)", ADVANCE="NO") " Running Job ", i, &
734 108 : " in "//TRIM(farming_env%Job(i)%cwd)//"."
735 54 : CALL m_flush(output_unit)
736 : END IF
737 104 : CALL execute_job(i)
738 104 : IF (output_unit > 0) THEN
739 54 : WRITE (output_unit, FMT="(A)") " Done, output in "//TRIM(output_file)
740 54 : CALL m_flush(output_unit)
741 : END IF
742 : END IF
743 : END DO
744 : END IF
745 :
746 : ! keep information about how long each process has to wait
747 : ! i.e. the load imbalance
748 24 : t1 = m_walltime()
749 24 : CALL para_env%sync()
750 24 : t2 = m_walltime()
751 72 : ALLOCATE (waittime(0:para_env%num_pe - 1))
752 72 : waittime = 0.0_dp
753 24 : waittime(para_env%mepos) = t2 - t1
754 24 : CALL para_env%sum(waittime)
755 24 : IF (output_unit > 0) THEN
756 12 : WRITE (output_unit, '(T2,A)') "Process idle times [s] at the end of the run"
757 36 : DO i = 0, para_env%num_pe - 1
758 : WRITE (output_unit, FMT='(A2,I6,A3,F8.3,A1)', ADVANCE="NO") &
759 24 : " (", i, " : ", waittime(i), ")"
760 36 : IF (MOD(i + 1, 4) == 0) WRITE (output_unit, '(A)') ""
761 : END DO
762 12 : CALL m_flush(output_unit)
763 : END IF
764 24 : DEALLOCATE (waittime)
765 :
766 : ! give back the communicators of the split groups
767 24 : IF (minion) CALL new_group%free()
768 24 : CALL minion_group%free()
769 :
770 : ! and message passing deallocate structures
771 24 : DEALLOCATE (group_distribution)
772 24 : DEALLOCATE (minion_distribution)
773 :
774 : ! clean the farming env
775 24 : CALL deallocate_farming_env(farming_env)
776 :
777 : CALL cp_print_key_finished_output(output_unit, logger, root_section, &
778 24 : "FARMING%PROGRAM_RUN_INFO")
779 :
780 288 : CALL timestop(handle)
781 :
782 : CONTAINS
783 : ! **************************************************************************************************
784 : !> \brief ...
785 : !> \param i ...
786 : ! **************************************************************************************************
787 122 : RECURSIVE SUBROUTINE execute_job(i)
788 : INTEGER :: i
789 :
790 : ! change to the new working directory
791 :
792 122 : CALL m_chdir(TRIM(farming_env%Job(i)%cwd), ierr)
793 122 : IF (ierr .NE. 0) &
794 0 : CPABORT("Failed to change dir to: "//TRIM(farming_env%Job(i)%cwd))
795 :
796 : ! generate a fresh call to cp2k_run
797 122 : IF (new_rank == 0) THEN
798 :
799 89 : IF (farming_env%Job(i)%output == "") THEN
800 : ! generate the output file
801 85 : WRITE (output_file, '(A12,I5.5)') "FARMING_OUT_", i
802 255 : ALLOCATE (my_parser)
803 85 : CALL parser_create(my_parser, file_name=TRIM(farming_env%Job(i)%input))
804 85 : label = "&GLOBAL"
805 85 : CALL parser_search_string(my_parser, label, ignore_case=.TRUE., found=found)
806 170 : IF (found) THEN
807 85 : CALL create_global_section(g_section)
808 85 : CALL section_vals_create(g_data, g_section)
809 : CALL cp_unit_set_create(default_units, "OUTPUT")
810 85 : CALL section_vals_parse(g_data, my_parser, default_units)
811 85 : CALL cp_unit_set_release(default_units)
812 : CALL section_vals_val_get(g_data, "PROJECT", &
813 85 : c_val=str)
814 85 : IF (str .NE. "") output_file = TRIM(str)//".out"
815 : CALL section_vals_val_get(g_data, "OUTPUT_FILE_NAME", &
816 85 : c_val=str)
817 85 : IF (str .NE. "") output_file = str
818 85 : CALL section_vals_release(g_data)
819 85 : CALL section_release(g_section)
820 : END IF
821 85 : CALL parser_release(my_parser)
822 85 : DEALLOCATE (my_parser)
823 : ELSE
824 4 : output_file = farming_env%Job(i)%output
825 : END IF
826 :
827 : CALL open_file(file_name=TRIM(output_file), &
828 : file_action="WRITE", &
829 : file_status="UNKNOWN", &
830 : file_position="APPEND", &
831 89 : unit_number=new_output_unit)
832 : ELSE
833 : ! this unit should be negative, otherwise all processors that get a default unit
834 : ! start writing output (to the same file, adding to confusion).
835 : ! error handling should be careful, asking for a local output unit if required
836 33 : new_output_unit = -1
837 : END IF
838 :
839 122 : CALL cp2k_run(input_declaration, TRIM(farming_env%Job(i)%input), new_output_unit, new_group, initial_variables)
840 :
841 122 : IF (new_rank == 0) CALL close_file(unit_number=new_output_unit)
842 :
843 : ! change to the original working directory
844 122 : CALL m_chdir(TRIM(farming_env%cwd), ierr)
845 122 : CPASSERT(ierr == 0)
846 :
847 122 : END SUBROUTINE execute_job
848 : END SUBROUTINE farming_run
849 :
850 : ! **************************************************************************************************
851 : !> \brief ...
852 : ! **************************************************************************************************
853 0 : SUBROUTINE write_xml_file()
854 :
855 : INTEGER :: i, unit_number
856 : TYPE(section_type), POINTER :: root_section
857 :
858 0 : NULLIFY (root_section)
859 0 : CALL create_cp2k_root_section(root_section)
860 0 : CALL keyword_release(root_section%keywords(0)%keyword)
861 : CALL open_file(unit_number=unit_number, &
862 : file_name="cp2k_input.xml", &
863 : file_action="WRITE", &
864 0 : file_status="REPLACE")
865 :
866 0 : WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'
867 :
868 : !MK CP2K input structure
869 : WRITE (UNIT=unit_number, FMT="(A)") &
870 0 : "<CP2K_INPUT>", &
871 0 : " <CP2K_VERSION>"//TRIM(cp2k_version)//"</CP2K_VERSION>", &
872 0 : " <CP2K_YEAR>"//TRIM(cp2k_year)//"</CP2K_YEAR>", &
873 0 : " <COMPILE_DATE>"//TRIM(compile_date)//"</COMPILE_DATE>", &
874 0 : " <COMPILE_REVISION>"//TRIM(compile_revision)//"</COMPILE_REVISION>"
875 :
876 0 : CALL export_references_as_xml(unit_number)
877 0 : CALL export_units_as_xml(unit_number)
878 :
879 0 : DO i = 1, root_section%n_subsections
880 0 : CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
881 : END DO
882 :
883 0 : WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
884 0 : CALL close_file(unit_number=unit_number)
885 0 : CALL section_release(root_section)
886 :
887 0 : END SUBROUTINE write_xml_file
888 :
889 : ! **************************************************************************************************
890 : !> \brief runs the given input
891 : !> \param input_declaration ...
892 : !> \param input_file_path the path of the input file
893 : !> \param output_file_path path of the output file (to which it is appended)
894 : !> if it is "__STD_OUT__" the default_output_unit is used
895 : !> \param initial_variables key-value list of initial preprocessor variables
896 : !> \param mpi_comm the mpi communicator to be used for this environment
897 : !> it will not be freed
898 : !> \author fawzi
899 : !> \note
900 : !> moved here because of circular dependencies
901 : ! **************************************************************************************************
902 9154 : SUBROUTINE run_input(input_declaration, input_file_path, output_file_path, initial_variables, mpi_comm)
903 : TYPE(section_type), POINTER :: input_declaration
904 : CHARACTER(len=*), INTENT(in) :: input_file_path, output_file_path
905 : CHARACTER(len=default_path_length), &
906 : DIMENSION(:, :), INTENT(IN) :: initial_variables
907 : TYPE(mp_comm_type), INTENT(in), OPTIONAL :: mpi_comm
908 :
909 : INTEGER :: unit_nr
910 : TYPE(mp_para_env_type), POINTER :: para_env
911 :
912 9154 : IF (PRESENT(mpi_comm)) THEN
913 0 : ALLOCATE (para_env)
914 0 : para_env = mpi_comm
915 : ELSE
916 9154 : para_env => f77_default_para_env
917 9154 : CALL para_env%retain()
918 : END IF
919 9154 : IF (para_env%is_source()) THEN
920 4577 : IF (output_file_path == "__STD_OUT__") THEN
921 4577 : unit_nr = default_output_unit
922 : ELSE
923 0 : INQUIRE (FILE=output_file_path, NUMBER=unit_nr)
924 : END IF
925 : ELSE
926 4577 : unit_nr = -1
927 : END IF
928 9154 : CALL cp2k_run(input_declaration, input_file_path, unit_nr, para_env, initial_variables)
929 9154 : CALL mp_para_env_release(para_env)
930 9154 : END SUBROUTINE run_input
931 :
932 : END MODULE cp2k_runs
|