Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : MODULE cp2k_runs
10 : USE atom, ONLY: atom_code
11 : USE bibliography, ONLY: Hutter2014,&
12 : cite_reference
13 : USE bsse, ONLY: do_bsse_calculation
14 : USE cell_opt, ONLY: cp_cell_opt
15 : USE cp2k_debug, ONLY: cp2k_debug_energy_and_forces
16 : USE cp2k_info, ONLY: compile_date,&
17 : compile_revision,&
18 : cp2k_version,&
19 : cp2k_year
20 : USE cp_control_types, ONLY: dft_control_type
21 : USE cp_dbcsr_api, ONLY: dbcsr_finalize_lib,&
22 : dbcsr_init_lib,&
23 : dbcsr_print_config,&
24 : dbcsr_print_statistics
25 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_config
26 : USE cp_files, ONLY: close_file,&
27 : open_file
28 : USE cp_log_handling, ONLY: cp_get_default_logger,&
29 : cp_logger_type,&
30 : cp_logger_would_log,&
31 : cp_note_level
32 : USE cp_output_handling, ONLY: cp_add_iter_level,&
33 : cp_print_key_finished_output,&
34 : cp_print_key_unit_nr,&
35 : cp_rm_iter_level
36 : USE cp_parser_methods, ONLY: parser_search_string
37 : USE cp_parser_types, ONLY: cp_parser_type,&
38 : parser_create,&
39 : parser_release
40 : USE cp_units, ONLY: cp_unit_set_create,&
41 : cp_unit_set_release,&
42 : cp_unit_set_type,&
43 : export_units_as_xml
44 : USE dbm_api, ONLY: dbm_library_print_stats
45 : USE environment, ONLY: cp2k_finalize,&
46 : cp2k_init,&
47 : cp2k_read,&
48 : cp2k_setup
49 : USE f77_interface, ONLY: create_force_env,&
50 : destroy_force_env,&
51 : f77_default_para_env => default_para_env,&
52 : f_env_add_defaults,&
53 : f_env_rm_defaults,&
54 : f_env_type
55 : USE farming_methods, ONLY: do_deadlock,&
56 : do_nothing,&
57 : do_wait,&
58 : farming_parse_input,&
59 : get_next_job
60 : USE farming_types, ONLY: deallocate_farming_env,&
61 : farming_env_type,&
62 : init_farming_env,&
63 : job_finished,&
64 : job_running
65 : USE force_env_methods, ONLY: force_env_calc_energy_force
66 : USE force_env_types, ONLY: force_env_get,&
67 : force_env_type
68 : USE geo_opt, ONLY: cp_geo_opt
69 : USE global_types, ONLY: global_environment_type,&
70 : globenv_create,&
71 : globenv_release
72 : USE grid_api, ONLY: grid_library_print_stats,&
73 : grid_library_set_config
74 : USE input_constants, ONLY: &
75 : bsse_run, cell_opt_run, debug_run, do_atom, do_band, do_cp2k, do_embed, do_farming, &
76 : do_fist, do_ipi, do_mixed, do_nnp, do_opt_basis, do_optimize_input, do_qmmm, do_qs, &
77 : do_sirius, do_swarm, do_tamc, do_test, do_tree_mc, do_tree_mc_ana, driver_run, ehrenfest, &
78 : energy_force_run, energy_run, geo_opt_run, linear_response_run, mimic_run, mol_dyn_run, &
79 : mon_car_run, negf_run, none_run, pint_run, real_time_propagation, rtp_method_bse, &
80 : tree_mc_run, vib_anal
81 : USE input_cp2k, ONLY: create_cp2k_root_section
82 : USE input_cp2k_check, ONLY: check_cp2k_input
83 : USE input_cp2k_global, ONLY: create_global_section
84 : USE input_cp2k_read, ONLY: read_input
85 : USE input_keyword_types, ONLY: keyword_release
86 : USE input_parsing, ONLY: section_vals_parse
87 : USE input_section_types, ONLY: &
88 : section_release, section_type, section_vals_create, section_vals_get_subs_vals, &
89 : section_vals_release, section_vals_retain, section_vals_type, section_vals_val_get, &
90 : section_vals_write, write_section_xml
91 : USE ipi_driver, ONLY: run_driver
92 : USE kinds, ONLY: default_path_length,&
93 : default_string_length,&
94 : dp,&
95 : int_8
96 : USE library_tests, ONLY: lib_test
97 : USE machine, ONLY: default_output_unit,&
98 : m_chdir,&
99 : m_flush,&
100 : m_getcwd,&
101 : m_memory,&
102 : m_memory_max,&
103 : m_walltime
104 : USE mc_run, ONLY: do_mon_car
105 : USE md_run, ONLY: qs_mol_dyn
106 : USE message_passing, ONLY: mp_any_source,&
107 : mp_comm_type,&
108 : mp_para_env_release,&
109 : mp_para_env_type
110 : USE mimic_loop, ONLY: do_mimic_loop
111 : USE mscfg_methods, ONLY: do_mol_loop,&
112 : loop_over_molecules
113 : USE neb_methods, ONLY: neb
114 : USE negf_methods, ONLY: do_negf
115 : USE offload_api, ONLY: offload_get_chosen_device,&
116 : offload_get_device_count,&
117 : offload_mempool_stats_print
118 : USE optimize_basis, ONLY: run_optimize_basis
119 : USE optimize_input, ONLY: run_optimize_input
120 : USE pint_methods, ONLY: do_pint_run
121 : USE qs_environment_types, ONLY: get_qs_env
122 : USE qs_linres_module, ONLY: linres_calculation
123 : USE reference_manager, ONLY: export_references_as_xml
124 : USE rt_bse, ONLY: run_propagation_bse
125 : USE rt_propagation, ONLY: rt_prop_setup
126 : USE swarm, ONLY: run_swarm
127 : USE tamc_run, ONLY: qs_tamc
128 : USE tmc_setup, ONLY: do_analyze_files,&
129 : do_tmc
130 : USE vibrational_analysis, ONLY: vb_anal
131 : #include "../base/base_uses.f90"
132 :
133 : IMPLICIT NONE
134 :
135 : PRIVATE
136 :
137 : PUBLIC :: write_xml_file, run_input
138 :
139 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp2k_runs'
140 :
141 : CONTAINS
142 :
143 : ! **************************************************************************************************
144 : !> \brief performs an instance of a cp2k run
145 : !> \param input_declaration ...
146 : !> \param input_file_name name of the file to be opened for input
147 : !> \param output_unit unit to which output should be written
148 : !> \param mpi_comm ...
149 : !> \param initial_variables key-value list of initial preprocessor variables
150 : !> \author Joost VandeVondele
151 : !> \note
152 : !> para_env should be a valid communicator
153 : !> output_unit should be writeable by at least the lowest rank of the mpi group
154 : !>
155 : !> recursive because a given run_type might need to be able to perform
156 : !> another cp2k_run as part of its job (e.g. farming, classical equilibration, ...)
157 : !>
158 : !> the idea is that a cp2k instance should be able to run with just three
159 : !> arguments, i.e. a given input file, output unit, mpi communicator.
160 : !> giving these three to cp2k_run should produce a valid run.
161 : !> the only task of the PROGRAM cp2k is to create valid instances of the
162 : !> above arguments. Ideally, anything that is called afterwards should be
163 : !> able to run simultaneously / multithreaded / sequential / parallel / ...
164 : !> and able to fail safe
165 : ! **************************************************************************************************
166 9414 : RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, mpi_comm, initial_variables)
167 : TYPE(section_type), POINTER :: input_declaration
168 : CHARACTER(LEN=*), INTENT(IN) :: input_file_name
169 : INTEGER, INTENT(IN) :: output_unit
170 :
171 : CLASS(mp_comm_type) :: mpi_comm
172 : CHARACTER(len=default_path_length), &
173 : DIMENSION(:, :), INTENT(IN) :: initial_variables
174 :
175 : INTEGER :: f_env_handle, grid_backend, ierr, &
176 : iter_level, method_name_id, &
177 : new_env_id, prog_name_id, run_type_id
178 : #if defined(__DBCSR_ACC)
179 : INTEGER, TARGET :: offload_chosen_device
180 : #endif
181 : INTEGER, POINTER :: active_device_id
182 : INTEGER(KIND=int_8) :: m_memory_max_mpi
183 : LOGICAL :: echo_input, grid_apply_cutoff, &
184 : grid_validate, I_was_ionode
185 : TYPE(cp_logger_type), POINTER :: logger, sublogger
186 : TYPE(mp_para_env_type), POINTER :: para_env
187 : TYPE(dft_control_type), POINTER :: dft_control
188 : TYPE(f_env_type), POINTER :: f_env
189 : TYPE(force_env_type), POINTER :: force_env
190 : TYPE(global_environment_type), POINTER :: globenv
191 : TYPE(section_vals_type), POINTER :: glob_section, input_file, root_section
192 :
193 9414 : NULLIFY (para_env, f_env, dft_control, active_device_id)
194 9414 : ALLOCATE (para_env)
195 9414 : para_env = mpi_comm
196 :
197 : #if defined(__DBCSR_ACC)
198 : IF (offload_get_device_count() > 0) THEN
199 : offload_chosen_device = offload_get_chosen_device()
200 : active_device_id => offload_chosen_device
201 : END IF
202 : #endif
203 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
204 9414 : accdrv_active_device_id=active_device_id)
205 :
206 9414 : NULLIFY (globenv, force_env)
207 :
208 9414 : CALL cite_reference(Hutter2014)
209 :
210 : ! Parse the input
211 : input_file => read_input(input_declaration, input_file_name, &
212 : initial_variables=initial_variables, &
213 9414 : para_env=para_env)
214 :
215 9414 : CALL para_env%sync()
216 :
217 9414 : logger => cp_get_default_logger()
218 :
219 9414 : glob_section => section_vals_get_subs_vals(input_file, "GLOBAL")
220 9414 : CALL section_vals_val_get(glob_section, "ECHO_INPUT", l_val=echo_input)
221 9414 : IF (echo_input .AND. (output_unit > 0)) THEN
222 : CALL section_vals_write(input_file, &
223 : unit_nr=output_unit, &
224 : hide_root=.TRUE., &
225 15 : hide_defaults=.FALSE.)
226 : END IF
227 :
228 9414 : CALL check_cp2k_input(input_declaration, input_file, para_env=para_env, output_unit=output_unit)
229 9414 : root_section => input_file
230 : CALL section_vals_val_get(input_file, "GLOBAL%PROGRAM_NAME", &
231 9414 : i_val=prog_name_id)
232 : CALL section_vals_val_get(input_file, "GLOBAL%RUN_TYPE", &
233 9414 : i_val=run_type_id)
234 9414 : CALL section_vals_val_get(root_section, "FORCE_EVAL%METHOD", i_val=method_name_id)
235 :
236 9414 : IF (prog_name_id /= do_cp2k) THEN
237 : ! initial setup (cp2k does in in the creation of the force_env)
238 524 : CALL globenv_create(globenv)
239 524 : CALL section_vals_retain(input_file)
240 524 : CALL cp2k_init(para_env, output_unit, globenv, input_file_name=input_file_name)
241 524 : CALL cp2k_read(root_section, para_env, globenv)
242 524 : CALL cp2k_setup(root_section, para_env, globenv)
243 : END IF
244 :
245 9414 : CALL cp_dbcsr_config(root_section)
246 9414 : IF (output_unit > 0 .AND. &
247 : cp_logger_would_log(logger, cp_note_level)) THEN
248 4735 : CALL dbcsr_print_config(unit_nr=output_unit)
249 4735 : WRITE (UNIT=output_unit, FMT='()')
250 : END IF
251 :
252 : ! Configure the grid library.
253 9414 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%BACKEND", i_val=grid_backend)
254 9414 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%VALIDATE", l_val=grid_validate)
255 9414 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%APPLY_CUTOFF", l_val=grid_apply_cutoff)
256 :
257 : CALL grid_library_set_config(backend=grid_backend, &
258 : validate=grid_validate, &
259 9414 : apply_cutoff=grid_apply_cutoff)
260 :
261 364 : SELECT CASE (prog_name_id)
262 : CASE (do_atom)
263 364 : globenv%run_type_id = none_run
264 364 : CALL atom_code(root_section)
265 : CASE (do_optimize_input)
266 6 : CALL run_optimize_input(input_declaration, root_section, para_env)
267 : CASE (do_swarm)
268 6 : CALL run_swarm(input_declaration, root_section, para_env, globenv, input_file_name)
269 : CASE (do_farming) ! TODO: refactor cp2k's startup code
270 24 : CALL dbcsr_finalize_lib()
271 24 : CALL farming_run(input_declaration, root_section, para_env, initial_variables)
272 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
273 24 : accdrv_active_device_id=active_device_id)
274 : CASE (do_opt_basis)
275 4 : CALL run_optimize_basis(input_declaration, root_section, para_env)
276 4 : globenv%run_type_id = none_run
277 : CASE (do_cp2k)
278 : CALL create_force_env(new_env_id, &
279 : input_declaration=input_declaration, &
280 : input_path=input_file_name, &
281 : output_path="__STD_OUT__", mpi_comm=para_env, &
282 : output_unit=output_unit, &
283 : owns_out_unit=.FALSE., &
284 8890 : input=input_file, ierr=ierr)
285 8890 : CPASSERT(ierr == 0)
286 8890 : CALL f_env_add_defaults(new_env_id, f_env, handle=f_env_handle)
287 8890 : force_env => f_env%force_env
288 8890 : CALL force_env_get(force_env, globenv=globenv)
289 : CASE (do_test)
290 80 : CALL lib_test(root_section, para_env, globenv)
291 : CASE (do_tree_mc) ! TMC entry point
292 28 : CALL do_tmc(input_declaration, root_section, para_env, globenv)
293 : CASE (do_tree_mc_ana)
294 12 : CALL do_analyze_files(input_declaration, root_section, para_env)
295 : CASE default
296 18304 : CPABORT("")
297 : END SELECT
298 9414 : CALL section_vals_release(input_file)
299 :
300 9480 : SELECT CASE (globenv%run_type_id)
301 : CASE (pint_run)
302 66 : CALL do_pint_run(para_env, root_section, input_declaration, globenv)
303 : CASE (none_run, tree_mc_run)
304 : ! do nothing
305 : CASE (driver_run)
306 0 : CALL run_driver(force_env, globenv)
307 : CASE (energy_run, energy_force_run)
308 : IF (method_name_id /= do_qs .AND. &
309 : method_name_id /= do_sirius .AND. &
310 : method_name_id /= do_qmmm .AND. &
311 : method_name_id /= do_mixed .AND. &
312 : method_name_id /= do_nnp .AND. &
313 : method_name_id /= do_embed .AND. &
314 5164 : method_name_id /= do_fist .AND. &
315 : method_name_id /= do_ipi) &
316 0 : CPABORT("Energy/Force run not available for all methods ")
317 :
318 5164 : sublogger => cp_get_default_logger()
319 : CALL cp_add_iter_level(sublogger%iter_info, "JUST_ENERGY", &
320 5164 : n_rlevel_new=iter_level)
321 :
322 : ! loop over molecules to generate a molecular guess
323 : ! this procedure is initiated here to avoid passing globenv deep down
324 : ! the subroutine stack
325 5164 : IF (do_mol_loop(force_env=force_env)) &
326 10 : CALL loop_over_molecules(globenv, force_env)
327 :
328 9266 : SELECT CASE (globenv%run_type_id)
329 : CASE (energy_run)
330 4102 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
331 : CASE (energy_force_run)
332 1062 : CALL force_env_calc_energy_force(force_env, calc_force=.TRUE.)
333 : CASE default
334 5164 : CPABORT("")
335 : END SELECT
336 5164 : CALL cp_rm_iter_level(sublogger%iter_info, level_name="JUST_ENERGY", n_rlevel_att=iter_level)
337 : CASE (mol_dyn_run)
338 1622 : CALL qs_mol_dyn(force_env, globenv)
339 : CASE (geo_opt_run)
340 764 : CALL cp_geo_opt(force_env, globenv)
341 : CASE (cell_opt_run)
342 218 : CALL cp_cell_opt(force_env, globenv)
343 : CASE (mon_car_run)
344 20 : CALL do_mon_car(force_env, globenv, input_declaration, input_file_name)
345 : CASE (do_tamc)
346 2 : CALL qs_tamc(force_env, globenv)
347 : CASE (real_time_propagation)
348 138 : IF (method_name_id /= do_qs) &
349 0 : CPABORT("Real time propagation needs METHOD QS. ")
350 138 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
351 138 : dft_control%rtp_control%fixed_ions = .TRUE.
352 222 : SELECT CASE (dft_control%rtp_control%rtp_method)
353 : CASE (rtp_method_bse)
354 : ! Run the TD-BSE method
355 12 : CALL run_propagation_bse(force_env%qs_env, force_env)
356 : CASE default
357 : ! Run the TDDFT method
358 138 : CALL rt_prop_setup(force_env)
359 : END SELECT
360 : CASE (ehrenfest)
361 72 : IF (method_name_id /= do_qs) &
362 0 : CPABORT("Ehrenfest dynamics needs METHOD QS ")
363 72 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
364 72 : dft_control%rtp_control%fixed_ions = .FALSE.
365 72 : CALL qs_mol_dyn(force_env, globenv)
366 : CASE (bsse_run)
367 12 : CALL do_bsse_calculation(force_env, globenv)
368 : CASE (linear_response_run)
369 188 : IF (method_name_id /= do_qs .AND. &
370 : method_name_id /= do_qmmm) &
371 0 : CPABORT("Property calculations by Linear Response only within the QS or QMMM program ")
372 : ! The Ground State is needed, it can be read from Restart
373 188 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE., linres=.TRUE.)
374 188 : CALL linres_calculation(force_env)
375 : CASE (debug_run)
376 586 : SELECT CASE (method_name_id)
377 : CASE (do_qs, do_qmmm, do_fist)
378 532 : CALL cp2k_debug_energy_and_forces(force_env)
379 : CASE DEFAULT
380 532 : CPABORT("Debug run available only with QS, FIST, and QMMM program ")
381 : END SELECT
382 : CASE (vib_anal)
383 54 : CALL vb_anal(root_section, input_declaration, para_env, globenv)
384 : CASE (do_band)
385 34 : CALL neb(root_section, input_declaration, para_env, globenv)
386 : CASE (negf_run)
387 4 : CALL do_negf(force_env)
388 : CASE (mimic_run)
389 0 : CALL do_mimic_loop(force_env)
390 : CASE default
391 14578 : CPABORT("")
392 : END SELECT
393 :
394 : ! Sample peak memory
395 9414 : CALL m_memory()
396 :
397 9414 : CALL dbcsr_print_statistics()
398 9414 : CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
399 9414 : CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
400 9414 : CALL offload_mempool_stats_print(mpi_comm=mpi_comm, output_unit=output_unit)
401 :
402 9414 : m_memory_max_mpi = m_memory_max
403 9414 : CALL mpi_comm%max(m_memory_max_mpi)
404 9414 : IF (output_unit > 0) THEN
405 4735 : WRITE (output_unit, *)
406 : WRITE (output_unit, '(T2,"MEMORY| Estimated peak process memory [MiB]",T73,I8)') &
407 4735 : (m_memory_max_mpi + (1024*1024) - 1)/(1024*1024)
408 : END IF
409 :
410 9414 : IF (prog_name_id == do_cp2k) THEN
411 8890 : f_env%force_env => force_env ! for mc
412 8890 : IF (ASSOCIATED(force_env%globenv)) THEN
413 8890 : IF (.NOT. ASSOCIATED(force_env%globenv, globenv)) THEN
414 0 : CALL globenv_release(force_env%globenv) !mc
415 : END IF
416 : END IF
417 8890 : force_env%globenv => globenv !mc
418 : CALL f_env_rm_defaults(f_env, ierr=ierr, &
419 8890 : handle=f_env_handle)
420 8890 : CPASSERT(ierr == 0)
421 8890 : CALL destroy_force_env(new_env_id, ierr=ierr)
422 8890 : CPASSERT(ierr == 0)
423 : ELSE
424 : I_was_ionode = para_env%is_source()
425 524 : CALL cp2k_finalize(root_section, para_env, globenv)
426 524 : CPASSERT(globenv%ref_count == 1)
427 524 : CALL section_vals_release(root_section)
428 524 : CALL globenv_release(globenv)
429 : END IF
430 :
431 9414 : CALL dbcsr_finalize_lib()
432 :
433 9414 : CALL mp_para_env_release(para_env)
434 :
435 9414 : END SUBROUTINE cp2k_run
436 :
437 : ! **************************************************************************************************
438 : !> \brief performs a farming run that performs several independent cp2k_runs
439 : !> \param input_declaration ...
440 : !> \param root_section ...
441 : !> \param para_env ...
442 : !> \param initial_variables ...
443 : !> \author Joost VandeVondele
444 : !> \note
445 : !> needs to be part of this module as the cp2k_run -> farming_run -> cp2k_run
446 : !> calling style creates a hard circular dependency
447 : ! **************************************************************************************************
448 24 : RECURSIVE SUBROUTINE farming_run(input_declaration, root_section, para_env, initial_variables)
449 : TYPE(section_type), POINTER :: input_declaration
450 : TYPE(section_vals_type), POINTER :: root_section
451 : TYPE(mp_para_env_type), POINTER :: para_env
452 : CHARACTER(len=default_path_length), DIMENSION(:, :), INTENT(IN) :: initial_variables
453 :
454 : CHARACTER(len=*), PARAMETER :: routineN = 'farming_run'
455 : INTEGER, PARAMETER :: minion_status_done = -3, &
456 : minion_status_wait = -4
457 :
458 : CHARACTER(len=7) :: label
459 : CHARACTER(LEN=default_path_length) :: output_file
460 : CHARACTER(LEN=default_string_length) :: str
461 : INTEGER :: dest, handle, i, i_job_to_restart, ierr, ijob, ijob_current, &
462 : ijob_end, ijob_start, iunit, n_jobs_to_run, new_output_unit, &
463 : new_rank, ngroups, num_minions, output_unit, primus_minion, &
464 : minion_rank, source, tag, todo
465 24 : INTEGER, DIMENSION(:), POINTER :: group_distribution, &
466 24 : captain_minion_partition, &
467 24 : minion_distribution, &
468 24 : minion_status
469 : LOGICAL :: found, captain, minion
470 : REAL(KIND=dp) :: t1, t2
471 24 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: waittime
472 : TYPE(cp_logger_type), POINTER :: logger
473 : TYPE(cp_parser_type), POINTER :: my_parser
474 : TYPE(cp_unit_set_type) :: default_units
475 : TYPE(farming_env_type), POINTER :: farming_env
476 : TYPE(section_type), POINTER :: g_section
477 : TYPE(section_vals_type), POINTER :: g_data
478 : TYPE(mp_comm_type) :: minion_group, new_group
479 :
480 : ! the primus of all minions, talks to the captain on topics concerning all minions
481 24 : CALL timeset(routineN, handle)
482 24 : NULLIFY (my_parser, g_section, g_data)
483 :
484 24 : logger => cp_get_default_logger()
485 : output_unit = cp_print_key_unit_nr(logger, root_section, "FARMING%PROGRAM_RUN_INFO", &
486 24 : extension=".log")
487 :
488 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Hi, welcome on this farm!"
489 :
490 24 : ALLOCATE (farming_env)
491 24 : CALL init_farming_env(farming_env)
492 : ! remember where we started
493 24 : CALL m_getcwd(farming_env%cwd)
494 24 : CALL farming_parse_input(farming_env, root_section, para_env)
495 :
496 : ! the full mpi group is first split in a minion group and a captain group, the latter being at most 1 process
497 24 : minion = .TRUE.
498 24 : captain = .FALSE.
499 24 : IF (farming_env%captain_minion) THEN
500 4 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Captain-Minion setup"
501 :
502 4 : ALLOCATE (captain_minion_partition(0:1))
503 12 : captain_minion_partition = [1, para_env%num_pe - 1]
504 12 : ALLOCATE (group_distribution(0:para_env%num_pe - 1))
505 :
506 : CALL minion_group%from_split(para_env, ngroups, group_distribution, &
507 4 : n_subgroups=2, group_partition=captain_minion_partition)
508 4 : DEALLOCATE (captain_minion_partition)
509 4 : DEALLOCATE (group_distribution)
510 4 : num_minions = minion_group%num_pe
511 4 : minion_rank = minion_group%mepos
512 :
513 4 : IF (para_env%mepos == 0) THEN
514 2 : minion = .FALSE.
515 2 : captain = .TRUE.
516 : ! on the captain node, num_minions corresponds to the size of the captain group
517 2 : CPASSERT(num_minions == 1)
518 2 : num_minions = para_env%num_pe - 1
519 2 : minion_rank = -1
520 : END IF
521 4 : CPASSERT(num_minions == para_env%num_pe - 1)
522 : ELSE
523 : ! all processes are minions
524 20 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Minion-only setup"
525 20 : CALL minion_group%from_dup(para_env)
526 20 : num_minions = minion_group%num_pe
527 20 : minion_rank = minion_group%mepos
528 : END IF
529 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A,I0)") "FARMING| Number of Minions ", num_minions
530 :
531 : ! keep track of which para_env rank is which minion/captain
532 72 : ALLOCATE (minion_distribution(0:para_env%num_pe - 1))
533 72 : minion_distribution = 0
534 24 : minion_distribution(para_env%mepos) = minion_rank
535 120 : CALL para_env%sum(minion_distribution)
536 : ! we do have a primus inter pares
537 24 : primus_minion = 0
538 48 : DO i = 1, para_env%num_pe - 1
539 48 : IF (minion_distribution(i) == 0) primus_minion = i
540 : END DO
541 :
542 : ! split the current communicator for the minions
543 : ! in a new_group, new_size and new_rank according to the number of groups required according to the input
544 72 : ALLOCATE (group_distribution(0:num_minions - 1))
545 68 : group_distribution = -1
546 24 : IF (minion) THEN
547 22 : IF (farming_env%group_size_wish_set) THEN
548 4 : farming_env%group_size_wish = MIN(farming_env%group_size_wish, para_env%num_pe)
549 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
550 4 : subgroup_min_size=farming_env%group_size_wish, stride=farming_env%stride)
551 18 : ELSE IF (farming_env%ngroup_wish_set) THEN
552 18 : IF (ASSOCIATED(farming_env%group_partition)) THEN
553 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
554 : n_subgroups=farming_env%ngroup_wish, &
555 0 : group_partition=farming_env%group_partition, stride=farming_env%stride)
556 : ELSE
557 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
558 18 : n_subgroups=farming_env%ngroup_wish, stride=farming_env%stride)
559 : END IF
560 : ELSE
561 0 : CPABORT("must set either group_size_wish or ngroup_wish")
562 : END IF
563 22 : new_rank = new_group%mepos
564 : END IF
565 :
566 : ! transfer the info about the minion group distribution to the captain
567 24 : IF (farming_env%captain_minion) THEN
568 4 : IF (para_env%mepos == primus_minion) THEN
569 2 : tag = 1
570 4 : CALL para_env%send(group_distribution, 0, tag)
571 2 : tag = 2
572 2 : CALL para_env%send(ngroups, 0, tag)
573 : END IF
574 4 : IF (para_env%mepos == 0) THEN
575 2 : tag = 1
576 6 : CALL para_env%recv(group_distribution, primus_minion, tag)
577 2 : tag = 2
578 2 : CALL para_env%recv(ngroups, primus_minion, tag)
579 : END IF
580 : END IF
581 :
582 : ! write info on group distribution
583 24 : IF (output_unit > 0) THEN
584 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of created MPI (Minion) groups:", ngroups
585 12 : WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| MPI (Minion) process to group correspondence:"
586 34 : DO i = 0, num_minions - 1
587 22 : IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
588 : WRITE (output_unit, FMT='(A3,I6,A3,I6,A1)', ADVANCE="NO") &
589 34 : " (", i, " : ", group_distribution(i), ")"
590 : END DO
591 12 : WRITE (output_unit, *)
592 12 : CALL m_flush(output_unit)
593 : END IF
594 :
595 : ! protect about too many jobs being run in single go. Not more jobs are allowed than the number in the input file
596 : ! and determine the future restart point
597 24 : IF (farming_env%cycle) THEN
598 2 : n_jobs_to_run = farming_env%max_steps*ngroups
599 2 : i_job_to_restart = MODULO(farming_env%restart_n + n_jobs_to_run - 1, farming_env%njobs) + 1
600 : ELSE
601 22 : n_jobs_to_run = MIN(farming_env%njobs, farming_env%max_steps*ngroups)
602 22 : n_jobs_to_run = MIN(n_jobs_to_run, farming_env%njobs - farming_env%restart_n + 1)
603 22 : i_job_to_restart = n_jobs_to_run + farming_env%restart_n
604 : END IF
605 :
606 : ! and write the restart now, that's the point where the next job starts, even if this one is running
607 : iunit = cp_print_key_unit_nr(logger, root_section, "FARMING%RESTART", &
608 24 : extension=".restart")
609 24 : IF (iunit > 0) THEN
610 12 : WRITE (iunit, *) i_job_to_restart
611 : END IF
612 24 : CALL cp_print_key_finished_output(iunit, logger, root_section, "FARMING%RESTART")
613 :
614 : ! this is the job range to be executed.
615 24 : ijob_start = farming_env%restart_n
616 24 : ijob_end = ijob_start + n_jobs_to_run - 1
617 24 : IF (output_unit > 0 .AND. ijob_end - ijob_start < 0) THEN
618 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| --- WARNING --- NO JOBS NEED EXECUTION ? "
619 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| is the cycle keyword required ?"
620 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is a stray RESTART file present ?"
621 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is the group_size requested smaller than the number of CPUs?"
622 : END IF
623 :
624 : ! actual executions of the jobs in two different modes
625 24 : IF (farming_env%captain_minion) THEN
626 4 : IF (minion) THEN
627 : ! keep on doing work until captain has decided otherwise
628 2 : todo = do_wait
629 : DO
630 20 : IF (new_rank == 0) THEN
631 : ! the head minion tells the captain he's done or ready to start
632 : ! the message tells what has been done lately
633 20 : tag = 1
634 20 : dest = 0
635 20 : CALL para_env%send(todo, dest, tag)
636 :
637 : ! gets the new todo item
638 20 : tag = 2
639 20 : source = 0
640 20 : CALL para_env%recv(todo, source, tag)
641 :
642 : ! and informs his peer minions
643 20 : CALL new_group%bcast(todo, 0)
644 : ELSE
645 0 : CALL new_group%bcast(todo, 0)
646 : END IF
647 :
648 : ! if the todo is do_nothing we are flagged to quit. Otherwise it is the job number
649 0 : SELECT CASE (todo)
650 : CASE (do_wait, do_deadlock)
651 : ! go for a next round, but we first wait a bit
652 0 : t1 = m_walltime()
653 : DO
654 0 : t2 = m_walltime()
655 0 : IF (t2 - t1 > farming_env%wait_time) EXIT
656 : END DO
657 : CASE (do_nothing)
658 18 : EXIT
659 : CASE (1:)
660 20 : CALL execute_job(todo)
661 : END SELECT
662 : END DO
663 : ELSE ! captain
664 6 : ALLOCATE (minion_status(0:ngroups - 1))
665 4 : minion_status = minion_status_wait
666 2 : ijob_current = ijob_start - 1
667 :
668 20 : DO
669 24 : IF (ALL(minion_status == minion_status_done)) EXIT
670 :
671 : ! who's the next minion waiting for work
672 20 : tag = 1
673 20 : source = mp_any_source
674 20 : CALL para_env%recv(todo, source, tag) ! updates source
675 20 : IF (todo > 0) THEN
676 18 : farming_env%Job(todo)%status = job_finished
677 18 : IF (output_unit > 0) THEN
678 18 : WRITE (output_unit, FMT=*) "Job finished: ", todo
679 18 : CALL m_flush(output_unit)
680 : END IF
681 : END IF
682 :
683 : ! get the next job in line, this could be do_nothing, if we're finished
684 20 : CALL get_next_job(farming_env, ijob_start, ijob_end, ijob_current, todo)
685 20 : dest = source
686 20 : tag = 2
687 20 : CALL para_env%send(todo, dest, tag)
688 :
689 22 : IF (todo > 0) THEN
690 18 : farming_env%Job(todo)%status = job_running
691 18 : IF (output_unit > 0) THEN
692 18 : WRITE (output_unit, FMT=*) "Job: ", todo, " Dir: ", TRIM(farming_env%Job(todo)%cwd), &
693 36 : " assigned to group ", group_distribution(minion_distribution(dest))
694 18 : CALL m_flush(output_unit)
695 : END IF
696 : ELSE
697 2 : IF (todo == do_nothing) THEN
698 2 : minion_status(group_distribution(minion_distribution(dest))) = minion_status_done
699 2 : IF (output_unit > 0) THEN
700 2 : WRITE (output_unit, FMT=*) "group done: ", group_distribution(minion_distribution(dest))
701 2 : CALL m_flush(output_unit)
702 : END IF
703 : END IF
704 2 : IF (todo == do_deadlock) THEN
705 0 : IF (output_unit > 0) THEN
706 0 : WRITE (output_unit, FMT=*) ""
707 0 : WRITE (output_unit, FMT=*) "FARMING JOB DEADLOCKED ... CIRCULAR DEPENDENCIES"
708 0 : WRITE (output_unit, FMT=*) ""
709 0 : CALL m_flush(output_unit)
710 : END IF
711 0 : CPASSERT(todo /= do_deadlock)
712 : END IF
713 : END IF
714 :
715 : END DO
716 :
717 2 : DEALLOCATE (minion_status)
718 :
719 : END IF
720 : ELSE
721 : ! this is the non-captain-minion mode way of executing the jobs
722 : ! the i-th job in the input is always executed by the MODULO(i-1,ngroups)-th group
723 : ! (needed for cyclic runs, we don't want two groups working on the same job)
724 20 : IF (output_unit > 0) THEN
725 10 : IF (ijob_end - ijob_start >= 0) THEN
726 10 : WRITE (output_unit, FMT="(T2,A)") "FARMING| List of jobs : "
727 81 : DO ijob = ijob_start, ijob_end
728 71 : i = MODULO(ijob - 1, farming_env%njobs) + 1
729 71 : WRITE (output_unit, FMT=*) "Job: ", i, " Dir: ", TRIM(farming_env%Job(i)%cwd), " Input: ", &
730 152 : TRIM(farming_env%Job(i)%input), " MPI group:", MODULO(i - 1, ngroups)
731 : END DO
732 : END IF
733 10 : CALL m_flush(output_unit)
734 : END IF
735 :
736 162 : DO ijob = ijob_start, ijob_end
737 142 : i = MODULO(ijob - 1, farming_env%njobs) + 1
738 : ! this farms out the jobs
739 162 : IF (MODULO(i - 1, ngroups) == group_distribution(minion_rank)) THEN
740 104 : IF (output_unit > 0) THEN
741 54 : WRITE (output_unit, FMT="(T2,A,I5.5,A)", ADVANCE="NO") " Running Job ", i, &
742 108 : " in "//TRIM(farming_env%Job(i)%cwd)//"."
743 54 : CALL m_flush(output_unit)
744 : END IF
745 104 : CALL execute_job(i)
746 104 : IF (output_unit > 0) THEN
747 54 : WRITE (output_unit, FMT="(A)") " Done, output in "//TRIM(output_file)
748 54 : CALL m_flush(output_unit)
749 : END IF
750 : END IF
751 : END DO
752 : END IF
753 :
754 : ! keep information about how long each process has to wait
755 : ! i.e. the load imbalance
756 24 : t1 = m_walltime()
757 24 : CALL para_env%sync()
758 24 : t2 = m_walltime()
759 72 : ALLOCATE (waittime(0:para_env%num_pe - 1))
760 72 : waittime = 0.0_dp
761 24 : waittime(para_env%mepos) = t2 - t1
762 24 : CALL para_env%sum(waittime)
763 24 : IF (output_unit > 0) THEN
764 12 : WRITE (output_unit, '(T2,A)') "Process idle times [s] at the end of the run"
765 36 : DO i = 0, para_env%num_pe - 1
766 : WRITE (output_unit, FMT='(A2,I6,A3,F8.3,A1)', ADVANCE="NO") &
767 24 : " (", i, " : ", waittime(i), ")"
768 36 : IF (MOD(i + 1, 4) == 0) WRITE (output_unit, '(A)') ""
769 : END DO
770 12 : CALL m_flush(output_unit)
771 : END IF
772 24 : DEALLOCATE (waittime)
773 :
774 : ! give back the communicators of the split groups
775 24 : IF (minion) CALL new_group%free()
776 24 : CALL minion_group%free()
777 :
778 : ! and message passing deallocate structures
779 24 : DEALLOCATE (group_distribution)
780 24 : DEALLOCATE (minion_distribution)
781 :
782 : ! clean the farming env
783 24 : CALL deallocate_farming_env(farming_env)
784 :
785 : CALL cp_print_key_finished_output(output_unit, logger, root_section, &
786 24 : "FARMING%PROGRAM_RUN_INFO")
787 :
788 288 : CALL timestop(handle)
789 :
790 : CONTAINS
791 : ! **************************************************************************************************
792 : !> \brief ...
793 : !> \param i ...
794 : ! **************************************************************************************************
795 122 : RECURSIVE SUBROUTINE execute_job(i)
796 : INTEGER :: i
797 :
798 : ! change to the new working directory
799 :
800 122 : CALL m_chdir(TRIM(farming_env%Job(i)%cwd), ierr)
801 122 : IF (ierr /= 0) &
802 0 : CPABORT("Failed to change dir to: "//TRIM(farming_env%Job(i)%cwd))
803 :
804 : ! generate a fresh call to cp2k_run
805 122 : IF (new_rank == 0) THEN
806 :
807 89 : IF (farming_env%Job(i)%output == "") THEN
808 : ! generate the output file
809 85 : WRITE (output_file, '(A12,I5.5)') "FARMING_OUT_", i
810 255 : ALLOCATE (my_parser)
811 85 : CALL parser_create(my_parser, file_name=TRIM(farming_env%Job(i)%input))
812 85 : label = "&GLOBAL"
813 85 : CALL parser_search_string(my_parser, label, ignore_case=.TRUE., found=found)
814 170 : IF (found) THEN
815 85 : CALL create_global_section(g_section)
816 85 : CALL section_vals_create(g_data, g_section)
817 : CALL cp_unit_set_create(default_units, "OUTPUT")
818 85 : CALL section_vals_parse(g_data, my_parser, default_units)
819 85 : CALL cp_unit_set_release(default_units)
820 : CALL section_vals_val_get(g_data, "PROJECT", &
821 85 : c_val=str)
822 85 : IF (str /= "") output_file = TRIM(str)//".out"
823 : CALL section_vals_val_get(g_data, "OUTPUT_FILE_NAME", &
824 85 : c_val=str)
825 85 : IF (str /= "") output_file = str
826 85 : CALL section_vals_release(g_data)
827 85 : CALL section_release(g_section)
828 : END IF
829 85 : CALL parser_release(my_parser)
830 85 : DEALLOCATE (my_parser)
831 : ELSE
832 4 : output_file = farming_env%Job(i)%output
833 : END IF
834 :
835 : CALL open_file(file_name=TRIM(output_file), &
836 : file_action="WRITE", &
837 : file_status="UNKNOWN", &
838 : file_position="APPEND", &
839 89 : unit_number=new_output_unit)
840 : ELSE
841 : ! this unit should be negative, otherwise all processors that get a default unit
842 : ! start writing output (to the same file, adding to confusion).
843 : ! error handling should be careful, asking for a local output unit if required
844 33 : new_output_unit = -1
845 : END IF
846 :
847 122 : CALL cp2k_run(input_declaration, TRIM(farming_env%Job(i)%input), new_output_unit, new_group, initial_variables)
848 :
849 122 : IF (new_rank == 0) CALL close_file(unit_number=new_output_unit)
850 :
851 : ! change to the original working directory
852 122 : CALL m_chdir(TRIM(farming_env%cwd), ierr)
853 122 : CPASSERT(ierr == 0)
854 :
855 122 : END SUBROUTINE execute_job
856 : END SUBROUTINE farming_run
857 :
858 : ! **************************************************************************************************
859 : !> \brief ...
860 : ! **************************************************************************************************
861 0 : SUBROUTINE write_xml_file()
862 :
863 : INTEGER :: i, unit_number
864 : TYPE(section_type), POINTER :: root_section
865 :
866 0 : NULLIFY (root_section)
867 0 : CALL create_cp2k_root_section(root_section)
868 0 : CALL keyword_release(root_section%keywords(0)%keyword)
869 : CALL open_file(unit_number=unit_number, &
870 : file_name="cp2k_input.xml", &
871 : file_action="WRITE", &
872 0 : file_status="REPLACE")
873 :
874 0 : WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'
875 :
876 : !MK CP2K input structure
877 : WRITE (UNIT=unit_number, FMT="(A)") &
878 0 : "<CP2K_INPUT>", &
879 0 : " <CP2K_VERSION>"//TRIM(cp2k_version)//"</CP2K_VERSION>", &
880 0 : " <CP2K_YEAR>"//TRIM(cp2k_year)//"</CP2K_YEAR>", &
881 0 : " <COMPILE_DATE>"//TRIM(compile_date)//"</COMPILE_DATE>", &
882 0 : " <COMPILE_REVISION>"//TRIM(compile_revision)//"</COMPILE_REVISION>"
883 :
884 0 : CALL export_references_as_xml(unit_number)
885 0 : CALL export_units_as_xml(unit_number)
886 :
887 0 : DO i = 1, root_section%n_subsections
888 0 : CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
889 : END DO
890 :
891 0 : WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
892 0 : CALL close_file(unit_number=unit_number)
893 0 : CALL section_release(root_section)
894 :
895 0 : END SUBROUTINE write_xml_file
896 :
897 : ! **************************************************************************************************
898 : !> \brief runs the given input
899 : !> \param input_declaration ...
900 : !> \param input_file_path the path of the input file
901 : !> \param output_file_path path of the output file (to which it is appended)
902 : !> if it is "__STD_OUT__" the default_output_unit is used
903 : !> \param initial_variables key-value list of initial preprocessor variables
904 : !> \param mpi_comm the mpi communicator to be used for this environment
905 : !> it will not be freed
906 : !> \author fawzi
907 : !> \note
908 : !> moved here because of circular dependencies
909 : ! **************************************************************************************************
910 9292 : SUBROUTINE run_input(input_declaration, input_file_path, output_file_path, initial_variables, mpi_comm)
911 : TYPE(section_type), POINTER :: input_declaration
912 : CHARACTER(len=*), INTENT(in) :: input_file_path, output_file_path
913 : CHARACTER(len=default_path_length), &
914 : DIMENSION(:, :), INTENT(IN) :: initial_variables
915 : TYPE(mp_comm_type), INTENT(in), OPTIONAL :: mpi_comm
916 :
917 : INTEGER :: unit_nr
918 : TYPE(mp_para_env_type), POINTER :: para_env
919 :
920 9292 : IF (PRESENT(mpi_comm)) THEN
921 0 : ALLOCATE (para_env)
922 0 : para_env = mpi_comm
923 : ELSE
924 9292 : para_env => f77_default_para_env
925 9292 : CALL para_env%retain()
926 : END IF
927 9292 : IF (para_env%is_source()) THEN
928 4646 : IF (output_file_path == "__STD_OUT__") THEN
929 4646 : unit_nr = default_output_unit
930 : ELSE
931 0 : INQUIRE (FILE=output_file_path, NUMBER=unit_nr)
932 : END IF
933 : ELSE
934 4646 : unit_nr = -1
935 : END IF
936 9292 : CALL cp2k_run(input_declaration, input_file_path, unit_nr, para_env, initial_variables)
937 9292 : CALL mp_para_env_release(para_env)
938 9292 : END SUBROUTINE run_input
939 :
940 : END MODULE cp2k_runs
|