Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : MODULE cp2k_runs
10 : USE atom, ONLY: atom_code
11 : USE bibliography, ONLY: Hutter2014,&
12 : cite_reference
13 : USE bsse, ONLY: do_bsse_calculation
14 : USE cell_opt, ONLY: cp_cell_opt
15 : USE cp2k_debug, ONLY: cp2k_debug_energy_and_forces
16 : USE cp2k_info, ONLY: compile_date,&
17 : compile_revision,&
18 : cp2k_version,&
19 : cp2k_year
20 : USE cp_control_types, ONLY: dft_control_type
21 : USE cp_dbcsr_api, ONLY: dbcsr_finalize_lib,&
22 : dbcsr_init_lib,&
23 : dbcsr_print_config,&
24 : dbcsr_print_statistics
25 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_config
26 : USE cp_files, ONLY: close_file,&
27 : open_file
28 : USE cp_log_handling, ONLY: cp_get_default_logger,&
29 : cp_logger_type,&
30 : cp_logger_would_log,&
31 : cp_note_level
32 : USE cp_output_handling, ONLY: cp_add_iter_level,&
33 : cp_print_key_finished_output,&
34 : cp_print_key_unit_nr,&
35 : cp_rm_iter_level
36 : USE cp_parser_methods, ONLY: parser_search_string
37 : USE cp_parser_types, ONLY: cp_parser_type,&
38 : parser_create,&
39 : parser_release
40 : USE cp_units, ONLY: cp_unit_set_create,&
41 : cp_unit_set_release,&
42 : cp_unit_set_type,&
43 : export_units_as_xml
44 : USE dbm_api, ONLY: dbm_library_print_stats
45 : USE environment, ONLY: cp2k_finalize,&
46 : cp2k_init,&
47 : cp2k_read,&
48 : cp2k_setup
49 : USE f77_interface, ONLY: create_force_env,&
50 : destroy_force_env,&
51 : f77_default_para_env => default_para_env,&
52 : f_env_add_defaults,&
53 : f_env_rm_defaults,&
54 : f_env_type
55 : USE farming_methods, ONLY: do_deadlock,&
56 : do_nothing,&
57 : do_wait,&
58 : farming_parse_input,&
59 : get_next_job
60 : USE farming_types, ONLY: deallocate_farming_env,&
61 : farming_env_type,&
62 : init_farming_env,&
63 : job_finished,&
64 : job_running
65 : USE force_env_methods, ONLY: force_env_calc_energy_force
66 : USE force_env_types, ONLY: force_env_get,&
67 : force_env_type
68 : USE geo_opt, ONLY: cp_geo_opt
69 : USE global_types, ONLY: global_environment_type,&
70 : globenv_create,&
71 : globenv_release
72 : USE grid_api, ONLY: grid_library_print_stats,&
73 : grid_library_set_config
74 : USE input_constants, ONLY: &
75 : bsse_run, cell_opt_run, debug_run, do_atom, do_band, do_cp2k, do_embed, do_farming, &
76 : do_fist, do_ipi, do_mixed, do_nnp, do_opt_basis, do_optimize_input, do_qmmm, do_qs, &
77 : do_sirius, do_swarm, do_tamc, do_test, do_tree_mc, do_tree_mc_ana, driver_run, ehrenfest, &
78 : energy_force_run, energy_run, geo_opt_run, linear_response_run, mol_dyn_run, mon_car_run, &
79 : negf_run, none_run, pint_run, real_time_propagation, rtp_method_bse, tree_mc_run, vib_anal
80 : USE input_cp2k, ONLY: create_cp2k_root_section
81 : USE input_cp2k_check, ONLY: check_cp2k_input
82 : USE input_cp2k_global, ONLY: create_global_section
83 : USE input_cp2k_read, ONLY: read_input
84 : USE input_keyword_types, ONLY: keyword_release
85 : USE input_parsing, ONLY: section_vals_parse
86 : USE input_section_types, ONLY: &
87 : section_release, section_type, section_vals_create, section_vals_get_subs_vals, &
88 : section_vals_release, section_vals_retain, section_vals_type, section_vals_val_get, &
89 : section_vals_write, write_section_xml
90 : USE ipi_driver, ONLY: run_driver
91 : USE kinds, ONLY: default_path_length,&
92 : default_string_length,&
93 : dp,&
94 : int_8
95 : USE library_tests, ONLY: lib_test
96 : USE machine, ONLY: default_output_unit,&
97 : m_chdir,&
98 : m_flush,&
99 : m_getcwd,&
100 : m_memory,&
101 : m_memory_max,&
102 : m_walltime
103 : USE mc_run, ONLY: do_mon_car
104 : USE md_run, ONLY: qs_mol_dyn
105 : USE message_passing, ONLY: mp_any_source,&
106 : mp_comm_type,&
107 : mp_para_env_release,&
108 : mp_para_env_type
109 : USE mscfg_methods, ONLY: do_mol_loop,&
110 : loop_over_molecules
111 : USE neb_methods, ONLY: neb
112 : USE negf_methods, ONLY: do_negf
113 : USE offload_api, ONLY: offload_get_chosen_device,&
114 : offload_get_device_count,&
115 : offload_mempool_stats_print
116 : USE optimize_basis, ONLY: run_optimize_basis
117 : USE optimize_input, ONLY: run_optimize_input
118 : USE pint_methods, ONLY: do_pint_run
119 : USE qs_environment_types, ONLY: get_qs_env
120 : USE qs_linres_module, ONLY: linres_calculation
121 : USE reference_manager, ONLY: export_references_as_xml
122 : USE rt_bse, ONLY: run_propagation_bse
123 : USE rt_propagation, ONLY: rt_prop_setup
124 : USE swarm, ONLY: run_swarm
125 : USE tamc_run, ONLY: qs_tamc
126 : USE tmc_setup, ONLY: do_analyze_files,&
127 : do_tmc
128 : USE vibrational_analysis, ONLY: vb_anal
129 : #include "../base/base_uses.f90"
130 :
131 : IMPLICIT NONE
132 :
133 : PRIVATE
134 :
135 : PUBLIC :: write_xml_file, run_input
136 :
137 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp2k_runs'
138 :
139 : CONTAINS
140 :
141 : ! **************************************************************************************************
142 : !> \brief performs an instance of a cp2k run
143 : !> \param input_declaration ...
144 : !> \param input_file_name name of the file to be opened for input
145 : !> \param output_unit unit to which output should be written
146 : !> \param mpi_comm ...
147 : !> \param initial_variables key-value list of initial preprocessor variables
148 : !> \author Joost VandeVondele
149 : !> \note
150 : !> para_env should be a valid communicator
151 : !> output_unit should be writeable by at least the lowest rank of the mpi group
152 : !>
153 : !> recursive because a given run_type might need to be able to perform
154 : !> another cp2k_run as part of its job (e.g. farming, classical equilibration, ...)
155 : !>
156 : !> the idea is that a cp2k instance should be able to run with just three
157 : !> arguments, i.e. a given input file, output unit, mpi communicator.
158 : !> giving these three to cp2k_run should produce a valid run.
159 : !> the only task of the PROGRAM cp2k is to create valid instances of the
160 : !> above arguments. Ideally, anything that is called afterwards should be
161 : !> able to run simultaneously / multithreaded / sequential / parallel / ...
162 : !> and able to fail safe
163 : ! **************************************************************************************************
164 9402 : RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, mpi_comm, initial_variables)
165 : TYPE(section_type), POINTER :: input_declaration
166 : CHARACTER(LEN=*), INTENT(IN) :: input_file_name
167 : INTEGER, INTENT(IN) :: output_unit
168 :
169 : CLASS(mp_comm_type) :: mpi_comm
170 : CHARACTER(len=default_path_length), &
171 : DIMENSION(:, :), INTENT(IN) :: initial_variables
172 :
173 : INTEGER :: f_env_handle, grid_backend, ierr, &
174 : iter_level, method_name_id, &
175 : new_env_id, prog_name_id, run_type_id
176 : #if defined(__DBCSR_ACC)
177 : INTEGER, TARGET :: offload_chosen_device
178 : #endif
179 : INTEGER, POINTER :: active_device_id
180 : INTEGER(KIND=int_8) :: m_memory_max_mpi
181 : LOGICAL :: echo_input, grid_apply_cutoff, &
182 : grid_validate, I_was_ionode
183 : TYPE(cp_logger_type), POINTER :: logger, sublogger
184 : TYPE(mp_para_env_type), POINTER :: para_env
185 : TYPE(dft_control_type), POINTER :: dft_control
186 : TYPE(f_env_type), POINTER :: f_env
187 : TYPE(force_env_type), POINTER :: force_env
188 : TYPE(global_environment_type), POINTER :: globenv
189 : TYPE(section_vals_type), POINTER :: glob_section, input_file, root_section
190 :
191 9402 : NULLIFY (para_env, f_env, dft_control, active_device_id)
192 9402 : ALLOCATE (para_env)
193 9402 : para_env = mpi_comm
194 :
195 : #if defined(__DBCSR_ACC)
196 : IF (offload_get_device_count() > 0) THEN
197 : offload_chosen_device = offload_get_chosen_device()
198 : active_device_id => offload_chosen_device
199 : END IF
200 : #endif
201 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
202 9402 : accdrv_active_device_id=active_device_id)
203 :
204 9402 : NULLIFY (globenv, force_env)
205 :
206 9402 : CALL cite_reference(Hutter2014)
207 :
208 : ! Parse the input
209 : input_file => read_input(input_declaration, input_file_name, &
210 : initial_variables=initial_variables, &
211 9402 : para_env=para_env)
212 :
213 9402 : CALL para_env%sync()
214 :
215 9402 : logger => cp_get_default_logger()
216 :
217 9402 : glob_section => section_vals_get_subs_vals(input_file, "GLOBAL")
218 9402 : CALL section_vals_val_get(glob_section, "ECHO_INPUT", l_val=echo_input)
219 9402 : IF (echo_input .AND. (output_unit > 0)) THEN
220 : CALL section_vals_write(input_file, &
221 : unit_nr=output_unit, &
222 : hide_root=.TRUE., &
223 15 : hide_defaults=.FALSE.)
224 : END IF
225 :
226 9402 : CALL check_cp2k_input(input_declaration, input_file, para_env=para_env, output_unit=output_unit)
227 9402 : root_section => input_file
228 : CALL section_vals_val_get(input_file, "GLOBAL%PROGRAM_NAME", &
229 9402 : i_val=prog_name_id)
230 : CALL section_vals_val_get(input_file, "GLOBAL%RUN_TYPE", &
231 9402 : i_val=run_type_id)
232 9402 : CALL section_vals_val_get(root_section, "FORCE_EVAL%METHOD", i_val=method_name_id)
233 :
234 9402 : IF (prog_name_id /= do_cp2k) THEN
235 : ! initial setup (cp2k does in in the creation of the force_env)
236 522 : CALL globenv_create(globenv)
237 522 : CALL section_vals_retain(input_file)
238 522 : CALL cp2k_init(para_env, output_unit, globenv, input_file_name=input_file_name)
239 522 : CALL cp2k_read(root_section, para_env, globenv)
240 522 : CALL cp2k_setup(root_section, para_env, globenv)
241 : END IF
242 :
243 9402 : CALL cp_dbcsr_config(root_section)
244 9402 : IF (output_unit > 0 .AND. &
245 : cp_logger_would_log(logger, cp_note_level)) THEN
246 4729 : CALL dbcsr_print_config(unit_nr=output_unit)
247 4729 : WRITE (UNIT=output_unit, FMT='()')
248 : END IF
249 :
250 : ! Configure the grid library.
251 9402 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%BACKEND", i_val=grid_backend)
252 9402 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%VALIDATE", l_val=grid_validate)
253 9402 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%APPLY_CUTOFF", l_val=grid_apply_cutoff)
254 :
255 : CALL grid_library_set_config(backend=grid_backend, &
256 : validate=grid_validate, &
257 9402 : apply_cutoff=grid_apply_cutoff)
258 :
259 362 : SELECT CASE (prog_name_id)
260 : CASE (do_atom)
261 362 : globenv%run_type_id = none_run
262 362 : CALL atom_code(root_section)
263 : CASE (do_optimize_input)
264 6 : CALL run_optimize_input(input_declaration, root_section, para_env)
265 : CASE (do_swarm)
266 6 : CALL run_swarm(input_declaration, root_section, para_env, globenv, input_file_name)
267 : CASE (do_farming) ! TODO: refactor cp2k's startup code
268 24 : CALL dbcsr_finalize_lib()
269 24 : CALL farming_run(input_declaration, root_section, para_env, initial_variables)
270 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
271 24 : accdrv_active_device_id=active_device_id)
272 : CASE (do_opt_basis)
273 4 : CALL run_optimize_basis(input_declaration, root_section, para_env)
274 4 : globenv%run_type_id = none_run
275 : CASE (do_cp2k)
276 : CALL create_force_env(new_env_id, &
277 : input_declaration=input_declaration, &
278 : input_path=input_file_name, &
279 : output_path="__STD_OUT__", mpi_comm=para_env, &
280 : output_unit=output_unit, &
281 : owns_out_unit=.FALSE., &
282 8880 : input=input_file, ierr=ierr)
283 8880 : CPASSERT(ierr == 0)
284 8880 : CALL f_env_add_defaults(new_env_id, f_env, handle=f_env_handle)
285 8880 : force_env => f_env%force_env
286 8880 : CALL force_env_get(force_env, globenv=globenv)
287 : CASE (do_test)
288 80 : CALL lib_test(root_section, para_env, globenv)
289 : CASE (do_tree_mc) ! TMC entry point
290 28 : CALL do_tmc(input_declaration, root_section, para_env, globenv)
291 : CASE (do_tree_mc_ana)
292 12 : CALL do_analyze_files(input_declaration, root_section, para_env)
293 : CASE default
294 18282 : CPABORT("")
295 : END SELECT
296 9402 : CALL section_vals_release(input_file)
297 :
298 9468 : SELECT CASE (globenv%run_type_id)
299 : CASE (pint_run)
300 66 : CALL do_pint_run(para_env, root_section, input_declaration, globenv)
301 : CASE (none_run, tree_mc_run)
302 : ! do nothing
303 : CASE (driver_run)
304 0 : CALL run_driver(force_env, globenv)
305 : CASE (energy_run, energy_force_run)
306 : IF (method_name_id /= do_qs .AND. &
307 : method_name_id /= do_sirius .AND. &
308 : method_name_id /= do_qmmm .AND. &
309 : method_name_id /= do_mixed .AND. &
310 : method_name_id /= do_nnp .AND. &
311 : method_name_id /= do_embed .AND. &
312 5156 : method_name_id /= do_fist .AND. &
313 : method_name_id /= do_ipi) &
314 0 : CPABORT("Energy/Force run not available for all methods ")
315 :
316 5156 : sublogger => cp_get_default_logger()
317 : CALL cp_add_iter_level(sublogger%iter_info, "JUST_ENERGY", &
318 5156 : n_rlevel_new=iter_level)
319 :
320 : ! loop over molecules to generate a molecular guess
321 : ! this procedure is initiated here to avoid passing globenv deep down
322 : ! the subroutine stack
323 5156 : IF (do_mol_loop(force_env=force_env)) &
324 10 : CALL loop_over_molecules(globenv, force_env)
325 :
326 9250 : SELECT CASE (globenv%run_type_id)
327 : CASE (energy_run)
328 4094 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
329 : CASE (energy_force_run)
330 1062 : CALL force_env_calc_energy_force(force_env, calc_force=.TRUE.)
331 : CASE default
332 5156 : CPABORT("")
333 : END SELECT
334 5156 : CALL cp_rm_iter_level(sublogger%iter_info, level_name="JUST_ENERGY", n_rlevel_att=iter_level)
335 : CASE (mol_dyn_run)
336 1622 : CALL qs_mol_dyn(force_env, globenv)
337 : CASE (geo_opt_run)
338 762 : CALL cp_geo_opt(force_env, globenv)
339 : CASE (cell_opt_run)
340 218 : CALL cp_cell_opt(force_env, globenv)
341 : CASE (mon_car_run)
342 20 : CALL do_mon_car(force_env, globenv, input_declaration, input_file_name)
343 : CASE (do_tamc)
344 2 : CALL qs_tamc(force_env, globenv)
345 : CASE (real_time_propagation)
346 138 : IF (method_name_id /= do_qs) &
347 0 : CPABORT("Real time propagation needs METHOD QS. ")
348 138 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
349 138 : dft_control%rtp_control%fixed_ions = .TRUE.
350 222 : SELECT CASE (dft_control%rtp_control%rtp_method)
351 : CASE (rtp_method_bse)
352 : ! Run the TD-BSE method
353 12 : CALL run_propagation_bse(force_env%qs_env, force_env)
354 : CASE default
355 : ! Run the TDDFT method
356 138 : CALL rt_prop_setup(force_env)
357 : END SELECT
358 : CASE (ehrenfest)
359 72 : IF (method_name_id /= do_qs) &
360 0 : CPABORT("Ehrenfest dynamics needs METHOD QS ")
361 72 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
362 72 : dft_control%rtp_control%fixed_ions = .FALSE.
363 72 : CALL qs_mol_dyn(force_env, globenv)
364 : CASE (bsse_run)
365 12 : CALL do_bsse_calculation(force_env, globenv)
366 : CASE (linear_response_run)
367 188 : IF (method_name_id /= do_qs .AND. &
368 : method_name_id /= do_qmmm) &
369 0 : CPABORT("Property calculations by Linear Response only within the QS or QMMM program ")
370 : ! The Ground State is needed, it can be read from Restart
371 188 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE., linres=.TRUE.)
372 188 : CALL linres_calculation(force_env)
373 : CASE (debug_run)
374 586 : SELECT CASE (method_name_id)
375 : CASE (do_qs, do_qmmm, do_fist)
376 532 : CALL cp2k_debug_energy_and_forces(force_env)
377 : CASE DEFAULT
378 532 : CPABORT("Debug run available only with QS, FIST, and QMMM program ")
379 : END SELECT
380 : CASE (vib_anal)
381 54 : CALL vb_anal(root_section, input_declaration, para_env, globenv)
382 : CASE (do_band)
383 34 : CALL neb(root_section, input_declaration, para_env, globenv)
384 : CASE (negf_run)
385 4 : CALL do_negf(force_env)
386 : CASE default
387 14558 : CPABORT("")
388 : END SELECT
389 :
390 : ! Sample peak memory
391 9402 : CALL m_memory()
392 :
393 9402 : CALL dbcsr_print_statistics()
394 9402 : CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
395 9402 : CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
396 9402 : CALL offload_mempool_stats_print(mpi_comm=mpi_comm, output_unit=output_unit)
397 :
398 9402 : m_memory_max_mpi = m_memory_max
399 9402 : CALL mpi_comm%max(m_memory_max_mpi)
400 9402 : IF (output_unit > 0) THEN
401 4729 : WRITE (output_unit, *)
402 : WRITE (output_unit, '(T2,"MEMORY| Estimated peak process memory [MiB]",T73,I8)') &
403 4729 : (m_memory_max_mpi + (1024*1024) - 1)/(1024*1024)
404 : END IF
405 :
406 9402 : IF (prog_name_id == do_cp2k) THEN
407 8880 : f_env%force_env => force_env ! for mc
408 8880 : IF (ASSOCIATED(force_env%globenv)) THEN
409 8880 : IF (.NOT. ASSOCIATED(force_env%globenv, globenv)) THEN
410 0 : CALL globenv_release(force_env%globenv) !mc
411 : END IF
412 : END IF
413 8880 : force_env%globenv => globenv !mc
414 : CALL f_env_rm_defaults(f_env, ierr=ierr, &
415 8880 : handle=f_env_handle)
416 8880 : CPASSERT(ierr == 0)
417 8880 : CALL destroy_force_env(new_env_id, ierr=ierr)
418 8880 : CPASSERT(ierr == 0)
419 : ELSE
420 : I_was_ionode = para_env%is_source()
421 522 : CALL cp2k_finalize(root_section, para_env, globenv)
422 522 : CPASSERT(globenv%ref_count == 1)
423 522 : CALL section_vals_release(root_section)
424 522 : CALL globenv_release(globenv)
425 : END IF
426 :
427 9402 : CALL dbcsr_finalize_lib()
428 :
429 9402 : CALL mp_para_env_release(para_env)
430 :
431 9402 : END SUBROUTINE cp2k_run
432 :
433 : ! **************************************************************************************************
434 : !> \brief performs a farming run that performs several independent cp2k_runs
435 : !> \param input_declaration ...
436 : !> \param root_section ...
437 : !> \param para_env ...
438 : !> \param initial_variables ...
439 : !> \author Joost VandeVondele
440 : !> \note
441 : !> needs to be part of this module as the cp2k_run -> farming_run -> cp2k_run
442 : !> calling style creates a hard circular dependency
443 : ! **************************************************************************************************
444 24 : RECURSIVE SUBROUTINE farming_run(input_declaration, root_section, para_env, initial_variables)
445 : TYPE(section_type), POINTER :: input_declaration
446 : TYPE(section_vals_type), POINTER :: root_section
447 : TYPE(mp_para_env_type), POINTER :: para_env
448 : CHARACTER(len=default_path_length), DIMENSION(:, :), INTENT(IN) :: initial_variables
449 :
450 : CHARACTER(len=*), PARAMETER :: routineN = 'farming_run'
451 : INTEGER, PARAMETER :: minion_status_done = -3, &
452 : minion_status_wait = -4
453 :
454 : CHARACTER(len=7) :: label
455 : CHARACTER(LEN=default_path_length) :: output_file
456 : CHARACTER(LEN=default_string_length) :: str
457 : INTEGER :: dest, handle, i, i_job_to_restart, ierr, ijob, ijob_current, &
458 : ijob_end, ijob_start, iunit, n_jobs_to_run, new_output_unit, &
459 : new_rank, ngroups, num_minions, output_unit, primus_minion, &
460 : minion_rank, source, tag, todo
461 24 : INTEGER, DIMENSION(:), POINTER :: group_distribution, &
462 24 : captain_minion_partition, &
463 24 : minion_distribution, &
464 24 : minion_status
465 : LOGICAL :: found, captain, minion
466 : REAL(KIND=dp) :: t1, t2
467 24 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: waittime
468 : TYPE(cp_logger_type), POINTER :: logger
469 : TYPE(cp_parser_type), POINTER :: my_parser
470 : TYPE(cp_unit_set_type) :: default_units
471 : TYPE(farming_env_type), POINTER :: farming_env
472 : TYPE(section_type), POINTER :: g_section
473 : TYPE(section_vals_type), POINTER :: g_data
474 : TYPE(mp_comm_type) :: minion_group, new_group
475 :
476 : ! the primus of all minions, talks to the captain on topics concerning all minions
477 24 : CALL timeset(routineN, handle)
478 24 : NULLIFY (my_parser, g_section, g_data)
479 :
480 24 : logger => cp_get_default_logger()
481 : output_unit = cp_print_key_unit_nr(logger, root_section, "FARMING%PROGRAM_RUN_INFO", &
482 24 : extension=".log")
483 :
484 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Hi, welcome on this farm!"
485 :
486 24 : ALLOCATE (farming_env)
487 24 : CALL init_farming_env(farming_env)
488 : ! remember where we started
489 24 : CALL m_getcwd(farming_env%cwd)
490 24 : CALL farming_parse_input(farming_env, root_section, para_env)
491 :
492 : ! the full mpi group is first split in a minion group and a captain group, the latter being at most 1 process
493 24 : minion = .TRUE.
494 24 : captain = .FALSE.
495 24 : IF (farming_env%captain_minion) THEN
496 4 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Captain-Minion setup"
497 :
498 4 : ALLOCATE (captain_minion_partition(0:1))
499 12 : captain_minion_partition = [1, para_env%num_pe - 1]
500 12 : ALLOCATE (group_distribution(0:para_env%num_pe - 1))
501 :
502 : CALL minion_group%from_split(para_env, ngroups, group_distribution, &
503 4 : n_subgroups=2, group_partition=captain_minion_partition)
504 4 : DEALLOCATE (captain_minion_partition)
505 4 : DEALLOCATE (group_distribution)
506 4 : num_minions = minion_group%num_pe
507 4 : minion_rank = minion_group%mepos
508 :
509 4 : IF (para_env%mepos == 0) THEN
510 2 : minion = .FALSE.
511 2 : captain = .TRUE.
512 : ! on the captain node, num_minions corresponds to the size of the captain group
513 2 : CPASSERT(num_minions == 1)
514 2 : num_minions = para_env%num_pe - 1
515 2 : minion_rank = -1
516 : END IF
517 4 : CPASSERT(num_minions == para_env%num_pe - 1)
518 : ELSE
519 : ! all processes are minions
520 20 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Minion-only setup"
521 20 : CALL minion_group%from_dup(para_env)
522 20 : num_minions = minion_group%num_pe
523 20 : minion_rank = minion_group%mepos
524 : END IF
525 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A,I0)") "FARMING| Number of Minions ", num_minions
526 :
527 : ! keep track of which para_env rank is which minion/captain
528 72 : ALLOCATE (minion_distribution(0:para_env%num_pe - 1))
529 72 : minion_distribution = 0
530 24 : minion_distribution(para_env%mepos) = minion_rank
531 120 : CALL para_env%sum(minion_distribution)
532 : ! we do have a primus inter pares
533 24 : primus_minion = 0
534 48 : DO i = 1, para_env%num_pe - 1
535 48 : IF (minion_distribution(i) == 0) primus_minion = i
536 : END DO
537 :
538 : ! split the current communicator for the minions
539 : ! in a new_group, new_size and new_rank according to the number of groups required according to the input
540 72 : ALLOCATE (group_distribution(0:num_minions - 1))
541 68 : group_distribution = -1
542 24 : IF (minion) THEN
543 22 : IF (farming_env%group_size_wish_set) THEN
544 4 : farming_env%group_size_wish = MIN(farming_env%group_size_wish, para_env%num_pe)
545 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
546 4 : subgroup_min_size=farming_env%group_size_wish, stride=farming_env%stride)
547 18 : ELSE IF (farming_env%ngroup_wish_set) THEN
548 18 : IF (ASSOCIATED(farming_env%group_partition)) THEN
549 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
550 : n_subgroups=farming_env%ngroup_wish, &
551 0 : group_partition=farming_env%group_partition, stride=farming_env%stride)
552 : ELSE
553 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
554 18 : n_subgroups=farming_env%ngroup_wish, stride=farming_env%stride)
555 : END IF
556 : ELSE
557 0 : CPABORT("must set either group_size_wish or ngroup_wish")
558 : END IF
559 22 : new_rank = new_group%mepos
560 : END IF
561 :
562 : ! transfer the info about the minion group distribution to the captain
563 24 : IF (farming_env%captain_minion) THEN
564 4 : IF (para_env%mepos == primus_minion) THEN
565 2 : tag = 1
566 4 : CALL para_env%send(group_distribution, 0, tag)
567 2 : tag = 2
568 2 : CALL para_env%send(ngroups, 0, tag)
569 : END IF
570 4 : IF (para_env%mepos == 0) THEN
571 2 : tag = 1
572 6 : CALL para_env%recv(group_distribution, primus_minion, tag)
573 2 : tag = 2
574 2 : CALL para_env%recv(ngroups, primus_minion, tag)
575 : END IF
576 : END IF
577 :
578 : ! write info on group distribution
579 24 : IF (output_unit > 0) THEN
580 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of created MPI (Minion) groups:", ngroups
581 12 : WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| MPI (Minion) process to group correspondence:"
582 34 : DO i = 0, num_minions - 1
583 22 : IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
584 : WRITE (output_unit, FMT='(A3,I6,A3,I6,A1)', ADVANCE="NO") &
585 34 : " (", i, " : ", group_distribution(i), ")"
586 : END DO
587 12 : WRITE (output_unit, *)
588 12 : CALL m_flush(output_unit)
589 : END IF
590 :
591 : ! protect about too many jobs being run in single go. Not more jobs are allowed than the number in the input file
592 : ! and determine the future restart point
593 24 : IF (farming_env%cycle) THEN
594 2 : n_jobs_to_run = farming_env%max_steps*ngroups
595 2 : i_job_to_restart = MODULO(farming_env%restart_n + n_jobs_to_run - 1, farming_env%njobs) + 1
596 : ELSE
597 22 : n_jobs_to_run = MIN(farming_env%njobs, farming_env%max_steps*ngroups)
598 22 : n_jobs_to_run = MIN(n_jobs_to_run, farming_env%njobs - farming_env%restart_n + 1)
599 22 : i_job_to_restart = n_jobs_to_run + farming_env%restart_n
600 : END IF
601 :
602 : ! and write the restart now, that's the point where the next job starts, even if this one is running
603 : iunit = cp_print_key_unit_nr(logger, root_section, "FARMING%RESTART", &
604 24 : extension=".restart")
605 24 : IF (iunit > 0) THEN
606 12 : WRITE (iunit, *) i_job_to_restart
607 : END IF
608 24 : CALL cp_print_key_finished_output(iunit, logger, root_section, "FARMING%RESTART")
609 :
610 : ! this is the job range to be executed.
611 24 : ijob_start = farming_env%restart_n
612 24 : ijob_end = ijob_start + n_jobs_to_run - 1
613 24 : IF (output_unit > 0 .AND. ijob_end - ijob_start < 0) THEN
614 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| --- WARNING --- NO JOBS NEED EXECUTION ? "
615 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| is the cycle keyword required ?"
616 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is a stray RESTART file present ?"
617 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is the group_size requested smaller than the number of CPUs?"
618 : END IF
619 :
620 : ! actual executions of the jobs in two different modes
621 24 : IF (farming_env%captain_minion) THEN
622 4 : IF (minion) THEN
623 : ! keep on doing work until captain has decided otherwise
624 2 : todo = do_wait
625 : DO
626 20 : IF (new_rank == 0) THEN
627 : ! the head minion tells the captain he's done or ready to start
628 : ! the message tells what has been done lately
629 20 : tag = 1
630 20 : dest = 0
631 20 : CALL para_env%send(todo, dest, tag)
632 :
633 : ! gets the new todo item
634 20 : tag = 2
635 20 : source = 0
636 20 : CALL para_env%recv(todo, source, tag)
637 :
638 : ! and informs his peer minions
639 20 : CALL new_group%bcast(todo, 0)
640 : ELSE
641 0 : CALL new_group%bcast(todo, 0)
642 : END IF
643 :
644 : ! if the todo is do_nothing we are flagged to quit. Otherwise it is the job number
645 0 : SELECT CASE (todo)
646 : CASE (do_wait, do_deadlock)
647 : ! go for a next round, but we first wait a bit
648 0 : t1 = m_walltime()
649 : DO
650 0 : t2 = m_walltime()
651 0 : IF (t2 - t1 > farming_env%wait_time) EXIT
652 : END DO
653 : CASE (do_nothing)
654 18 : EXIT
655 : CASE (1:)
656 20 : CALL execute_job(todo)
657 : END SELECT
658 : END DO
659 : ELSE ! captain
660 6 : ALLOCATE (minion_status(0:ngroups - 1))
661 4 : minion_status = minion_status_wait
662 2 : ijob_current = ijob_start - 1
663 :
664 20 : DO
665 24 : IF (ALL(minion_status == minion_status_done)) EXIT
666 :
667 : ! who's the next minion waiting for work
668 20 : tag = 1
669 20 : source = mp_any_source
670 20 : CALL para_env%recv(todo, source, tag) ! updates source
671 20 : IF (todo > 0) THEN
672 18 : farming_env%Job(todo)%status = job_finished
673 18 : IF (output_unit > 0) THEN
674 18 : WRITE (output_unit, FMT=*) "Job finished: ", todo
675 18 : CALL m_flush(output_unit)
676 : END IF
677 : END IF
678 :
679 : ! get the next job in line, this could be do_nothing, if we're finished
680 20 : CALL get_next_job(farming_env, ijob_start, ijob_end, ijob_current, todo)
681 20 : dest = source
682 20 : tag = 2
683 20 : CALL para_env%send(todo, dest, tag)
684 :
685 22 : IF (todo > 0) THEN
686 18 : farming_env%Job(todo)%status = job_running
687 18 : IF (output_unit > 0) THEN
688 18 : WRITE (output_unit, FMT=*) "Job: ", todo, " Dir: ", TRIM(farming_env%Job(todo)%cwd), &
689 36 : " assigned to group ", group_distribution(minion_distribution(dest))
690 18 : CALL m_flush(output_unit)
691 : END IF
692 : ELSE
693 2 : IF (todo == do_nothing) THEN
694 2 : minion_status(group_distribution(minion_distribution(dest))) = minion_status_done
695 2 : IF (output_unit > 0) THEN
696 2 : WRITE (output_unit, FMT=*) "group done: ", group_distribution(minion_distribution(dest))
697 2 : CALL m_flush(output_unit)
698 : END IF
699 : END IF
700 2 : IF (todo == do_deadlock) THEN
701 0 : IF (output_unit > 0) THEN
702 0 : WRITE (output_unit, FMT=*) ""
703 0 : WRITE (output_unit, FMT=*) "FARMING JOB DEADLOCKED ... CIRCULAR DEPENDENCIES"
704 0 : WRITE (output_unit, FMT=*) ""
705 0 : CALL m_flush(output_unit)
706 : END IF
707 0 : CPASSERT(todo /= do_deadlock)
708 : END IF
709 : END IF
710 :
711 : END DO
712 :
713 2 : DEALLOCATE (minion_status)
714 :
715 : END IF
716 : ELSE
717 : ! this is the non-captain-minion mode way of executing the jobs
718 : ! the i-th job in the input is always executed by the MODULO(i-1,ngroups)-th group
719 : ! (needed for cyclic runs, we don't want two groups working on the same job)
720 20 : IF (output_unit > 0) THEN
721 10 : IF (ijob_end - ijob_start >= 0) THEN
722 10 : WRITE (output_unit, FMT="(T2,A)") "FARMING| List of jobs : "
723 81 : DO ijob = ijob_start, ijob_end
724 71 : i = MODULO(ijob - 1, farming_env%njobs) + 1
725 71 : WRITE (output_unit, FMT=*) "Job: ", i, " Dir: ", TRIM(farming_env%Job(i)%cwd), " Input: ", &
726 152 : TRIM(farming_env%Job(i)%input), " MPI group:", MODULO(i - 1, ngroups)
727 : END DO
728 : END IF
729 10 : CALL m_flush(output_unit)
730 : END IF
731 :
732 162 : DO ijob = ijob_start, ijob_end
733 142 : i = MODULO(ijob - 1, farming_env%njobs) + 1
734 : ! this farms out the jobs
735 162 : IF (MODULO(i - 1, ngroups) == group_distribution(minion_rank)) THEN
736 104 : IF (output_unit > 0) THEN
737 54 : WRITE (output_unit, FMT="(T2,A,I5.5,A)", ADVANCE="NO") " Running Job ", i, &
738 108 : " in "//TRIM(farming_env%Job(i)%cwd)//"."
739 54 : CALL m_flush(output_unit)
740 : END IF
741 104 : CALL execute_job(i)
742 104 : IF (output_unit > 0) THEN
743 54 : WRITE (output_unit, FMT="(A)") " Done, output in "//TRIM(output_file)
744 54 : CALL m_flush(output_unit)
745 : END IF
746 : END IF
747 : END DO
748 : END IF
749 :
750 : ! keep information about how long each process has to wait
751 : ! i.e. the load imbalance
752 24 : t1 = m_walltime()
753 24 : CALL para_env%sync()
754 24 : t2 = m_walltime()
755 72 : ALLOCATE (waittime(0:para_env%num_pe - 1))
756 72 : waittime = 0.0_dp
757 24 : waittime(para_env%mepos) = t2 - t1
758 24 : CALL para_env%sum(waittime)
759 24 : IF (output_unit > 0) THEN
760 12 : WRITE (output_unit, '(T2,A)') "Process idle times [s] at the end of the run"
761 36 : DO i = 0, para_env%num_pe - 1
762 : WRITE (output_unit, FMT='(A2,I6,A3,F8.3,A1)', ADVANCE="NO") &
763 24 : " (", i, " : ", waittime(i), ")"
764 36 : IF (MOD(i + 1, 4) == 0) WRITE (output_unit, '(A)') ""
765 : END DO
766 12 : CALL m_flush(output_unit)
767 : END IF
768 24 : DEALLOCATE (waittime)
769 :
770 : ! give back the communicators of the split groups
771 24 : IF (minion) CALL new_group%free()
772 24 : CALL minion_group%free()
773 :
774 : ! and message passing deallocate structures
775 24 : DEALLOCATE (group_distribution)
776 24 : DEALLOCATE (minion_distribution)
777 :
778 : ! clean the farming env
779 24 : CALL deallocate_farming_env(farming_env)
780 :
781 : CALL cp_print_key_finished_output(output_unit, logger, root_section, &
782 24 : "FARMING%PROGRAM_RUN_INFO")
783 :
784 288 : CALL timestop(handle)
785 :
786 : CONTAINS
787 : ! **************************************************************************************************
788 : !> \brief ...
789 : !> \param i ...
790 : ! **************************************************************************************************
791 122 : RECURSIVE SUBROUTINE execute_job(i)
792 : INTEGER :: i
793 :
794 : ! change to the new working directory
795 :
796 122 : CALL m_chdir(TRIM(farming_env%Job(i)%cwd), ierr)
797 122 : IF (ierr /= 0) &
798 0 : CPABORT("Failed to change dir to: "//TRIM(farming_env%Job(i)%cwd))
799 :
800 : ! generate a fresh call to cp2k_run
801 122 : IF (new_rank == 0) THEN
802 :
803 89 : IF (farming_env%Job(i)%output == "") THEN
804 : ! generate the output file
805 85 : WRITE (output_file, '(A12,I5.5)') "FARMING_OUT_", i
806 255 : ALLOCATE (my_parser)
807 85 : CALL parser_create(my_parser, file_name=TRIM(farming_env%Job(i)%input))
808 85 : label = "&GLOBAL"
809 85 : CALL parser_search_string(my_parser, label, ignore_case=.TRUE., found=found)
810 170 : IF (found) THEN
811 85 : CALL create_global_section(g_section)
812 85 : CALL section_vals_create(g_data, g_section)
813 : CALL cp_unit_set_create(default_units, "OUTPUT")
814 85 : CALL section_vals_parse(g_data, my_parser, default_units)
815 85 : CALL cp_unit_set_release(default_units)
816 : CALL section_vals_val_get(g_data, "PROJECT", &
817 85 : c_val=str)
818 85 : IF (str /= "") output_file = TRIM(str)//".out"
819 : CALL section_vals_val_get(g_data, "OUTPUT_FILE_NAME", &
820 85 : c_val=str)
821 85 : IF (str /= "") output_file = str
822 85 : CALL section_vals_release(g_data)
823 85 : CALL section_release(g_section)
824 : END IF
825 85 : CALL parser_release(my_parser)
826 85 : DEALLOCATE (my_parser)
827 : ELSE
828 4 : output_file = farming_env%Job(i)%output
829 : END IF
830 :
831 : CALL open_file(file_name=TRIM(output_file), &
832 : file_action="WRITE", &
833 : file_status="UNKNOWN", &
834 : file_position="APPEND", &
835 89 : unit_number=new_output_unit)
836 : ELSE
837 : ! this unit should be negative, otherwise all processors that get a default unit
838 : ! start writing output (to the same file, adding to confusion).
839 : ! error handling should be careful, asking for a local output unit if required
840 33 : new_output_unit = -1
841 : END IF
842 :
843 122 : CALL cp2k_run(input_declaration, TRIM(farming_env%Job(i)%input), new_output_unit, new_group, initial_variables)
844 :
845 122 : IF (new_rank == 0) CALL close_file(unit_number=new_output_unit)
846 :
847 : ! change to the original working directory
848 122 : CALL m_chdir(TRIM(farming_env%cwd), ierr)
849 122 : CPASSERT(ierr == 0)
850 :
851 122 : END SUBROUTINE execute_job
852 : END SUBROUTINE farming_run
853 :
854 : ! **************************************************************************************************
855 : !> \brief ...
856 : ! **************************************************************************************************
857 0 : SUBROUTINE write_xml_file()
858 :
859 : INTEGER :: i, unit_number
860 : TYPE(section_type), POINTER :: root_section
861 :
862 0 : NULLIFY (root_section)
863 0 : CALL create_cp2k_root_section(root_section)
864 0 : CALL keyword_release(root_section%keywords(0)%keyword)
865 : CALL open_file(unit_number=unit_number, &
866 : file_name="cp2k_input.xml", &
867 : file_action="WRITE", &
868 0 : file_status="REPLACE")
869 :
870 0 : WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'
871 :
872 : !MK CP2K input structure
873 : WRITE (UNIT=unit_number, FMT="(A)") &
874 0 : "<CP2K_INPUT>", &
875 0 : " <CP2K_VERSION>"//TRIM(cp2k_version)//"</CP2K_VERSION>", &
876 0 : " <CP2K_YEAR>"//TRIM(cp2k_year)//"</CP2K_YEAR>", &
877 0 : " <COMPILE_DATE>"//TRIM(compile_date)//"</COMPILE_DATE>", &
878 0 : " <COMPILE_REVISION>"//TRIM(compile_revision)//"</COMPILE_REVISION>"
879 :
880 0 : CALL export_references_as_xml(unit_number)
881 0 : CALL export_units_as_xml(unit_number)
882 :
883 0 : DO i = 1, root_section%n_subsections
884 0 : CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
885 : END DO
886 :
887 0 : WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
888 0 : CALL close_file(unit_number=unit_number)
889 0 : CALL section_release(root_section)
890 :
891 0 : END SUBROUTINE write_xml_file
892 :
893 : ! **************************************************************************************************
894 : !> \brief runs the given input
895 : !> \param input_declaration ...
896 : !> \param input_file_path the path of the input file
897 : !> \param output_file_path path of the output file (to which it is appended)
898 : !> if it is "__STD_OUT__" the default_output_unit is used
899 : !> \param initial_variables key-value list of initial preprocessor variables
900 : !> \param mpi_comm the mpi communicator to be used for this environment
901 : !> it will not be freed
902 : !> \author fawzi
903 : !> \note
904 : !> moved here because of circular dependencies
905 : ! **************************************************************************************************
906 9280 : SUBROUTINE run_input(input_declaration, input_file_path, output_file_path, initial_variables, mpi_comm)
907 : TYPE(section_type), POINTER :: input_declaration
908 : CHARACTER(len=*), INTENT(in) :: input_file_path, output_file_path
909 : CHARACTER(len=default_path_length), &
910 : DIMENSION(:, :), INTENT(IN) :: initial_variables
911 : TYPE(mp_comm_type), INTENT(in), OPTIONAL :: mpi_comm
912 :
913 : INTEGER :: unit_nr
914 : TYPE(mp_para_env_type), POINTER :: para_env
915 :
916 9280 : IF (PRESENT(mpi_comm)) THEN
917 0 : ALLOCATE (para_env)
918 0 : para_env = mpi_comm
919 : ELSE
920 9280 : para_env => f77_default_para_env
921 9280 : CALL para_env%retain()
922 : END IF
923 9280 : IF (para_env%is_source()) THEN
924 4640 : IF (output_file_path == "__STD_OUT__") THEN
925 4640 : unit_nr = default_output_unit
926 : ELSE
927 0 : INQUIRE (FILE=output_file_path, NUMBER=unit_nr)
928 : END IF
929 : ELSE
930 4640 : unit_nr = -1
931 : END IF
932 9280 : CALL cp2k_run(input_declaration, input_file_path, unit_nr, para_env, initial_variables)
933 9280 : CALL mp_para_env_release(para_env)
934 9280 : END SUBROUTINE run_input
935 :
936 : END MODULE cp2k_runs
|