StartDate: 2021-11-25 19:08:21+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 8b72b57cebd9ad3b81b52a55f3e8c59fd050a2ee CommitTime: 2021-11-25 15:57:49 +0100 CommitAuthor: Matthias Krack CommitSubject: Format string revised Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 8b72b57cebd9ad3b81b52a55f3e8c59fd050a2ee CommitTime: 2021-11-25 15:57:49 +0100 CommitAuthor: Matthias Krack CommitSubject: Format string revised ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 163.278 163.278 qs_mol_dyn_low 1 2.0 0.004 0.004 162.428 162.428 qs_forces 11 3.9 0.001 0.001 162.371 162.371 qs_energies 11 4.9 0.001 0.001 151.984 151.984 scf_env_do_scf 11 5.9 0.001 0.001 119.739 119.739 velocity_verlet 10 3.0 0.002 0.002 113.756 113.756 scf_env_do_scf_inner_loop 108 6.5 0.009 0.009 81.229 81.229 init_scf_loop 11 6.9 0.000 0.000 38.324 38.324 prepare_preconditioner 11 7.9 0.000 0.000 34.357 34.357 make_preconditioner 11 8.9 0.000 0.000 34.357 34.357 rebuild_ks_matrix 119 8.3 0.001 0.001 33.070 33.070 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 33.069 33.069 make_full_inverse_cholesky 11 9.9 0.000 0.000 32.473 32.473 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.848 30.848 qs_rho_update_rho 119 7.7 0.001 0.001 28.532 28.532 calculate_rho_elec 119 8.7 1.553 1.553 28.532 28.532 qs_scf_new_mos 108 7.5 0.001 0.001 27.920 27.920 qs_scf_loop_do_ot 108 8.5 0.001 0.001 27.919 27.919 ot_scf_mini 108 9.5 0.003 0.003 26.017 26.017 dbcsr_multiply_generic 2286 12.5 0.183 0.183 23.360 23.360 grid_collocate_task_list 119 9.7 22.313 22.313 22.313 22.313 sum_up_and_integrate 119 10.3 0.396 0.396 21.056 21.056 integrate_v_rspace 119 11.3 0.527 0.527 20.660 20.660 cp_fm_cholesky_invert 11 10.9 19.483 19.483 19.483 19.483 grid_integrate_task_list 119 12.3 17.633 17.633 17.633 17.633 init_scf_run 11 5.9 0.001 0.001 16.362 16.362 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.361 16.361 wfi_extrapolate 11 7.9 0.001 0.001 15.514 15.514 ot_mini 108 10.5 0.001 0.001 15.258 15.258 cp_gemm 81 9.0 0.000 0.000 15.096 15.096 cp_gemm_cosma 81 10.0 15.096 15.096 15.096 15.096 make_m2s 4572 13.5 0.067 0.067 13.012 13.012 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.967 10.967 qs_ot_get_derivative 108 11.5 0.002 0.002 7.937 7.937 pw_transfer 1439 11.6 0.093 0.093 7.863 7.863 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.549 7.549 ot_diis_step 108 11.5 0.006 0.006 7.317 7.317 cp_fm_cholesky_decompose 22 10.9 7.147 7.147 7.147 7.147 make_images 4572 14.5 2.530 2.530 6.910 6.910 qs_ot_get_p 119 10.4 0.001 0.001 6.733 6.733 fft_wrap_pw1pw2_140 487 13.2 0.729 0.729 6.425 6.425 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.407 6.407 dbcsr_make_dense_low 5837 15.5 0.106 0.106 6.260 6.260 dbcsr_complete_redistribute 329 12.2 2.907 2.907 6.152 6.152 make_dense_data 5837 16.5 5.501 5.501 6.131 6.131 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.123 6.123 apply_single 119 13.6 0.001 0.001 6.122 6.122 dbcsr_copy 2102 12.0 0.294 0.294 5.980 5.980 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.893 5.893 multiply_cannon 2286 13.5 0.967 0.967 5.781 5.781 dbcsr_copy_into_existing 22 7.9 5.636 5.636 5.637 5.637 dbcsr_make_images_dense 3978 14.8 0.029 0.029 5.617 5.617 qs_create_task_list 11 7.9 0.000 0.000 5.351 5.351 generate_qs_task_list 11 8.9 3.647 3.647 5.351 5.351 qs_ot_p2m_diag 50 11.0 0.222 0.222 5.127 5.127 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.018 5.018 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.752 4.752 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.752 4.752 pw_poisson_solve 119 10.3 2.002 2.002 4.669 4.669 density_rs2pw 119 9.7 0.006 0.006 4.665 4.665 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.553 4.553 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.490 4.490 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.392 4.392 cp_fm_diag_elpa_base 50 14.0 4.341 4.341 4.391 4.391 multiply_cannon_loop 2286 14.5 0.052 0.052 4.247 4.247 multiply_cannon_multrec 2286 15.5 4.121 4.121 4.194 4.194 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.107 4.107 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.977 3.977 fft3d_s 1202 14.6 3.324 3.324 3.331 3.331 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.025 69.833 69.834 qs_mol_dyn_low 1 2.0 0.006 0.007 69.697 69.703 qs_forces 11 3.9 0.002 0.002 69.641 69.641 qs_energies 11 4.9 0.001 0.002 64.834 64.835 scf_env_do_scf 11 5.9 0.001 0.001 58.389 58.389 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 54.074 54.074 velocity_verlet 10 3.0 0.002 0.002 41.733 41.735 rebuild_ks_matrix 119 8.3 0.001 0.001 27.147 27.171 qs_ks_build_kohn_sham_matrix 119 9.3 0.022 0.023 27.146 27.171 qs_ks_update_qs_env 119 7.6 0.001 0.001 24.125 24.150 qs_rho_update_rho 119 7.7 0.001 0.001 21.420 21.430 calculate_rho_elec 119 8.7 0.048 0.049 21.419 21.430 sum_up_and_integrate 119 10.3 0.044 0.046 21.291 21.318 integrate_v_rspace 119 11.3 0.005 0.005 21.246 21.272 dbcsr_multiply_generic 2286 12.5 0.130 0.133 15.974 16.104 grid_collocate_task_list 119 9.7 15.375 16.066 15.375 16.066 grid_integrate_task_list 119 12.3 15.332 15.630 15.332 15.630 qs_scf_new_mos 108 7.5 0.001 0.001 13.043 13.073 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.042 13.072 ot_scf_mini 108 9.5 0.003 0.003 12.225 12.262 multiply_cannon 2286 13.5 0.225 0.231 10.614 10.843 multiply_cannon_loop 2286 14.5 0.216 0.228 9.628 9.972 mp_waitall_1 169478 16.3 7.902 8.256 7.902 8.256 ot_mini 108 10.5 0.001 0.001 7.187 7.222 rs_pw_transfer 974 11.9 0.016 0.017 6.406 7.206 density_rs2pw 119 9.7 0.009 0.010 5.479 6.309 pw_transfer 1439 11.6 0.141 0.151 5.530 5.614 multiply_cannon_metrocomm3 18288 15.5 0.078 0.082 5.093 5.387 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.247 5.323 potential_pw2rs 119 12.3 0.010 0.010 4.909 4.920 fft_wrap_pw1pw2_140 487 13.2 0.551 0.580 4.589 4.747 init_scf_run 11 5.9 0.000 0.002 4.424 4.424 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.423 4.424 init_scf_loop 11 6.9 0.000 0.001 4.298 4.299 wfi_extrapolate 11 7.9 0.001 0.001 4.040 4.041 fft3d_ps 1201 14.6 2.157 2.278 3.859 3.924 make_m2s 4572 13.5 0.074 0.075 3.683 3.726 qs_ot_get_derivative 108 11.5 0.001 0.002 3.563 3.595 ot_diis_step 108 11.5 0.005 0.005 3.594 3.594 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.534 3.561 apply_single 119 13.6 0.001 0.001 3.534 3.561 multiply_cannon_multrec 18288 15.5 3.405 3.542 3.422 3.559 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.239 3.246 make_images 4572 14.5 0.184 0.188 3.008 3.057 mp_waitany 9880 13.7 2.072 2.868 2.072 2.868 rs_pw_transfer_RS2PW_140 130 11.5 0.574 0.619 1.932 2.743 rs_pw_transfer_PW2RS_140 130 13.9 1.176 1.242 2.471 2.500 mp_alltoall_d11v 2130 13.8 1.280 1.883 1.280 1.883 qs_ot_get_p 119 10.4 0.001 0.001 1.729 1.751 cp_gemm 81 9.0 0.000 0.000 1.609 1.614 cp_gemm_cosma 81 10.0 1.609 1.613 1.609 1.613 rs_gather_matrices 119 12.3 0.127 0.138 0.952 1.586 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.423 1.538 make_images_data 4572 15.5 0.062 0.068 1.387 1.463 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 1.419 1.420 prepare_preconditioner 11 7.9 0.000 0.000 1.397 1.410 make_preconditioner 11 8.9 0.000 0.000 1.397 1.410 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=77.48499999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.313, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.483, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.633, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.096, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.147, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.121, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=24.052999999999997, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.375, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.332, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.609, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.405, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.902, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.157, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 216.929 216.929 qs_mol_dyn_low 1 2.0 0.004 0.004 216.030 216.030 qs_forces 11 3.9 0.001 0.001 215.974 215.974 qs_energies 11 4.9 0.001 0.001 201.628 201.628 scf_env_do_scf 11 5.9 0.001 0.001 165.173 165.173 velocity_verlet 10 3.0 0.002 0.002 145.069 145.069 scf_env_do_scf_inner_loop 96 6.5 0.008 0.008 122.089 122.089 rebuild_ks_matrix 107 8.3 0.001 0.001 61.568 61.568 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.016 61.567 61.567 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.494 55.494 qs_rho_update_rho 107 7.7 0.001 0.001 54.716 54.716 calculate_rho_elec 107 8.7 1.390 1.390 54.716 54.716 sum_up_and_integrate 107 10.3 0.337 0.337 50.765 50.765 integrate_v_rspace 107 11.3 0.508 0.508 50.428 50.428 grid_collocate_task_list 107 9.7 49.294 49.294 49.294 49.294 grid_integrate_task_list 107 12.3 47.653 47.653 47.653 47.653 init_scf_loop 11 6.9 0.000 0.000 42.886 42.886 prepare_preconditioner 11 7.9 0.000 0.000 35.502 35.502 make_preconditioner 11 8.9 0.000 0.000 35.502 35.502 make_full_inverse_cholesky 11 9.9 0.001 0.001 33.445 33.445 qs_scf_new_mos 96 7.5 0.001 0.001 24.445 24.445 qs_scf_loop_do_ot 96 8.5 0.001 0.001 24.445 24.445 ot_scf_mini 96 9.5 0.003 0.003 22.746 22.746 dbcsr_multiply_generic 1966 12.4 0.159 0.159 21.152 21.152 cp_fm_cholesky_invert 11 10.9 19.963 19.963 19.963 19.963 init_scf_run 11 5.9 0.001 0.001 19.499 19.499 scf_env_initial_rho_setup 11 6.9 0.001 0.001 19.498 19.498 wfi_extrapolate 11 7.9 0.001 0.001 18.308 18.308 cp_gemm 81 9.0 0.000 0.000 14.994 14.994 cp_gemm_cosma 81 10.0 14.994 14.994 14.994 14.994 ot_mini 96 10.5 0.001 0.001 13.681 13.681 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.978 11.978 make_m2s 3932 13.4 0.056 0.056 11.818 11.818 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.652 7.652 cp_fm_cholesky_decompose 22 10.9 7.461 7.461 7.461 7.461 qs_ot_get_derivative 96 11.5 0.001 0.001 7.275 7.275 pw_transfer 1295 11.6 0.083 0.083 6.937 6.937 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.931 6.931 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.692 6.692 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.660 6.660 make_images 3932 14.4 2.347 2.347 6.424 6.424 ot_diis_step 96 11.5 0.005 0.005 6.402 6.402 qs_create_task_list 11 7.9 0.000 0.000 6.393 6.393 generate_qs_task_list 11 8.9 4.715 4.715 6.393 6.393 dbcsr_complete_redistribute 317 12.2 2.901 2.901 6.226 6.226 dbcsr_copy 1855 11.9 0.264 0.264 6.094 6.094 dbcsr_copy_into_existing 22 7.9 5.774 5.774 5.775 5.775 fft_wrap_pw1pw2_140 439 13.2 0.596 0.596 5.636 5.636 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.541 5.541 apply_single 107 13.6 0.000 0.000 5.541 5.541 dbcsr_make_dense_low 4961 15.5 0.095 0.095 5.508 5.508 qs_ot_get_p 107 10.4 0.001 0.001 5.488 5.488 make_dense_data 4961 16.5 4.807 4.807 5.394 5.394 multiply_cannon 1966 13.4 0.910 0.910 5.248 5.248 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.069 5.069 dbcsr_make_images_dense 3386 14.7 0.024 0.024 4.952 4.952 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.807 4.807 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.807 4.807 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.486 4.486 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 135.875 135.876 qs_mol_dyn_low 1 2.0 0.005 0.005 135.743 135.750 qs_forces 11 3.9 0.002 0.002 135.688 135.688 qs_energies 11 4.9 0.001 0.002 126.435 126.439 scf_env_do_scf 11 5.9 0.001 0.001 115.935 115.937 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 107.472 107.473 velocity_verlet 10 3.0 0.002 0.003 84.325 84.327 rebuild_ks_matrix 107 8.3 0.001 0.001 60.652 60.693 qs_ks_build_kohn_sham_matrix 107 9.3 0.021 0.024 60.651 60.693 sum_up_and_integrate 107 10.3 0.044 0.048 54.851 54.884 integrate_v_rspace 107 11.3 0.004 0.006 54.807 54.839 qs_ks_update_qs_env 107 7.6 0.001 0.001 53.320 53.360 qs_rho_update_rho 107 7.7 0.001 0.001 51.558 51.578 calculate_rho_elec 107 8.7 0.043 0.045 51.557 51.578 grid_integrate_task_list 107 12.3 46.919 47.799 46.919 47.799 grid_collocate_task_list 107 9.7 43.420 44.301 43.420 44.301 dbcsr_multiply_generic 1966 12.4 0.121 0.125 17.142 17.211 qs_scf_new_mos 96 7.5 0.001 0.001 14.050 14.093 qs_scf_loop_do_ot 96 8.5 0.001 0.002 14.049 14.092 ot_scf_mini 96 9.5 0.003 0.003 13.130 13.159 multiply_cannon 1966 13.4 0.202 0.208 10.876 11.036 rs_pw_transfer 878 11.9 0.015 0.017 8.684 10.293 multiply_cannon_loop 1966 14.4 0.207 0.227 9.675 9.964 density_rs2pw 107 9.7 0.008 0.011 7.550 9.173 mp_waitall_1 146670 16.2 8.443 8.913 8.443 8.913 init_scf_loop 11 6.9 0.001 0.001 8.442 8.444 init_scf_run 11 5.9 0.000 0.002 8.215 8.216 scf_env_initial_rho_setup 11 6.9 0.000 0.001 8.215 8.215 ot_mini 96 10.5 0.001 0.002 7.827 7.858 wfi_extrapolate 11 7.9 0.001 0.002 7.595 7.596 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.559 7.569 mp_waitany 8968 13.7 4.313 5.972 4.313 5.972 multiply_cannon_metrocomm3 15728 15.4 0.073 0.078 5.291 5.616 pw_transfer 1295 11.6 0.131 0.140 5.522 5.580 rs_pw_transfer_RS2PW_140 118 11.5 0.425 0.440 3.967 5.569 fft_wrap_pw1pw2 1081 12.6 0.013 0.015 5.250 5.308 potential_pw2rs 107 12.3 0.009 0.010 5.076 5.093 mp_alltoall_d11v 1998 13.7 3.105 4.731 3.105 4.731 fft_wrap_pw1pw2_140 439 13.2 0.505 0.527 4.467 4.629 rs_gather_matrices 107 12.3 0.123 0.132 2.759 4.335 qs_ot_get_derivative 96 11.5 0.001 0.002 4.137 4.170 fft3d_ps 1081 14.6 2.066 2.204 3.933 3.990 make_m2s 3932 13.4 0.067 0.070 3.884 3.943 ot_diis_step 96 11.5 0.004 0.005 3.638 3.640 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.554 3.588 apply_single 107 13.6 0.001 0.001 3.554 3.588 multiply_cannon_multrec 15728 15.4 3.258 3.363 3.274 3.380 make_images 3932 14.4 0.167 0.173 3.267 3.337 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=77.564, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=49.294, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.653, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.963, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=14.994, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.461, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=29.522000000000006, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=43.42, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.919, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.258, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.443, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=4.313, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.378 0.378 270.739 270.739 qs_energies 1 2.0 0.000 0.000 269.559 269.559 scf_env_do_scf 1 3.0 0.000 0.000 267.144 267.144 qs_ks_update_qs_env 8 5.0 0.000 0.000 250.404 250.404 rebuild_ks_matrix 7 6.0 0.000 0.000 250.303 250.303 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 250.302 250.302 hfx_ks_matrix 7 8.0 0.000 0.000 167.987 167.987 integrate_four_center 7 9.0 2.109 2.109 167.960 167.960 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 157.082 157.082 integrate_four_center_main 7 10.0 1.440 1.440 156.581 156.581 integrate_four_center_bin 448 11.0 155.141 155.141 155.141 155.141 init_scf_loop 1 4.0 0.000 0.000 110.047 110.047 cp_gemm 129 10.3 0.000 0.000 67.680 67.680 cp_gemm_cosma 129 11.3 67.679 67.679 67.679 67.679 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 38.786 38.786 admm_fit_mo_coeffs 7 9.0 0.000 0.000 37.111 37.111 admm_mo_merge_derivs 7 8.0 0.000 0.000 34.934 34.934 merge_mo_derivs_diag 7 9.0 0.021 0.021 34.934 34.934 purify_mo_diag 7 10.0 0.001 0.001 22.449 22.449 fit_mo_coeffs 7 10.0 0.000 0.000 14.663 14.663 prepare_preconditioner 1 5.0 0.000 0.000 13.063 13.063 make_preconditioner 1 6.0 0.000 0.000 13.063 13.063 integrate_four_center_load 7 10.0 0.001 0.001 8.903 8.903 hfx_load_balance 1 11.0 0.002 0.002 8.902 8.902 arnoldi_normal_ev 11 9.3 0.002 0.002 8.052 8.052 estimate_cond_num 1 7.0 0.000 0.000 7.981 7.981 build_subspace 28 9.5 0.013 0.013 7.955 7.955 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.182 0.187 183.060 183.073 qs_energies 1 2.0 0.000 0.001 182.741 182.754 scf_env_do_scf 1 3.0 0.000 0.001 182.200 182.212 qs_ks_update_qs_env 8 5.0 0.000 0.000 179.449 179.461 rebuild_ks_matrix 7 6.0 0.000 0.000 179.436 179.449 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 179.436 179.449 hfx_ks_matrix 7 8.0 0.000 0.000 167.702 167.713 integrate_four_center 7 9.0 0.086 0.395 167.686 167.698 integrate_four_center_main 7 10.0 0.004 0.005 153.834 157.403 integrate_four_center_bin 448 11.0 153.830 157.399 153.830 157.399 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 106.867 106.875 init_scf_loop 1 4.0 0.000 0.001 75.331 75.336 integrate_four_center_load 7 10.0 0.000 0.001 9.088 9.090 hfx_load_balance 1 11.0 0.001 0.001 9.088 9.090 mp_sync 70 11.3 3.980 7.039 3.980 7.039 cp_gemm 129 10.3 0.000 0.000 4.900 4.904 cp_gemm_cosma 129 11.3 4.899 4.904 4.899 4.904 hfx_load_balance_bin 1 12.0 4.354 4.547 4.354 4.547 hfx_load_balance_count 1 12.0 4.346 4.533 4.346 4.533 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=43.99199999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=155.141, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=67.679, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.109, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.44, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.378, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.37899999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=153.83, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.899, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.086, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.182, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.98, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.354, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.346, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 401.403 401.403 qs_energies 1 2.0 0.000 0.000 400.936 400.936 mp2_main 1 3.0 0.000 0.000 394.979 394.979 mp2_gpw_main 1 4.0 0.000 0.000 394.529 394.529 rpa_ri_compute_en 1 5.0 0.000 0.000 379.622 379.622 rpa_num_int 1 6.0 0.000 0.000 379.598 379.598 compute_mat_P_omega 1 7.0 0.002 0.002 204.928 204.928 compute_mat_P_omega_contract 10 8.0 11.797 11.797 203.436 203.436 dbcsr_t_total 2336 9.6 0.015 0.015 194.470 194.470 cp_gemm 105 8.4 0.000 0.000 147.021 147.021 cp_gemm_cosma 105 9.4 147.020 147.020 147.020 147.020 dbcsr_t_contract 787 11.0 46.443 46.443 123.722 123.722 GW_matrix_operations 10 7.0 0.005 0.005 103.681 103.681 compute_mat_P_omega_calc_M_occ 250 9.0 11.797 11.797 78.316 78.316 dbcsr_tas_total 1149 12.2 0.049 0.049 71.273 71.273 dbcsr_tas_multiply 807 12.1 0.003 0.003 69.927 69.927 dbcsr_t_copy 1103 10.7 19.496 19.496 69.382 69.382 dbcsr_multiply_generic 837 15.8 0.125 0.125 57.139 57.139 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 56.655 56.655 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 51.544 51.544 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 49.675 49.675 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 48.009 48.009 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 44.756 44.756 multiply_cannon 837 16.8 18.106 18.106 44.394 44.394 dbcsr_tas_reserve_blocks_index 3261 13.7 7.092 7.092 26.451 26.451 dbcsr_tas_copy 574 11.4 16.561 16.561 23.935 23.935 multiply_cannon_loop 837 17.8 0.148 0.148 23.696 23.696 multiply_cannon_multrec 837 18.8 22.136 22.136 22.665 22.665 dbcsr_t_reserve_blocks_index 2280 12.5 1.335 1.335 20.377 20.377 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.010 0.010 20.088 20.088 dbcsr_reserve_blocks 3717 14.7 18.655 18.655 19.025 19.025 compute_QP_energies 1 7.0 0.000 0.000 18.954 18.954 compute_self_energy_cubic_gw 1 8.0 0.095 0.095 18.953 18.953 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 18.841 18.841 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 14.891 14.891 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 13.883 13.883 dbcsr_t_copy_nocomm 251 12.0 10.877 10.877 13.202 13.202 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.737 11.737 make_m2s 1674 16.8 0.103 0.103 10.363 10.363 dbcsr_tas_mm_2 251 15.0 0.001 0.001 10.101 10.101 make_images 1674 17.8 4.770 4.770 9.775 9.775 cp_fm_cholesky_invert 10 8.0 8.619 8.619 8.619 8.619 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 57.823 57.825 qs_energies 1 2.0 0.001 0.001 57.694 57.700 mp2_main 1 3.0 0.001 0.001 56.307 56.313 mp2_gpw_main 1 4.0 0.000 0.001 56.254 56.260 rpa_ri_compute_en 1 5.0 0.000 0.000 54.200 54.207 rpa_num_int 1 6.0 0.000 0.001 54.193 54.200 dbcsr_t_total 2336 9.6 0.015 0.016 40.014 40.015 compute_mat_P_omega 1 7.0 0.001 0.002 39.047 39.058 compute_mat_P_omega_contract 10 8.0 0.732 0.757 38.773 38.778 dbcsr_t_contract 787 11.0 1.814 1.980 29.552 29.558 dbcsr_tas_total 1149 12.2 0.061 0.067 25.952 25.952 dbcsr_tas_multiply 807 12.1 0.003 0.003 25.810 25.812 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 18.875 18.876 dbcsr_multiply_generic 837 15.8 0.067 0.072 15.707 16.700 compute_mat_P_omega_calc_M_occ 250 9.0 0.718 0.746 13.072 13.073 cp_gemm 105 8.4 0.000 0.000 10.858 10.875 cp_gemm_cosma 105 9.4 10.858 10.874 10.858 10.874 multiply_cannon 837 16.8 0.129 0.144 9.315 9.877 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.448 9.448 dbcsr_t_copy 1111 10.7 4.074 4.335 8.901 9.267 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 8.348 9.206 multiply_cannon_loop 837 17.8 0.041 0.044 8.472 9.045 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 8.394 8.395 multiply_cannon_multrec 1386 17.8 6.670 7.238 6.916 7.462 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.162 7.163 mp_sync 8696 11.6 6.120 7.134 6.120 7.134 GW_matrix_operations 10 7.0 0.001 0.002 7.037 7.043 make_m2s 1674 16.8 0.042 0.045 5.498 6.084 make_images 1674 17.8 0.239 0.266 5.419 6.003 compute_QP_energies 1 7.0 0.000 0.001 4.016 4.016 compute_self_energy_cubic_gw 1 8.0 0.005 0.007 4.013 4.016 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.742 3.756 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.637 3.651 dbcsr_t_communicate_buffer 1098 11.7 0.090 0.097 3.140 3.269 mp_waitall_2 3776 14.7 2.932 3.165 2.932 3.165 make_images_data 1674 18.8 0.036 0.039 2.876 3.024 contract_cubic_gw 21 9.0 0.000 0.000 2.981 2.981 hybrid_alltoall_any 1724 19.5 2.264 2.580 2.773 2.914 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.018 0.020 2.536 2.890 dbcsr_t_reserve_blocks_index 2849 12.4 0.105 0.114 2.531 2.887 dbcsr_tas_reserve_blocks_index 3300 13.8 0.262 0.284 2.482 2.831 make_images_pack 1674 18.8 2.113 2.598 2.127 2.610 dbcsr_reserve_blocks 3785 14.7 2.211 2.536 2.250 2.579 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.051 2.051 convert_to_new_pgrid 2421 14.1 0.017 0.018 1.795 1.939 dbcsr_copy 3323 15.8 1.733 1.878 1.762 1.907 mp_waitall_1 26582 19.0 1.357 1.795 1.357 1.795 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.002 1.620 1.625 dbcsr_add_anytype 909 13.7 0.956 1.014 1.489 1.552 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.440 1.445 dbcsr_tas_replicate 396 14.1 0.778 0.855 1.296 1.356 scf_env_do_scf 1 3.0 0.000 0.000 1.334 1.334 scf_env_do_scf_inner_loop 17 4.0 0.000 0.001 1.333 1.334 mp_max_i 2058 9.6 0.974 1.223 0.974 1.223 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=147.653, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=147.02, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=46.443, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.136, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=19.496, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=18.655, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=23.144000000000005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.858, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.814, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.67, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.074, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.211, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.932, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.12, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.098 0.098 184.969 184.969 qs_energies 1 2.0 0.000 0.000 183.324 183.324 scf_env_do_scf 1 3.0 0.000 0.000 173.269 173.269 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 173.268 173.268 qs_scf_new_mos 15 5.0 0.000 0.000 75.253 75.253 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.124 68.124 rebuild_ks_matrix 15 6.0 0.000 0.000 67.765 67.765 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 67.765 67.765 eigensolver 15 6.0 0.002 0.002 62.053 62.053 cp_fm_diag_elpa 15 7.0 0.000 0.000 49.058 49.058 cp_fm_diag_elpa_base 15 8.0 44.701 44.701 49.057 49.057 qs_vxc_create 15 8.0 0.037 0.037 43.735 43.735 calculate_dispersion_nonloc 15 9.0 8.658 8.658 38.087 38.087 pw_transfer 1191 9.8 0.089 0.089 25.693 25.693 fft_wrap_pw1pw2 1086 10.9 0.012 0.012 25.405 25.405 qs_rho_update_rho 16 5.0 0.000 0.000 23.945 23.945 calculate_rho_elec 16 6.0 0.339 0.339 23.945 23.945 sum_up_and_integrate 15 8.0 0.076 0.076 22.447 22.447 grid_collocate_task_list 16 7.0 22.432 22.432 22.432 22.432 integrate_v_rspace 15 9.0 0.033 0.033 22.370 22.370 grid_integrate_task_list 15 10.0 21.777 21.777 21.777 21.777 fft_wrap_pw1pw2_150 765 12.0 3.250 3.250 19.284 19.284 fft3d_s 1087 12.8 10.396 10.396 10.408 10.408 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.362 10.362 pw_scatter_s 585 13.0 10.138 10.138 10.138 10.138 dbcsr_complete_redistribute 46 8.3 3.390 3.390 9.343 9.343 cp_fm_cholesky_restore 45 7.0 8.792 8.792 8.792 8.792 cp_fm_upper_to_full 30 8.0 8.557 8.557 8.557 8.557 vdW_energy 15 10.0 7.755 7.755 7.755 7.755 gspace_mixing 14 5.0 0.272 0.272 7.239 7.239 broyden_mixing 14 6.0 6.514 6.514 6.515 6.515 fft_wrap_pw1pw2_200 197 11.5 0.328 0.328 5.877 5.877 xc_vxc_pw_create 15 9.0 1.492 1.492 5.610 5.610 init_scf_run 1 3.0 0.000 0.000 4.709 4.709 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.596 4.596 dbcsr_finalize 159 9.9 0.020 0.020 4.086 4.086 dbcsr_merge_all 91 11.1 0.068 0.068 3.937 3.937 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.024 0.026 84.231 84.232 qs_energies 1 2.0 0.000 0.001 83.828 83.829 scf_env_do_scf 1 3.0 0.000 0.000 78.790 78.791 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 78.790 78.790 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.080 39.102 rebuild_ks_matrix 15 6.0 0.000 0.000 39.034 39.056 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.004 39.034 39.056 sum_up_and_integrate 15 8.0 0.012 0.015 23.331 23.386 integrate_v_rspace 15 9.0 0.001 0.001 23.318 23.373 qs_rho_update_rho 16 5.0 0.000 0.000 22.727 22.730 calculate_rho_elec 16 6.0 0.011 0.012 22.727 22.730 grid_integrate_task_list 15 10.0 21.443 21.971 21.443 21.971 grid_collocate_task_list 16 7.0 20.886 21.371 20.886 21.371 qs_scf_new_mos 15 5.0 0.001 0.001 17.401 17.510 eigensolver 15 6.0 0.002 0.002 15.993 16.010 qs_vxc_create 15 8.0 0.001 0.001 15.180 15.194 calculate_dispersion_nonloc 15 9.0 1.386 1.421 12.313 12.329 pw_transfer 1191 9.8 0.130 0.153 11.461 11.554 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.435 11.443 cp_fm_diag_elpa_base 15 8.0 11.206 11.241 11.430 11.434 fft_wrap_pw1pw2 1086 10.9 0.020 0.023 11.172 11.266 fft3d_ps 1086 12.9 4.944 5.125 8.400 8.623 fft_wrap_pw1pw2_150 765 12.0 0.643 0.677 7.403 7.459 cp_fm_cholesky_restore 45 7.0 4.332 4.383 4.332 4.383 fft_wrap_pw1pw2_200 197 11.5 0.363 0.390 3.616 3.683 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.153 3.153 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.738 2.985 xc_vxc_pw_create 15 9.0 0.061 0.083 2.866 2.881 mp_alltoall_z22v 1086 14.9 2.031 2.442 2.031 2.442 rs_pw_transfer 158 9.4 0.002 0.003 1.744 2.288 vdW_energy 15 10.0 2.067 2.178 2.067 2.178 x_to_yz 585 14.0 0.875 0.920 1.934 2.081 density_rs2pw 16 7.0 0.002 0.002 1.690 2.068 build_core_ppnl 1 5.0 1.829 2.002 1.829 2.002 yz_to_x 501 13.7 0.518 0.577 1.490 1.776 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=66.73299999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=44.701, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.432, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.777, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.396, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.138, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=8.792, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=21.419999999999995, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.206, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.886, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.443, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.332, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.944, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.085 0.085 333.783 333.783 qs_energies 1 2.0 0.000 0.000 333.627 333.627 ls_scf 1 3.0 0.000 0.000 331.893 331.893 ls_scf_main 1 4.0 0.002 0.002 316.007 316.007 density_matrix_trs4 11 5.0 0.010 0.010 174.786 174.786 ls_scf_dm_to_ks 11 5.0 0.000 0.000 134.748 134.748 matrix_ls_to_qs 11 6.0 0.000 0.000 130.573 130.573 dbcsr_multiply_generic 185 6.1 0.502 0.502 111.627 111.627 dbcsr_copy_into_existing 11 7.0 81.019 81.019 81.020 81.020 multiply_cannon 185 7.1 3.117 3.117 75.722 75.722 multiply_cannon_loop 185 8.1 0.392 0.392 54.589 54.589 dbcsr_complete_redistribute 23 7.5 38.695 38.695 54.077 54.077 multiply_cannon_multrec 185 9.1 52.410 52.410 52.520 52.520 matrix_decluster 11 7.0 0.000 0.000 49.551 49.551 arnoldi_extremal 12 6.1 0.000 0.000 44.973 44.973 arnoldi_normal_ev 12 7.1 0.028 0.028 44.973 44.973 build_subspace 23 8.1 0.129 0.129 44.307 44.307 dbcsr_matrix_vector_mult 652 9.0 0.246 0.246 34.199 34.199 dbcsr_matrix_vector_mult_local 652 10.0 32.634 32.634 32.642 32.642 make_m2s 370 7.1 0.030 0.030 29.492 29.492 make_images 370 8.1 7.343 7.343 27.089 27.089 dbcsr_finalize 646 7.5 0.212 0.212 20.658 20.658 dbcsr_merge_all 597 8.5 3.253 3.253 18.571 18.571 setup_rec_index_2d 370 8.1 17.767 17.767 17.767 17.767 dbcsr_sort_indices 1103 9.9 16.389 16.389 16.389 16.389 ls_scf_init_scf 1 4.0 0.000 0.000 15.021 15.021 ls_scf_init_matrix_S 1 5.0 0.000 0.000 14.599 14.599 quick_finalize 395 10.0 0.481 0.481 13.946 13.946 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 13.765 13.765 tree_to_linear_d 110 9.4 13.122 13.122 13.122 13.122 dbcsr_special_finalize 370 9.1 0.002 0.002 12.852 12.852 dbcsr_dot_sd 144 6.3 8.765 8.765 8.766 8.766 dbcsr_new_transposed 2 7.0 0.129 0.129 8.345 8.345 dbcsr_redistribute 2 8.0 8.106 8.106 8.178 8.178 dbcsr_frobenius_norm 142 6.1 7.562 7.562 7.564 7.564 make_images_data 370 9.1 0.010 0.010 6.745 6.745 matrix_qs_to_ls 12 5.1 0.000 0.000 6.741 6.741 matrix_cluster 12 6.1 0.000 0.000 6.741 6.741 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 90.963 90.964 qs_energies 1 2.0 0.000 0.000 90.875 90.875 ls_scf 1 3.0 0.000 0.000 90.799 90.800 ls_scf_main 1 4.0 0.001 0.003 87.224 87.225 density_matrix_trs4 11 5.0 0.008 0.013 83.573 83.675 dbcsr_multiply_generic 185 6.1 0.071 0.086 78.403 78.673 multiply_cannon 185 7.1 0.039 0.044 65.235 66.338 multiply_cannon_loop 185 8.1 0.204 0.215 61.504 63.454 multiply_cannon_multrec 1480 9.1 41.064 44.131 41.533 44.600 mp_waitall_1 11936 10.3 18.001 20.176 18.001 20.176 multiply_cannon_metrocomm3 1480 9.1 0.017 0.020 10.735 14.577 make_m2s 370 7.1 0.033 0.036 8.673 8.774 make_images 370 8.1 0.690 0.727 8.556 8.659 multiply_cannon_metrocomm1 1480 9.1 0.010 0.012 4.272 6.774 calculate_norms 2960 9.1 4.687 4.945 4.687 4.945 mp_sum_l 1039 5.9 3.197 4.376 3.197 4.376 arnoldi_extremal 12 6.1 0.000 0.001 3.776 3.783 arnoldi_normal_ev 12 7.1 0.002 0.008 3.776 3.782 make_images_data 370 9.1 0.012 0.013 3.371 3.657 build_subspace 23 8.1 0.037 0.050 3.654 3.657 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 2.321 3.350 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.155 3.277 dbcsr_matrix_vector_mult 652 9.0 0.018 0.078 3.076 3.186 hybrid_alltoall_any 393 9.9 0.308 1.516 2.721 2.937 dbcsr_complete_redistribute 23 7.5 1.745 1.893 2.770 2.909 matrix_ls_to_qs 11 6.0 0.000 0.000 2.737 2.878 ls_scf_init_scf 1 4.0 0.000 0.000 2.716 2.717 dbcsr_matrix_vector_mult_local 652 10.0 2.507 2.705 2.511 2.710 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.679 2.689 matrix_decluster 11 7.0 0.000 0.000 2.491 2.633 make_images_pack 370 9.1 2.380 2.548 2.385 2.553 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.458 2.461 buffer_matrices_ensure_size 370 8.1 2.129 2.264 2.129 2.264 dbcsr_add_d 280 6.0 0.001 0.002 2.010 2.092 dbcsr_add_anytype 280 7.0 1.084 1.145 2.009 2.091 dbcsr_finalize 646 7.5 0.014 0.016 1.894 1.990 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=111.25800000000004, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=81.019, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=52.41, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=38.695, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=32.634, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=17.767, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=19.762, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=41.064, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.745, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.507, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.687, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.197, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=18.001, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 111.026 111.026 lib_test 1 2.0 0.000 0.000 111.019 111.019 dbcsr_run_tests 3 3.0 0.003 0.003 111.019 111.019 test_multiplies_multiproc 3 4.0 0.001 0.001 91.614 91.614 dbcsr_redistribute 9 5.0 63.288 63.288 66.884 66.884 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.963 22.963 dbcsr_make_random_matrix 9 4.0 14.100 14.100 19.318 19.318 multiply_cannon 9 6.0 0.002 0.002 16.436 16.436 multiply_cannon_loop 9 7.0 0.002 0.002 15.936 15.936 multiply_cannon_multrec 9 8.0 15.932 15.932 15.933 15.933 dbcsr_finalize 27 5.7 0.004 0.004 8.979 8.979 dbcsr_merge_all 18 6.5 3.221 3.221 8.267 8.267 mp_alltoall_d11v 27 6.0 3.267 3.267 3.267 3.267 tree_to_linear_d 9 7.0 3.164 3.164 3.164 3.164 dbcsr_data_release 975 7.6 2.414 2.414 2.414 2.414 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 26.776 26.777 lib_test 1 2.0 0.000 0.000 26.748 26.767 dbcsr_run_tests 3 3.0 0.000 0.001 26.747 26.766 test_multiplies_multiproc 3 4.0 0.000 0.001 25.617 25.688 dbcsr_multiply_generic 9 5.0 0.001 0.002 23.776 23.873 multiply_cannon 9 6.0 0.002 0.003 21.577 22.007 multiply_cannon_loop 9 7.0 0.004 0.004 21.131 21.562 multiply_cannon_multrec 72 8.0 17.864 18.737 17.865 18.739 mp_waitall_1 576 9.2 3.646 4.311 3.646 4.311 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.855 3.758 dbcsr_make_random_matrix 9 4.0 0.876 0.923 1.099 1.154 mp_sum_l 310 2.7 0.550 1.150 0.550 1.150 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.546 1.146 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.400 0.993 make_m2s 18 6.0 0.001 0.001 0.900 0.949 make_images 18 7.0 0.026 0.026 0.897 0.946 dbcsr_finalize 27 5.7 0.001 0.001 0.819 0.889 dbcsr_merge_all 18 6.5 0.133 0.150 0.736 0.812 dbcsr_redistribute 9 5.0 0.371 0.415 0.654 0.679 dbcsr_data_release 444 7.6 0.581 0.639 0.581 0.639 dbcsr_destroy 111 5.9 0.005 0.050 0.505 0.581 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.804000000000002, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=63.288, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.932, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.1, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.267, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.221, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.414, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.754999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.371, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.864, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.876, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.133, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.581, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.646, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.55, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.041 0.041 134.569 134.569 qs_mol_dyn_low 1 2.0 0.005 0.005 132.752 132.752 velocity_verlet 5 3.0 0.004 0.004 106.791 106.791 qmmm_el_coupling 6 3.8 0.000 0.000 62.313 62.313 qmmm_elec_with_gaussian 6 4.8 0.180 0.180 62.307 62.307 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 60.679 60.679 qmmm_elec_gaussian_low_G 6 6.8 59.142 59.142 59.142 59.142 qs_forces 6 3.8 0.001 0.001 55.406 55.406 qs_energies 6 4.8 0.000 0.000 49.315 49.315 scf_env_do_scf 6 5.8 0.000 0.000 45.478 45.478 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 38.115 38.115 rebuild_ks_matrix 45 8.4 0.000 0.000 37.822 37.822 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 37.822 37.822 qs_ks_update_qs_env 45 7.8 0.000 0.000 32.448 32.448 pw_transfer 966 11.9 0.066 0.066 22.708 22.708 fft_wrap_pw1pw2 801 13.0 0.008 0.008 22.387 22.387 fft_wrap_pw1pw2_150 507 14.3 2.325 2.325 21.897 21.897 qs_vxc_create 45 10.4 0.001 0.001 20.734 20.734 xc_vxc_pw_create 45 11.4 4.386 4.386 20.733 20.733 pw_scatter_s 429 15.4 10.036 10.036 10.036 10.036 qs_rho_update_rho 45 7.9 0.000 0.000 9.983 9.983 calculate_rho_elec 45 8.9 0.883 0.883 9.983 9.983 xc_rho_set_and_dset_create 45 12.4 0.247 0.247 9.445 9.445 qmmm_forces 6 3.8 0.001 0.001 8.831 8.831 fft3d_s 802 15.0 8.734 8.734 8.744 8.744 pw_integral_ab 2539 7.4 8.465 8.465 8.465 8.465 qmmm_forces_with_gaussian 6 4.8 0.128 0.128 8.371 8.371 init_scf_loop 6 6.8 0.000 0.000 7.358 7.358 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.440 6.440 qs_ks_ddapc 45 10.4 0.001 0.001 6.349 6.349 fist_calc_energy_force 6 3.8 0.002 0.002 5.430 5.430 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.385 5.385 qmmm_forces_gaussian_low_G 6 6.8 5.367 5.367 5.367 5.367 pw_poisson_solve 51 9.9 2.212 2.212 5.082 5.082 grid_collocate_task_list 45 9.9 4.577 4.577 4.577 4.577 density_rs2pw 45 9.9 0.003 0.003 4.523 4.523 sum_up_and_integrate 45 10.4 0.223 0.223 4.256 4.256 force_nonbond 6 4.8 4.211 4.211 4.211 4.211 integrate_v_rspace 45 11.4 0.012 0.012 4.033 4.033 cp_ddapc_apply_CD 45 11.4 0.006 0.006 3.943 3.943 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.034 85.736 85.738 qs_mol_dyn_low 1 2.0 0.005 0.006 84.166 84.260 qs_forces 6 3.8 0.001 0.001 61.670 61.670 qs_energies 6 4.8 0.001 0.001 58.782 58.782 scf_env_do_scf 6 5.8 0.000 0.001 57.278 57.278 scf_env_do_scf_inner_loop 113 6.2 0.002 0.009 54.999 55.000 rebuild_ks_matrix 119 8.1 0.000 0.000 40.569 40.588 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.022 40.569 40.588 qs_ks_update_qs_env 119 7.3 0.001 0.001 38.132 38.150 velocity_verlet 5 3.0 0.002 0.003 35.334 35.339 pw_transfer 2446 11.8 0.274 0.305 25.755 26.071 fft_wrap_pw1pw2 2059 12.8 0.033 0.037 24.949 25.329 fft_wrap_pw1pw2_150 1321 14.0 2.200 2.387 24.248 24.514 qs_vxc_create 119 10.1 0.003 0.004 20.626 20.630 xc_vxc_pw_create 119 11.1 0.448 0.602 20.623 20.627 fft3d_ps 2059 14.8 11.216 12.151 18.710 19.229 qs_rho_update_rho 119 7.3 0.001 0.001 16.080 16.081 calculate_rho_elec 119 8.3 0.086 0.095 16.080 16.080 sum_up_and_integrate 119 10.1 0.084 0.091 14.593 14.638 integrate_v_rspace 119 11.1 0.004 0.005 14.510 14.558 qmmm_forces 6 3.8 0.003 0.003 12.410 12.411 qmmm_forces_with_gaussian 6 4.8 0.365 0.432 12.013 12.184 rs_pw_transfer 988 11.5 0.015 0.017 10.959 11.548 xc_rho_set_and_dset_create 119 12.1 0.505 0.601 9.838 10.233 density_rs2pw 119 9.3 0.011 0.012 9.608 10.081 qmmm_el_coupling 6 3.8 0.000 0.000 8.935 8.983 qmmm_elec_with_gaussian 6 4.8 0.356 0.448 8.932 8.979 potential_pw2rs 119 12.1 0.011 0.012 8.527 8.538 grid_collocate_task_list 119 9.3 6.196 6.675 6.196 6.675 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.784 5.896 grid_integrate_task_list 119 12.1 5.576 5.828 5.576 5.828 mp_alltoall_z22v 2059 16.8 4.456 5.767 4.456 5.767 qmmm_forces_gaussian_low_G 6 6.8 4.729 4.851 4.729 4.851 rs_pw_transfer_PW2RS_150 125 13.9 2.516 2.615 4.764 4.809 pw_restrict_s3 18 5.8 2.157 2.194 4.709 4.767 rs_pw_transfer_RS2PW_150 125 11.2 2.049 2.229 4.010 4.580 mp_waitany 4028 12.8 3.314 4.287 3.314 4.287 yz_to_x 964 15.3 1.159 1.290 3.366 4.274 x_to_yz 1095 16.3 1.831 2.045 4.081 4.229 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.898 3.953 pw_prolongate_s3 18 6.8 1.756 1.787 3.898 3.953 pw_integral_ab 2761 7.7 3.190 3.217 3.526 3.716 qs_scf_new_mos 113 7.2 0.001 0.001 3.615 3.625 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.615 3.625 ot_scf_mini 113 9.2 0.002 0.002 3.457 3.466 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.410 3.452 dbcsr_multiply_generic 2588 12.3 0.096 0.116 3.248 3.322 qs_ks_ddapc 119 10.1 0.002 0.003 2.827 2.964 qmmm_elec_gaussian_low_G 6 6.8 2.463 2.507 2.463 2.507 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.447 2.448 mp_sum_dm3 33 5.7 2.234 2.434 2.234 2.434 init_scf_loop 6 6.8 0.000 0.000 2.275 2.275 pw_gather_p 964 14.3 2.022 2.208 2.022 2.208 ot_mini 113 10.2 0.001 0.001 2.183 2.196 mp_waitall_1 188862 16.2 1.884 2.098 1.884 2.098 pw_scatter_p 1095 15.3 1.925 2.029 1.925 2.029 pw_derive 732 12.5 1.717 1.895 1.717 1.895 qs_ot_get_derivative 113 11.2 0.001 0.001 1.722 1.731 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=38.24799999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=59.142, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.036, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.734, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.465, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.367, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.577, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=47.91, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.463, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.19, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.729, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.196, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.576, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.216, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.456, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-11-25 19:58:11+00:00