StartDate: 2021-07-30 19:18:24+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 8ce8fc3d18617d26157ecbe8915d22e9bd667806 CommitTime: 2021-07-30 17:41:36 +0200 CommitAuthor: Matthias Krack CommitSubject: Update CRAY arch files Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 8ce8fc3d18617d26157ecbe8915d22e9bd667806 CommitTime: 2021-07-30 17:41:36 +0200 CommitAuthor: Matthias Krack CommitSubject: Update CRAY arch files ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 153.590 153.590 qs_mol_dyn_low 1 2.0 0.004 0.004 152.794 152.794 qs_forces 11 3.9 0.001 0.001 152.742 152.742 qs_energies 11 4.9 0.001 0.001 142.023 142.023 scf_env_do_scf 11 5.9 0.001 0.001 115.525 115.525 velocity_verlet 10 3.0 0.002 0.002 102.169 102.169 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 90.107 90.107 rebuild_ks_matrix 119 8.3 0.001 0.001 41.827 41.827 qs_ks_build_kohn_sham_matrix 119 9.3 0.020 0.020 41.826 41.826 qs_ks_update_qs_env 119 7.6 0.001 0.001 37.361 37.361 qs_rho_update_rho 119 7.7 0.001 0.001 36.977 36.977 calculate_rho_elec 119 8.7 1.554 1.554 36.976 36.976 grid_collocate_task_list 119 9.7 30.807 30.807 30.807 30.807 sum_up_and_integrate 119 10.3 0.396 0.396 30.021 30.021 integrate_v_rspace 119 11.3 0.153 0.153 29.625 29.625 grid_integrate_task_list 119 12.3 26.889 26.889 26.889 26.889 init_scf_loop 11 6.9 0.000 0.000 25.221 25.221 qs_scf_new_mos 108 7.5 0.001 0.001 23.165 23.165 qs_scf_loop_do_ot 108 8.5 0.001 0.001 23.164 23.164 dbcsr_multiply_generic 2286 12.5 0.162 0.162 22.024 22.024 ot_scf_mini 108 9.5 0.003 0.003 21.811 21.811 prepare_preconditioner 11 7.9 0.000 0.000 20.662 20.662 make_preconditioner 11 8.9 0.000 0.000 20.662 20.662 make_full_inverse_cholesky 11 9.9 0.000 0.000 18.637 18.637 ot_mini 108 10.5 0.001 0.001 14.391 14.391 init_scf_run 11 5.9 0.001 0.001 13.881 13.881 scf_env_initial_rho_setup 11 6.9 0.001 0.001 13.880 13.880 make_m2s 4572 13.5 0.060 0.060 13.493 13.493 wfi_extrapolate 11 7.9 0.001 0.001 13.035 13.035 cp_gemm 81 9.0 0.000 0.000 10.935 10.935 cp_gemm_cosma 81 10.0 10.935 10.935 10.935 10.935 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 8.748 8.748 pw_transfer 1439 11.6 0.107 0.107 7.886 7.886 ot_diis_step 108 11.5 0.005 0.005 7.637 7.637 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.545 7.545 cp_fm_cholesky_decompose 22 10.9 7.313 7.313 7.313 7.313 make_images 4572 14.5 2.550 2.550 6.979 6.979 dbcsr_make_dense_low 5837 15.5 0.088 0.088 6.865 6.865 make_dense_data 5837 16.5 6.084 6.084 6.758 6.758 qs_ot_get_derivative 108 11.5 0.001 0.001 6.750 6.750 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.517 6.517 apply_single 119 13.6 0.001 0.001 6.516 6.516 dbcsr_complete_redistribute 329 12.2 2.865 2.865 6.373 6.373 fft_wrap_pw1pw2_140 487 13.2 0.613 0.613 6.370 6.370 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.335 6.335 dbcsr_make_images_dense 3978 14.8 0.024 0.024 6.118 6.118 copy_dbcsr_to_fm 153 11.3 0.003 0.003 5.225 5.225 cp_fm_cholesky_invert 11 10.9 4.936 4.936 4.936 4.936 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.755 4.755 dbcsr_copy 2102 12.0 0.309 0.309 4.711 4.711 pw_poisson_solve 119 10.3 1.975 1.975 4.710 4.710 density_rs2pw 119 9.7 0.006 0.006 4.616 4.616 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.381 4.381 dbcsr_copy_into_existing 22 7.9 4.357 4.357 4.357 4.357 multiply_cannon 2286 13.5 0.278 0.278 4.323 4.323 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.315 4.315 qs_create_task_list 11 7.9 0.000 0.000 4.195 4.195 generate_qs_task_list 11 8.9 2.606 2.606 4.195 4.195 qs_ot_get_p 119 10.4 0.001 0.001 3.741 3.741 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.687 3.687 calculate_w_matrix_ot 11 6.9 0.008 0.008 3.687 3.687 multiply_cannon_loop 2286 14.5 0.051 0.051 3.648 3.648 multiply_cannon_multrec 2286 15.5 3.527 3.527 3.596 3.596 fft3d_s 1202 14.6 3.418 3.418 3.425 3.425 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.399 3.399 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.218 3.218 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.022 67.310 67.310 qs_mol_dyn_low 1 2.0 0.004 0.006 67.175 67.180 qs_forces 11 3.9 0.002 0.002 67.127 67.127 qs_energies 11 4.9 0.001 0.002 62.779 62.781 scf_env_do_scf 11 5.9 0.001 0.001 57.117 57.118 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 52.816 52.817 velocity_verlet 10 3.0 0.002 0.003 40.050 40.051 rebuild_ks_matrix 119 8.3 0.001 0.001 26.026 26.058 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.022 26.026 26.057 qs_ks_update_qs_env 119 7.6 0.001 0.001 23.094 23.123 sum_up_and_integrate 119 10.3 0.052 0.055 19.752 19.777 qs_rho_update_rho 119 7.7 0.001 0.001 19.710 19.726 calculate_rho_elec 119 8.7 0.048 0.050 19.709 19.725 integrate_v_rspace 119 11.3 0.005 0.006 19.699 19.725 dbcsr_multiply_generic 2286 12.5 0.113 0.116 17.529 17.712 qs_scf_new_mos 108 7.5 0.001 0.001 14.212 14.240 qs_scf_loop_do_ot 108 8.5 0.001 0.001 14.211 14.239 grid_collocate_task_list 119 9.7 13.051 13.896 13.051 13.896 grid_integrate_task_list 119 12.3 13.134 13.837 13.134 13.837 ot_scf_mini 108 9.5 0.003 0.003 13.350 13.385 multiply_cannon 2286 13.5 0.212 0.219 11.863 12.122 multiply_cannon_loop 2286 14.5 0.196 0.211 10.718 11.056 mp_waitall_1 169478 16.3 8.858 9.082 8.858 9.082 ot_mini 108 10.5 0.001 0.001 7.986 8.018 rs_pw_transfer 974 11.9 0.017 0.018 6.960 7.665 density_rs2pw 119 9.7 0.007 0.008 6.040 6.729 pw_transfer 1439 11.6 0.146 0.151 6.094 6.157 multiply_cannon_metrocomm3 18288 15.5 0.062 0.066 5.671 5.879 fft_wrap_pw1pw2 1201 12.6 0.013 0.014 5.778 5.839 potential_pw2rs 119 12.3 0.009 0.009 5.291 5.298 fft_wrap_pw1pw2_140 487 13.2 0.566 0.596 4.965 5.127 fft3d_ps 1201 14.6 2.360 2.499 4.335 4.419 init_scf_loop 11 6.9 0.000 0.001 4.283 4.284 ot_diis_step 108 11.5 0.004 0.004 4.132 4.133 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.092 4.115 apply_single 119 13.6 0.001 0.001 4.091 4.115 multiply_cannon_multrec 18288 15.5 3.848 4.051 3.864 4.068 init_scf_run 11 5.9 0.000 0.002 3.968 3.968 scf_env_initial_rho_setup 11 6.9 0.000 0.001 3.968 3.968 make_m2s 4572 13.5 0.065 0.068 3.865 3.930 qs_ot_get_derivative 108 11.5 0.001 0.001 3.816 3.846 wfi_extrapolate 11 7.9 0.001 0.001 3.601 3.601 make_images 4572 14.5 0.166 0.169 3.218 3.292 mp_waitany 9880 13.7 2.447 3.184 2.447 3.184 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.155 3.163 rs_pw_transfer_RS2PW_140 130 11.5 0.547 0.579 2.225 2.942 rs_pw_transfer_PW2RS_140 130 13.9 1.279 1.316 2.617 2.657 mp_alltoall_d11v 2130 13.8 1.567 2.075 1.567 2.075 qs_ot_get_p 119 10.4 0.001 0.001 1.750 1.787 rs_gather_matrices 119 12.3 0.136 0.148 1.221 1.686 make_images_data 4572 15.5 0.052 0.057 1.481 1.589 prepare_preconditioner 11 7.9 0.000 0.000 1.502 1.511 make_preconditioner 11 8.9 0.000 0.000 1.502 1.511 mp_alltoall_z22v 1201 16.6 1.311 1.500 1.311 1.500 hybrid_alltoall_any 4725 16.4 0.112 0.391 1.297 1.387 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.333 1.364 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=68.035, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=30.807, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.889, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.935, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.313, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=6.084, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.527, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=25.971999999999994, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=13.051, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.134, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.848, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.447, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.858, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 177.343 177.343 qs_mol_dyn_low 1 2.0 0.004 0.004 176.577 176.577 qs_forces 11 3.9 0.001 0.001 176.526 176.526 qs_energies 11 4.9 0.001 0.001 164.778 164.778 scf_env_do_scf 11 5.9 0.001 0.001 135.401 135.401 velocity_verlet 10 3.0 0.002 0.002 117.099 117.099 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 107.963 107.963 rebuild_ks_matrix 107 8.3 0.001 0.001 54.886 54.886 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 54.885 54.885 qs_rho_update_rho 107 7.7 0.001 0.001 50.176 50.176 calculate_rho_elec 107 8.7 1.398 1.398 50.176 50.176 qs_ks_update_qs_env 107 7.6 0.001 0.001 49.143 49.143 grid_collocate_task_list 107 9.7 44.430 44.430 44.430 44.430 sum_up_and_integrate 107 10.3 0.348 0.348 44.238 44.238 integrate_v_rspace 107 11.3 0.140 0.140 43.890 43.890 grid_integrate_task_list 107 12.3 41.373 41.373 41.373 41.373 init_scf_loop 11 6.9 0.000 0.000 27.237 27.237 prepare_preconditioner 11 7.9 0.000 0.000 20.662 20.662 make_preconditioner 11 8.9 0.000 0.000 20.662 20.662 qs_scf_new_mos 96 7.5 0.001 0.001 19.749 19.749 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.749 19.749 dbcsr_multiply_generic 1966 12.4 0.143 0.143 18.865 18.865 make_full_inverse_cholesky 11 9.9 0.000 0.000 18.583 18.583 ot_scf_mini 96 9.5 0.003 0.003 18.527 18.527 init_scf_run 11 5.9 0.001 0.001 15.674 15.674 scf_env_initial_rho_setup 11 6.9 0.001 0.001 15.673 15.673 wfi_extrapolate 11 7.9 0.001 0.001 14.680 14.680 ot_mini 96 10.5 0.001 0.001 12.066 12.066 make_m2s 3932 13.4 0.052 0.052 11.315 11.315 cp_gemm 81 9.0 0.000 0.000 10.698 10.698 cp_gemm_cosma 81 10.0 10.697 10.697 10.697 10.697 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 9.952 9.952 pw_transfer 1295 11.6 0.093 0.093 7.416 7.416 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.402 7.402 cp_fm_cholesky_decompose 22 10.9 7.267 7.267 7.267 7.267 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 7.109 7.109 dbcsr_complete_redistribute 317 12.2 2.821 2.821 6.421 6.421 qs_ot_get_derivative 96 11.5 0.001 0.001 6.040 6.040 ot_diis_step 96 11.5 0.005 0.005 6.023 6.023 make_images 3932 14.4 2.114 2.114 6.018 6.018 fft_wrap_pw1pw2_140 439 13.2 0.636 0.636 6.011 6.011 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.980 5.980 dbcsr_make_dense_low 4961 15.5 0.079 0.079 5.603 5.603 make_dense_data 4961 16.5 4.921 4.921 5.508 5.508 qs_create_task_list 11 7.9 0.000 0.000 5.420 5.420 generate_qs_task_list 11 8.9 3.867 3.867 5.420 5.420 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.209 5.209 apply_single 107 13.6 0.000 0.000 5.209 5.209 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.182 5.182 dbcsr_make_images_dense 3386 14.7 0.021 0.021 4.947 4.947 cp_fm_cholesky_invert 11 10.9 4.923 4.923 4.923 4.923 dbcsr_copy 1855 11.9 0.285 0.285 4.647 4.647 density_rs2pw 107 9.7 0.005 0.005 4.348 4.348 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.343 4.343 dbcsr_copy_into_existing 22 7.9 4.319 4.319 4.320 4.320 pw_poisson_solve 107 10.3 1.890 1.890 4.313 4.313 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.260 4.260 multiply_cannon 1966 13.4 0.244 0.244 3.859 3.859 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.571 3.571 calculate_w_matrix_ot 11 6.9 0.008 0.008 3.571 3.571 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 112.851 112.852 qs_mol_dyn_low 1 2.0 0.004 0.006 112.741 112.746 qs_forces 11 3.9 0.002 0.002 112.695 112.696 qs_energies 11 4.9 0.001 0.001 105.229 105.231 scf_env_do_scf 11 5.9 0.001 0.001 97.097 97.098 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 90.036 90.037 velocity_verlet 10 3.0 0.002 0.003 67.113 67.115 rebuild_ks_matrix 107 8.3 0.001 0.001 50.833 50.875 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.020 50.832 50.875 sum_up_and_integrate 107 10.3 0.047 0.050 45.181 45.202 integrate_v_rspace 107 11.3 0.004 0.005 45.134 45.153 qs_ks_update_qs_env 107 7.6 0.001 0.001 44.751 44.791 qs_rho_update_rho 107 7.7 0.001 0.001 42.392 42.413 calculate_rho_elec 107 8.7 0.043 0.044 42.391 42.412 grid_integrate_task_list 107 12.3 38.610 39.307 38.610 39.307 grid_collocate_task_list 107 9.7 36.081 36.606 36.081 36.606 dbcsr_multiply_generic 1966 12.4 0.098 0.101 15.500 15.606 qs_scf_new_mos 96 7.5 0.001 0.001 12.360 12.380 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.359 12.380 ot_scf_mini 96 9.5 0.003 0.003 11.603 11.628 multiply_cannon 1966 13.4 0.183 0.194 10.579 10.707 multiply_cannon_loop 1966 14.4 0.171 0.183 9.587 9.873 mp_waitall_1 146670 16.2 7.977 8.296 7.977 8.296 rs_pw_transfer 878 11.9 0.015 0.016 6.774 7.806 init_scf_loop 11 6.9 0.000 0.001 7.043 7.044 ot_mini 96 10.5 0.001 0.001 6.900 6.925 density_rs2pw 107 9.7 0.007 0.007 5.764 6.788 init_scf_run 11 5.9 0.000 0.002 6.443 6.443 scf_env_initial_rho_setup 11 6.9 0.000 0.001 6.442 6.443 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.289 6.298 wfi_extrapolate 11 7.9 0.001 0.001 5.890 5.890 pw_transfer 1295 11.6 0.133 0.139 5.437 5.506 multiply_cannon_metrocomm3 15728 15.4 0.054 0.057 5.084 5.359 fft_wrap_pw1pw2 1081 12.6 0.012 0.013 5.151 5.218 potential_pw2rs 107 12.3 0.008 0.008 4.876 4.888 fft_wrap_pw1pw2_140 439 13.2 0.502 0.523 4.426 4.564 fft3d_ps 1081 14.6 2.108 2.241 3.852 3.922 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.650 3.704 apply_single 107 13.6 0.001 0.001 3.650 3.704 mp_waitany 8968 13.7 2.641 3.661 2.641 3.661 ot_diis_step 96 11.5 0.004 0.004 3.626 3.626 multiply_cannon_multrec 15728 15.4 3.436 3.586 3.450 3.600 make_m2s 3932 13.4 0.056 0.059 3.401 3.452 rs_pw_transfer_RS2PW_140 118 11.5 0.443 0.464 2.337 3.379 qs_ot_get_derivative 96 11.5 0.001 0.001 3.242 3.266 mp_alltoall_d11v 1998 13.7 1.918 3.005 1.918 3.005 make_images 3932 14.4 0.145 0.149 2.839 2.897 rs_gather_matrices 107 12.3 0.122 0.132 1.594 2.691 rs_pw_transfer_PW2RS_140 118 13.9 1.210 1.238 2.496 2.532 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=68.65299999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=44.43, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=41.373, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.697, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.267, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.923, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=24.10599999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=36.081, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=38.61, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.436, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.641, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.977, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.404 0.404 203.214 203.214 qs_energies 1 2.0 0.000 0.000 201.944 201.944 scf_env_do_scf 1 3.0 0.000 0.000 199.460 199.460 qs_ks_update_qs_env 8 5.0 0.000 0.000 190.927 190.927 rebuild_ks_matrix 7 6.0 0.000 0.000 190.817 190.817 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 190.817 190.817 hfx_ks_matrix 7 8.0 0.000 0.000 124.035 124.035 integrate_four_center 7 9.0 2.590 2.590 124.004 124.004 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 117.962 117.962 integrate_four_center_main 7 10.0 0.975 0.975 113.307 113.307 integrate_four_center_bin 444 11.0 112.332 112.332 112.332 112.332 init_scf_loop 1 4.0 0.000 0.000 81.484 81.484 cp_gemm 129 10.3 0.001 0.001 52.252 52.252 cp_gemm_cosma 129 11.3 52.251 52.251 52.251 52.251 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 30.329 30.329 admm_fit_mo_coeffs 7 9.0 0.000 0.000 27.645 27.645 admm_mo_merge_derivs 7 8.0 0.000 0.000 26.246 26.246 merge_mo_derivs_diag 7 9.0 0.022 0.022 26.246 26.246 purify_mo_diag 7 10.0 0.001 0.001 14.604 14.604 fit_mo_coeffs 7 10.0 0.000 0.000 13.041 13.041 integrate_four_center_load 7 10.0 0.001 0.001 7.732 7.732 hfx_load_balance 1 11.0 0.007 0.007 7.731 7.731 calculate_rho_elec 15 7.4 0.192 0.192 6.156 6.156 grid_collocate_task_list 15 8.4 5.367 5.367 5.367 5.367 sum_up_and_integrate 7 8.0 0.042 0.042 4.753 4.753 integrate_v_rspace 14 9.0 0.014 0.014 4.700 4.700 grid_integrate_task_list 14 10.0 4.367 4.367 4.367 4.367 qs_vxc_create 14 8.0 0.000 0.000 4.277 4.277 xc_vxc_pw_create 14 9.0 0.195 0.195 4.276 4.276 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.222 0.228 136.352 136.352 qs_energies 1 2.0 0.000 0.001 135.994 135.995 scf_env_do_scf 1 3.0 0.000 0.000 135.485 135.485 qs_ks_update_qs_env 8 5.0 0.000 0.000 132.695 132.695 rebuild_ks_matrix 7 6.0 0.000 0.000 132.681 132.682 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 132.681 132.681 hfx_ks_matrix 7 8.0 0.000 0.001 122.576 122.578 integrate_four_center 7 9.0 0.181 0.485 122.560 122.561 integrate_four_center_main 7 10.0 0.004 0.005 110.504 113.981 integrate_four_center_bin 448 11.0 110.500 113.976 110.500 113.976 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 78.336 78.336 init_scf_loop 1 4.0 0.000 0.000 57.147 57.147 integrate_four_center_load 7 10.0 0.000 0.000 7.259 7.263 hfx_load_balance 1 11.0 0.001 0.001 7.259 7.263 mp_sync 70 11.3 3.859 5.732 3.859 5.732 hfx_load_balance_bin 1 12.0 3.570 3.637 3.570 3.637 hfx_load_balance_count 1 12.0 3.572 3.623 3.572 3.623 qs_vxc_create 14 8.0 0.000 0.000 3.468 3.468 xc_vxc_pw_create 14 9.0 0.020 0.021 3.468 3.468 cp_gemm 129 10.3 0.000 0.001 3.292 3.298 cp_gemm_cosma 129 11.3 3.292 3.298 3.292 3.298 xc_rho_set_and_dset_create 14 10.0 0.010 0.011 2.692 2.842 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=26.307000000000016, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=112.332, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=52.251, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.367, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=4.367, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.59, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.378000000000014, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=110.5, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=3.292, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.181, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.572, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.859, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.57, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 328.168 328.168 qs_energies 1 2.0 0.000 0.000 327.692 327.692 mp2_main 1 3.0 0.000 0.000 323.139 323.139 mp2_gpw_main 1 4.0 0.000 0.000 322.963 322.963 rpa_ri_compute_en 1 5.0 0.000 0.000 304.601 304.601 rpa_num_int 1 6.0 0.000 0.000 304.574 304.574 compute_mat_P_omega 1 7.0 0.002 0.002 167.391 167.391 compute_mat_P_omega_contract 10 8.0 10.649 10.649 166.300 166.300 dbcsr_t_total 2336 9.6 0.016 0.016 157.451 157.451 cp_gemm 105 8.4 0.000 0.000 117.736 117.736 cp_gemm_cosma 105 9.4 117.736 117.736 117.736 117.736 dbcsr_t_contract 787 11.0 35.065 35.065 92.918 92.918 GW_matrix_operations 10 7.0 0.007 0.007 78.425 78.425 dbcsr_t_copy 1103 10.7 17.496 17.496 63.360 63.360 compute_mat_P_omega_calc_M_occ 250 9.0 10.639 10.639 61.566 61.566 dbcsr_tas_total 1149 12.2 0.045 0.045 51.928 51.928 dbcsr_tas_multiply 807 12.1 0.002 0.002 50.779 50.779 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 40.458 40.458 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 38.446 38.446 dbcsr_multiply_generic 837 15.8 0.133 0.133 37.250 37.250 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 37.184 37.184 dbcsr_tas_dbcsr 807 14.1 0.002 0.002 36.992 36.992 dbcsr_tas_reserve_blocks_index 3261 13.7 4.992 4.992 26.743 26.743 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 25.848 25.848 multiply_cannon 837 16.8 0.447 0.447 23.430 23.430 dbcsr_tas_copy 574 11.4 15.826 15.826 22.968 22.968 dbcsr_reserve_blocks 3717 14.7 20.898 20.898 21.353 21.353 dbcsr_t_reserve_blocks_index 2280 12.5 1.260 1.260 20.940 20.940 multiply_cannon_loop 837 17.8 0.185 0.185 20.266 20.266 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.012 0.012 19.570 19.570 multiply_cannon_multrec 837 18.8 18.176 18.176 18.983 18.983 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 18.347 18.347 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 17.447 17.447 compute_QP_energies 1 7.0 0.000 0.000 16.472 16.472 compute_self_energy_cubic_gw 1 8.0 0.074 0.074 16.471 16.471 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.642 11.642 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 11.292 11.292 make_m2s 1674 16.8 0.105 0.105 11.262 11.262 make_images 1674 17.8 5.105 5.105 10.782 10.782 dbcsr_t_copy_nocomm 251 12.0 8.231 8.231 10.513 10.513 dbcsr_tas_mm_2 251 15.0 0.001 0.001 9.835 9.835 dbcsr_finalize 9888 13.6 1.589 1.589 8.483 8.483 contract_cubic_gw 21 9.0 0.000 0.000 6.912 6.912 build_3c_integrals 5 6.0 2.714 2.714 6.704 6.704 mp2_ri_gpw_compute_in_copy_3c 6 6.0 0.512 0.512 6.671 6.671 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.009 57.023 57.024 qs_energies 1 2.0 0.000 0.000 56.914 56.921 mp2_main 1 3.0 0.000 0.001 55.442 55.448 mp2_gpw_main 1 4.0 0.000 0.001 55.385 55.391 rpa_ri_compute_en 1 5.0 0.000 0.000 53.512 53.519 rpa_num_int 1 6.0 0.001 0.001 53.504 53.511 dbcsr_t_total 2336 9.6 0.017 0.018 43.227 43.229 compute_mat_P_omega 1 7.0 0.001 0.002 42.353 42.359 compute_mat_P_omega_contract 10 8.0 0.782 0.801 42.131 42.136 dbcsr_t_contract 787 11.0 1.682 1.767 31.937 31.944 dbcsr_tas_total 1149 12.2 0.062 0.067 28.451 28.452 dbcsr_tas_multiply 807 12.1 0.002 0.003 28.344 28.346 dbcsr_tas_dbcsr 807 14.1 0.003 0.004 20.725 20.726 dbcsr_multiply_generic 837 15.8 0.066 0.070 17.300 18.499 compute_mat_P_omega_calc_M_occ 250 9.0 0.749 0.768 14.038 14.039 multiply_cannon 837 16.8 0.132 0.148 10.077 10.598 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.437 10.437 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 9.150 10.246 dbcsr_t_copy 1111 10.7 4.157 4.388 9.617 10.002 multiply_cannon_loop 837 17.8 0.045 0.048 9.181 9.670 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 9.039 9.040 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.869 7.870 mp_sync 8696 11.6 6.742 7.851 6.742 7.851 multiply_cannon_multrec 1386 17.8 6.973 7.429 7.236 7.666 make_m2s 1674 16.8 0.045 0.049 6.207 7.036 make_images 1674 17.8 0.215 0.222 6.120 6.949 cp_gemm 105 8.4 0.000 0.000 6.411 6.420 cp_gemm_cosma 105 9.4 6.411 6.420 6.411 6.420 GW_matrix_operations 10 7.0 0.001 0.002 4.226 4.233 compute_QP_energies 1 7.0 0.000 0.000 4.159 4.159 compute_self_energy_cubic_gw 1 8.0 0.004 0.004 4.155 4.159 dbcsr_t_communicate_buffer 1098 11.7 0.080 0.085 3.696 3.866 mp_waitall_2 3776 14.7 3.488 3.766 3.488 3.766 make_images_data 1674 18.8 0.034 0.036 3.395 3.571 hybrid_alltoall_any 1724 19.5 2.627 2.958 3.255 3.454 contract_cubic_gw 21 9.0 0.000 0.000 3.201 3.201 dbcsr_t_reserve_blocks_index 2849 12.4 0.112 0.120 2.817 3.168 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.022 0.025 2.771 3.122 dbcsr_tas_reserve_blocks_index 3300 13.8 0.202 0.216 2.757 3.100 make_images_pack 1674 18.8 2.283 3.031 2.298 3.046 dbcsr_reserve_blocks 3785 14.7 2.541 2.873 2.585 2.919 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.411 2.420 contract_P_omega_with_mat_L 10 8.0 0.000 0.001 2.288 2.297 mp_waitall_1 26582 19.0 1.781 2.174 1.781 2.174 convert_to_new_pgrid 2421 14.1 0.019 0.021 2.002 2.160 dbcsr_copy 3323 15.8 1.928 2.095 1.958 2.123 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.871 1.871 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.002 1.765 1.771 dbcsr_add_anytype 909 13.7 1.081 1.125 1.693 1.744 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.556 1.561 scf_env_do_scf 1 3.0 0.000 0.000 1.414 1.414 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.414 1.414 dbcsr_tas_replicate 396 14.1 0.661 0.727 1.178 1.264 mp_sum_l 9176 14.6 0.953 1.201 0.953 1.201 dbcsr_finalize 10566 13.5 0.047 0.051 1.132 1.169 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=118.79700000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=117.736, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=35.065, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=20.898, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.176, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=17.496, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=25.029000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=6.411, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.682, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.541, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.973, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.157, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.488, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.742, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.110 0.110 172.922 172.922 qs_energies 1 2.0 0.000 0.000 171.226 171.226 scf_env_do_scf 1 3.0 0.000 0.000 161.673 161.673 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 161.673 161.673 qs_ks_update_qs_env 15 5.0 0.000 0.000 69.347 69.347 rebuild_ks_matrix 15 6.0 0.000 0.000 68.955 68.955 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 68.955 68.955 qs_scf_new_mos 15 5.0 0.000 0.000 63.495 63.495 eigensolver 15 6.0 0.002 0.002 49.986 49.986 qs_vxc_create 15 8.0 0.040 0.040 45.254 45.254 calculate_dispersion_nonloc 15 9.0 9.178 9.178 40.604 40.604 cp_fm_diag_elpa 15 7.0 0.000 0.000 34.232 34.232 cp_fm_diag_elpa_base 15 8.0 29.007 29.007 34.231 34.231 pw_transfer 1191 9.8 0.099 0.099 27.852 27.852 fft_wrap_pw1pw2 1086 10.9 0.013 0.013 27.548 27.548 qs_rho_update_rho 16 5.0 0.000 0.000 22.388 22.388 calculate_rho_elec 16 6.0 0.345 0.345 22.388 22.388 sum_up_and_integrate 15 8.0 0.077 0.077 22.078 22.078 integrate_v_rspace 15 9.0 0.033 0.033 22.001 22.001 grid_integrate_task_list 15 10.0 21.349 21.349 21.349 21.349 fft_wrap_pw1pw2_150 765 12.0 3.406 3.406 21.022 21.022 grid_collocate_task_list 16 7.0 20.804 20.804 20.804 20.804 fft3d_s 1087 12.8 11.326 11.326 11.336 11.336 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.120 11.120 pw_scatter_s 585 13.0 11.020 11.020 11.020 11.020 cp_fm_cholesky_restore 45 7.0 10.707 10.707 10.707 10.707 cp_fm_upper_to_full 30 8.0 10.269 10.269 10.269 10.269 dbcsr_complete_redistribute 46 8.3 3.549 3.549 9.872 9.872 vdW_energy 15 10.0 8.270 8.270 8.270 8.270 gspace_mixing 14 5.0 0.277 0.277 7.584 7.584 broyden_mixing 14 6.0 6.862 6.862 6.862 6.862 fft_wrap_pw1pw2_200 197 11.5 0.358 0.358 6.264 6.264 init_scf_run 1 3.0 0.000 0.000 4.635 4.635 xc_vxc_pw_create 15 9.0 0.344 0.344 4.610 4.610 dbcsr_finalize 159 9.9 0.018 0.018 4.313 4.313 dbcsr_merge_all 91 11.1 0.090 0.090 4.174 4.174 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.035 4.035 mp_alltoall_d11v 186 9.2 3.544 3.544 3.544 3.544 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.016 84.451 84.452 qs_energies 1 2.0 0.000 0.000 84.110 84.111 scf_env_do_scf 1 3.0 0.000 0.000 79.485 79.486 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 79.485 79.486 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.402 39.414 rebuild_ks_matrix 15 6.0 0.000 0.000 39.350 39.361 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 39.350 39.361 sum_up_and_integrate 15 8.0 0.014 0.015 22.562 22.601 integrate_v_rspace 15 9.0 0.001 0.001 22.548 22.587 grid_integrate_task_list 15 10.0 20.916 21.535 20.916 21.535 qs_rho_update_rho 16 5.0 0.000 0.000 20.994 20.996 calculate_rho_elec 16 6.0 0.011 0.012 20.994 20.996 grid_collocate_task_list 16 7.0 19.118 19.698 19.118 19.698 qs_scf_new_mos 15 5.0 0.001 0.001 19.298 19.330 eigensolver 15 6.0 0.002 0.002 17.750 17.775 qs_vxc_create 15 8.0 0.001 0.001 16.225 16.252 pw_transfer 1191 9.8 0.127 0.135 13.121 13.239 calculate_dispersion_nonloc 15 9.0 1.118 1.164 13.150 13.179 fft_wrap_pw1pw2 1086 10.9 0.019 0.021 12.839 12.958 cp_fm_diag_elpa 15 7.0 0.000 0.000 12.861 12.870 cp_fm_diag_elpa_base 15 8.0 12.582 12.620 12.854 12.858 fft3d_ps 1086 12.9 5.505 5.758 9.821 10.070 fft_wrap_pw1pw2_150 765 12.0 0.652 0.702 8.683 8.726 cp_fm_cholesky_restore 45 7.0 4.637 4.693 4.637 4.693 fft_wrap_pw1pw2_200 197 11.5 0.361 0.383 3.984 4.090 mp_alltoall_z22v 1086 14.9 2.775 3.163 2.775 3.163 xc_vxc_pw_create 15 9.0 0.047 0.058 3.074 3.100 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.823 2.823 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.428 2.667 x_to_yz 585 14.0 0.994 1.027 2.398 2.659 rs_pw_transfer 158 9.4 0.003 0.003 1.716 2.182 vdW_energy 15 10.0 1.993 2.093 1.993 2.093 yz_to_x 501 13.7 0.512 0.564 1.883 2.063 density_rs2pw 16 7.0 0.001 0.002 1.691 1.937 build_core_ppnl 1 5.0 1.646 1.817 1.646 1.817 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=68.709, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=29.007, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.349, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.804, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.326, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=11.02, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.707, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=21.69299999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.582, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=20.916, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.118, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.637, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.505, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.088 0.088 264.510 264.510 qs_energies 1 2.0 0.000 0.000 264.349 264.349 ls_scf 1 3.0 0.000 0.000 262.453 262.453 ls_scf_main 1 4.0 0.002 0.002 250.508 250.508 density_matrix_trs4 11 5.0 0.012 0.012 133.768 133.768 ls_scf_dm_to_ks 11 5.0 0.000 0.000 110.021 110.021 matrix_ls_to_qs 11 6.0 0.000 0.000 105.674 105.674 dbcsr_multiply_generic 185 6.1 0.486 0.486 90.969 90.969 dbcsr_copy_into_existing 11 7.0 58.898 58.898 58.898 58.898 multiply_cannon 185 7.1 0.335 0.335 53.701 53.701 dbcsr_complete_redistribute 23 7.5 37.012 37.012 51.402 51.402 matrix_decluster 11 7.0 0.000 0.000 46.775 46.775 multiply_cannon_loop 185 8.1 0.398 0.398 36.473 36.473 multiply_cannon_multrec 185 9.1 34.255 34.255 34.301 34.301 make_m2s 370 7.1 0.031 0.031 31.087 31.087 make_images 370 8.1 7.054 7.054 28.624 28.624 arnoldi_extremal 12 6.1 0.000 0.000 21.079 21.079 arnoldi_normal_ev 12 7.1 0.024 0.024 21.079 21.079 build_subspace 23 8.1 0.125 0.125 20.489 20.489 dbcsr_finalize 646 7.5 0.213 0.213 19.701 19.701 dbcsr_matrix_vector_mult 652 9.0 0.197 0.197 19.671 19.671 dbcsr_matrix_vector_mult_local 652 10.0 18.518 18.518 18.540 18.540 dbcsr_merge_all 597 8.5 3.548 3.548 17.884 17.884 dbcsr_sort_indices 1103 9.9 17.638 17.638 17.638 17.638 setup_rec_index_2d 370 8.1 16.751 16.751 16.751 16.751 quick_finalize 395 10.0 0.549 0.549 15.099 15.099 dbcsr_special_finalize 370 9.1 0.003 0.003 13.914 13.914 tree_to_linear_d 110 9.4 12.018 12.018 12.018 12.018 ls_scf_init_scf 1 4.0 0.000 0.000 11.064 11.064 ls_scf_init_matrix_S 1 5.0 0.000 0.000 10.597 10.597 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 9.758 9.758 dbcsr_dot_sd 144 6.3 9.700 9.700 9.702 9.702 dbcsr_frobenius_norm 142 6.1 7.655 7.655 7.658 7.658 make_images_data 370 9.1 0.012 0.012 7.458 7.458 matrix_qs_to_ls 12 5.1 0.000 0.000 6.967 6.967 matrix_cluster 12 6.1 0.000 0.000 6.967 6.967 hybrid_alltoall_any 393 9.9 5.344 5.344 6.133 6.133 dbcsr_new_transposed 2 7.0 0.133 0.133 6.022 6.022 dbcsr_redistribute 2 8.0 5.774 5.774 5.848 5.848 dbcsr_add_d 280 6.0 0.001 0.001 5.766 5.766 dbcsr_add_anytype 280 7.0 1.617 1.617 5.765 5.765 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 94.897 94.898 qs_energies 1 2.0 0.000 0.000 94.796 94.796 ls_scf 1 3.0 0.000 0.000 94.724 94.725 ls_scf_main 1 4.0 0.001 0.003 90.898 90.898 density_matrix_trs4 11 5.0 0.009 0.014 87.379 87.460 dbcsr_multiply_generic 185 6.1 0.070 0.085 82.284 82.613 multiply_cannon 185 7.1 0.045 0.049 68.974 69.774 multiply_cannon_loop 185 8.1 0.231 0.248 65.163 66.849 multiply_cannon_multrec 1480 9.1 42.259 44.579 42.711 45.040 mp_waitall_1 11936 10.3 20.333 22.464 20.333 22.464 multiply_cannon_metrocomm3 1480 9.1 0.020 0.022 12.191 15.168 make_m2s 370 7.1 0.034 0.038 9.180 9.260 make_images 370 8.1 0.637 0.664 9.063 9.146 multiply_cannon_metrocomm1 1480 9.1 0.011 0.014 4.808 6.864 calculate_norms 2960 9.1 5.139 5.370 5.139 5.370 make_images_data 370 9.1 0.013 0.013 3.739 4.053 mp_sum_l 1039 5.9 2.842 3.758 2.842 3.758 arnoldi_extremal 12 6.1 0.001 0.001 3.616 3.628 arnoldi_normal_ev 12 7.1 0.002 0.007 3.616 3.628 build_subspace 23 8.1 0.039 0.053 3.488 3.491 hybrid_alltoall_any 393 9.9 0.326 1.688 3.114 3.306 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.023 3.091 ls_scf_init_scf 1 4.0 0.000 0.000 2.945 2.946 dbcsr_matrix_vector_mult 652 9.0 0.017 0.070 2.861 2.930 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.907 2.918 dbcsr_complete_redistribute 23 7.5 1.644 1.789 2.673 2.785 make_images_pack 370 9.1 2.454 2.778 2.460 2.784 matrix_ls_to_qs 11 6.0 0.000 0.000 2.628 2.739 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 1.887 2.711 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.664 2.668 matrix_decluster 11 7.0 0.000 0.000 2.388 2.498 buffer_matrices_ensure_size 370 8.1 2.330 2.432 2.330 2.432 dbcsr_add_d 280 6.0 0.002 0.002 2.130 2.252 dbcsr_add_anytype 280 7.0 1.179 1.283 2.128 2.251 dbcsr_matrix_vector_mult_local 652 10.0 2.134 2.230 2.138 2.235 dbcsr_finalize 646 7.5 0.014 0.015 1.880 1.951 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=98.189, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=58.898, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=37.012, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=34.255, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=18.518, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_sort_indices", label="dbcsr_sort_indices", y=17.638, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=18.092, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.644, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=42.259, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.134, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_sort_indices", label="dbcsr_sort_indices", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.454, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.842, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.139, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=20.333, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 105.705 105.705 lib_test 1 2.0 0.000 0.000 105.699 105.699 dbcsr_run_tests 3 3.0 0.004 0.004 105.699 105.699 test_multiplies_multiproc 3 4.0 0.001 0.001 83.867 83.867 dbcsr_redistribute 9 5.0 54.455 54.455 58.047 58.047 dbcsr_multiply_generic 9 5.0 0.001 0.001 23.924 23.924 dbcsr_make_random_matrix 9 4.0 15.784 15.784 21.738 21.738 multiply_cannon 9 6.0 0.002 0.002 16.737 16.737 multiply_cannon_loop 9 7.0 0.006 0.006 16.232 16.232 multiply_cannon_multrec 9 8.0 16.224 16.224 16.225 16.225 dbcsr_finalize 27 5.7 0.004 0.004 10.108 10.108 dbcsr_merge_all 18 6.5 3.560 3.560 9.321 9.321 tree_to_linear_d 9 7.0 3.635 3.635 3.635 3.635 mp_alltoall_d11v 27 6.0 3.249 3.249 3.249 3.249 dbcsr_data_release 975 7.6 2.552 2.552 2.552 2.552 make_m2s 18 6.0 0.001 0.001 2.388 2.388 make_images 18 7.0 0.738 0.738 2.305 2.305 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 26.668 26.668 lib_test 1 2.0 0.000 0.000 26.635 26.658 dbcsr_run_tests 3 3.0 0.000 0.001 26.634 26.657 test_multiplies_multiproc 3 4.0 0.001 0.002 25.572 25.649 dbcsr_multiply_generic 9 5.0 0.001 0.002 23.793 23.898 multiply_cannon 9 6.0 0.002 0.003 21.425 21.915 multiply_cannon_loop 9 7.0 0.004 0.004 20.960 21.411 multiply_cannon_multrec 72 8.0 17.494 18.038 17.495 18.039 mp_waitall_1 576 9.2 3.874 4.670 3.874 4.670 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 3.110 4.008 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.342 1.121 mp_sum_l 310 2.7 0.501 1.111 0.501 1.111 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.497 1.106 dbcsr_finalize 27 5.7 0.001 0.001 0.958 1.059 dbcsr_make_random_matrix 9 4.0 0.799 0.828 1.015 1.059 make_m2s 18 6.0 0.001 0.001 0.941 1.004 make_images 18 7.0 0.022 0.023 0.938 1.001 dbcsr_merge_all 18 6.5 0.161 0.183 0.827 0.941 dbcsr_data_release 444 7.6 0.738 0.851 0.738 0.851 dbcsr_redistribute 9 5.0 0.404 0.463 0.708 0.747 dbcsr_destroy 111 5.9 0.006 0.060 0.616 0.723 make_images_data 18 8.0 0.001 0.001 0.472 0.576 dbcsr_data_copy_aa2 18 7.5 0.493 0.559 0.493 0.559 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.494999999999976, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=54.455, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.224, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.784, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.635, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.56, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.552, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.696999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.404, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.494, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.799, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.161, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.738, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.501, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.874, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 149.563 149.563 qs_mol_dyn_low 1 2.0 0.004 0.004 147.815 147.815 velocity_verlet 5 3.0 0.004 0.004 120.453 120.453 qmmm_el_coupling 6 3.8 0.000 0.000 72.195 72.195 qmmm_elec_with_gaussian 6 4.8 0.196 0.196 72.189 72.189 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 70.184 70.184 qmmm_elec_gaussian_low_G 6 6.8 68.478 68.478 68.478 68.478 qs_forces 6 3.8 0.001 0.001 52.717 52.717 qs_energies 6 4.8 0.001 0.001 46.775 46.775 scf_env_do_scf 6 5.8 0.000 0.000 43.443 43.443 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 37.809 37.809 rebuild_ks_matrix 45 8.4 0.000 0.000 37.051 37.051 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 37.051 37.051 qs_ks_update_qs_env 45 7.8 0.000 0.000 31.693 31.693 pw_transfer 966 11.9 0.074 0.074 24.612 24.612 fft_wrap_pw1pw2 801 13.0 0.009 0.009 24.257 24.257 fft_wrap_pw1pw2_150 507 14.3 2.571 2.571 23.694 23.694 qs_vxc_create 45 10.4 0.001 0.001 18.707 18.707 xc_vxc_pw_create 45 11.4 1.056 1.056 18.706 18.706 fist_calc_energy_force 6 3.8 0.002 0.002 12.974 12.974 force_nonbond 6 4.8 11.619 11.619 11.619 11.619 pw_scatter_s 429 15.4 10.936 10.936 10.936 10.936 qs_rho_update_rho 45 7.9 0.000 0.000 10.374 10.374 calculate_rho_elec 45 8.9 0.911 0.911 10.373 10.373 xc_rho_set_and_dset_create 45 12.4 0.224 0.224 10.121 10.121 pw_integral_ab 2539 7.4 9.917 9.917 9.917 9.917 qmmm_forces 6 3.8 0.001 0.001 9.309 9.309 fft3d_s 802 15.0 9.215 9.215 9.225 9.225 qmmm_forces_with_gaussian 6 4.8 0.131 0.131 8.785 8.785 qs_ks_ddapc 45 10.4 0.001 0.001 6.842 6.842 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.418 6.418 init_scf_loop 6 6.8 0.000 0.000 5.629 5.629 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.371 5.371 pw_poisson_solve 51 9.9 2.292 2.292 5.369 5.369 qmmm_forces_gaussian_low_G 6 6.8 5.324 5.324 5.324 5.324 density_rs2pw 45 9.9 0.002 0.002 4.810 4.810 grid_collocate_task_list 45 9.9 4.652 4.652 4.652 4.652 sum_up_and_integrate 45 10.4 0.238 0.238 4.320 4.320 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.154 4.154 integrate_v_rspace 45 11.4 0.011 0.011 4.082 4.082 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.037 93.273 93.274 qs_mol_dyn_low 1 2.0 0.004 0.004 91.886 91.960 qs_forces 6 3.8 0.001 0.001 66.215 66.215 qs_energies 6 4.8 0.000 0.000 63.243 63.244 scf_env_do_scf 6 5.8 0.000 0.001 61.669 61.670 scf_env_do_scf_inner_loop 113 6.2 0.002 0.009 59.164 59.165 rebuild_ks_matrix 119 8.1 0.000 0.000 44.280 44.293 qs_ks_build_kohn_sham_matrix 119 9.1 0.022 0.023 44.279 44.293 qs_ks_update_qs_env 119 7.3 0.001 0.001 41.670 41.682 velocity_verlet 5 3.0 0.002 0.003 39.544 39.548 pw_transfer 2446 11.8 0.279 0.296 29.334 29.515 fft_wrap_pw1pw2 2059 12.8 0.032 0.034 28.456 28.647 fft_wrap_pw1pw2_150 1321 14.0 2.374 2.615 27.522 27.767 qs_vxc_create 119 10.1 0.004 0.005 22.752 22.761 xc_vxc_pw_create 119 11.1 0.377 0.474 22.748 22.756 fft3d_ps 2059 14.8 12.476 13.633 21.498 21.787 qs_rho_update_rho 119 7.3 0.001 0.001 17.321 17.323 calculate_rho_elec 119 8.3 0.085 0.094 17.321 17.322 sum_up_and_integrate 119 10.1 0.095 0.105 15.548 15.581 integrate_v_rspace 119 11.1 0.005 0.006 15.452 15.480 qmmm_forces 6 3.8 0.003 0.003 14.577 14.577 qmmm_forces_with_gaussian 6 4.8 0.435 0.548 14.050 14.292 rs_pw_transfer 988 11.5 0.017 0.018 12.459 12.848 density_rs2pw 119 9.3 0.009 0.011 10.903 11.285 xc_rho_set_and_dset_create 119 12.1 0.467 0.539 10.562 10.943 qmmm_el_coupling 6 3.8 0.000 0.000 10.152 10.220 qmmm_elec_with_gaussian 6 4.8 0.384 0.515 10.149 10.216 potential_pw2rs 119 12.1 0.009 0.010 9.696 9.709 mp_alltoall_z22v 2059 16.8 5.745 7.491 5.745 7.491 grid_collocate_task_list 119 9.3 6.071 6.552 6.071 6.552 pw_restrict_s3 18 5.8 2.256 2.295 6.440 6.510 pw_integral_ab 2761 7.7 5.707 5.775 6.123 6.298 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.890 6.164 grid_integrate_task_list 119 12.1 5.361 5.592 5.361 5.592 rs_pw_transfer_PW2RS_150 125 13.9 2.763 2.867 5.339 5.387 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 5.142 5.207 pw_prolongate_s3 18 6.8 1.782 1.809 5.142 5.207 x_to_yz 1095 16.3 1.992 2.219 4.877 5.179 yz_to_x 964 15.3 1.232 1.411 4.092 5.172 qmmm_forces_gaussian_low_G 6 6.8 4.813 5.077 4.813 5.077 rs_pw_transfer_RS2PW_150 125 11.2 2.259 2.430 4.581 4.975 mp_waitany 4028 12.8 3.949 4.676 3.949 4.676 qs_scf_new_mos 113 7.2 0.001 0.001 3.289 3.298 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.288 3.297 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.215 3.272 qs_ks_ddapc 119 10.1 0.003 0.003 3.070 3.226 ot_scf_mini 113 9.2 0.002 0.002 3.144 3.149 dbcsr_multiply_generic 2588 12.3 0.077 0.086 2.947 2.987 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.623 2.623 mp_sum_dm3 33 5.7 2.489 2.576 2.489 2.576 init_scf_loop 6 6.8 0.000 0.000 2.502 2.502 pw_gather_p 964 14.3 2.284 2.482 2.284 2.482 mp_waitall_1 188862 16.2 2.176 2.358 2.176 2.358 pw_scatter_p 1095 15.3 2.217 2.348 2.217 2.348 qmmm_elec_gaussian_low_G 6 6.8 2.248 2.300 2.248 2.300 ot_mini 113 10.2 0.001 0.001 1.981 1.990 pw_derive 732 12.5 1.744 1.887 1.744 1.887 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.745999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=68.478, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=11.619, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.936, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=9.917, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.215, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.652, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=55.66499999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.248, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=5.707, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.071, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=12.476, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.361, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.745, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-07-30 20:02:02+00:00