StartDate: 2021-05-12 11:08:04+00:00 CpuId: 64x Intel Xeon W 2000 / Scalable Bronze 3000 / Silver 4000 / Gold 5000 / 6000 / Platinum 8000 (Skylake), 14nm CommitSHA: 12b22cabcf12874a7a6af6a4048ae0b28b572820 CommitTime: 2021-05-12 11:31:54 +0200 CommitAuthor: Tiziano Müller CommitSubject: arch: add Eiger arch file Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 12b22cabcf12874a7a6af6a4048ae0b28b572820 CommitTime: 2021-05-12 11:31:54 +0200 CommitAuthor: Tiziano Müller CommitSubject: arch: add Eiger arch file ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 194.717 194.717 qs_mol_dyn_low 1 2.0 0.004 0.004 193.925 193.925 qs_forces 11 3.9 0.002 0.002 193.863 193.863 qs_energies 11 4.9 0.001 0.001 181.913 181.913 scf_env_do_scf 11 5.9 0.001 0.001 149.606 149.606 velocity_verlet 10 3.0 0.002 0.002 135.886 135.886 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 98.443 98.443 init_scf_loop 11 6.9 0.000 0.000 50.977 50.977 prepare_preconditioner 11 7.9 0.000 0.000 46.182 46.182 make_preconditioner 11 8.9 0.000 0.000 46.182 46.182 rebuild_ks_matrix 119 8.3 0.001 0.001 45.550 45.550 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.019 45.549 45.549 make_full_inverse_cholesky 11 9.9 0.000 0.000 44.211 44.211 qs_rho_update_rho 119 7.7 0.001 0.001 42.991 42.991 calculate_rho_elec 119 8.7 1.557 1.557 42.990 42.990 qs_ks_update_qs_env 119 7.6 0.001 0.001 41.090 41.090 grid_collocate_task_list 119 9.7 35.639 35.639 35.639 35.639 sum_up_and_integrate 119 10.3 0.393 0.393 32.289 32.289 integrate_v_rspace 119 11.3 0.141 0.141 31.895 31.895 cp_fm_cholesky_invert 11 10.9 30.112 30.112 30.112 30.112 grid_integrate_task_list 119 12.3 28.131 28.131 28.131 28.131 qs_scf_new_mos 108 7.5 0.001 0.001 24.468 24.468 qs_scf_loop_do_ot 108 8.5 0.001 0.001 24.468 24.468 ot_scf_mini 108 9.5 0.003 0.003 23.100 23.100 dbcsr_multiply_generic 2286 12.5 0.246 0.246 22.381 22.381 init_scf_run 11 5.9 0.001 0.001 16.875 16.875 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.874 16.874 wfi_extrapolate 11 7.9 0.001 0.001 16.023 16.023 ot_mini 108 10.5 0.001 0.001 14.652 14.652 make_m2s 4572 13.5 0.062 0.062 13.289 13.289 cp_gemm 81 9.0 0.000 0.000 11.584 11.584 cp_gemm_fm_gemm 81 10.0 0.000 0.000 11.583 11.583 cp_fm_gemm 81 11.0 11.583 11.583 11.583 11.583 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.498 11.498 pw_transfer 1439 11.6 0.096 0.096 10.261 10.261 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 9.975 9.975 fft_wrap_pw1pw2_140 487 13.2 0.611 0.611 8.502 8.502 cp_fm_cholesky_decompose 22 10.9 7.717 7.717 7.717 7.717 ot_diis_step 108 11.5 0.005 0.005 7.475 7.475 qs_ot_get_derivative 108 11.5 0.001 0.001 7.173 7.173 make_images 4572 14.5 2.417 2.417 6.780 6.780 dbcsr_make_dense_low 5837 15.5 0.102 0.102 6.774 6.774 make_dense_data 5837 16.5 5.910 5.910 6.655 6.655 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.494 6.494 dbcsr_complete_redistribute 329 12.2 3.095 3.095 6.444 6.444 dbcsr_copy 2102 12.0 0.622 0.622 6.390 6.390 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.340 6.340 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.165 6.165 apply_single 119 13.6 0.000 0.000 6.165 6.165 dbcsr_make_images_dense 3978 14.8 0.024 0.024 6.044 6.044 qs_create_task_list 11 7.9 0.000 0.000 5.816 5.816 generate_qs_task_list 11 8.9 3.959 3.959 5.816 5.816 density_rs2pw 119 9.7 0.006 0.006 5.794 5.794 dbcsr_copy_into_existing 22 7.9 5.719 5.719 5.719 5.719 fft3d_s 1202 14.6 5.667 5.667 5.673 5.673 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.607 5.607 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.268 5.268 pw_poisson_solve 119 10.3 2.042 2.042 4.766 4.766 multiply_cannon 2286 13.5 0.285 0.285 4.713 4.713 qs_ot_get_p 119 10.4 0.001 0.001 4.433 4.433 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.375 4.375 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.355 4.355 multiply_cannon_loop 2286 14.5 0.061 0.061 3.947 3.947 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 119.601 119.602 qs_mol_dyn_low 1 2.0 0.007 0.008 119.481 119.488 qs_forces 11 3.9 0.002 0.002 119.425 119.425 qs_energies 11 4.9 0.001 0.001 114.025 114.027 scf_env_do_scf 11 5.9 0.001 0.001 101.395 101.396 velocity_verlet 10 3.0 0.002 0.002 80.915 80.916 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 66.745 66.746 rebuild_ks_matrix 119 8.3 0.001 0.001 35.084 35.206 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.020 35.083 35.205 init_scf_loop 11 6.9 0.000 0.001 34.631 34.631 qs_ks_update_qs_env 119 7.6 0.001 0.001 31.488 31.606 prepare_preconditioner 11 7.9 0.000 0.000 31.361 31.372 make_preconditioner 11 8.9 0.000 0.000 31.361 31.371 make_full_inverse_cholesky 11 9.9 0.000 0.000 30.847 30.876 cp_fm_cholesky_invert 11 10.9 29.968 29.988 29.968 29.988 sum_up_and_integrate 119 10.3 0.058 0.067 24.889 24.976 integrate_v_rspace 119 11.3 0.004 0.005 24.831 24.913 qs_rho_update_rho 119 7.7 0.001 0.001 24.474 24.487 calculate_rho_elec 119 8.7 0.049 0.051 24.473 24.486 dbcsr_multiply_generic 2286 12.5 0.127 0.132 20.646 20.706 grid_collocate_task_list 119 9.7 16.886 17.747 16.886 17.747 grid_integrate_task_list 119 12.3 16.857 17.450 16.857 17.450 qs_scf_new_mos 108 7.5 0.001 0.001 15.867 15.982 qs_scf_loop_do_ot 108 8.5 0.001 0.001 15.867 15.981 ot_scf_mini 108 9.5 0.003 0.003 14.896 15.014 multiply_cannon 2286 13.5 0.236 0.242 14.130 14.647 multiply_cannon_loop 2286 14.5 0.212 0.222 12.597 12.903 mp_waitall_1 169478 16.3 10.988 12.245 10.988 12.245 init_scf_run 11 5.9 0.000 0.001 9.146 9.146 scf_env_initial_rho_setup 11 6.9 0.000 0.001 9.146 9.146 ot_mini 108 10.5 0.001 0.001 8.811 8.929 rs_pw_transfer 974 11.9 0.016 0.017 8.100 8.689 wfi_extrapolate 11 7.9 0.001 0.001 8.643 8.643 density_rs2pw 119 9.7 0.008 0.009 6.847 7.488 multiply_cannon_metrocomm3 18288 15.5 0.069 0.072 6.907 7.460 pw_transfer 1439 11.6 0.125 0.130 7.265 7.361 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 6.963 7.047 potential_pw2rs 119 12.3 0.009 0.010 6.498 6.535 cp_gemm 81 9.0 0.000 0.000 6.481 6.490 cp_gemm_fm_gemm 81 10.0 0.000 0.000 6.480 6.490 cp_fm_gemm 81 11.0 6.480 6.489 6.480 6.489 fft_wrap_pw1pw2_140 487 13.2 0.644 0.704 6.045 6.262 fft3d_ps 1201 14.6 2.679 2.922 5.370 5.538 make_m2s 4572 13.5 0.072 0.076 4.463 4.912 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.717 4.756 apply_single 119 13.6 0.001 0.001 4.717 4.755 ot_diis_step 108 11.5 0.004 0.005 4.693 4.694 multiply_cannon_multrec 18288 15.5 4.323 4.541 4.339 4.558 mp_sum_d 4127 12.0 3.600 4.262 3.600 4.262 make_images 4572 14.5 0.182 0.191 3.764 4.224 qs_ot_get_derivative 108 11.5 0.001 0.001 4.086 4.205 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.860 3.865 mp_waitany 9880 13.7 2.973 3.586 2.973 3.586 rs_pw_transfer_PW2RS_140 130 13.9 1.404 1.719 3.267 3.384 rs_pw_transfer_RS2PW_140 130 11.5 0.663 0.752 2.398 3.008 make_images_data 4572 15.5 0.056 0.061 2.036 2.614 mp_alltoall_d11v 2130 13.8 1.871 2.414 1.871 2.414 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=81.53500000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=35.639, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=30.112, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=28.131, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_gemm", label="cp_fm_gemm", y=11.583, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.717, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=38.422, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.886, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=29.968, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.857, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_gemm", label="cp_fm_gemm", y=6.48, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.988, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.031 227.487 227.487 qs_mol_dyn_low 1 2.0 0.004 0.004 226.679 226.679 qs_forces 11 3.9 0.002 0.002 226.616 226.616 qs_energies 11 4.9 0.001 0.001 212.297 212.297 scf_env_do_scf 11 5.9 0.001 0.001 178.190 178.190 velocity_verlet 10 3.0 0.002 0.002 155.067 155.067 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 123.880 123.880 rebuild_ks_matrix 107 8.3 0.001 0.001 63.647 63.647 qs_ks_build_kohn_sham_matrix 107 9.3 0.018 0.018 63.646 63.646 qs_rho_update_rho 107 7.7 0.001 0.001 58.436 58.436 calculate_rho_elec 107 8.7 1.431 1.431 58.435 58.435 qs_ks_update_qs_env 107 7.6 0.001 0.001 57.018 57.018 init_scf_loop 11 6.9 0.000 0.000 53.966 53.966 grid_collocate_task_list 107 9.7 52.493 52.493 52.493 52.493 sum_up_and_integrate 107 10.3 0.374 0.374 52.190 52.190 integrate_v_rspace 107 11.3 0.133 0.133 51.816 51.816 grid_integrate_task_list 107 12.3 48.693 48.693 48.693 48.693 prepare_preconditioner 11 7.9 0.000 0.000 46.410 46.410 make_preconditioner 11 8.9 0.000 0.000 46.410 46.410 make_full_inverse_cholesky 11 9.9 0.001 0.001 44.424 44.424 cp_fm_cholesky_invert 11 10.9 30.104 30.104 30.104 30.104 qs_scf_new_mos 96 7.5 0.001 0.001 21.234 21.234 qs_scf_loop_do_ot 96 8.5 0.001 0.001 21.233 21.233 ot_scf_mini 96 9.5 0.003 0.003 20.029 20.029 dbcsr_multiply_generic 1966 12.4 0.154 0.154 19.672 19.672 init_scf_run 11 5.9 0.001 0.001 17.326 17.326 scf_env_initial_rho_setup 11 6.9 0.001 0.001 17.325 17.325 wfi_extrapolate 11 7.9 0.001 0.001 16.248 16.248 ot_mini 96 10.5 0.001 0.001 12.888 12.888 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 12.772 12.772 make_m2s 3932 13.4 0.055 0.055 11.752 11.752 cp_gemm 81 9.0 0.000 0.000 11.533 11.533 cp_gemm_fm_gemm 81 10.0 0.000 0.000 11.532 11.532 cp_fm_gemm 81 11.0 11.532 11.532 11.532 11.532 pw_transfer 1295 11.6 0.090 0.090 8.517 8.517 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.406 8.406 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 7.920 7.920 cp_fm_cholesky_decompose 22 10.9 7.786 7.786 7.786 7.786 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.469 7.469 dbcsr_complete_redistribute 317 12.2 3.069 3.069 7.071 7.071 qs_create_task_list 11 7.9 0.000 0.000 6.921 6.921 generate_qs_task_list 11 8.9 5.120 5.120 6.921 6.921 qs_ot_get_derivative 96 11.5 0.001 0.001 6.529 6.529 fft_wrap_pw1pw2_140 439 13.2 1.140 1.140 6.494 6.494 ot_diis_step 96 11.5 0.005 0.005 6.355 6.355 make_images 3932 14.4 2.534 2.534 6.236 6.236 dbcsr_copy 1855 11.9 0.250 0.250 6.193 6.193 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.910 5.910 dbcsr_copy_into_existing 22 7.9 5.899 5.899 5.899 5.899 dbcsr_make_dense_low 4961 15.5 0.087 0.087 5.786 5.786 make_dense_data 4961 16.5 4.857 4.857 5.685 5.685 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.597 5.597 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.414 5.414 apply_single 107 13.6 0.000 0.000 5.413 5.413 dbcsr_make_images_dense 3386 14.7 0.021 0.021 5.111 5.111 pw_poisson_solve 107 10.3 2.010 2.010 4.844 4.844 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.657 4.657 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.010 165.829 165.830 qs_mol_dyn_low 1 2.0 0.004 0.005 165.728 165.734 qs_forces 11 3.9 0.002 0.002 165.675 165.675 qs_energies 11 4.9 0.001 0.001 156.818 156.821 scf_env_do_scf 11 5.9 0.001 0.001 141.770 141.771 velocity_verlet 10 3.0 0.002 0.002 109.273 109.275 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 105.211 105.211 rebuild_ks_matrix 107 8.3 0.001 0.001 60.172 60.290 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.018 60.172 60.290 sum_up_and_integrate 107 10.3 0.049 0.057 53.883 53.948 integrate_v_rspace 107 11.3 0.004 0.005 53.833 53.902 qs_ks_update_qs_env 107 7.6 0.001 0.001 53.092 53.207 qs_rho_update_rho 107 7.7 0.001 0.001 50.426 50.434 calculate_rho_elec 107 8.7 0.044 0.046 50.425 50.433 grid_integrate_task_list 107 12.3 46.106 47.341 46.106 47.341 grid_collocate_task_list 107 9.7 43.111 44.040 43.111 44.040 init_scf_loop 11 6.9 0.000 0.000 36.541 36.542 prepare_preconditioner 11 7.9 0.000 0.000 30.039 30.047 make_preconditioner 11 8.9 0.000 0.000 30.039 30.047 make_full_inverse_cholesky 11 9.9 0.000 0.000 29.883 29.903 cp_fm_cholesky_invert 11 10.9 29.202 29.221 29.202 29.221 dbcsr_multiply_generic 1966 12.4 0.109 0.114 16.916 17.148 qs_scf_new_mos 96 7.5 0.001 0.001 12.965 13.016 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.964 13.016 ot_scf_mini 96 9.5 0.003 0.003 12.183 12.227 multiply_cannon 1966 13.4 0.203 0.209 11.793 12.087 init_scf_run 11 5.9 0.000 0.001 11.629 11.629 scf_env_initial_rho_setup 11 6.9 0.000 0.001 11.628 11.629 multiply_cannon_loop 1966 14.4 0.179 0.191 10.711 10.962 wfi_extrapolate 11 7.9 0.001 0.001 10.919 10.919 mp_waitall_1 146670 16.2 9.288 10.018 9.288 10.018 rs_pw_transfer 878 11.9 0.014 0.015 7.603 9.033 density_rs2pw 107 9.7 0.007 0.008 6.733 8.154 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.305 7.312 ot_mini 96 10.5 0.001 0.001 7.124 7.168 multiply_cannon_metrocomm3 15728 15.4 0.058 0.062 5.897 6.638 pw_transfer 1295 11.6 0.113 0.118 6.362 6.465 cp_gemm 81 9.0 0.000 0.000 6.384 6.396 cp_gemm_fm_gemm 81 10.0 0.000 0.000 6.384 6.395 cp_fm_gemm 81 11.0 6.383 6.395 6.383 6.395 fft_wrap_pw1pw2 1081 12.6 0.011 0.013 6.101 6.192 potential_pw2rs 107 12.3 0.008 0.008 5.572 5.608 fft_wrap_pw1pw2_140 439 13.2 0.526 0.573 5.000 5.430 fft3d_ps 1081 14.6 2.279 2.444 4.785 4.943 mp_waitany 8968 13.7 3.135 4.670 3.135 4.670 mp_alltoall_d11v 1998 13.7 2.474 4.095 2.474 4.095 rs_pw_transfer_RS2PW_140 118 11.5 0.464 0.527 2.572 4.000 multiply_cannon_multrec 15728 15.4 3.609 3.832 3.623 3.846 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.786 3.841 apply_single 107 13.6 0.000 0.000 3.785 3.841 ot_diis_step 96 11.5 0.004 0.004 3.749 3.749 rs_gather_matrices 107 12.3 0.129 0.149 2.100 3.664 make_m2s 3932 13.4 0.062 0.064 3.442 3.600 qs_ot_get_derivative 96 11.5 0.001 0.001 3.347 3.390 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=76.87899999999996, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=52.493, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.693, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=30.104, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_gemm", label="cp_fm_gemm", y=11.532, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.786, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=31.739000000000004, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=43.111, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.106, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=29.202, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_gemm", label="cp_fm_gemm", y=6.383, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.288, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.414 0.414 260.914 260.914 qs_energies 1 2.0 0.000 0.000 259.594 259.594 scf_env_do_scf 1 3.0 0.000 0.000 256.867 256.867 qs_ks_update_qs_env 8 5.0 0.000 0.000 244.801 244.801 rebuild_ks_matrix 7 6.0 0.000 0.000 244.687 244.687 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 244.687 244.687 hfx_ks_matrix 7 8.0 0.000 0.000 176.561 176.561 integrate_four_center 7 9.0 9.546 9.546 176.532 176.532 integrate_four_center_main 7 10.0 1.496 1.496 157.670 157.670 integrate_four_center_bin 447 11.0 156.174 156.174 156.174 156.174 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 148.544 148.544 init_scf_loop 1 4.0 0.000 0.000 108.307 108.307 cp_gemm 129 10.3 0.001 0.001 51.940 51.940 cp_gemm_fm_gemm 129 11.3 0.000 0.000 51.939 51.939 cp_fm_gemm 129 12.3 51.939 51.939 51.939 51.939 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 30.496 30.496 admm_fit_mo_coeffs 7 9.0 0.000 0.000 27.879 27.879 admm_mo_merge_derivs 7 8.0 0.000 0.000 26.269 26.269 merge_mo_derivs_diag 7 9.0 0.023 0.023 26.269 26.269 purify_mo_diag 7 10.0 0.001 0.001 14.906 14.906 fit_mo_coeffs 7 10.0 0.000 0.000 12.973 12.973 integrate_four_center_load 7 10.0 0.000 0.000 8.772 8.772 hfx_load_balance 1 11.0 0.002 0.002 8.772 8.772 calculate_rho_elec 15 7.4 0.193 0.193 6.343 6.343 prepare_preconditioner 1 5.0 0.000 0.000 6.162 6.162 make_preconditioner 1 6.0 0.000 0.000 6.162 6.162 grid_collocate_task_list 15 8.4 5.506 5.506 5.506 5.506 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.024 0.055 210.799 210.800 qs_energies 1 2.0 0.000 0.000 210.632 210.632 scf_env_do_scf 1 3.0 0.000 0.000 209.982 209.982 qs_ks_update_qs_env 8 5.0 0.000 0.000 203.733 203.733 rebuild_ks_matrix 7 6.0 0.000 0.000 203.721 203.721 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 203.720 203.721 hfx_ks_matrix 7 8.0 0.000 0.000 170.404 170.422 integrate_four_center 7 9.0 0.130 0.410 170.393 170.411 integrate_four_center_main 7 10.0 0.004 0.005 156.960 160.287 integrate_four_center_bin 448 11.0 156.956 160.282 156.956 160.282 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 124.712 124.712 init_scf_loop 1 4.0 0.000 0.000 85.268 85.268 cp_gemm 129 10.3 0.000 0.001 24.967 24.976 cp_gemm_fm_gemm 129 11.3 0.000 0.000 24.966 24.975 cp_fm_gemm 129 12.3 24.966 24.975 24.966 24.975 admm_mo_merge_derivs 7 8.0 0.000 0.000 14.873 14.875 merge_mo_derivs_diag 7 9.0 0.012 0.013 14.873 14.875 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 11.881 11.888 admm_fit_mo_coeffs 7 9.0 0.000 0.000 10.828 10.829 integrate_four_center_load 7 10.0 0.000 0.001 8.902 8.906 hfx_load_balance 1 11.0 0.001 0.001 8.902 8.906 purify_mo_diag 7 10.0 0.000 0.000 6.555 6.557 mp_sync 70 11.3 3.608 5.970 3.608 5.970 hfx_load_balance_bin 1 12.0 4.291 4.487 4.291 4.487 hfx_load_balance_count 1 12.0 4.298 4.457 4.298 4.457 fit_mo_coeffs 7 10.0 0.000 0.000 4.273 4.276 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=36.252999999999986, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=156.174, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_gemm", label="cp_fm_gemm", y=51.939, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=9.546, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.506, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.496, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=16.54600000000002, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=156.956, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_gemm", label="cp_fm_gemm", y=24.966, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.13, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.291, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.298, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.608, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 384.166 384.166 qs_energies 1 2.0 0.000 0.000 383.677 383.677 mp2_main 1 3.0 0.000 0.000 378.067 378.067 mp2_gpw_main 1 4.0 0.000 0.000 377.869 377.869 rpa_ri_compute_en 1 5.0 0.000 0.000 357.059 357.059 rpa_num_int 1 6.0 0.000 0.000 357.032 357.032 compute_mat_P_omega 1 7.0 0.002 0.002 202.019 202.019 compute_mat_P_omega_contract 10 8.0 13.042 13.042 200.720 200.720 dbcsr_t_total 2336 9.6 0.015 0.015 190.604 190.604 cp_gemm 105 8.4 0.001 0.001 125.078 125.078 cp_gemm_fm_gemm 105 9.4 0.000 0.000 125.078 125.078 cp_fm_gemm 105 10.4 125.077 125.077 125.077 125.077 dbcsr_t_contract 787 11.0 48.407 48.407 114.113 114.113 GW_matrix_operations 10 7.0 0.006 0.006 89.140 89.140 compute_mat_P_omega_calc_M_occ 250 9.0 13.101 13.101 76.406 76.406 dbcsr_t_copy 1103 10.7 21.287 21.287 75.001 75.001 dbcsr_tas_total 1149 12.2 0.050 0.050 58.320 58.320 dbcsr_tas_multiply 807 12.1 0.002 0.002 56.852 56.852 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 45.252 45.252 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 42.741 42.741 dbcsr_multiply_generic 837 15.8 0.134 0.134 42.662 42.662 dbcsr_tas_dbcsr 807 14.1 0.002 0.002 42.410 42.410 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 40.812 40.812 dbcsr_tas_reserve_blocks_index 3261 13.7 7.393 7.393 30.443 30.443 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 30.037 30.037 multiply_cannon 837 16.8 0.914 0.914 26.333 26.333 dbcsr_tas_copy 574 11.4 17.755 17.755 25.721 25.721 dbcsr_t_reserve_blocks_index 2280 12.5 1.104 1.104 23.578 23.578 multiply_cannon_loop 837 17.8 0.651 0.651 22.906 22.906 dbcsr_reserve_blocks 3717 14.7 22.235 22.235 22.687 22.687 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.011 0.011 22.233 22.233 compute_QP_energies 1 7.0 0.000 0.000 21.189 21.189 compute_self_energy_cubic_gw 1 8.0 0.104 0.104 21.189 21.189 multiply_cannon_multrec 837 18.8 20.413 20.413 20.985 20.985 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 20.793 20.793 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 20.543 20.543 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 15.069 15.069 dbcsr_t_copy_nocomm 251 12.0 11.764 11.764 14.253 14.253 make_m2s 1674 16.8 0.106 0.106 13.856 13.856 make_images 1674 17.8 5.923 5.923 13.292 13.292 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.878 11.878 dbcsr_tas_mm_2 251 15.0 0.001 0.001 10.220 10.220 contract_cubic_gw 21 9.0 0.000 0.000 9.111 9.111 dbcsr_finalize 9888 13.6 1.715 1.715 8.168 8.168 cp_fm_cholesky_invert 10 8.0 8.036 8.036 8.036 8.036 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.008 206.578 206.579 qs_energies 1 2.0 0.000 0.000 206.469 206.474 mp2_main 1 3.0 0.000 0.000 204.588 204.593 mp2_gpw_main 1 4.0 0.000 0.000 204.529 204.534 rpa_ri_compute_en 1 5.0 0.000 0.000 199.320 199.325 rpa_num_int 1 6.0 0.000 0.000 199.313 199.318 cp_gemm 105 8.4 0.000 0.000 143.076 143.483 cp_gemm_fm_gemm 105 9.4 0.000 0.000 143.076 143.483 cp_fm_gemm 105 10.4 143.076 143.483 143.076 143.483 GW_matrix_operations 10 7.0 0.001 0.001 100.029 100.271 dbcsr_t_total 2336 9.6 0.016 0.017 47.530 47.531 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 46.883 46.911 compute_mat_P_omega 1 7.0 0.001 0.002 46.798 46.804 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 46.622 46.754 compute_mat_P_omega_contract 10 8.0 0.767 0.812 45.970 45.977 dbcsr_t_contract 787 11.0 1.933 2.086 34.996 35.001 dbcsr_tas_total 1149 12.2 0.060 0.068 31.028 31.029 dbcsr_tas_multiply 807 12.1 0.003 0.003 30.887 30.889 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 21.945 21.945 dbcsr_multiply_generic 837 15.8 0.070 0.075 18.124 19.008 compute_mat_P_omega_calc_M_occ 250 9.0 0.740 0.782 14.822 14.822 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.072 12.073 multiply_cannon 837 16.8 0.125 0.136 10.654 11.077 dbcsr_t_copy 1111 10.7 4.530 5.182 10.157 11.010 mp_sync 8696 11.6 8.467 10.179 8.467 10.179 multiply_cannon_loop 837 17.8 0.040 0.045 9.743 10.155 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 9.257 10.106 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 9.423 9.424 dbcsr_tas_mm_2 251 15.0 0.002 0.002 8.393 8.394 cp_fm_cholesky_invert 10 8.0 8.229 8.246 8.229 8.246 multiply_cannon_multrec 1386 17.8 7.401 7.845 7.662 8.115 make_m2s 1674 16.8 0.042 0.047 6.122 6.764 make_images 1674 17.8 0.243 0.254 6.042 6.687 compute_QP_energies 1 7.0 0.000 0.000 5.229 5.229 compute_self_energy_cubic_gw 1 8.0 0.005 0.008 5.227 5.229 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 5.207 5.207 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=138.71099999999998, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_gemm", label="cp_fm_gemm", y=125.077, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=48.407, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=22.235, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=21.287, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.413, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.036, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=32.94200000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_gemm", label="cp_fm_gemm", y=143.076, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.933, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.53, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.401, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.229, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=8.467, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.095 0.095 298.975 298.975 qs_energies 1 2.0 0.000 0.000 298.790 298.790 ls_scf 1 3.0 0.000 0.000 296.731 296.731 ls_scf_main 1 4.0 0.002 0.002 282.391 282.391 density_matrix_trs4 11 5.0 0.012 0.012 141.763 141.763 ls_scf_dm_to_ks 11 5.0 0.000 0.000 133.359 133.359 matrix_ls_to_qs 11 6.0 0.000 0.000 128.654 128.654 dbcsr_multiply_generic 185 6.1 0.494 0.494 92.243 92.243 dbcsr_copy_into_existing 11 7.0 71.630 71.630 71.630 71.630 dbcsr_complete_redistribute 23 7.5 42.601 42.601 62.193 62.193 matrix_decluster 11 7.0 0.000 0.000 57.023 57.023 multiply_cannon 185 7.1 0.433 0.433 52.993 52.993 multiply_cannon_loop 185 8.1 0.428 0.428 34.032 34.032 make_m2s 370 7.1 0.032 0.032 33.446 33.446 multiply_cannon_multrec 185 9.1 31.118 31.118 31.166 31.166 make_images 370 8.1 7.313 7.313 29.752 29.752 arnoldi_extremal 12 6.1 0.000 0.000 26.049 26.049 arnoldi_normal_ev 12 7.1 0.026 0.026 26.049 26.049 build_subspace 23 8.1 0.137 0.137 25.421 25.421 dbcsr_finalize 646 7.5 0.227 0.227 25.008 25.008 dbcsr_matrix_vector_mult 652 9.0 0.235 0.235 24.511 24.511 dbcsr_merge_all 597 8.5 4.113 4.113 23.322 23.322 dbcsr_matrix_vector_mult_local 652 10.0 23.210 23.210 23.229 23.229 setup_rec_index_2d 370 8.1 18.385 18.385 18.385 18.385 tree_to_linear_d 110 9.4 17.219 17.219 17.219 17.219 dbcsr_sort_indices 1103 9.9 15.514 15.514 15.514 15.514 quick_finalize 395 10.0 1.263 1.263 13.922 13.922 ls_scf_init_scf 1 4.0 0.000 0.000 13.523 13.523 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.063 13.063 dbcsr_special_finalize 370 9.1 0.003 0.003 12.868 12.868 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.161 12.161 dbcsr_dot_sd 144 6.3 10.871 10.871 10.873 10.873 make_images_data 370 9.1 0.011 0.011 9.456 9.456 dbcsr_frobenius_norm 142 6.1 8.807 8.807 8.809 8.809 hybrid_alltoall_any 393 9.9 7.183 7.183 7.907 7.907 dbcsr_new_transposed 2 7.0 0.156 0.156 7.835 7.835 dbcsr_redistribute 2 8.0 7.582 7.582 7.644 7.644 matrix_qs_to_ls 12 5.1 0.000 0.000 7.567 7.567 matrix_cluster 12 6.1 0.000 0.000 7.567 7.567 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 105.242 105.243 qs_energies 1 2.0 0.000 0.000 105.158 105.158 ls_scf 1 3.0 0.000 0.000 105.077 105.078 ls_scf_main 1 4.0 0.000 0.002 100.522 100.523 density_matrix_trs4 11 5.0 0.009 0.013 96.644 96.722 dbcsr_multiply_generic 185 6.1 0.071 0.081 91.063 91.441 multiply_cannon 185 7.1 0.041 0.043 74.891 76.139 multiply_cannon_loop 185 8.1 0.216 0.242 70.967 72.066 multiply_cannon_multrec 1480 9.1 44.351 48.216 44.904 48.797 mp_waitall_1 11936 10.3 24.106 28.950 24.106 28.950 multiply_cannon_metrocomm3 1480 9.1 0.016 0.019 14.932 19.690 make_m2s 370 7.1 0.033 0.036 11.202 11.359 make_images 370 8.1 0.732 0.774 11.081 11.239 multiply_cannon_metrocomm1 1480 9.1 0.008 0.009 5.409 9.591 calculate_norms 2960 9.1 5.425 6.559 5.425 6.559 mp_sum_l 1039 5.9 4.126 5.249 4.126 5.249 make_images_data 370 9.1 0.010 0.012 4.120 4.513 arnoldi_extremal 12 6.1 0.000 0.001 4.274 4.310 arnoldi_normal_ev 12 7.1 0.002 0.008 4.273 4.310 build_subspace 23 8.1 0.044 0.058 3.960 3.963 hybrid_alltoall_any 393 9.9 0.291 1.367 3.508 3.831 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 2.870 3.723 ls_scf_init_scf 1 4.0 0.000 0.000 3.645 3.645 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.576 3.616 dbcsr_matrix_vector_mult 652 9.0 0.018 0.078 3.408 3.535 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.363 3.482 dbcsr_complete_redistribute 23 7.5 1.865 1.967 2.961 3.040 matrix_ls_to_qs 11 6.0 0.000 0.000 2.933 3.014 make_images_sizes 370 9.1 0.001 0.001 2.262 2.774 mp_alltoall_i44 370 10.1 2.262 2.774 2.262 2.774 matrix_decluster 11 7.0 0.000 0.000 2.670 2.751 make_images_pack 370 9.1 2.451 2.734 2.455 2.738 dbcsr_matrix_vector_mult_local 652 10.0 2.603 2.706 2.606 2.709 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 2.647 2.649 buffer_matrices_ensure_size 370 8.1 2.186 2.388 2.186 2.388 dbcsr_add_d 280 6.0 0.001 0.001 2.172 2.297 dbcsr_add_anytype 280 7.0 1.216 1.304 2.171 2.296 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=112.03100000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=71.63, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=42.601, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=31.118, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=23.21, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.385, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=22.766000000000005, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.865, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=44.351, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.603, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.425, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=4.126, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=24.106, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 111.188 111.188 lib_test 1 2.0 0.000 0.000 111.180 111.180 dbcsr_run_tests 3 3.0 0.003 0.003 111.180 111.180 test_multiplies_multiproc 3 4.0 0.001 0.001 89.775 89.775 dbcsr_redistribute 9 5.0 60.145 60.145 64.045 64.045 dbcsr_multiply_generic 9 5.0 0.001 0.001 24.011 24.011 dbcsr_make_random_matrix 9 4.0 15.481 15.481 21.322 21.322 multiply_cannon 9 6.0 0.004 0.004 17.228 17.228 multiply_cannon_loop 9 7.0 0.007 0.007 16.674 16.674 multiply_cannon_multrec 9 8.0 16.666 16.666 16.667 16.667 dbcsr_finalize 27 5.7 0.004 0.004 9.955 9.955 dbcsr_merge_all 18 6.5 3.641 3.641 9.313 9.313 mp_alltoall_d11v 27 6.0 3.611 3.611 3.611 3.611 tree_to_linear_d 9 7.0 3.521 3.521 3.521 3.521 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 28.618 28.619 lib_test 1 2.0 0.000 0.000 28.592 28.609 dbcsr_run_tests 3 3.0 0.000 0.001 28.591 28.608 test_multiplies_multiproc 3 4.0 0.000 0.001 27.469 27.552 dbcsr_multiply_generic 9 5.0 0.001 0.001 25.597 25.672 multiply_cannon 9 6.0 0.002 0.002 23.116 23.640 multiply_cannon_loop 9 7.0 0.003 0.003 22.690 23.234 multiply_cannon_multrec 72 8.0 18.372 19.604 18.373 19.605 mp_waitall_1 576 9.2 4.799 6.459 4.799 6.459 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 3.829 5.722 mp_sum_l 310 2.7 0.716 1.438 0.716 1.438 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.712 1.433 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.477 1.346 dbcsr_make_random_matrix 9 4.0 0.883 0.908 1.091 1.154 make_m2s 18 6.0 0.001 0.001 1.043 1.093 make_images 18 7.0 0.027 0.029 1.040 1.090 dbcsr_finalize 27 5.7 0.000 0.001 0.799 0.928 dbcsr_merge_all 18 6.5 0.145 0.167 0.758 0.885 dbcsr_redistribute 9 5.0 0.390 0.461 0.709 0.754 make_images_data 18 8.0 0.001 0.001 0.542 0.638 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=11.643999999999991, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=60.145, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.666, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.481, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.641, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.611, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=3.312999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.39, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.372, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.883, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.145, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.799, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.716, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-05-12 11:56:03+00:00