StartDate: 2021-11-26 19:08:12+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: e1f4b59b5ff107d9d72c0d95045de36150d94d3e CommitTime: 2021-11-26 17:09:15 +0100 CommitAuthor: Matthias Krack CommitSubject: Disable checks by default Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: e1f4b59b5ff107d9d72c0d95045de36150d94d3e CommitTime: 2021-11-26 17:09:15 +0100 CommitAuthor: Matthias Krack CommitSubject: Disable checks by default ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 175.196 175.196 qs_mol_dyn_low 1 2.0 0.004 0.004 174.287 174.287 qs_forces 11 3.9 0.002 0.002 174.228 174.228 qs_energies 11 4.9 0.001 0.001 163.100 163.100 scf_env_do_scf 11 5.9 0.001 0.001 128.588 128.588 velocity_verlet 10 3.0 0.002 0.002 121.673 121.673 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 88.165 88.165 init_scf_loop 11 6.9 0.000 0.000 40.234 40.234 prepare_preconditioner 11 7.9 0.000 0.000 36.028 36.028 make_preconditioner 11 8.9 0.000 0.000 36.028 36.028 rebuild_ks_matrix 119 8.3 0.001 0.001 34.802 34.802 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.021 34.801 34.801 make_full_inverse_cholesky 11 9.9 0.000 0.000 34.026 34.026 qs_ks_update_qs_env 119 7.6 0.001 0.001 32.545 32.545 qs_rho_update_rho 119 7.7 0.001 0.001 31.322 31.322 calculate_rho_elec 119 8.7 1.577 1.577 31.321 31.321 qs_scf_new_mos 108 7.5 0.001 0.001 30.822 30.822 qs_scf_loop_do_ot 108 8.5 0.001 0.001 30.821 30.821 ot_scf_mini 108 9.5 0.004 0.004 28.751 28.751 dbcsr_multiply_generic 2286 12.5 0.202 0.202 26.205 26.205 grid_collocate_task_list 119 9.7 24.582 24.582 24.582 24.582 sum_up_and_integrate 119 10.3 0.405 0.405 21.863 21.863 integrate_v_rspace 119 11.3 0.523 0.523 21.457 21.457 cp_fm_cholesky_invert 11 10.9 19.715 19.715 19.715 19.715 grid_integrate_task_list 119 12.3 18.130 18.130 18.130 18.130 init_scf_run 11 5.9 0.001 0.001 17.449 17.449 scf_env_initial_rho_setup 11 6.9 0.001 0.001 17.448 17.448 ot_mini 108 10.5 0.001 0.001 17.135 17.135 wfi_extrapolate 11 7.9 0.001 0.001 16.566 16.566 cp_gemm 81 9.0 0.000 0.000 15.881 15.881 cp_gemm_cosma 81 10.0 15.881 15.881 15.881 15.881 make_m2s 4572 13.5 0.073 0.073 14.861 14.861 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.846 11.846 qs_ot_get_derivative 108 11.5 0.002 0.002 8.938 8.938 pw_transfer 1439 11.6 0.106 0.106 8.815 8.815 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 8.454 8.454 ot_diis_step 108 11.5 0.006 0.006 8.193 8.193 cp_fm_cholesky_decompose 22 10.9 8.029 8.029 8.029 8.029 make_images 4572 14.5 2.840 2.840 7.886 7.886 fft_wrap_pw1pw2_140 487 13.2 0.733 0.733 7.204 7.204 qs_ot_get_p 119 10.4 0.001 0.001 7.149 7.149 dbcsr_make_dense_low 5837 15.5 0.119 0.119 7.124 7.124 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 7.000 7.000 dbcsr_copy 2102 12.0 0.318 0.318 6.998 6.998 make_dense_data 5837 16.5 6.212 6.212 6.981 6.981 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.914 6.914 apply_single 119 13.6 0.001 0.001 6.914 6.914 dbcsr_copy_into_existing 22 7.9 6.620 6.620 6.620 6.620 dbcsr_complete_redistribute 329 12.2 3.039 3.039 6.469 6.469 dbcsr_make_images_dense 3978 14.8 0.030 0.030 6.419 6.419 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.214 6.214 multiply_cannon 2286 13.5 1.018 1.018 6.115 6.115 qs_create_task_list 11 7.9 0.000 0.000 5.637 5.637 generate_qs_task_list 11 8.9 3.815 3.815 5.637 5.637 qs_ot_p2m_diag 50 11.0 0.231 0.231 5.352 5.352 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.332 5.332 density_rs2pw 119 9.7 0.007 0.007 5.162 5.162 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.044 5.044 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.044 5.044 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.999 4.999 pw_poisson_solve 119 10.3 2.114 2.114 4.939 4.939 cp_dbcsr_syevd 50 12.0 0.005 0.005 4.727 4.727 cp_fm_diag_elpa 50 13.0 0.001 0.001 4.539 4.539 cp_fm_diag_elpa_base 50 14.0 4.483 4.483 4.538 4.538 multiply_cannon_loop 2286 14.5 0.072 0.072 4.468 4.468 multiply_cannon_multrec 2286 15.5 4.307 4.307 4.395 4.395 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.384 4.384 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.125 4.125 fft3d_s 1202 14.6 3.845 3.845 3.852 3.852 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.019 79.515 79.516 qs_mol_dyn_low 1 2.0 0.006 0.007 79.371 79.378 qs_forces 11 3.9 0.002 0.002 79.295 79.296 qs_energies 11 4.9 0.002 0.002 74.087 74.089 scf_env_do_scf 11 5.9 0.001 0.001 66.864 66.865 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 61.964 61.964 velocity_verlet 10 3.0 0.002 0.003 47.263 47.265 rebuild_ks_matrix 119 8.3 0.001 0.001 30.434 30.479 qs_ks_build_kohn_sham_matrix 119 9.3 0.024 0.026 30.434 30.478 qs_ks_update_qs_env 119 7.6 0.001 0.001 27.089 27.134 qs_rho_update_rho 119 7.7 0.001 0.001 23.676 23.704 calculate_rho_elec 119 8.7 0.048 0.051 23.675 23.703 sum_up_and_integrate 119 10.3 0.058 0.061 23.387 23.412 integrate_v_rspace 119 11.3 0.005 0.005 23.329 23.352 dbcsr_multiply_generic 2286 12.5 0.143 0.146 19.853 19.927 grid_collocate_task_list 119 9.7 16.256 16.875 16.256 16.875 grid_integrate_task_list 119 12.3 16.039 16.501 16.039 16.501 qs_scf_new_mos 108 7.5 0.001 0.001 16.109 16.146 qs_scf_loop_do_ot 108 8.5 0.001 0.001 16.108 16.145 ot_scf_mini 108 9.5 0.004 0.004 15.120 15.151 multiply_cannon 2286 13.5 0.237 0.243 13.314 13.519 multiply_cannon_loop 2286 14.5 0.249 0.263 12.069 12.490 mp_waitall_1 169478 16.3 10.455 10.863 10.455 10.863 ot_mini 108 10.5 0.001 0.002 8.949 8.980 rs_pw_transfer 974 11.9 0.019 0.020 7.899 8.853 density_rs2pw 119 9.7 0.010 0.010 6.718 7.695 multiply_cannon_metrocomm3 18288 15.5 0.088 0.094 6.834 7.211 pw_transfer 1439 11.6 0.143 0.150 6.775 6.853 fft_wrap_pw1pw2 1201 12.6 0.015 0.016 6.449 6.529 potential_pw2rs 119 12.3 0.011 0.011 6.066 6.073 fft_wrap_pw1pw2_140 487 13.2 0.634 0.652 5.571 5.774 init_scf_run 11 5.9 0.000 0.002 5.008 5.009 scf_env_initial_rho_setup 11 6.9 0.000 0.001 5.008 5.008 fft3d_ps 1201 14.6 2.643 2.800 4.833 4.915 init_scf_loop 11 6.9 0.001 0.001 4.880 4.881 wfi_extrapolate 11 7.9 0.001 0.001 4.580 4.580 ot_diis_step 108 11.5 0.005 0.006 4.563 4.564 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 4.476 4.526 apply_single 119 13.6 0.001 0.001 4.475 4.526 make_m2s 4572 13.5 0.079 0.084 4.458 4.525 qs_ot_get_derivative 108 11.5 0.002 0.002 4.340 4.372 multiply_cannon_multrec 18288 15.5 3.855 4.000 3.875 4.020 make_images 4572 14.5 0.192 0.197 3.716 3.789 mp_waitany 9880 13.7 2.702 3.661 2.702 3.661 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.590 3.598 rs_pw_transfer_RS2PW_140 130 11.5 0.573 0.611 2.401 3.373 rs_pw_transfer_PW2RS_140 130 13.9 1.439 1.490 2.973 3.009 mp_alltoall_d11v 2130 13.8 1.573 2.192 1.573 2.192 qs_ot_get_p 119 10.4 0.001 0.002 2.019 2.069 make_images_data 4572 15.5 0.064 0.070 1.726 1.838 rs_gather_matrices 119 12.3 0.151 0.163 1.162 1.830 cp_gemm 81 9.0 0.000 0.001 1.720 1.723 cp_gemm_cosma 81 10.0 1.719 1.723 1.719 1.723 prepare_preconditioner 11 7.9 0.000 0.000 1.685 1.702 make_preconditioner 11 8.9 0.000 0.000 1.685 1.702 hybrid_alltoall_any 4725 16.4 0.134 0.479 1.520 1.638 mp_alltoall_z22v 1201 16.6 1.452 1.624 1.452 1.624 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=84.552, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.582, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.715, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=18.13, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.881, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.029, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.307, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=28.489000000000004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.256, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.039, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.719, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.855, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.702, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.455, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 227.634 227.634 qs_mol_dyn_low 1 2.0 0.004 0.004 226.777 226.777 qs_forces 11 3.9 0.002 0.002 226.716 226.716 qs_energies 11 4.9 0.001 0.001 211.768 211.768 scf_env_do_scf 11 5.9 0.001 0.001 172.727 172.727 velocity_verlet 10 3.0 0.002 0.002 154.391 154.391 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 128.034 128.034 rebuild_ks_matrix 107 8.3 0.001 0.001 63.024 63.024 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.019 63.023 63.023 qs_rho_update_rho 107 7.7 0.001 0.001 57.344 57.344 calculate_rho_elec 107 8.7 1.421 1.421 57.343 57.343 qs_ks_update_qs_env 107 7.6 0.001 0.001 56.899 56.899 sum_up_and_integrate 107 10.3 0.365 0.365 51.657 51.657 grid_collocate_task_list 107 9.7 51.434 51.434 51.434 51.434 integrate_v_rspace 107 11.3 0.523 0.523 51.292 51.292 grid_integrate_task_list 107 12.3 48.306 48.306 48.306 48.306 init_scf_loop 11 6.9 0.000 0.000 44.465 44.465 prepare_preconditioner 11 7.9 0.000 0.000 36.969 36.969 make_preconditioner 11 8.9 0.000 0.000 36.969 36.969 make_full_inverse_cholesky 11 9.9 0.000 0.000 35.022 35.022 qs_scf_new_mos 96 7.5 0.001 0.001 26.633 26.633 qs_scf_loop_do_ot 96 8.5 0.001 0.001 26.632 26.632 ot_scf_mini 96 9.5 0.003 0.003 24.748 24.748 dbcsr_multiply_generic 1966 12.4 0.180 0.180 22.498 22.498 cp_fm_cholesky_invert 11 10.9 20.588 20.588 20.588 20.588 init_scf_run 11 5.9 0.001 0.001 20.538 20.538 scf_env_initial_rho_setup 11 6.9 0.001 0.001 20.537 20.537 wfi_extrapolate 11 7.9 0.001 0.001 19.381 19.381 cp_gemm 81 9.0 0.000 0.000 16.073 16.073 cp_gemm_cosma 81 10.0 16.073 16.073 16.073 16.073 ot_mini 96 10.5 0.001 0.001 14.646 14.646 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 13.045 13.045 make_m2s 3932 13.4 0.063 0.063 12.541 12.541 cp_fm_cholesky_decompose 22 10.9 8.124 8.124 8.124 8.124 qs_ot_get_derivative 96 11.5 0.002 0.002 7.889 7.889 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.785 7.785 pw_transfer 1295 11.6 0.096 0.096 7.678 7.678 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.406 7.406 fft_wrap_pw1pw2 1081 12.6 0.011 0.011 7.360 7.360 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 7.160 7.160 qs_create_task_list 11 7.9 0.000 0.000 6.813 6.813 generate_qs_task_list 11 8.9 4.993 4.993 6.813 6.813 dbcsr_copy 1855 11.9 0.281 0.281 6.768 6.768 ot_diis_step 96 11.5 0.005 0.005 6.753 6.753 make_images 3932 14.4 2.424 2.424 6.686 6.686 dbcsr_complete_redistribute 317 12.2 3.043 3.043 6.549 6.549 dbcsr_copy_into_existing 22 7.9 6.433 6.433 6.433 6.433 qs_ot_get_p 107 10.4 0.001 0.001 6.322 6.322 fft_wrap_pw1pw2_140 439 13.2 0.612 0.612 6.254 6.254 dbcsr_make_dense_low 4961 15.5 0.098 0.098 6.003 6.003 make_dense_data 4961 16.5 5.259 5.259 5.884 5.884 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.823 5.823 apply_single 107 13.6 0.001 0.001 5.823 5.823 multiply_cannon 1966 13.4 0.957 0.957 5.522 5.522 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.453 5.453 dbcsr_make_images_dense 3386 14.7 0.026 0.026 5.377 5.377 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.275 5.275 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.275 5.275 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.959 4.959 qs_ot_p2m_diag 44 11.0 0.213 0.213 4.800 4.800 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.012 135.771 135.771 qs_mol_dyn_low 1 2.0 0.005 0.007 135.646 135.652 qs_forces 11 3.9 0.002 0.002 135.588 135.589 qs_energies 11 4.9 0.001 0.002 126.455 126.460 scf_env_do_scf 11 5.9 0.001 0.001 116.112 116.113 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 107.825 107.826 velocity_verlet 10 3.0 0.002 0.003 81.172 81.174 rebuild_ks_matrix 107 8.3 0.001 0.001 61.067 61.101 qs_ks_build_kohn_sham_matrix 107 9.3 0.022 0.023 61.067 61.100 sum_up_and_integrate 107 10.3 0.052 0.055 54.702 54.729 integrate_v_rspace 107 11.3 0.005 0.005 54.650 54.679 qs_ks_update_qs_env 107 7.6 0.001 0.001 53.780 53.811 qs_rho_update_rho 107 7.7 0.001 0.001 51.290 51.313 calculate_rho_elec 107 8.7 0.044 0.046 51.289 51.312 grid_integrate_task_list 107 12.3 47.097 48.497 47.097 48.497 grid_collocate_task_list 107 9.7 43.724 45.069 43.724 45.069 dbcsr_multiply_generic 1966 12.4 0.126 0.130 17.699 17.780 qs_scf_new_mos 96 7.5 0.001 0.001 14.117 14.147 qs_scf_loop_do_ot 96 8.5 0.001 0.001 14.116 14.146 ot_scf_mini 96 9.5 0.003 0.004 13.230 13.260 multiply_cannon 1966 13.4 0.206 0.211 11.932 12.230 multiply_cannon_loop 1966 14.4 0.220 0.233 10.836 11.189 mp_waitall_1 146670 16.2 9.374 9.660 9.374 9.660 rs_pw_transfer 878 11.9 0.017 0.018 8.186 9.434 init_scf_loop 11 6.9 0.001 0.001 8.266 8.267 density_rs2pw 107 9.7 0.009 0.009 6.935 8.204 init_scf_run 11 5.9 0.001 0.002 8.087 8.087 scf_env_initial_rho_setup 11 6.9 0.000 0.001 8.086 8.087 ot_mini 96 10.5 0.001 0.002 7.840 7.869 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.511 7.518 wfi_extrapolate 11 7.9 0.001 0.001 7.439 7.440 multiply_cannon_metrocomm3 15728 15.4 0.076 0.080 6.143 6.434 pw_transfer 1295 11.6 0.128 0.139 6.153 6.224 fft_wrap_pw1pw2 1081 12.6 0.014 0.015 5.863 5.947 potential_pw2rs 107 12.3 0.010 0.010 5.606 5.620 fft_wrap_pw1pw2_140 439 13.2 0.570 0.600 5.061 5.254 mp_waitany 8968 13.7 3.452 4.642 3.452 4.642 fft3d_ps 1081 14.6 2.396 2.565 4.396 4.506 rs_pw_transfer_RS2PW_140 118 11.5 0.461 0.482 3.060 4.301 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.014 4.069 apply_single 107 13.6 0.001 0.001 4.014 4.069 make_m2s 3932 13.4 0.069 0.072 3.970 4.029 ot_diis_step 96 11.5 0.005 0.005 4.003 4.004 qs_ot_get_derivative 96 11.5 0.001 0.002 3.795 3.823 multiply_cannon_multrec 15728 15.4 3.483 3.619 3.500 3.636 mp_alltoall_d11v 1998 13.7 2.262 3.602 2.262 3.602 make_images 3932 14.4 0.169 0.174 3.318 3.379 rs_gather_matrices 107 12.3 0.147 0.162 1.884 3.185 rs_pw_transfer_PW2RS_140 118 13.9 1.376 1.441 2.859 2.898 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=83.10899999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=51.434, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.306, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.588, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=16.073, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.124, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=28.64099999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=43.724, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.097, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.374, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.483, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.452, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.407 0.407 286.924 286.924 qs_energies 1 2.0 0.000 0.000 285.604 285.604 scf_env_do_scf 1 3.0 0.000 0.000 282.927 282.927 qs_ks_update_qs_env 8 5.0 0.000 0.000 264.515 264.515 rebuild_ks_matrix 7 6.0 0.000 0.000 264.407 264.407 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 264.407 264.407 hfx_ks_matrix 7 8.0 0.000 0.000 173.076 173.076 integrate_four_center 7 9.0 2.174 2.174 173.043 173.043 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 166.175 166.175 integrate_four_center_main 7 10.0 0.889 0.889 161.419 161.419 integrate_four_center_bin 456 11.0 160.531 160.531 160.531 160.531 init_scf_loop 1 4.0 0.000 0.000 116.738 116.738 cp_gemm 129 10.3 0.001 0.001 75.353 75.353 cp_gemm_cosma 129 11.3 75.353 75.353 75.353 75.353 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 43.673 43.673 admm_fit_mo_coeffs 7 9.0 0.000 0.000 41.836 41.836 admm_mo_merge_derivs 7 8.0 0.000 0.000 38.235 38.235 merge_mo_derivs_diag 7 9.0 0.023 0.023 38.235 38.235 purify_mo_diag 7 10.0 0.001 0.001 24.684 24.684 fit_mo_coeffs 7 10.0 0.000 0.000 17.152 17.152 prepare_preconditioner 1 5.0 0.000 0.000 14.202 14.202 make_preconditioner 1 6.0 0.000 0.000 14.202 14.202 integrate_four_center_load 7 10.0 0.001 0.001 9.034 9.034 hfx_load_balance 1 11.0 0.002 0.002 9.033 9.033 arnoldi_normal_ev 11 9.3 0.002 0.002 8.170 8.170 estimate_cond_num 1 7.0 0.000 0.000 8.081 8.081 build_subspace 28 9.5 0.017 0.017 8.032 8.032 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.224 0.230 190.988 190.989 qs_energies 1 2.0 0.000 0.001 190.612 190.613 scf_env_do_scf 1 3.0 0.000 0.000 189.977 189.978 qs_ks_update_qs_env 8 5.0 0.000 0.000 186.619 186.619 rebuild_ks_matrix 7 6.0 0.000 0.000 186.604 186.604 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 186.604 186.604 hfx_ks_matrix 7 8.0 0.000 0.001 173.667 173.669 integrate_four_center 7 9.0 0.108 0.429 173.650 173.651 integrate_four_center_main 7 10.0 0.005 0.005 158.207 162.364 integrate_four_center_bin 448 11.0 158.202 162.359 158.202 162.359 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 111.438 111.438 init_scf_loop 1 4.0 0.000 0.001 78.537 78.538 integrate_four_center_load 7 10.0 0.000 0.001 9.126 9.136 hfx_load_balance 1 11.0 0.002 0.003 9.126 9.136 mp_sync 70 11.3 5.419 8.146 5.419 8.146 cp_gemm 129 10.3 0.001 0.001 5.174 5.180 cp_gemm_cosma 129 11.3 5.173 5.179 5.173 5.179 hfx_load_balance_bin 1 12.0 4.466 4.563 4.466 4.563 hfx_load_balance_count 1 12.0 4.459 4.551 4.459 4.551 qs_vxc_create 14 8.0 0.000 0.001 3.876 3.876 xc_vxc_pw_create 14 9.0 0.021 0.024 3.876 3.876 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=47.569999999999936, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=160.531, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=75.353, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.174, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.889, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.407, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.931999999999988, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.202, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=5.173, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.108, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.224, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=5.419, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.459, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.466, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 454.146 454.146 qs_energies 1 2.0 0.000 0.000 453.608 453.608 mp2_main 1 3.0 0.000 0.000 446.795 446.795 mp2_gpw_main 1 4.0 0.001 0.001 446.381 446.381 rpa_ri_compute_en 1 5.0 0.000 0.000 429.297 429.297 rpa_num_int 1 6.0 0.000 0.000 429.271 429.271 compute_mat_P_omega 1 7.0 0.002 0.002 219.697 219.697 compute_mat_P_omega_contract 10 8.0 13.497 13.497 218.075 218.075 dbcsr_t_total 2336 9.6 0.019 0.019 206.568 206.568 cp_gemm 105 8.4 0.000 0.000 179.998 179.998 cp_gemm_cosma 105 9.4 179.997 179.997 179.997 179.997 dbcsr_t_contract 787 11.0 49.373 49.373 129.445 129.445 GW_matrix_operations 10 7.0 0.006 0.006 126.129 126.129 compute_mat_P_omega_calc_M_occ 250 9.0 13.505 13.505 82.916 82.916 dbcsr_t_copy 1103 10.7 21.116 21.116 75.491 75.491 dbcsr_tas_total 1149 12.2 0.058 0.058 73.408 73.408 dbcsr_tas_multiply 807 12.1 0.004 0.004 71.804 71.804 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 60.602 60.602 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 58.614 58.614 dbcsr_multiply_generic 837 15.8 0.144 0.144 57.458 57.458 dbcsr_tas_dbcsr 807 14.1 0.004 0.004 56.991 56.991 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 53.732 53.732 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 44.023 44.023 multiply_cannon 837 16.8 15.006 15.006 42.708 42.708 dbcsr_tas_reserve_blocks_index 3261 13.7 7.317 7.317 29.008 29.008 dbcsr_tas_copy 574 11.4 17.727 17.727 25.802 25.802 multiply_cannon_loop 837 17.8 0.133 0.133 24.780 24.780 multiply_cannon_multrec 837 18.8 22.911 22.911 23.573 23.573 dbcsr_t_reserve_blocks_index 2280 12.5 1.398 1.398 22.404 22.404 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.015 0.015 22.098 22.098 dbcsr_reserve_blocks 3717 14.7 20.948 20.948 21.340 21.340 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 20.716 20.716 compute_QP_energies 1 7.0 0.000 0.000 20.432 20.432 compute_self_energy_cubic_gw 1 8.0 0.109 0.109 20.432 20.432 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 17.068 17.068 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 15.072 15.072 dbcsr_t_copy_nocomm 251 12.0 11.713 11.713 14.274 14.274 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 13.140 13.140 make_m2s 1674 16.8 0.112 0.112 11.967 11.967 make_images 1674 17.8 5.468 5.468 11.339 11.339 dbcsr_tas_mm_2 251 15.0 0.002 0.002 11.143 11.143 cp_fm_cholesky_invert 10 8.0 9.166 9.166 9.166 9.166 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 66.272 66.273 qs_energies 1 2.0 0.001 0.001 66.129 66.136 mp2_main 1 3.0 0.000 0.001 64.514 64.521 mp2_gpw_main 1 4.0 0.000 0.001 64.450 64.458 rpa_ri_compute_en 1 5.0 0.000 0.000 62.261 62.269 rpa_num_int 1 6.0 0.001 0.001 62.253 62.261 dbcsr_t_total 2336 9.6 0.019 0.020 48.984 48.987 compute_mat_P_omega 1 7.0 0.001 0.002 47.900 47.905 compute_mat_P_omega_contract 10 8.0 0.889 0.920 47.581 47.587 dbcsr_t_contract 787 11.0 2.107 2.246 35.917 35.921 dbcsr_tas_total 1149 12.2 0.075 0.081 31.661 31.661 dbcsr_tas_multiply 807 12.1 0.003 0.003 31.504 31.507 dbcsr_tas_dbcsr 807 14.1 0.004 0.004 22.954 22.957 dbcsr_multiply_generic 837 15.8 0.081 0.086 19.234 20.475 compute_mat_P_omega_calc_M_occ 250 9.0 0.858 0.887 15.908 15.909 multiply_cannon 837 16.8 0.150 0.168 11.280 11.750 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.711 11.712 dbcsr_t_copy 1111 10.7 4.928 5.205 11.192 11.673 dbcsr_tas_mm_1N 524 15.1 0.003 0.004 10.122 11.156 multiply_cannon_loop 837 17.8 0.050 0.054 10.282 10.700 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.002 10.073 10.073 cp_gemm 105 8.4 0.000 0.000 9.034 9.049 cp_gemm_cosma 105 9.4 9.034 9.048 9.034 9.048 dbcsr_tas_mm_2 251 15.0 0.002 0.002 8.834 8.835 mp_sync 8696 11.6 7.385 8.745 7.385 8.745 multiply_cannon_multrec 1386 17.8 7.876 8.390 8.173 8.662 make_m2s 1674 16.8 0.051 0.054 6.831 7.655 make_images 1674 17.8 0.257 0.276 6.737 7.562 GW_matrix_operations 10 7.0 0.002 0.002 5.783 5.791 compute_QP_energies 1 7.0 0.000 0.001 4.897 4.897 compute_self_energy_cubic_gw 1 8.0 0.005 0.006 4.892 4.897 dbcsr_t_communicate_buffer 1098 11.7 0.103 0.111 4.185 4.400 mp_waitall_2 3776 14.7 3.926 4.266 3.926 4.266 make_images_data 1674 18.8 0.042 0.044 3.672 3.822 hybrid_alltoall_any 1724 19.5 2.806 3.177 3.519 3.692 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.023 0.026 3.193 3.675 dbcsr_t_reserve_blocks_index 2849 12.4 0.124 0.131 3.185 3.670 contract_cubic_gw 21 9.0 0.000 0.000 3.664 3.664 dbcsr_tas_reserve_blocks_index 3300 13.8 0.284 0.307 3.127 3.609 dbcsr_reserve_blocks 3785 14.7 2.839 3.316 2.883 3.363 make_images_pack 1674 18.8 2.562 3.340 2.579 3.358 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.267 3.277 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.136 3.144 mp_waitall_1 26582 19.0 1.959 2.418 1.959 2.418 convert_to_new_pgrid 2421 14.1 0.021 0.023 2.218 2.366 dbcsr_copy 3323 15.8 2.139 2.294 2.172 2.327 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.186 2.186 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 2.044 2.049 dbcsr_add_anytype 909 13.7 1.186 1.246 1.848 1.910 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 1.797 1.802 dbcsr_tas_replicate 396 14.1 0.856 0.944 1.495 1.565 scf_env_do_scf 1 3.0 0.000 0.000 1.553 1.553 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.553 1.553 mp_max_i 2058 9.6 1.112 1.372 1.112 1.372 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=159.80100000000004, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=179.997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=49.373, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.911, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=21.116, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=20.948, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=28.177000000000007, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.034, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=2.107, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.876, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.928, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.839, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.926, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=7.385, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.104 0.104 205.885 205.885 qs_energies 1 2.0 0.000 0.000 204.033 204.033 scf_env_do_scf 1 3.0 0.000 0.000 193.430 193.430 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 193.430 193.430 qs_scf_new_mos 15 5.0 0.001 0.001 85.951 85.951 qs_ks_update_qs_env 15 5.0 0.000 0.000 75.012 75.012 rebuild_ks_matrix 15 6.0 0.000 0.000 74.625 74.625 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 74.625 74.625 eigensolver 15 6.0 0.002 0.002 71.706 71.706 cp_fm_diag_elpa 15 7.0 0.000 0.000 55.152 55.152 cp_fm_diag_elpa_base 15 8.0 50.418 50.418 55.151 55.151 qs_vxc_create 15 8.0 0.040 0.040 49.911 49.911 calculate_dispersion_nonloc 15 9.0 9.793 9.793 43.514 43.514 pw_transfer 1191 9.8 0.106 0.106 29.977 29.977 fft_wrap_pw1pw2 1086 10.9 0.014 0.014 29.640 29.640 qs_rho_update_rho 16 5.0 0.000 0.000 25.375 25.375 calculate_rho_elec 16 6.0 0.351 0.351 25.375 25.375 grid_collocate_task_list 16 7.0 23.607 23.607 23.607 23.607 sum_up_and_integrate 15 8.0 0.083 0.083 22.954 22.954 integrate_v_rspace 15 9.0 0.037 0.037 22.871 22.871 fft_wrap_pw1pw2_150 765 12.0 3.554 3.554 22.362 22.362 grid_integrate_task_list 15 10.0 22.146 22.146 22.146 22.146 fft3d_s 1087 12.8 12.513 12.513 12.525 12.525 cp_fm_cholesky_restore 45 7.0 12.095 12.095 12.095 12.095 pw_scatter_s 585 13.0 11.599 11.599 11.599 11.599 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.288 11.288 dbcsr_complete_redistribute 46 8.3 3.611 3.611 10.024 10.024 cp_fm_upper_to_full 30 8.0 9.190 9.190 9.190 9.190 vdW_energy 15 10.0 8.681 8.681 8.681 8.681 gspace_mixing 14 5.0 0.277 0.277 8.413 8.413 broyden_mixing 14 6.0 7.610 7.610 7.610 7.610 fft_wrap_pw1pw2_200 197 11.5 0.394 0.394 6.996 6.996 xc_vxc_pw_create 15 9.0 1.552 1.552 6.357 6.357 init_scf_run 1 3.0 0.000 0.000 4.884 4.884 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.825 4.825 dbcsr_finalize 159 9.9 0.023 0.023 4.410 4.410 dbcsr_merge_all 91 11.1 0.093 0.093 4.245 4.245 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.025 92.441 92.442 qs_energies 1 2.0 0.001 0.001 92.049 92.050 scf_env_do_scf 1 3.0 0.000 0.000 86.793 86.794 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 86.793 86.794 qs_ks_update_qs_env 15 5.0 0.000 0.000 42.461 42.489 rebuild_ks_matrix 15 6.0 0.000 0.000 42.407 42.435 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 42.407 42.435 sum_up_and_integrate 15 8.0 0.015 0.017 24.084 24.123 integrate_v_rspace 15 9.0 0.001 0.001 24.069 24.107 qs_rho_update_rho 16 5.0 0.000 0.000 24.027 24.029 calculate_rho_elec 16 6.0 0.012 0.012 24.027 24.028 grid_integrate_task_list 15 10.0 21.902 22.564 21.902 22.564 grid_collocate_task_list 16 7.0 21.803 22.488 21.803 22.488 qs_scf_new_mos 15 5.0 0.001 0.001 20.625 20.739 eigensolver 15 6.0 0.002 0.003 18.899 18.914 qs_vxc_create 15 8.0 0.001 0.001 17.724 17.741 calculate_dispersion_nonloc 15 9.0 1.467 1.555 14.393 14.407 pw_transfer 1191 9.8 0.139 0.144 13.961 14.078 fft_wrap_pw1pw2 1086 10.9 0.022 0.024 13.651 13.771 cp_fm_diag_elpa 15 7.0 0.000 0.000 13.644 13.653 cp_fm_diag_elpa_base 15 8.0 13.357 13.398 13.638 13.643 fft3d_ps 1086 12.9 5.961 6.185 10.395 10.600 fft_wrap_pw1pw2_150 765 12.0 0.738 0.807 9.174 9.231 cp_fm_cholesky_restore 45 7.0 4.982 5.042 4.982 5.042 fft_wrap_pw1pw2_200 197 11.5 0.414 0.447 4.291 4.388 xc_vxc_pw_create 15 9.0 0.065 0.086 3.329 3.343 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.261 3.261 mp_alltoall_z22v 1086 14.9 2.749 3.161 2.749 3.161 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.801 3.076 rs_pw_transfer 158 9.4 0.003 0.003 2.046 2.665 x_to_yz 585 14.0 1.063 1.117 2.481 2.592 vdW_energy 15 10.0 2.231 2.337 2.231 2.337 density_rs2pw 16 7.0 0.002 0.002 2.020 2.275 yz_to_x 501 13.7 0.586 0.636 1.916 2.129 mp_waitany 520 11.3 1.296 2.100 1.296 2.100 build_core_ppnl 1 5.0 1.872 2.051 1.872 2.051 rs_pw_transfer_RS2PW_200 18 8.8 0.081 0.089 1.015 1.856 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=85.10599999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=50.418, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.607, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.146, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.513, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=12.095, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=24.436000000000007, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=13.357, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.803, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.902, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.982, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.961, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.116 0.116 372.355 372.355 qs_energies 1 2.0 0.000 0.000 372.156 372.156 ls_scf 1 3.0 0.000 0.000 370.149 370.149 ls_scf_main 1 4.0 0.002 0.002 351.865 351.865 density_matrix_trs4 11 5.0 0.012 0.012 192.979 192.979 ls_scf_dm_to_ks 11 5.0 0.000 0.000 151.807 151.807 matrix_ls_to_qs 11 6.0 0.000 0.000 147.438 147.438 dbcsr_multiply_generic 185 6.1 0.537 0.537 124.846 124.846 dbcsr_copy_into_existing 11 7.0 92.978 92.978 92.978 92.978 multiply_cannon 185 7.1 3.663 3.663 85.130 85.130 multiply_cannon_loop 185 8.1 0.416 0.416 61.675 61.675 dbcsr_complete_redistribute 23 7.5 42.914 42.914 59.462 59.462 multiply_cannon_multrec 185 9.1 59.229 59.229 59.284 59.284 matrix_decluster 11 7.0 0.000 0.000 54.458 54.458 arnoldi_extremal 12 6.1 0.000 0.000 47.549 47.549 arnoldi_normal_ev 12 7.1 0.030 0.030 47.549 47.549 build_subspace 23 8.1 0.138 0.138 46.879 46.879 dbcsr_matrix_vector_mult 652 9.0 0.262 0.262 36.084 36.084 dbcsr_matrix_vector_mult_local 652 10.0 34.488 34.488 34.497 34.497 make_m2s 370 7.1 0.032 0.032 32.789 32.789 make_images 370 8.1 7.841 7.841 30.127 30.127 dbcsr_finalize 646 7.5 0.242 0.242 22.896 22.896 dbcsr_merge_all 597 8.5 4.068 4.068 20.506 20.506 setup_rec_index_2d 370 8.1 19.627 19.627 19.627 19.627 dbcsr_sort_indices 1103 9.9 17.909 17.909 17.909 17.909 ls_scf_init_scf 1 4.0 0.000 0.000 17.056 17.056 ls_scf_init_matrix_S 1 5.0 0.000 0.000 16.592 16.592 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 15.655 15.655 quick_finalize 395 10.0 0.558 0.558 15.327 15.327 dbcsr_special_finalize 370 9.1 0.004 0.004 14.131 14.131 tree_to_linear_d 110 9.4 13.968 13.968 13.968 13.968 dbcsr_new_transposed 2 7.0 0.144 0.144 9.731 9.731 dbcsr_redistribute 2 8.0 9.474 9.474 9.544 9.544 dbcsr_dot_sd 144 6.3 9.455 9.455 9.456 9.456 dbcsr_frobenius_norm 142 6.1 8.488 8.488 8.491 8.491 make_images_data 370 9.1 0.012 0.012 7.951 7.951 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.010 107.905 107.906 qs_energies 1 2.0 0.000 0.000 107.800 107.800 ls_scf 1 3.0 0.000 0.000 107.702 107.703 ls_scf_main 1 4.0 0.001 0.004 103.257 103.258 density_matrix_trs4 11 5.0 0.010 0.014 99.153 99.258 dbcsr_multiply_generic 185 6.1 0.083 0.102 93.265 93.512 multiply_cannon 185 7.1 0.050 0.052 78.415 79.463 multiply_cannon_loop 185 8.1 0.273 0.283 74.102 75.968 multiply_cannon_multrec 1480 9.1 48.826 50.877 49.401 51.430 mp_waitall_1 11936 10.3 22.011 24.503 22.011 24.503 multiply_cannon_metrocomm3 1480 9.1 0.022 0.024 12.966 16.600 make_m2s 370 7.1 0.038 0.042 10.474 10.550 make_images 370 8.1 0.719 0.753 10.343 10.425 multiply_cannon_metrocomm1 1480 9.1 0.013 0.015 5.175 6.951 calculate_norms 2960 9.1 6.196 6.449 6.196 6.449 make_images_data 370 9.1 0.015 0.016 4.335 4.694 arnoldi_extremal 12 6.1 0.001 0.001 4.387 4.402 arnoldi_normal_ev 12 7.1 0.002 0.008 4.386 4.401 build_subspace 23 8.1 0.047 0.060 4.245 4.249 hybrid_alltoall_any 393 9.9 0.376 1.927 3.639 3.931 mp_sum_l 1039 5.9 2.833 3.706 2.833 3.706 dbcsr_matrix_vector_mult 652 9.0 0.020 0.085 3.541 3.615 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.535 3.611 ls_scf_init_scf 1 4.0 0.000 0.000 3.453 3.455 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.415 3.422 dbcsr_complete_redistribute 23 7.5 1.942 2.109 3.137 3.289 matrix_ls_to_qs 11 6.0 0.000 0.000 3.078 3.239 make_images_pack 370 9.1 2.848 3.140 2.855 3.147 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 3.121 3.124 matrix_decluster 11 7.0 0.000 0.000 2.814 2.964 dbcsr_matrix_vector_mult_local 652 10.0 2.731 2.892 2.736 2.897 buffer_matrices_ensure_size 370 8.1 2.600 2.691 2.600 2.691 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 1.844 2.602 dbcsr_add_d 280 6.0 0.002 0.002 2.422 2.515 dbcsr_add_anytype 280 7.0 1.329 1.433 2.420 2.514 dbcsr_finalize 646 7.5 0.016 0.017 2.238 2.315 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=123.11900000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=92.978, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=59.229, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=42.914, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=34.488, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=19.627, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.518, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=48.826, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.942, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.731, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=22.011, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=6.196, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.833, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.848, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 125.787 125.787 lib_test 1 2.0 0.000 0.000 125.780 125.780 dbcsr_run_tests 3 3.0 0.003 0.003 125.779 125.779 test_multiplies_multiproc 3 4.0 0.001 0.001 104.942 104.942 dbcsr_redistribute 9 5.0 72.697 72.697 76.397 76.397 dbcsr_multiply_generic 9 5.0 0.001 0.001 26.331 26.331 dbcsr_make_random_matrix 9 4.0 15.327 15.327 20.741 20.741 multiply_cannon 9 6.0 0.002 0.002 19.253 19.253 multiply_cannon_loop 9 7.0 0.002 0.002 18.696 18.696 multiply_cannon_multrec 9 8.0 18.693 18.693 18.694 18.694 dbcsr_finalize 27 5.7 0.005 0.005 9.362 9.362 dbcsr_merge_all 18 6.5 3.339 3.339 8.585 8.585 mp_alltoall_d11v 27 6.0 3.359 3.359 3.359 3.359 tree_to_linear_d 9 7.0 3.292 3.292 3.292 3.292 dbcsr_data_release 975 7.6 2.568 2.568 2.568 2.568 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.006 30.501 30.502 lib_test 1 2.0 0.000 0.000 30.468 30.485 dbcsr_run_tests 3 3.0 0.000 0.001 30.467 30.484 test_multiplies_multiproc 3 4.0 0.001 0.001 29.261 29.352 dbcsr_multiply_generic 9 5.0 0.002 0.002 27.190 27.303 multiply_cannon 9 6.0 0.003 0.003 24.539 25.166 multiply_cannon_loop 9 7.0 0.004 0.005 24.027 24.624 multiply_cannon_multrec 72 8.0 20.304 21.337 20.306 21.338 mp_waitall_1 576 9.2 4.199 5.118 4.199 5.118 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 3.308 4.356 mp_sum_l 310 2.7 0.568 1.254 0.568 1.254 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.563 1.250 dbcsr_make_random_matrix 9 4.0 0.900 0.914 1.163 1.211 make_m2s 18 6.0 0.001 0.001 1.101 1.179 make_images 18 7.0 0.026 0.027 1.097 1.175 dbcsr_finalize 27 5.7 0.001 0.001 1.063 1.174 dbcsr_merge_all 18 6.5 0.172 0.203 0.928 1.038 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.401 0.928 dbcsr_data_release 444 7.6 0.735 0.821 0.735 0.821 dbcsr_redistribute 9 5.0 0.435 0.497 0.766 0.805 dbcsr_destroy 111 5.9 0.007 0.061 0.609 0.684 dbcsr_data_copy_aa2 18 7.5 0.553 0.651 0.553 0.651 make_images_data 18 8.0 0.001 0.001 0.549 0.625 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.804000000000016, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=72.697, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.693, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.327, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.359, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.339, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.568, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=3.1880000000000024, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.435, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.304, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.9, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.172, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.735, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.199, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.568, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.053 0.053 155.030 155.030 qs_mol_dyn_low 1 2.0 0.005 0.005 152.982 152.982 velocity_verlet 5 3.0 0.005 0.005 123.508 123.508 qmmm_el_coupling 6 3.8 0.000 0.000 76.046 76.046 qmmm_elec_with_gaussian 6 4.8 0.194 0.194 76.040 76.040 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 74.320 74.320 qmmm_elec_gaussian_low_G 6 6.8 72.801 72.801 72.801 72.801 qs_forces 6 3.8 0.001 0.001 62.045 62.045 qs_energies 6 4.8 0.001 0.001 55.120 55.120 scf_env_do_scf 6 5.8 0.001 0.001 50.976 50.976 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 43.194 43.194 rebuild_ks_matrix 45 8.4 0.000 0.000 42.439 42.439 qs_ks_build_kohn_sham_matrix 45 9.4 0.008 0.008 42.439 42.439 qs_ks_update_qs_env 45 7.8 0.000 0.000 36.288 36.288 pw_transfer 966 11.9 0.082 0.082 25.961 25.961 fft_wrap_pw1pw2 801 13.0 0.010 0.010 25.559 25.559 fft_wrap_pw1pw2_150 507 14.3 2.528 2.528 24.976 24.976 qs_vxc_create 45 10.4 0.001 0.001 23.072 23.072 xc_vxc_pw_create 45 11.4 4.536 4.536 23.071 23.071 pw_scatter_s 429 15.4 11.477 11.477 11.477 11.477 qs_rho_update_rho 45 7.9 0.000 0.000 11.449 11.449 calculate_rho_elec 45 8.9 0.914 0.914 11.449 11.449 xc_rho_set_and_dset_create 45 12.4 0.254 0.254 10.654 10.654 fft3d_s 802 15.0 9.912 9.912 9.923 9.923 qmmm_forces 6 3.8 0.002 0.002 9.373 9.373 pw_integral_ab 2539 7.4 9.336 9.336 9.336 9.336 qmmm_forces_with_gaussian 6 4.8 0.141 0.141 8.863 8.863 init_scf_loop 6 6.8 0.000 0.000 7.776 7.776 qs_ks_ddapc 45 10.4 0.001 0.001 7.281 7.281 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.800 6.800 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 6.164 6.164 pw_poisson_solve 51 9.9 2.625 2.625 5.932 5.932 qmmm_forces_gaussian_low_G 6 6.8 5.708 5.708 5.708 5.708 grid_collocate_task_list 45 9.9 5.345 5.345 5.345 5.345 density_rs2pw 45 9.9 0.003 0.003 5.190 5.190 fist_calc_energy_force 6 3.8 0.002 0.002 4.672 4.672 sum_up_and_integrate 45 10.4 0.253 0.253 4.619 4.619 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.474 4.474 integrate_v_rspace 45 11.4 0.013 0.013 4.365 4.365 force_nonbond 6 4.8 3.329 3.329 3.329 3.329 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.041 101.047 101.048 qs_mol_dyn_low 1 2.0 0.005 0.008 99.368 99.464 qs_forces 6 3.8 0.001 0.001 74.828 74.828 qs_energies 6 4.8 0.001 0.001 71.417 71.417 scf_env_do_scf 6 5.8 0.000 0.001 69.638 69.638 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 66.865 66.867 rebuild_ks_matrix 119 8.1 0.000 0.001 49.685 49.702 qs_ks_build_kohn_sham_matrix 119 9.1 0.023 0.025 49.684 49.701 qs_ks_update_qs_env 119 7.3 0.001 0.001 46.747 46.763 velocity_verlet 5 3.0 0.003 0.004 40.536 40.541 pw_transfer 2446 11.8 0.296 0.309 33.048 33.223 fft_wrap_pw1pw2 2059 12.8 0.038 0.040 32.091 32.301 fft_wrap_pw1pw2_150 1321 14.0 2.772 2.972 31.077 31.296 qs_vxc_create 119 10.1 0.004 0.005 25.837 25.843 xc_vxc_pw_create 119 11.1 0.549 0.729 25.832 25.839 fft3d_ps 2059 14.8 14.313 15.509 24.219 24.528 qs_rho_update_rho 119 7.3 0.001 0.001 19.388 19.390 calculate_rho_elec 119 8.3 0.087 0.096 19.387 19.389 sum_up_and_integrate 119 10.1 0.104 0.113 17.280 17.315 integrate_v_rspace 119 11.1 0.005 0.006 17.175 17.211 rs_pw_transfer 988 11.5 0.019 0.021 13.997 14.690 qmmm_forces 6 3.8 0.003 0.003 13.532 13.532 qmmm_forces_with_gaussian 6 4.8 0.471 0.563 13.185 13.305 density_rs2pw 119 9.3 0.012 0.014 12.274 12.884 xc_rho_set_and_dset_create 119 12.1 0.553 0.642 11.974 12.430 potential_pw2rs 119 12.1 0.012 0.013 10.949 10.961 qmmm_el_coupling 6 3.8 0.000 0.000 9.794 9.828 qmmm_elec_with_gaussian 6 4.8 0.442 0.596 9.790 9.825 mp_alltoall_z22v 2059 16.8 6.091 7.773 6.091 7.773 grid_collocate_task_list 119 9.3 6.760 7.215 6.760 7.215 grid_integrate_task_list 119 12.1 5.786 6.066 5.786 6.066 rs_pw_transfer_PW2RS_150 125 13.9 3.100 3.201 6.003 6.029 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.907 6.025 x_to_yz 1095 16.3 2.243 2.456 5.327 5.726 rs_pw_transfer_RS2PW_150 125 11.2 2.458 2.634 5.016 5.691 yz_to_x 964 15.3 1.511 1.701 4.519 5.610 mp_waitany 4028 12.8 4.341 5.477 4.341 5.477 pw_restrict_s3 18 5.8 2.443 2.471 5.404 5.447 qmmm_forces_gaussian_low_G 6 6.8 4.823 4.935 4.823 4.935 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 4.348 4.386 pw_prolongate_s3 18 6.8 1.963 2.013 4.348 4.386 pw_integral_ab 2761 7.7 3.627 3.649 4.037 4.194 qs_scf_new_mos 113 7.2 0.001 0.001 4.171 4.180 qs_scf_loop_do_ot 113 8.2 0.001 0.001 4.170 4.180 ot_scf_mini 113 9.2 0.002 0.002 3.989 3.996 dbcsr_multiply_generic 2588 12.3 0.103 0.119 3.740 3.827 qs_ks_ddapc 119 10.1 0.003 0.003 3.469 3.617 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.425 3.496 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.951 2.952 mp_sum_dm3 33 5.7 2.765 2.938 2.765 2.938 pw_gather_p 964 14.3 2.551 2.817 2.551 2.817 init_scf_loop 6 6.8 0.000 0.000 2.768 2.768 mp_waitall_1 188862 16.2 2.588 2.760 2.588 2.760 pw_scatter_p 1095 15.3 2.445 2.577 2.445 2.577 ot_mini 113 10.2 0.001 0.001 2.517 2.529 qmmm_elec_gaussian_low_G 6 6.8 2.454 2.515 2.454 2.515 pw_derive 732 12.5 2.020 2.256 2.020 2.256 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=40.45099999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=72.801, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=11.477, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.912, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=9.336, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.708, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.345, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=57.19299999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.454, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.627, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.823, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.76, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=6.091, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.786, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=14.313, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-11-26 19:58:38+00:00