StartDate: 2021-11-09 19:26:06+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: c88c0527da0b63e69c0515c2f066fbd6c19bc8e0 CommitTime: 2021-11-09 19:17:59 +0100 CommitAuthor: Hans Pabst CommitSubject: FFTW 3.3.10 Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: c88c0527da0b63e69c0515c2f066fbd6c19bc8e0 CommitTime: 2021-11-09 19:17:59 +0100 CommitAuthor: Hans Pabst CommitSubject: FFTW 3.3.10 ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.033 159.218 159.218 qs_mol_dyn_low 1 2.0 0.004 0.004 158.395 158.395 qs_forces 11 3.9 0.002 0.002 158.327 158.327 qs_energies 11 4.9 0.001 0.001 147.644 147.644 scf_env_do_scf 11 5.9 0.001 0.001 120.172 120.172 velocity_verlet 10 3.0 0.002 0.002 110.216 110.216 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 81.363 81.363 init_scf_loop 11 6.9 0.000 0.000 38.620 38.620 prepare_preconditioner 11 7.9 0.000 0.000 34.602 34.602 make_preconditioner 11 8.9 0.000 0.000 34.602 34.602 rebuild_ks_matrix 119 8.3 0.001 0.001 33.570 33.570 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.019 33.569 33.569 make_full_inverse_cholesky 11 9.9 0.000 0.000 32.665 32.665 qs_ks_update_qs_env 119 7.6 0.001 0.001 31.317 31.317 qs_rho_update_rho 119 7.7 0.001 0.001 28.670 28.670 calculate_rho_elec 119 8.7 1.548 1.548 28.669 28.669 qs_scf_new_mos 108 7.5 0.001 0.001 27.521 27.521 qs_scf_loop_do_ot 108 8.5 0.001 0.001 27.520 27.520 ot_scf_mini 108 9.5 0.004 0.004 25.670 25.670 dbcsr_multiply_generic 2286 12.5 0.181 0.181 23.595 23.595 grid_collocate_task_list 119 9.7 22.389 22.389 22.389 22.389 sum_up_and_integrate 119 10.3 0.381 0.381 21.337 21.337 integrate_v_rspace 119 11.3 0.520 0.520 20.956 20.956 cp_fm_cholesky_invert 11 10.9 19.723 19.723 19.723 19.723 grid_integrate_task_list 119 12.3 17.928 17.928 17.928 17.928 ot_mini 108 10.5 0.001 0.001 15.224 15.224 make_m2s 4572 13.5 0.068 0.068 13.160 13.160 init_scf_run 11 5.9 0.001 0.001 12.834 12.834 scf_env_initial_rho_setup 11 6.9 0.001 0.001 12.833 12.833 wfi_extrapolate 11 7.9 0.001 0.001 12.065 12.065 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.968 10.968 cp_gemm 81 9.0 0.000 0.000 10.257 10.257 cp_gemm_cosma 81 10.0 10.256 10.256 10.256 10.256 pw_transfer 1439 11.6 0.095 0.095 8.014 8.014 qs_ot_get_derivative 108 11.5 0.002 0.002 7.782 7.782 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.707 7.707 ot_diis_step 108 11.5 0.006 0.006 7.437 7.437 make_images 4572 14.5 2.569 2.569 6.964 6.964 cp_fm_cholesky_decompose 22 10.9 6.788 6.788 6.788 6.788 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.627 6.627 fft_wrap_pw1pw2_140 487 13.2 0.875 0.875 6.613 6.613 qs_ot_get_p 119 10.4 0.001 0.001 6.406 6.406 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.329 6.329 apply_single 119 13.6 0.001 0.001 6.329 6.329 dbcsr_make_dense_low 5837 15.5 0.107 0.107 6.299 6.299 make_dense_data 5837 16.5 5.561 5.561 6.168 6.168 dbcsr_complete_redistribute 329 12.2 2.887 2.887 6.097 6.097 dbcsr_copy 2102 12.0 0.285 0.285 6.009 6.009 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.987 5.987 multiply_cannon 2286 13.5 0.977 0.977 5.809 5.809 dbcsr_make_images_dense 3978 14.8 0.029 0.029 5.716 5.716 dbcsr_copy_into_existing 22 7.9 5.675 5.675 5.676 5.676 qs_create_task_list 11 7.9 0.000 0.000 5.452 5.452 generate_qs_task_list 11 8.9 3.732 3.732 5.452 5.452 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.049 5.049 qs_ot_p2m_diag 50 11.0 0.232 0.232 4.818 4.818 pw_poisson_solve 119 10.3 2.120 2.120 4.769 4.769 density_rs2pw 119 9.7 0.007 0.007 4.732 4.732 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.394 4.394 multiply_cannon_loop 2286 14.5 0.052 0.052 4.268 4.268 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.243 4.243 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.217 4.217 multiply_cannon_multrec 2286 15.5 4.145 4.145 4.215 4.215 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.079 4.079 cp_fm_diag_elpa_base 50 14.0 4.022 4.022 4.078 4.078 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.053 4.053 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.501 3.501 calculate_w_matrix_ot 11 6.9 0.008 0.008 3.501 3.501 fft3d_s 1202 14.6 3.300 3.300 3.306 3.306 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.251 3.251 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.014 72.232 72.233 qs_mol_dyn_low 1 2.0 0.005 0.006 72.095 72.113 qs_forces 11 3.9 0.002 0.002 72.039 72.040 qs_energies 11 4.9 0.001 0.002 67.050 67.052 scf_env_do_scf 11 5.9 0.001 0.001 61.026 61.027 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 56.619 56.620 velocity_verlet 10 3.0 0.002 0.002 42.547 42.549 rebuild_ks_matrix 119 8.3 0.001 0.001 28.439 28.494 qs_ks_build_kohn_sham_matrix 119 9.3 0.022 0.022 28.439 28.493 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.268 25.318 sum_up_and_integrate 119 10.3 0.047 0.050 22.454 22.474 integrate_v_rspace 119 11.3 0.005 0.005 22.407 22.430 qs_rho_update_rho 119 7.7 0.001 0.001 22.117 22.135 calculate_rho_elec 119 8.7 0.048 0.050 22.116 22.134 dbcsr_multiply_generic 2286 12.5 0.133 0.135 16.775 16.863 grid_integrate_task_list 119 12.3 16.170 16.592 16.170 16.592 grid_collocate_task_list 119 9.7 15.848 16.396 15.848 16.396 qs_scf_new_mos 108 7.5 0.001 0.001 13.831 13.872 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.831 13.871 ot_scf_mini 108 9.5 0.003 0.003 12.962 12.997 multiply_cannon 2286 13.5 0.225 0.228 11.059 11.218 multiply_cannon_loop 2286 14.5 0.230 0.238 9.960 10.153 mp_waitall_1 169478 16.3 8.172 8.667 8.172 8.667 ot_mini 108 10.5 0.001 0.001 7.653 7.691 rs_pw_transfer 974 11.9 0.016 0.017 6.590 7.581 density_rs2pw 119 9.7 0.009 0.009 5.692 6.698 pw_transfer 1439 11.6 0.141 0.149 5.745 5.806 multiply_cannon_metrocomm3 18288 15.5 0.084 0.086 5.257 5.677 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.456 5.514 potential_pw2rs 119 12.3 0.010 0.011 5.057 5.064 fft_wrap_pw1pw2_140 487 13.2 0.552 0.579 4.724 4.877 init_scf_loop 11 6.9 0.000 0.001 4.390 4.391 init_scf_run 11 5.9 0.000 0.002 4.125 4.125 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.124 4.125 fft3d_ps 1201 14.6 2.236 2.385 4.053 4.122 make_m2s 4572 13.5 0.076 0.077 3.852 3.912 qs_ot_get_derivative 108 11.5 0.001 0.002 3.851 3.885 ot_diis_step 108 11.5 0.005 0.006 3.761 3.762 wfi_extrapolate 11 7.9 0.001 0.001 3.744 3.744 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.660 3.711 apply_single 119 13.6 0.001 0.001 3.659 3.711 multiply_cannon_multrec 18288 15.5 3.512 3.613 3.530 3.631 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.389 3.396 mp_waitany 9880 13.7 2.228 3.245 2.228 3.245 make_images 4572 14.5 0.186 0.189 3.160 3.225 rs_pw_transfer_RS2PW_140 130 11.5 0.535 0.568 2.046 3.048 rs_pw_transfer_PW2RS_140 130 13.9 1.201 1.258 2.495 2.530 mp_alltoall_d11v 2130 13.8 1.449 2.088 1.449 2.088 qs_ot_get_p 119 10.4 0.001 0.001 1.827 1.873 rs_gather_matrices 119 12.3 0.131 0.142 1.128 1.793 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.448 1.570 make_images_data 4572 15.5 0.063 0.068 1.414 1.555 prepare_preconditioner 11 7.9 0.000 0.000 1.446 1.459 make_preconditioner 11 8.9 0.000 0.000 1.446 1.459 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 1.445 1.445 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=77.989, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.389, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.723, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.928, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.256, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.788, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.145, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.293999999999997, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.848, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.17, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.512, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.236, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.172, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.033 208.939 208.939 qs_mol_dyn_low 1 2.0 0.004 0.004 208.160 208.160 qs_forces 11 3.9 0.001 0.001 208.103 208.103 qs_energies 11 4.9 0.001 0.001 194.173 194.173 scf_env_do_scf 11 5.9 0.001 0.001 162.628 162.628 velocity_verlet 10 3.0 0.002 0.002 139.263 139.263 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 121.474 121.474 rebuild_ks_matrix 107 8.3 0.001 0.001 61.570 61.570 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 61.569 61.569 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.422 55.422 qs_rho_update_rho 107 7.7 0.001 0.001 54.772 54.772 calculate_rho_elec 107 8.7 1.385 1.385 54.771 54.771 sum_up_and_integrate 107 10.3 0.345 0.345 51.067 51.067 integrate_v_rspace 107 11.3 0.395 0.395 50.722 50.722 grid_collocate_task_list 107 9.7 49.449 49.449 49.449 49.449 grid_integrate_task_list 107 12.3 48.145 48.145 48.145 48.145 init_scf_loop 11 6.9 0.000 0.000 40.954 40.954 prepare_preconditioner 11 7.9 0.000 0.000 33.744 33.744 make_preconditioner 11 8.9 0.000 0.000 33.744 33.744 make_full_inverse_cholesky 11 9.9 0.000 0.000 31.802 31.802 qs_scf_new_mos 96 7.5 0.001 0.001 23.722 23.722 qs_scf_loop_do_ot 96 8.5 0.001 0.001 23.721 23.721 ot_scf_mini 96 9.5 0.003 0.003 22.169 22.169 dbcsr_multiply_generic 1966 12.4 0.159 0.159 20.206 20.206 cp_fm_cholesky_invert 11 10.9 18.788 18.788 18.788 18.788 init_scf_run 11 5.9 0.001 0.001 15.976 15.976 scf_env_initial_rho_setup 11 6.9 0.001 0.001 15.975 15.975 wfi_extrapolate 11 7.9 0.001 0.001 14.934 14.934 ot_mini 96 10.5 0.001 0.001 13.240 13.240 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.897 11.897 make_m2s 3932 13.4 0.058 0.058 11.378 11.378 cp_gemm 81 9.0 0.000 0.000 10.203 10.203 cp_gemm_cosma 81 10.0 10.203 10.203 10.203 10.203 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.756 7.756 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.099 7.099 qs_ot_get_derivative 96 11.5 0.001 0.001 6.955 6.955 cp_fm_cholesky_decompose 22 10.9 6.799 6.799 6.799 6.799 pw_transfer 1295 11.6 0.085 0.085 6.726 6.726 qs_create_task_list 11 7.9 0.000 0.000 6.589 6.589 generate_qs_task_list 11 8.9 4.885 4.885 6.589 6.589 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.457 6.457 ot_diis_step 96 11.5 0.005 0.005 6.282 6.282 dbcsr_complete_redistribute 317 12.2 2.925 2.925 6.245 6.245 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.171 6.171 make_images 3932 14.4 2.243 2.243 6.061 6.061 dbcsr_copy 1855 11.9 0.257 0.257 5.805 5.805 qs_ot_get_p 107 10.4 0.001 0.001 5.564 5.564 dbcsr_copy_into_existing 22 7.9 5.502 5.502 5.503 5.503 fft_wrap_pw1pw2_140 439 13.2 0.554 0.554 5.471 5.471 dbcsr_make_dense_low 4961 15.5 0.092 0.092 5.438 5.438 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.429 5.429 apply_single 107 13.6 0.000 0.000 5.429 5.429 make_dense_data 4961 16.5 4.792 4.792 5.327 5.327 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.143 5.143 multiply_cannon 1966 13.4 0.732 0.732 4.976 4.976 dbcsr_make_images_dense 3386 14.7 0.025 0.025 4.896 4.896 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.276 4.276 qs_ot_p2m_diag 44 11.0 0.197 0.197 4.231 4.231 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.229 4.229 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.010 125.644 125.646 qs_mol_dyn_low 1 2.0 0.005 0.006 125.523 125.529 qs_forces 11 3.9 0.002 0.002 125.470 125.470 qs_energies 11 4.9 0.001 0.002 116.715 116.717 scf_env_do_scf 11 5.9 0.001 0.001 107.836 107.837 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 100.221 100.221 velocity_verlet 10 3.0 0.002 0.002 74.476 74.477 rebuild_ks_matrix 107 8.3 0.001 0.001 57.878 57.909 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.021 57.877 57.908 sum_up_and_integrate 107 10.3 0.040 0.043 52.608 52.644 integrate_v_rspace 107 11.3 0.004 0.005 52.568 52.604 qs_ks_update_qs_env 107 7.6 0.001 0.001 50.894 50.923 qs_rho_update_rho 107 7.7 0.001 0.001 48.406 48.420 calculate_rho_elec 107 8.7 0.043 0.044 48.405 48.420 grid_integrate_task_list 107 12.3 46.474 47.197 46.474 47.197 grid_collocate_task_list 107 9.7 42.399 43.124 42.399 43.124 dbcsr_multiply_generic 1966 12.4 0.115 0.117 14.435 14.508 qs_scf_new_mos 96 7.5 0.001 0.001 11.699 11.733 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.698 11.732 ot_scf_mini 96 9.5 0.003 0.003 10.949 10.987 multiply_cannon 1966 13.4 0.193 0.196 9.600 9.795 multiply_cannon_loop 1966 14.4 0.199 0.212 8.685 8.981 rs_pw_transfer 878 11.9 0.014 0.016 6.497 7.786 init_scf_loop 11 6.9 0.000 0.000 7.599 7.600 mp_waitall_1 146670 16.2 7.038 7.375 7.038 7.375 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.174 7.182 init_scf_run 11 5.9 0.001 0.002 6.954 6.954 scf_env_initial_rho_setup 11 6.9 0.000 0.001 6.953 6.954 density_rs2pw 107 9.7 0.008 0.008 5.498 6.816 ot_mini 96 10.5 0.001 0.001 6.457 6.495 wfi_extrapolate 11 7.9 0.001 0.001 6.343 6.343 pw_transfer 1295 11.6 0.127 0.133 4.985 5.045 fft_wrap_pw1pw2 1081 12.6 0.013 0.013 4.728 4.788 multiply_cannon_metrocomm3 15728 15.4 0.072 0.073 4.535 4.765 potential_pw2rs 107 12.3 0.009 0.009 4.532 4.541 fft_wrap_pw1pw2_140 439 13.2 0.489 0.517 4.117 4.250 mp_waitany 8968 13.7 2.618 3.905 2.618 3.905 rs_pw_transfer_RS2PW_140 118 11.5 0.413 0.441 2.347 3.638 fft3d_ps 1081 14.6 1.932 2.043 3.481 3.566 make_m2s 3932 13.4 0.065 0.068 3.283 3.329 multiply_cannon_multrec 15728 15.4 3.132 3.266 3.147 3.282 qs_ot_get_derivative 96 11.5 0.001 0.001 3.205 3.242 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.194 3.220 apply_single 107 13.6 0.001 0.001 3.194 3.220 ot_diis_step 96 11.5 0.004 0.005 3.219 3.219 mp_alltoall_d11v 1998 13.7 1.802 3.217 1.802 3.217 rs_gather_matrices 107 12.3 0.117 0.126 1.510 2.897 make_images 3932 14.4 0.162 0.165 2.688 2.734 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=75.555, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=49.449, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.145, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=18.788, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.203, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.799, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=23.98300000000002, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.399, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.474, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.038, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.132, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.618, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.378 0.378 253.238 253.238 qs_energies 1 2.0 0.000 0.000 251.926 251.926 scf_env_do_scf 1 3.0 0.000 0.000 249.515 249.515 qs_ks_update_qs_env 8 5.0 0.000 0.000 231.716 231.716 rebuild_ks_matrix 7 6.0 0.000 0.000 231.608 231.608 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 231.608 231.608 hfx_ks_matrix 7 8.0 0.000 0.000 168.870 168.870 integrate_four_center 7 9.0 2.161 2.161 168.840 168.840 integrate_four_center_main 7 10.0 1.507 1.507 157.300 157.300 integrate_four_center_bin 451 11.0 155.794 155.794 155.794 155.794 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 142.283 142.283 init_scf_loop 1 4.0 0.000 0.000 107.218 107.218 cp_gemm 129 10.3 0.001 0.001 47.971 47.971 cp_gemm_cosma 129 11.3 47.970 47.970 47.970 47.970 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 29.775 29.775 admm_fit_mo_coeffs 7 9.0 0.000 0.000 28.084 28.084 admm_mo_merge_derivs 7 8.0 0.000 0.000 24.110 24.110 merge_mo_derivs_diag 7 9.0 0.022 0.022 24.110 24.110 purify_mo_diag 7 10.0 0.001 0.001 15.562 15.562 prepare_preconditioner 1 5.0 0.000 0.000 14.008 14.008 make_preconditioner 1 6.0 0.000 0.000 14.008 14.008 fit_mo_coeffs 7 10.0 0.000 0.000 12.522 12.522 integrate_four_center_load 7 10.0 0.001 0.001 9.002 9.002 hfx_load_balance 1 11.0 0.002 0.002 9.001 9.001 arnoldi_normal_ev 11 9.3 0.002 0.002 8.584 8.584 estimate_cond_num 1 7.0 0.000 0.000 8.508 8.508 build_subspace 28 9.5 0.015 0.015 8.479 8.479 qs_vxc_create 14 8.0 0.000 0.000 5.343 5.343 xc_vxc_pw_create 14 9.0 0.977 0.977 5.343 5.343 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.199 0.205 182.973 182.974 qs_energies 1 2.0 0.000 0.000 182.632 182.633 scf_env_do_scf 1 3.0 0.000 0.000 182.099 182.100 qs_ks_update_qs_env 8 5.0 0.000 0.000 179.233 179.233 rebuild_ks_matrix 7 6.0 0.000 0.000 179.220 179.221 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 179.220 179.221 hfx_ks_matrix 7 8.0 0.000 0.001 168.687 168.688 integrate_four_center 7 9.0 0.097 0.407 168.671 168.672 integrate_four_center_main 7 10.0 0.005 0.005 155.103 158.505 integrate_four_center_bin 448 11.0 155.098 158.500 155.098 158.500 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 106.983 106.983 init_scf_loop 1 4.0 0.000 0.000 75.115 75.115 integrate_four_center_load 7 10.0 0.000 0.000 8.985 8.990 hfx_load_balance 1 11.0 0.001 0.001 8.985 8.990 mp_sync 70 11.3 3.770 6.751 3.770 6.751 hfx_load_balance_bin 1 12.0 4.412 4.499 4.412 4.499 hfx_load_balance_count 1 12.0 4.406 4.497 4.406 4.497 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=44.45099999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=155.794, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=47.97, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.161, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.507, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.977, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.378, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=14.985999999999962, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=155.098, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.097, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.199, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.77, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.412, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.406, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 371.900 371.900 qs_energies 1 2.0 0.000 0.000 371.415 371.415 mp2_main 1 3.0 0.000 0.000 365.271 365.271 mp2_gpw_main 1 4.0 0.000 0.000 364.897 364.897 rpa_ri_compute_en 1 5.0 0.000 0.000 350.627 350.627 rpa_num_int 1 6.0 0.000 0.000 350.602 350.602 compute_mat_P_omega 1 7.0 0.002 0.002 220.566 220.566 compute_mat_P_omega_contract 10 8.0 12.758 12.758 219.381 219.381 dbcsr_t_total 2336 9.6 0.017 0.017 209.121 209.121 dbcsr_t_contract 787 11.0 47.067 47.067 136.254 136.254 cp_gemm 105 8.4 0.000 0.000 99.753 99.753 cp_gemm_cosma 105 9.4 99.753 99.753 99.753 99.753 compute_mat_P_omega_calc_M_occ 250 9.0 12.824 12.824 85.750 85.750 dbcsr_tas_total 1149 12.2 0.051 0.051 83.134 83.134 dbcsr_tas_multiply 807 12.1 0.004 0.004 81.665 81.665 GW_matrix_operations 10 7.0 0.005 0.005 73.500 73.500 dbcsr_t_copy 1103 10.7 20.146 20.146 71.373 71.373 dbcsr_multiply_generic 837 15.8 0.134 0.134 68.245 68.245 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 67.830 67.830 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 57.266 57.266 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 55.400 55.400 multiply_cannon 837 16.8 24.706 24.706 55.123 55.123 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 34.827 34.827 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 33.055 33.055 multiply_cannon_loop 837 17.8 0.151 0.151 27.742 27.742 dbcsr_tas_reserve_blocks_index 3261 13.7 7.294 7.294 26.970 26.970 multiply_cannon_multrec 837 18.8 26.163 26.163 26.700 26.700 dbcsr_tas_copy 574 11.4 16.917 16.917 24.472 24.472 dbcsr_t_reserve_blocks_index 2280 12.5 1.284 1.284 20.693 20.693 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.011 0.011 20.397 20.397 dbcsr_reserve_blocks 3717 14.7 18.988 18.988 19.374 19.374 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 19.362 19.362 compute_QP_energies 1 7.0 0.000 0.000 19.059 19.059 compute_self_energy_cubic_gw 1 8.0 0.110 0.110 19.059 19.059 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 14.272 14.272 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 14.256 14.256 dbcsr_t_copy_nocomm 251 12.0 11.213 11.213 13.562 13.562 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.165 12.165 make_m2s 1674 16.8 0.105 0.105 10.583 10.583 dbcsr_tas_mm_2 251 15.0 0.002 0.002 10.403 10.403 make_images 1674 17.8 4.885 4.885 10.002 10.002 cp_fm_cholesky_invert 10 8.0 8.676 8.676 8.676 8.676 dbcsr_finalize 9888 13.6 1.517 1.517 8.037 8.037 contract_cubic_gw 21 9.0 0.000 0.000 7.837 7.837 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 57.304 57.306 qs_energies 1 2.0 0.001 0.001 57.149 57.156 mp2_main 1 3.0 0.000 0.001 55.640 55.647 mp2_gpw_main 1 4.0 0.000 0.001 55.577 55.584 rpa_ri_compute_en 1 5.0 0.000 0.000 53.519 53.526 rpa_num_int 1 6.0 0.001 0.001 53.511 53.519 dbcsr_t_total 2336 9.6 0.016 0.019 41.867 41.868 compute_mat_P_omega 1 7.0 0.001 0.002 40.815 40.827 compute_mat_P_omega_contract 10 8.0 0.761 0.793 40.597 40.602 dbcsr_t_contract 787 11.0 1.875 2.034 30.882 30.887 dbcsr_tas_total 1149 12.2 0.063 0.069 27.164 27.165 dbcsr_tas_multiply 807 12.1 0.003 0.003 27.017 27.019 dbcsr_tas_dbcsr 807 14.1 0.003 0.004 19.638 19.639 dbcsr_multiply_generic 837 15.8 0.070 0.073 16.334 17.334 compute_mat_P_omega_calc_M_occ 250 9.0 0.744 0.770 13.692 13.692 multiply_cannon 837 16.8 0.134 0.149 9.630 10.145 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.843 9.843 dbcsr_t_copy 1111 10.7 4.232 4.468 9.361 9.762 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 8.704 9.593 multiply_cannon_loop 837 17.8 0.043 0.046 8.762 9.281 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 8.752 8.753 cp_gemm 105 8.4 0.000 0.000 7.956 7.971 cp_gemm_cosma 105 9.4 7.956 7.971 7.956 7.971 mp_sync 8696 11.6 6.478 7.488 6.478 7.488 multiply_cannon_multrec 1386 17.8 6.844 7.219 7.096 7.457 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.401 7.402 make_m2s 1674 16.8 0.044 0.047 5.766 6.305 make_images 1674 17.8 0.242 0.248 5.683 6.222 GW_matrix_operations 10 7.0 0.001 0.001 5.262 5.268 compute_QP_energies 1 7.0 0.000 0.000 4.205 4.205 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.202 4.205 dbcsr_t_communicate_buffer 1098 11.7 0.093 0.100 3.372 3.588 mp_waitall_2 3776 14.7 3.173 3.456 3.173 3.456 contract_cubic_gw 21 9.0 0.000 0.000 3.188 3.188 make_images_data 1674 18.8 0.037 0.039 3.008 3.125 hybrid_alltoall_any 1724 19.5 2.353 2.667 2.896 3.019 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.019 0.021 2.690 2.920 dbcsr_t_reserve_blocks_index 2849 12.4 0.106 0.113 2.684 2.918 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.857 2.866 dbcsr_tas_reserve_blocks_index 3300 13.8 0.274 0.297 2.636 2.865 make_images_pack 1674 18.8 2.229 2.767 2.243 2.779 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 2.740 2.748 dbcsr_reserve_blocks 3785 14.7 2.356 2.568 2.396 2.608 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.055 2.055 convert_to_new_pgrid 2421 14.1 0.017 0.019 1.876 1.964 dbcsr_copy 3323 15.8 1.810 1.906 1.840 1.936 mp_waitall_1 26582 19.0 1.490 1.865 1.490 1.865 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.705 1.710 dbcsr_add_anytype 909 13.7 0.997 1.055 1.558 1.627 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.517 1.522 scf_env_do_scf 1 3.0 0.000 0.000 1.453 1.453 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.453 1.453 dbcsr_tas_replicate 396 14.1 0.817 0.898 1.373 1.424 mp_max_i 2057 9.6 0.970 1.218 0.970 1.218 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=154.065, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=99.753, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=47.067, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=26.163, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon", label="multiply_cannon", y=24.706, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=20.146, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=26.612000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=7.956, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.875, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.844, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon", label="multiply_cannon", y=0.134, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.232, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.173, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.478, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.140 0.140 190.312 190.312 qs_energies 1 2.0 0.000 0.000 188.522 188.522 scf_env_do_scf 1 3.0 0.000 0.000 178.122 178.122 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 178.122 178.122 qs_scf_new_mos 15 5.0 0.000 0.000 77.971 77.971 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.950 68.950 rebuild_ks_matrix 15 6.0 0.000 0.000 68.583 68.583 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 68.582 68.582 eigensolver 15 6.0 0.002 0.002 64.760 64.760 cp_fm_diag_elpa 15 7.0 0.000 0.000 50.651 50.651 cp_fm_diag_elpa_base 15 8.0 46.010 46.010 50.651 50.651 qs_vxc_create 15 8.0 0.000 0.000 44.875 44.875 calculate_dispersion_nonloc 15 9.0 8.919 8.919 39.024 39.024 pw_transfer 1191 9.8 0.093 0.093 26.728 26.728 fft_wrap_pw1pw2 1086 10.9 0.013 0.013 26.434 26.434 qs_rho_update_rho 16 5.0 0.000 0.000 24.813 24.813 calculate_rho_elec 16 6.0 0.342 0.342 24.813 24.813 grid_collocate_task_list 16 7.0 23.263 23.263 23.263 23.263 sum_up_and_integrate 15 8.0 0.079 0.079 22.103 22.103 integrate_v_rspace 15 9.0 0.032 0.032 22.024 22.024 grid_integrate_task_list 15 10.0 21.393 21.393 21.393 21.393 fft_wrap_pw1pw2_150 765 12.0 3.356 3.356 19.992 19.992 fft3d_s 1087 12.8 10.805 10.805 10.817 10.817 pw_scatter_s 585 13.0 10.606 10.606 10.606 10.606 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.517 10.517 cp_fm_cholesky_restore 45 7.0 9.645 9.645 9.645 9.645 dbcsr_complete_redistribute 46 8.3 3.322 3.322 9.242 9.242 cp_fm_upper_to_full 30 8.0 9.102 9.102 9.102 9.102 vdW_energy 15 10.0 7.768 7.768 7.768 7.768 gspace_mixing 14 5.0 0.276 0.276 7.719 7.719 broyden_mixing 14 6.0 6.949 6.949 6.949 6.949 fft_wrap_pw1pw2_200 197 11.5 0.338 0.338 6.187 6.187 xc_vxc_pw_create 15 9.0 1.552 1.552 5.850 5.850 init_scf_run 1 3.0 0.000 0.000 4.821 4.821 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.581 4.581 dbcsr_finalize 159 9.9 0.021 0.021 3.982 3.982 dbcsr_merge_all 91 11.1 0.066 0.066 3.831 3.831 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 85.606 85.607 qs_energies 1 2.0 0.000 0.001 85.223 85.224 scf_env_do_scf 1 3.0 0.000 0.000 80.074 80.074 scf_env_do_scf_inner_loop 15 4.0 0.002 0.003 80.074 80.074 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.095 39.130 rebuild_ks_matrix 15 6.0 0.000 0.000 39.049 39.084 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 39.049 39.084 qs_rho_update_rho 16 5.0 0.000 0.000 23.585 23.589 calculate_rho_elec 16 6.0 0.011 0.013 23.585 23.588 sum_up_and_integrate 15 8.0 0.013 0.014 23.059 23.109 integrate_v_rspace 15 9.0 0.001 0.001 23.046 23.095 grid_collocate_task_list 16 7.0 21.655 22.323 21.655 22.323 grid_integrate_task_list 15 10.0 21.186 22.024 21.186 22.024 qs_scf_new_mos 15 5.0 0.001 0.001 17.845 18.199 eigensolver 15 6.0 0.002 0.002 16.396 16.405 qs_vxc_create 15 8.0 0.001 0.001 15.472 15.478 calculate_dispersion_nonloc 15 9.0 1.410 1.440 12.581 12.591 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.921 11.927 cp_fm_diag_elpa_base 15 8.0 11.668 11.705 11.916 11.920 pw_transfer 1191 9.8 0.135 0.147 11.679 11.804 fft_wrap_pw1pw2 1086 10.9 0.020 0.021 11.386 11.525 fft3d_ps 1086 12.9 5.017 5.111 8.577 8.767 fft_wrap_pw1pw2_150 765 12.0 0.665 0.710 7.639 7.671 cp_fm_cholesky_restore 45 7.0 4.236 4.295 4.236 4.295 fft_wrap_pw1pw2_200 197 11.5 0.359 0.385 3.597 3.695 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.198 3.198 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.759 3.024 xc_vxc_pw_create 15 9.0 0.057 0.077 2.889 2.907 mp_alltoall_z22v 1086 14.9 2.114 2.484 2.114 2.484 rs_pw_transfer 158 9.4 0.002 0.003 1.849 2.379 vdW_energy 15 10.0 2.106 2.218 2.106 2.218 x_to_yz 585 14.0 0.896 0.920 2.015 2.100 density_rs2pw 16 7.0 0.002 0.002 1.766 2.092 build_core_ppnl 1 5.0 1.848 2.023 1.848 2.023 mp_waitany 520 11.3 1.180 1.852 1.180 1.852 rs_pw_transfer_RS2PW_200 18 8.8 0.072 0.078 0.924 1.747 yz_to_x 501 13.7 0.518 0.574 1.513 1.733 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=68.59000000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=46.01, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.263, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.393, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.805, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.606, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.645, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=21.843999999999994, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.668, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.655, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.186, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.236, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.017, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.087 0.087 328.670 328.670 qs_energies 1 2.0 0.000 0.000 328.510 328.510 ls_scf 1 3.0 0.000 0.000 326.693 326.693 ls_scf_main 1 4.0 0.002 0.002 311.944 311.944 density_matrix_trs4 11 5.0 0.011 0.011 178.383 178.383 ls_scf_dm_to_ks 11 5.0 0.000 0.000 127.001 127.001 matrix_ls_to_qs 11 6.0 0.000 0.000 122.770 122.770 dbcsr_multiply_generic 185 6.1 0.498 0.498 112.707 112.707 multiply_cannon 185 7.1 3.411 3.411 77.514 77.514 dbcsr_copy_into_existing 11 7.0 71.174 71.174 71.175 71.175 dbcsr_complete_redistribute 23 7.5 40.538 40.538 56.289 56.289 multiply_cannon_loop 185 8.1 0.391 0.391 55.566 55.566 multiply_cannon_multrec 185 9.1 53.398 53.398 53.461 53.461 matrix_decluster 11 7.0 0.000 0.000 51.594 51.594 arnoldi_extremal 12 6.1 0.000 0.000 46.684 46.684 arnoldi_normal_ev 12 7.1 0.029 0.029 46.684 46.684 build_subspace 23 8.1 0.129 0.129 46.041 46.041 dbcsr_matrix_vector_mult 652 9.0 0.277 0.277 35.667 35.667 dbcsr_matrix_vector_mult_local 652 10.0 34.127 34.127 34.136 34.136 make_m2s 370 7.1 0.030 0.030 28.902 28.902 make_images 370 8.1 7.418 7.418 26.444 26.444 dbcsr_finalize 646 7.5 0.212 0.212 21.382 21.382 dbcsr_merge_all 597 8.5 3.607 3.607 19.241 19.241 setup_rec_index_2d 370 8.1 18.325 18.325 18.325 18.325 dbcsr_sort_indices 1103 9.9 15.008 15.008 15.008 15.008 ls_scf_init_scf 1 4.0 0.000 0.000 13.798 13.798 tree_to_linear_d 110 9.4 13.376 13.376 13.376 13.376 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.359 13.359 quick_finalize 395 10.0 0.488 0.488 12.815 12.815 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.512 12.512 dbcsr_special_finalize 370 9.1 0.003 0.003 11.829 11.829 dbcsr_dot_sd 144 6.3 8.942 8.942 8.944 8.944 dbcsr_frobenius_norm 142 6.1 7.777 7.777 7.780 7.780 dbcsr_new_transposed 2 7.0 0.140 0.140 7.074 7.074 make_images_data 370 9.1 0.010 0.010 7.038 7.038 dbcsr_redistribute 2 8.0 6.831 6.831 6.900 6.900 matrix_qs_to_ls 12 5.1 0.000 0.000 6.825 6.825 matrix_cluster 12 6.1 0.000 0.000 6.825 6.825 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.010 93.128 93.129 qs_energies 1 2.0 0.000 0.000 93.033 93.033 ls_scf 1 3.0 0.000 0.000 92.953 92.954 ls_scf_main 1 4.0 0.001 0.003 89.228 89.228 density_matrix_trs4 11 5.0 0.008 0.013 85.520 85.628 dbcsr_multiply_generic 185 6.1 0.073 0.094 80.088 80.397 multiply_cannon 185 7.1 0.042 0.045 66.607 67.819 multiply_cannon_loop 185 8.1 0.212 0.222 62.786 64.629 multiply_cannon_multrec 1480 9.1 42.015 45.165 42.496 45.646 mp_waitall_1 11936 10.3 18.294 20.616 18.294 20.616 multiply_cannon_metrocomm3 1480 9.1 0.018 0.020 10.822 14.251 make_m2s 370 7.1 0.034 0.038 8.934 9.011 make_images 370 8.1 0.697 0.737 8.814 8.892 multiply_cannon_metrocomm1 1480 9.1 0.010 0.012 4.317 6.616 calculate_norms 2960 9.1 4.864 5.040 4.864 5.040 mp_sum_l 1039 5.9 3.243 4.148 3.243 4.148 arnoldi_extremal 12 6.1 0.000 0.001 3.962 3.971 arnoldi_normal_ev 12 7.1 0.002 0.008 3.962 3.971 build_subspace 23 8.1 0.039 0.052 3.835 3.839 make_images_data 370 9.1 0.012 0.013 3.538 3.833 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.203 3.295 dbcsr_matrix_vector_mult 652 9.0 0.020 0.091 3.176 3.253 hybrid_alltoall_any 393 9.9 0.311 1.563 2.857 3.227 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.312 3.134 dbcsr_complete_redistribute 23 7.5 1.793 1.874 2.852 2.964 matrix_ls_to_qs 11 6.0 0.000 0.000 2.823 2.926 ls_scf_init_scf 1 4.0 0.000 0.000 2.869 2.870 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.830 2.839 make_images_pack 370 9.1 2.451 2.700 2.456 2.706 matrix_decluster 11 7.0 0.000 0.000 2.566 2.674 dbcsr_matrix_vector_mult_local 652 10.0 2.516 2.633 2.521 2.637 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.593 2.596 buffer_matrices_ensure_size 370 8.1 2.209 2.324 2.209 2.324 dbcsr_add_d 280 6.0 0.002 0.002 2.085 2.191 dbcsr_add_anytype 280 7.0 1.136 1.229 2.083 2.190 dbcsr_finalize 646 7.5 0.014 0.014 1.923 1.994 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=111.108, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=71.174, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=53.398, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=40.538, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=34.127, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.325, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.403000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=42.015, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.793, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.516, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=18.294, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.864, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.243, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 105.357 105.357 lib_test 1 2.0 0.000 0.000 105.350 105.350 dbcsr_run_tests 3 3.0 0.002 0.002 105.350 105.350 test_multiplies_multiproc 3 4.0 0.001 0.001 85.449 85.449 dbcsr_redistribute 9 5.0 57.058 57.058 60.721 60.721 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.862 22.862 dbcsr_make_random_matrix 9 4.0 14.559 14.559 19.822 19.822 multiply_cannon 9 6.0 0.002 0.002 16.317 16.317 multiply_cannon_loop 9 7.0 0.002 0.002 15.782 15.782 multiply_cannon_multrec 9 8.0 15.778 15.778 15.779 15.779 dbcsr_finalize 27 5.7 0.004 0.004 8.967 8.967 dbcsr_merge_all 18 6.5 3.164 3.164 8.263 8.263 mp_alltoall_d11v 27 6.0 3.343 3.343 3.343 3.343 tree_to_linear_d 9 7.0 3.176 3.176 3.176 3.176 dbcsr_data_release 975 7.6 2.380 2.380 2.380 2.380 make_m2s 18 6.0 0.001 0.001 2.267 2.267 make_images 18 7.0 0.715 0.715 2.190 2.190 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 26.544 26.545 lib_test 1 2.0 0.000 0.000 26.514 26.535 dbcsr_run_tests 3 3.0 0.001 0.001 26.513 26.534 test_multiplies_multiproc 3 4.0 0.001 0.001 25.359 25.435 dbcsr_multiply_generic 9 5.0 0.002 0.002 23.449 23.550 multiply_cannon 9 6.0 0.002 0.003 21.206 21.589 multiply_cannon_loop 9 7.0 0.004 0.004 20.751 21.112 multiply_cannon_multrec 72 8.0 17.607 18.064 17.608 18.065 mp_waitall_1 576 9.2 3.546 4.337 3.546 4.337 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.772 3.570 dbcsr_make_random_matrix 9 4.0 0.894 0.920 1.114 1.173 dbcsr_finalize 27 5.7 0.000 0.001 0.885 1.008 make_m2s 18 6.0 0.001 0.001 0.931 1.006 make_images 18 7.0 0.026 0.027 0.928 1.003 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.361 0.971 mp_sum_l 310 2.7 0.476 0.967 0.476 0.967 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.471 0.962 dbcsr_merge_all 18 6.5 0.142 0.166 0.772 0.880 dbcsr_data_release 444 7.6 0.666 0.751 0.666 0.751 dbcsr_redistribute 9 5.0 0.392 0.448 0.688 0.715 dbcsr_destroy 111 5.9 0.006 0.054 0.561 0.637 make_images_data 18 8.0 0.001 0.001 0.462 0.547 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.063000000000002, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=57.058, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.778, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.559, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.343, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.176, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.38, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.9630000000000045, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.392, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.607, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.894, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.666, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.476, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.546, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 146.904 146.904 qs_mol_dyn_low 1 2.0 0.005 0.005 145.011 145.011 velocity_verlet 5 3.0 0.004 0.004 117.231 117.231 qmmm_el_coupling 6 3.8 0.000 0.000 69.582 69.582 qmmm_elec_with_gaussian 6 4.8 0.188 0.188 69.576 69.576 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 67.896 67.896 qmmm_elec_gaussian_low_G 6 6.8 66.374 66.374 66.374 66.374 qs_forces 6 3.8 0.001 0.001 56.687 56.687 qs_energies 6 4.8 0.001 0.001 50.399 50.399 scf_env_do_scf 6 5.8 0.001 0.001 46.420 46.420 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 38.986 38.986 rebuild_ks_matrix 45 8.4 0.000 0.000 38.653 38.653 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 38.653 38.653 qs_ks_update_qs_env 45 7.8 0.000 0.000 33.121 33.121 pw_transfer 966 11.9 0.069 0.069 23.343 23.343 fft_wrap_pw1pw2 801 13.0 0.009 0.009 23.007 23.007 fft_wrap_pw1pw2_150 507 14.3 2.352 2.352 22.491 22.491 qs_vxc_create 45 10.4 0.001 0.001 20.988 20.988 xc_vxc_pw_create 45 11.4 4.282 4.282 20.988 20.988 pw_scatter_s 429 15.4 10.354 10.354 10.354 10.354 qs_rho_update_rho 45 7.9 0.000 0.000 10.301 10.301 calculate_rho_elec 45 8.9 0.899 0.899 10.301 10.301 xc_rho_set_and_dset_create 45 12.4 0.245 0.245 9.619 9.619 fist_calc_energy_force 6 3.8 0.002 0.002 9.059 9.059 fft3d_s 802 15.0 8.937 8.937 8.947 8.947 qmmm_forces 6 3.8 0.001 0.001 8.886 8.886 pw_integral_ab 2539 7.4 8.679 8.679 8.679 8.679 qmmm_forces_with_gaussian 6 4.8 0.126 0.126 8.405 8.405 force_nonbond 6 4.8 7.816 7.816 7.816 7.816 init_scf_loop 6 6.8 0.000 0.000 7.426 7.426 qs_ks_ddapc 45 10.4 0.001 0.001 6.601 6.601 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.468 6.468 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.544 5.544 qmmm_forces_gaussian_low_G 6 6.8 5.382 5.382 5.382 5.382 pw_poisson_solve 51 9.9 2.272 2.272 5.232 5.232 grid_collocate_task_list 45 9.9 4.768 4.768 4.768 4.768 density_rs2pw 45 9.9 0.003 0.003 4.634 4.634 sum_up_and_integrate 45 10.4 0.223 0.223 4.392 4.392 integrate_v_rspace 45 11.4 0.013 0.013 4.169 4.169 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.097 4.097 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.037 87.062 87.064 qs_mol_dyn_low 1 2.0 0.005 0.006 85.497 85.592 qs_forces 6 3.8 0.001 0.001 62.877 62.877 qs_energies 6 4.8 0.001 0.001 59.926 59.927 scf_env_do_scf 6 5.8 0.000 0.001 58.435 58.435 scf_env_do_scf_inner_loop 113 6.2 0.003 0.009 56.108 56.109 rebuild_ks_matrix 119 8.1 0.000 0.001 41.444 41.461 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.022 41.443 41.460 qs_ks_update_qs_env 119 7.3 0.001 0.001 38.948 38.964 velocity_verlet 5 3.0 0.002 0.003 35.887 35.892 pw_transfer 2446 11.8 0.281 0.296 26.333 26.530 fft_wrap_pw1pw2 2059 12.8 0.033 0.035 25.514 25.770 fft_wrap_pw1pw2_150 1321 14.0 2.243 2.402 24.724 24.901 qs_vxc_create 119 10.1 0.003 0.004 20.973 20.981 xc_vxc_pw_create 119 11.1 0.456 0.611 20.970 20.977 fft3d_ps 2059 14.8 11.480 12.417 19.202 19.655 qs_rho_update_rho 119 7.3 0.001 0.001 16.304 16.305 calculate_rho_elec 119 8.3 0.086 0.096 16.303 16.304 sum_up_and_integrate 119 10.1 0.086 0.093 15.037 15.067 integrate_v_rspace 119 11.1 0.004 0.005 14.950 14.981 qmmm_forces 6 3.8 0.002 0.003 12.601 12.602 qmmm_forces_with_gaussian 6 4.8 0.383 0.470 12.163 12.458 rs_pw_transfer 988 11.5 0.015 0.018 11.172 11.709 density_rs2pw 119 9.3 0.011 0.013 9.818 10.258 xc_rho_set_and_dset_create 119 12.1 0.507 0.593 9.832 10.161 qmmm_el_coupling 6 3.8 0.000 0.000 8.885 8.941 qmmm_elec_with_gaussian 6 4.8 0.368 0.491 8.882 8.939 potential_pw2rs 119 12.1 0.011 0.012 8.752 8.762 grid_collocate_task_list 119 9.3 6.183 6.612 6.183 6.612 mp_alltoall_z22v 2059 16.8 4.668 6.236 4.668 6.236 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.848 6.138 grid_integrate_task_list 119 12.1 5.798 6.064 5.798 6.064 qmmm_forces_gaussian_low_G 6 6.8 4.776 5.098 4.776 5.098 rs_pw_transfer_PW2RS_150 125 13.9 2.514 2.600 4.866 4.901 pw_restrict_s3 18 5.8 2.171 2.198 4.754 4.788 rs_pw_transfer_RS2PW_150 125 11.2 2.036 2.178 4.037 4.552 yz_to_x 964 15.3 1.166 1.321 3.500 4.547 x_to_yz 1095 16.3 1.839 2.032 4.173 4.374 mp_waitany 4028 12.8 3.420 4.177 3.420 4.177 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.826 3.879 pw_prolongate_s3 18 6.8 1.736 1.757 3.826 3.879 pw_integral_ab 2761 7.7 3.199 3.228 3.565 3.781 qs_scf_new_mos 113 7.2 0.001 0.001 3.732 3.741 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.731 3.740 ot_scf_mini 113 9.2 0.002 0.002 3.568 3.576 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.403 3.501 dbcsr_multiply_generic 2588 12.3 0.098 0.114 3.357 3.426 qs_ks_ddapc 119 10.1 0.003 0.003 2.852 2.972 qmmm_elec_gaussian_low_G 6 6.8 2.452 2.546 2.452 2.546 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.506 2.507 mp_sum_dm3 33 5.7 2.270 2.479 2.270 2.479 init_scf_loop 6 6.8 0.000 0.001 2.323 2.323 ot_mini 113 10.2 0.001 0.001 2.257 2.268 pw_gather_p 964 14.3 2.069 2.267 2.069 2.267 mp_waitall_1 188862 16.2 1.959 2.119 1.959 2.119 pw_scatter_p 1095 15.3 1.907 1.988 1.907 1.988 pw_derive 732 12.5 1.707 1.858 1.707 1.858 qs_ot_get_derivative 113 11.2 0.001 0.001 1.782 1.790 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.593999999999994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=66.374, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.354, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.937, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.679, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=7.816, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.382, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.768, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=48.50599999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.452, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.199, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.776, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.183, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.48, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.668, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.798, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-11-09 20:10:46+00:00