StartDate: 2021-10-04 11:59:02+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 77c056f2f480dc6780967b3156423a823c86ddae CommitTime: 2021-10-04 09:12:39 +0200 CommitAuthor: Hans Pabst CommitSubject: ocl: support OpenCL devices in DBCSR (#1662) Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 77c056f2f480dc6780967b3156423a823c86ddae CommitTime: 2021-10-04 09:12:39 +0200 CommitAuthor: Hans Pabst CommitSubject: ocl: support OpenCL devices in DBCSR (#1662) ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 160.759 160.759 qs_mol_dyn_low 1 2.0 0.005 0.005 159.742 159.742 qs_forces 11 3.9 0.001 0.001 159.684 159.684 qs_energies 11 4.9 0.001 0.001 147.872 147.872 scf_env_do_scf 11 5.9 0.001 0.001 115.115 115.115 velocity_verlet 10 3.0 0.002 0.002 108.049 108.049 scf_env_do_scf_inner_loop 108 6.5 0.009 0.009 89.803 89.803 rebuild_ks_matrix 119 8.3 0.001 0.001 41.314 41.314 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 41.313 41.313 qs_rho_update_rho 119 7.7 0.001 0.001 37.354 37.354 calculate_rho_elec 119 8.7 1.564 1.564 37.353 37.353 qs_ks_update_qs_env 119 7.6 0.001 0.001 36.821 36.821 grid_collocate_task_list 119 9.7 31.165 31.165 31.165 31.165 sum_up_and_integrate 119 10.3 0.368 0.368 29.579 29.579 integrate_v_rspace 119 11.3 0.163 0.163 29.210 29.210 grid_integrate_task_list 119 12.3 26.492 26.492 26.492 26.492 init_scf_loop 11 6.9 0.000 0.000 25.131 25.131 qs_scf_new_mos 108 7.5 0.001 0.001 23.141 23.141 qs_scf_loop_do_ot 108 8.5 0.001 0.001 23.141 23.141 dbcsr_multiply_generic 2286 12.5 0.183 0.183 22.260 22.260 ot_scf_mini 108 9.5 0.003 0.003 21.756 21.756 prepare_preconditioner 11 7.9 0.000 0.000 20.499 20.499 make_preconditioner 11 8.9 0.000 0.000 20.499 20.499 make_full_inverse_cholesky 11 9.9 0.000 0.000 18.472 18.472 init_scf_run 11 5.9 0.001 0.001 17.076 17.076 scf_env_initial_rho_setup 11 6.9 0.001 0.001 17.075 17.075 wfi_extrapolate 11 7.9 0.001 0.001 16.094 16.094 cp_gemm 81 9.0 0.000 0.000 15.101 15.101 cp_gemm_cosma 81 10.0 15.101 15.101 15.101 15.101 ot_mini 108 10.5 0.001 0.001 14.093 14.093 make_m2s 4572 13.5 0.065 0.065 13.284 13.284 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.738 10.738 pw_transfer 1439 11.6 0.090 0.090 7.908 7.908 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.607 7.607 ot_diis_step 108 11.5 0.005 0.005 7.316 7.316 cp_fm_cholesky_decompose 22 10.9 7.267 7.267 7.267 7.267 make_images 4572 14.5 2.625 2.625 7.206 7.206 qs_ot_get_derivative 108 11.5 0.002 0.002 6.773 6.773 fft_wrap_pw1pw2_140 487 13.2 0.612 0.612 6.401 6.401 dbcsr_complete_redistribute 329 12.2 2.964 2.964 6.277 6.277 dbcsr_make_dense_low 5837 15.5 0.104 0.104 6.276 6.276 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.246 6.246 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.203 6.203 apply_single 119 13.6 0.001 0.001 6.202 6.202 make_dense_data 5837 16.5 5.533 5.533 6.150 6.150 qs_env_update_s_mstruct 11 6.9 0.002 0.002 5.983 5.983 dbcsr_copy 2102 12.0 0.302 0.302 5.907 5.907 dbcsr_make_images_dense 3978 14.8 0.028 0.028 5.626 5.626 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.563 5.563 dbcsr_copy_into_existing 22 7.9 5.547 5.547 5.547 5.547 qs_create_task_list 11 7.9 0.000 0.000 5.422 5.422 generate_qs_task_list 11 8.9 3.728 3.728 5.422 5.422 copy_dbcsr_to_fm 153 11.3 0.003 0.003 5.118 5.118 cp_fm_cholesky_invert 11 10.9 4.993 4.993 4.993 4.993 multiply_cannon 2286 13.5 0.291 0.291 4.850 4.850 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.762 4.762 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.762 4.762 density_rs2pw 119 9.7 0.006 0.006 4.624 4.624 pw_poisson_solve 119 10.3 1.789 1.789 4.387 4.387 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.234 4.234 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.173 4.173 multiply_cannon_loop 2286 14.5 0.060 0.060 4.097 4.097 multiply_cannon_multrec 2286 15.5 3.964 3.964 4.036 4.036 qs_ot_get_p 119 10.4 0.001 0.001 3.900 3.900 fft3d_s 1202 14.6 3.556 3.556 3.562 3.562 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.014 82.234 82.235 qs_mol_dyn_low 1 2.0 0.005 0.006 82.112 82.118 qs_forces 11 3.9 0.002 0.002 82.053 82.053 qs_energies 11 4.9 0.002 0.002 76.761 76.765 scf_env_do_scf 11 5.9 0.001 0.001 69.626 69.627 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 64.677 64.677 velocity_verlet 10 3.0 0.002 0.002 48.014 48.016 rebuild_ks_matrix 119 8.3 0.001 0.001 31.670 31.748 qs_ks_build_kohn_sham_matrix 119 9.3 0.023 0.028 31.669 31.748 qs_ks_update_qs_env 119 7.6 0.001 0.002 28.217 28.297 sum_up_and_integrate 119 10.3 0.056 0.063 24.514 24.550 qs_rho_update_rho 119 7.7 0.001 0.001 24.464 24.502 calculate_rho_elec 119 8.7 0.048 0.051 24.464 24.501 integrate_v_rspace 119 11.3 0.005 0.006 24.458 24.496 dbcsr_multiply_generic 2286 12.5 0.142 0.153 20.511 20.652 grid_integrate_task_list 119 12.3 16.665 17.433 16.665 17.433 grid_collocate_task_list 119 9.7 16.429 17.262 16.429 17.262 qs_scf_new_mos 108 7.5 0.001 0.001 16.994 17.063 qs_scf_loop_do_ot 108 8.5 0.001 0.001 16.993 17.062 ot_scf_mini 108 9.5 0.003 0.004 15.946 16.006 multiply_cannon 2286 13.5 0.231 0.240 13.585 13.910 multiply_cannon_loop 2286 14.5 0.237 0.256 12.200 12.692 mp_waitall_1 169478 16.3 10.172 10.528 10.172 10.528 ot_mini 108 10.5 0.001 0.001 9.572 9.634 rs_pw_transfer 974 11.9 0.017 0.019 8.145 9.344 density_rs2pw 119 9.7 0.009 0.010 7.299 8.484 pw_transfer 1439 11.6 0.148 0.160 6.898 7.002 multiply_cannon_metrocomm3 18288 15.5 0.085 0.090 6.464 6.784 fft_wrap_pw1pw2 1201 12.6 0.015 0.017 6.563 6.678 potential_pw2rs 119 12.3 0.010 0.012 5.849 5.858 fft_wrap_pw1pw2_140 487 13.2 0.595 0.636 5.589 5.818 fft3d_ps 1201 14.6 2.569 2.777 5.017 5.085 init_scf_run 11 5.9 0.000 0.002 4.975 4.975 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.974 4.975 init_scf_loop 11 6.9 0.001 0.001 4.931 4.932 ot_diis_step 108 11.5 0.005 0.005 4.788 4.789 qs_ot_get_derivative 108 11.5 0.002 0.002 4.732 4.788 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 4.593 4.674 apply_single 119 13.6 0.001 0.001 4.592 4.674 wfi_extrapolate 11 7.9 0.001 0.001 4.576 4.576 mp_waitany 9880 13.7 3.327 4.555 3.327 4.555 multiply_cannon_multrec 18288 15.5 4.298 4.529 4.317 4.549 make_m2s 4572 13.5 0.079 0.083 4.413 4.493 rs_pw_transfer_RS2PW_140 130 11.5 0.485 0.520 2.929 4.143 make_images 4572 14.5 0.194 0.200 3.681 3.760 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.697 3.709 mp_alltoall_d11v 2130 13.8 2.301 3.203 2.301 3.203 rs_gather_matrices 119 12.3 0.137 0.148 1.886 2.824 rs_pw_transfer_PW2RS_140 130 13.9 1.305 1.364 2.785 2.823 qs_ot_get_p 119 10.4 0.001 0.001 2.254 2.348 mp_alltoall_z22v 1201 16.6 1.718 1.955 1.718 1.955 mp_sum_l 11138 13.3 1.558 1.778 1.558 1.778 make_images_data 4572 15.5 0.064 0.072 1.642 1.757 qs_ot_get_derivative_diag 49 12.0 0.002 0.002 1.707 1.743 mp_sum_d 4127 12.0 1.244 1.722 1.244 1.722 cp_gemm 81 9.0 0.000 0.000 1.688 1.694 cp_gemm_cosma 81 10.0 1.687 1.694 1.687 1.694 prepare_preconditioner 11 7.9 0.000 0.000 1.672 1.691 make_preconditioner 11 8.9 0.000 0.000 1.672 1.691 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=71.223, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=31.165, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.492, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.101, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.267, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=5.547, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.964, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=29.656000000000006, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.429, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.665, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.687, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.298, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.172, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.327, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.042 0.042 216.200 216.200 qs_mol_dyn_low 1 2.0 0.005 0.005 215.187 215.187 qs_forces 11 3.9 0.002 0.002 215.123 215.123 qs_energies 11 4.9 0.001 0.001 200.909 200.909 scf_env_do_scf 11 5.9 0.001 0.001 159.816 159.816 velocity_verlet 10 3.0 0.002 0.002 144.314 144.314 scf_env_do_scf_inner_loop 96 6.5 0.011 0.011 125.987 125.987 rebuild_ks_matrix 107 8.3 0.001 0.001 65.206 65.206 qs_ks_build_kohn_sham_matrix 107 9.3 0.023 0.023 65.205 65.205 qs_ks_update_qs_env 107 7.6 0.001 0.001 58.606 58.606 qs_rho_update_rho 107 7.7 0.001 0.001 57.672 57.672 calculate_rho_elec 107 8.7 1.397 1.397 57.671 57.671 sum_up_and_integrate 107 10.3 0.443 0.443 52.935 52.935 integrate_v_rspace 107 11.3 0.153 0.153 52.491 52.491 grid_collocate_task_list 107 9.7 51.752 51.752 51.752 51.752 grid_integrate_task_list 107 12.3 49.823 49.823 49.823 49.823 init_scf_loop 11 6.9 0.001 0.001 33.554 33.554 prepare_preconditioner 11 7.9 0.000 0.000 25.844 25.844 make_preconditioner 11 8.9 0.000 0.000 25.844 25.844 make_full_inverse_cholesky 11 9.9 0.001 0.001 23.297 23.297 qs_scf_new_mos 96 7.5 0.001 0.001 22.561 22.561 qs_scf_loop_do_ot 96 8.5 0.001 0.001 22.561 22.561 init_scf_run 11 5.9 0.001 0.001 21.709 21.709 scf_env_initial_rho_setup 11 6.9 0.001 0.001 21.708 21.708 dbcsr_multiply_generic 1966 12.4 0.201 0.201 21.420 21.420 ot_scf_mini 96 9.5 0.004 0.004 21.215 21.215 wfi_extrapolate 11 7.9 0.001 0.001 20.467 20.467 cp_gemm 81 9.0 0.000 0.000 17.218 17.218 cp_gemm_cosma 81 10.0 17.217 17.217 17.217 17.217 ot_mini 96 10.5 0.001 0.001 13.731 13.731 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 13.614 13.614 make_m2s 3932 13.4 0.073 0.073 12.929 12.929 cp_fm_cholesky_decompose 22 10.9 9.390 9.390 9.390 9.390 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.537 8.537 qs_env_update_s_mstruct 11 6.9 0.000 0.000 8.364 8.364 dbcsr_complete_redistribute 317 12.2 3.597 3.597 8.296 8.296 pw_transfer 1295 11.6 0.115 0.115 7.802 7.802 qs_create_task_list 11 7.9 0.000 0.000 7.745 7.745 generate_qs_task_list 11 8.9 5.444 5.444 7.745 7.745 fft_wrap_pw1pw2 1081 12.6 0.013 0.013 7.451 7.451 qs_ot_get_derivative 96 11.5 0.002 0.002 6.893 6.893 ot_diis_step 96 11.5 0.007 0.007 6.834 6.834 copy_dbcsr_to_fm 147 11.2 0.005 0.005 6.793 6.793 make_images 3932 14.4 2.458 2.458 6.615 6.615 dbcsr_make_dense_low 4961 15.5 0.096 0.096 6.584 6.584 make_dense_data 4961 16.5 5.932 5.932 6.467 6.467 fft_wrap_pw1pw2_140 439 13.2 0.632 0.632 6.376 6.376 cp_fm_cholesky_invert 11 10.9 6.076 6.076 6.076 6.076 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.002 6.002 apply_single 107 13.6 0.001 0.001 6.002 6.002 dbcsr_copy 1855 11.9 0.314 0.314 5.956 5.956 dbcsr_make_images_dense 3386 14.7 0.029 0.029 5.871 5.871 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.674 5.674 dbcsr_copy_into_existing 22 7.9 5.583 5.583 5.583 5.583 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.556 5.556 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.555 5.555 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 5.489 5.489 pw_poisson_solve 107 10.3 2.290 2.290 5.075 5.075 density_rs2pw 107 9.7 0.008 0.008 4.522 4.522 multiply_cannon 1966 13.4 0.327 0.327 4.386 4.386 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.371 4.371 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 135.707 135.708 qs_mol_dyn_low 1 2.0 0.005 0.006 135.591 135.597 qs_forces 11 3.9 0.002 0.002 135.539 135.540 qs_energies 11 4.9 0.001 0.002 126.402 126.406 scf_env_do_scf 11 5.9 0.001 0.001 116.101 116.102 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 107.807 107.808 velocity_verlet 10 3.0 0.002 0.002 80.803 80.805 rebuild_ks_matrix 107 8.3 0.001 0.001 61.397 61.623 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.024 61.396 61.622 sum_up_and_integrate 107 10.3 0.046 0.057 55.295 55.363 integrate_v_rspace 107 11.3 0.004 0.006 55.248 55.315 qs_ks_update_qs_env 107 7.6 0.001 0.001 54.068 54.270 qs_rho_update_rho 107 7.7 0.001 0.001 51.690 51.713 calculate_rho_elec 107 8.7 0.043 0.045 51.689 51.712 grid_integrate_task_list 107 12.3 47.067 49.555 47.067 49.555 grid_collocate_task_list 107 9.7 43.807 46.261 43.807 46.261 dbcsr_multiply_generic 1966 12.4 0.120 0.140 17.148 17.485 qs_scf_new_mos 96 7.5 0.001 0.001 13.698 13.921 qs_scf_loop_do_ot 96 8.5 0.001 0.001 13.697 13.921 ot_scf_mini 96 9.5 0.003 0.003 12.879 13.097 multiply_cannon 1966 13.4 0.193 0.221 11.708 12.509 multiply_cannon_loop 1966 14.4 0.200 0.225 10.656 11.547 rs_pw_transfer 878 11.9 0.015 0.019 8.371 10.073 mp_waitall_1 146670 16.2 8.757 9.958 8.757 9.958 density_rs2pw 107 9.7 0.008 0.009 7.291 8.971 init_scf_loop 11 6.9 0.001 0.001 8.277 8.277 init_scf_run 11 5.9 0.000 0.002 8.139 8.139 scf_env_initial_rho_setup 11 6.9 0.000 0.001 8.139 8.139 ot_mini 96 10.5 0.001 0.002 7.728 7.967 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.524 7.549 wfi_extrapolate 11 7.9 0.001 0.002 7.473 7.473 multiply_cannon_metrocomm3 15728 15.4 0.072 0.079 5.648 7.315 pw_transfer 1295 11.6 0.127 0.135 5.698 5.888 mp_waitany 8968 13.7 4.144 5.820 4.144 5.820 fft_wrap_pw1pw2 1081 12.6 0.013 0.014 5.413 5.633 rs_pw_transfer_RS2PW_140 118 11.5 0.390 0.493 3.717 5.431 potential_pw2rs 107 12.3 0.009 0.011 5.102 5.115 fft_wrap_pw1pw2_140 439 13.2 0.507 0.541 4.689 5.068 mp_alltoall_d11v 1998 13.7 3.373 4.977 3.373 4.977 rs_gather_matrices 107 12.3 0.117 0.137 3.026 4.638 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.992 4.202 apply_single 107 13.6 0.001 0.002 3.992 4.202 fft3d_ps 1081 14.6 2.179 2.495 4.096 4.189 multiply_cannon_multrec 15728 15.4 3.762 4.045 3.778 4.063 ot_diis_step 96 11.5 0.004 0.005 4.030 4.030 qs_ot_get_derivative 96 11.5 0.001 0.002 3.663 3.882 make_m2s 3932 13.4 0.066 0.074 3.571 3.670 make_images 3932 14.4 0.168 0.180 2.942 3.018 multiply_cannon_metrocomm1 15728 15.4 0.038 0.043 0.757 2.733 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=81.94199999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=51.752, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=49.823, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=17.217, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=9.39, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=6.076, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=28.169999999999987, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=43.807, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.067, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.762, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.757, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=4.144, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.444 0.444 278.612 278.612 qs_energies 1 2.0 0.000 0.000 277.138 277.138 scf_env_do_scf 1 3.0 0.000 0.000 273.949 273.949 qs_ks_update_qs_env 8 5.0 0.000 0.000 264.524 264.524 rebuild_ks_matrix 7 6.0 0.000 0.000 264.397 264.397 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 264.397 264.397 hfx_ks_matrix 7 8.0 0.000 0.000 168.216 168.216 integrate_four_center 7 9.0 2.975 2.975 168.180 168.180 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 167.749 167.749 integrate_four_center_main 7 10.0 0.781 0.781 155.906 155.906 integrate_four_center_bin 439 11.0 155.126 155.126 155.126 155.126 init_scf_loop 1 4.0 0.000 0.000 106.180 106.180 cp_gemm 129 10.3 0.001 0.001 80.011 80.011 cp_gemm_cosma 129 11.3 80.011 80.011 80.011 80.011 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 44.790 44.790 admm_fit_mo_coeffs 7 9.0 0.000 0.000 42.001 42.001 admm_mo_merge_derivs 7 8.0 0.000 0.000 39.966 39.966 merge_mo_derivs_diag 7 9.0 0.025 0.025 39.966 39.966 purify_mo_diag 7 10.0 0.001 0.001 23.285 23.285 fit_mo_coeffs 7 10.0 0.000 0.000 18.716 18.716 integrate_four_center_load 7 10.0 0.000 0.000 8.869 8.869 hfx_load_balance 1 11.0 0.002 0.002 8.868 8.868 calculate_rho_elec 15 7.4 0.193 0.193 6.153 6.153 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.202 0.208 184.934 184.935 qs_energies 1 2.0 0.001 0.001 184.590 184.591 scf_env_do_scf 1 3.0 0.000 0.000 184.020 184.020 qs_ks_update_qs_env 8 5.0 0.000 0.000 181.053 181.053 rebuild_ks_matrix 7 6.0 0.000 0.000 181.040 181.040 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 181.039 181.040 hfx_ks_matrix 7 8.0 0.000 0.000 168.676 168.678 integrate_four_center 7 9.0 0.089 0.376 168.662 168.663 integrate_four_center_main 7 10.0 0.004 0.006 154.447 158.284 integrate_four_center_bin 448 11.0 154.442 158.279 154.442 158.279 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 108.091 108.091 init_scf_loop 1 4.0 0.000 0.000 75.927 75.928 integrate_four_center_load 7 10.0 0.000 0.000 8.768 8.773 hfx_load_balance 1 11.0 0.001 0.001 8.768 8.772 mp_sync 70 11.3 4.632 8.188 4.632 8.188 cp_gemm 129 10.3 0.000 0.001 5.079 5.084 cp_gemm_cosma 129 11.3 5.079 5.084 5.079 5.084 hfx_load_balance_bin 1 12.0 4.270 4.382 4.270 4.382 hfx_load_balance_count 1 12.0 4.285 4.373 4.285 4.373 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=39.275000000000034, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=155.126, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=80.011, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.975, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.781, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.444, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.930999999999983, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=154.442, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=5.079, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.089, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.202, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=4.632, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.285, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.27, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 396.454 396.454 qs_energies 1 2.0 0.000 0.000 395.849 395.849 mp2_main 1 3.0 0.000 0.000 390.757 390.757 mp2_gpw_main 1 4.0 0.000 0.000 390.573 390.573 rpa_ri_compute_en 1 5.0 0.000 0.000 369.059 369.059 rpa_num_int 1 6.0 0.000 0.000 369.034 369.034 compute_mat_P_omega 1 7.0 0.002 0.002 195.918 195.918 compute_mat_P_omega_contract 10 8.0 12.652 12.652 194.378 194.378 dbcsr_t_total 2336 9.6 0.016 0.016 183.354 183.354 cp_gemm 105 8.4 0.000 0.000 153.105 153.105 cp_gemm_cosma 105 9.4 153.105 153.105 153.105 153.105 dbcsr_t_contract 787 11.0 47.492 47.492 110.376 110.376 GW_matrix_operations 10 7.0 0.005 0.005 101.078 101.078 compute_mat_P_omega_calc_M_occ 250 9.0 12.636 12.636 72.203 72.203 dbcsr_t_copy 1103 10.7 20.093 20.093 71.439 71.439 dbcsr_tas_total 1149 12.2 0.048 0.048 56.886 56.886 dbcsr_tas_multiply 807 12.1 0.003 0.003 55.370 55.370 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 51.082 51.082 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 49.344 49.344 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 44.833 44.833 dbcsr_multiply_generic 837 15.8 0.139 0.139 41.208 41.208 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 40.919 40.919 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 28.611 28.611 dbcsr_tas_reserve_blocks_index 3261 13.7 7.252 7.252 28.006 28.006 multiply_cannon 837 16.8 0.455 0.455 26.455 26.455 dbcsr_tas_copy 574 11.4 16.964 16.964 24.493 24.493 multiply_cannon_loop 837 17.8 0.199 0.199 22.956 22.956 dbcsr_t_reserve_blocks_index 2280 12.5 1.331 1.331 21.781 21.781 multiply_cannon_multrec 837 18.8 20.758 20.758 21.583 21.583 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 21.499 21.499 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.012 0.012 20.514 20.514 dbcsr_reserve_blocks 3717 14.7 20.073 20.073 20.451 20.451 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 19.514 19.514 compute_QP_energies 1 7.0 0.000 0.000 19.000 19.000 compute_self_energy_cubic_gw 1 8.0 0.095 0.095 19.000 19.000 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 14.374 14.374 dbcsr_t_copy_nocomm 251 12.0 11.288 11.288 13.650 13.650 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.847 12.847 make_m2s 1674 16.8 0.107 0.107 11.971 11.971 make_images 1674 17.8 5.562 5.562 11.431 11.431 dbcsr_tas_mm_2 251 15.0 0.001 0.001 10.964 10.964 dbcsr_finalize 9888 13.6 1.615 1.615 8.452 8.452 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.012 59.278 59.279 qs_energies 1 2.0 0.001 0.001 59.152 59.159 mp2_main 1 3.0 0.000 0.000 57.741 57.748 mp2_gpw_main 1 4.0 0.000 0.001 57.686 57.693 rpa_ri_compute_en 1 5.0 0.000 0.000 55.672 55.679 rpa_num_int 1 6.0 0.001 0.001 55.664 55.672 dbcsr_t_total 2336 9.6 0.016 0.018 42.592 42.594 compute_mat_P_omega 1 7.0 0.001 0.002 41.429 41.437 compute_mat_P_omega_contract 10 8.0 0.763 0.816 41.140 41.147 dbcsr_t_contract 787 11.0 1.875 2.088 31.295 31.304 dbcsr_tas_total 1149 12.2 0.063 0.079 27.588 27.589 dbcsr_tas_multiply 807 12.1 0.003 0.004 27.441 27.445 dbcsr_tas_dbcsr 807 14.1 0.003 0.004 20.103 20.104 dbcsr_multiply_generic 837 15.8 0.071 0.086 16.651 17.967 compute_mat_P_omega_calc_M_occ 250 9.0 0.754 0.810 13.856 13.856 multiply_cannon 837 16.8 0.135 0.167 9.833 10.894 dbcsr_t_copy 1111 10.7 4.284 4.672 9.628 10.309 dbcsr_tas_mm_1N 524 15.1 0.003 0.004 8.839 10.011 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.002 10.009 10.009 multiply_cannon_loop 837 17.8 0.043 0.052 8.956 9.980 cp_gemm 105 8.4 0.000 0.000 9.442 9.464 cp_gemm_cosma 105 9.4 9.442 9.463 9.442 9.463 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.002 8.788 8.788 multiply_cannon_multrec 1386 17.8 6.949 7.718 7.204 7.964 mp_sync 8696 11.6 6.597 7.657 6.597 7.657 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.565 7.565 make_m2s 1674 16.8 0.045 0.053 5.870 6.388 make_images 1674 17.8 0.245 0.253 5.786 6.308 GW_matrix_operations 10 7.0 0.001 0.002 6.069 6.078 compute_QP_energies 1 7.0 0.000 0.000 4.470 4.470 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.468 4.470 dbcsr_t_communicate_buffer 1098 11.7 0.093 0.100 3.637 3.767 mp_waitall_2 3776 14.7 3.432 3.687 3.432 3.687 make_images_data 1674 18.8 0.038 0.040 3.091 3.386 contract_cubic_gw 21 9.0 0.000 0.000 3.363 3.363 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.342 3.361 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.221 3.240 hybrid_alltoall_any 1724 19.5 2.383 2.768 2.974 3.201 dbcsr_t_reserve_blocks_index 2849 12.4 0.108 0.124 2.696 3.131 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.019 0.021 2.655 3.082 dbcsr_tas_reserve_blocks_index 3300 13.8 0.267 0.313 2.642 3.070 dbcsr_reserve_blocks 3785 14.7 2.370 2.775 2.409 2.816 make_images_pack 1674 18.8 2.247 2.712 2.262 2.729 mp_waitall_1 26582 19.0 1.597 2.202 1.597 2.202 convert_to_new_pgrid 2421 14.1 0.018 0.022 1.886 2.023 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.012 2.012 dbcsr_copy 3323 15.8 1.819 1.960 1.848 1.989 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.746 1.751 dbcsr_add_anytype 909 13.7 1.027 1.098 1.590 1.668 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.561 1.566 dbcsr_tas_replicate 396 14.1 0.793 0.899 1.349 1.427 scf_env_do_scf 1 3.0 0.000 0.000 1.354 1.354 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.353 1.353 mp_max_i 2057 9.6 0.975 1.300 0.975 1.300 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=134.93300000000005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=153.105, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=47.492, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.758, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=20.093, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=20.073, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=24.329, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.442, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.875, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.949, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.284, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.37, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.597, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.432, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.204 0.204 201.068 201.068 qs_energies 1 2.0 0.000 0.000 198.865 198.865 scf_env_do_scf 1 3.0 0.000 0.000 186.687 186.687 scf_env_do_scf_inner_loop 15 4.0 0.003 0.003 186.687 186.687 qs_ks_update_qs_env 15 5.0 0.000 0.000 76.742 76.742 rebuild_ks_matrix 15 6.0 0.000 0.000 76.280 76.280 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 76.280 76.280 qs_scf_new_mos 15 5.0 0.001 0.001 75.099 75.099 eigensolver 15 6.0 0.002 0.002 57.741 57.741 qs_vxc_create 15 8.0 0.001 0.001 51.171 51.171 calculate_dispersion_nonloc 15 9.0 9.415 9.415 44.925 44.925 cp_fm_diag_elpa 15 7.0 0.000 0.000 39.043 39.043 cp_fm_diag_elpa_base 15 8.0 33.376 33.376 39.043 39.043 pw_transfer 1191 9.8 0.122 0.122 31.076 31.076 fft_wrap_pw1pw2 1086 10.9 0.018 0.018 30.740 30.740 qs_rho_update_rho 16 5.0 0.000 0.000 25.846 25.846 calculate_rho_elec 16 6.0 0.346 0.346 25.846 25.846 grid_collocate_task_list 16 7.0 24.065 24.065 24.065 24.065 fft_wrap_pw1pw2_150 765 12.0 3.579 3.579 23.156 23.156 sum_up_and_integrate 15 8.0 0.104 0.104 23.114 23.114 integrate_v_rspace 15 9.0 0.039 0.039 23.011 23.011 grid_integrate_task_list 15 10.0 22.271 22.271 22.271 22.271 copy_dbcsr_to_fm 16 5.9 0.001 0.001 14.257 14.257 pw_scatter_s 585 13.0 13.655 13.655 13.655 13.655 cp_fm_cholesky_restore 45 7.0 13.226 13.226 13.226 13.226 dbcsr_complete_redistribute 46 8.3 4.384 4.384 12.871 12.871 fft3d_s 1087 12.8 11.667 11.667 11.679 11.679 cp_fm_upper_to_full 30 8.0 11.136 11.136 11.136 11.136 gspace_mixing 14 5.0 0.278 0.278 10.249 10.249 vdW_energy 15 10.0 9.902 9.902 9.902 9.902 broyden_mixing 14 6.0 9.417 9.417 9.417 9.417 fft_wrap_pw1pw2_200 197 11.5 0.387 0.387 7.281 7.281 xc_vxc_pw_create 15 9.0 1.418 1.418 6.245 6.245 dbcsr_finalize 159 9.9 0.027 0.027 5.691 5.691 init_scf_run 1 3.0 0.000 0.000 5.557 5.557 dbcsr_merge_all 91 11.1 0.093 0.093 5.521 5.521 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 5.226 5.226 mp_alltoall_d11v 186 9.2 4.831 4.831 4.831 4.831 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.018 87.008 87.009 qs_energies 1 2.0 0.001 0.001 86.642 86.642 scf_env_do_scf 1 3.0 0.000 0.000 81.475 81.476 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 81.475 81.476 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.714 39.741 rebuild_ks_matrix 15 6.0 0.000 0.000 39.670 39.696 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 39.670 39.696 qs_rho_update_rho 16 5.0 0.000 0.000 24.428 24.433 calculate_rho_elec 16 6.0 0.011 0.012 24.428 24.433 sum_up_and_integrate 15 8.0 0.013 0.015 23.323 23.354 integrate_v_rspace 15 9.0 0.001 0.001 23.310 23.342 grid_collocate_task_list 16 7.0 22.135 22.710 22.135 22.710 grid_integrate_task_list 15 10.0 21.504 22.220 21.504 22.220 qs_scf_new_mos 15 5.0 0.001 0.001 17.866 18.163 eigensolver 15 6.0 0.002 0.003 16.372 16.386 qs_vxc_create 15 8.0 0.001 0.001 15.808 15.819 calculate_dispersion_nonloc 15 9.0 1.395 1.421 12.830 12.851 pw_transfer 1191 9.8 0.137 0.153 12.171 12.375 fft_wrap_pw1pw2 1086 10.9 0.021 0.023 11.870 12.120 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.998 12.006 cp_fm_diag_elpa_base 15 8.0 11.738 11.772 11.993 11.995 fft3d_ps 1086 12.9 5.220 5.607 9.010 9.239 fft_wrap_pw1pw2_150 765 12.0 0.698 0.742 7.928 7.955 cp_fm_cholesky_restore 45 7.0 4.128 4.199 4.128 4.199 fft_wrap_pw1pw2_200 197 11.5 0.373 0.401 3.790 4.039 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.149 3.149 xc_vxc_pw_create 15 9.0 0.061 0.093 2.977 2.993 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.751 2.978 rs_pw_transfer 158 9.4 0.002 0.003 2.133 2.784 density_rs2pw 16 7.0 0.002 0.002 2.126 2.646 mp_alltoall_z22v 1086 14.9 2.246 2.614 2.246 2.614 x_to_yz 585 14.0 0.955 0.990 2.142 2.258 vdW_energy 15 10.0 2.035 2.198 2.035 2.198 mp_waitany 520 11.3 1.426 2.048 1.426 2.048 build_core_ppnl 1 5.0 1.836 2.012 1.836 2.012 yz_to_x 501 13.7 0.557 0.685 1.616 1.831 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=94.47500000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=33.376, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.065, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.271, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=13.655, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=13.226, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=22.282999999999987, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.738, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.135, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.504, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.128, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.22, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.129 0.129 317.596 317.596 qs_energies 1 2.0 0.000 0.000 317.372 317.372 ls_scf 1 3.0 0.000 0.000 314.848 314.848 ls_scf_main 1 4.0 0.002 0.002 300.038 300.038 density_matrix_trs4 11 5.0 0.014 0.014 149.186 149.186 ls_scf_dm_to_ks 11 5.0 0.000 0.000 143.103 143.103 matrix_ls_to_qs 11 6.0 0.000 0.000 138.344 138.344 dbcsr_multiply_generic 185 6.1 0.527 0.527 98.572 98.572 dbcsr_copy_into_existing 11 7.0 78.281 78.281 78.281 78.281 dbcsr_complete_redistribute 23 7.5 44.675 44.675 65.623 65.623 matrix_decluster 11 7.0 0.000 0.000 60.061 60.061 multiply_cannon 185 7.1 0.361 0.361 58.822 58.822 multiply_cannon_loop 185 8.1 0.557 0.557 38.752 38.752 multiply_cannon_multrec 185 9.1 36.281 36.281 36.340 36.340 make_m2s 370 7.1 0.036 0.036 33.398 33.398 make_images 370 8.1 7.751 7.751 30.390 30.390 dbcsr_finalize 646 7.5 0.242 0.242 27.065 27.065 dbcsr_merge_all 597 8.5 4.349 4.349 25.240 25.240 arnoldi_extremal 12 6.1 0.000 0.000 24.542 24.542 arnoldi_normal_ev 12 7.1 0.027 0.027 24.541 24.541 build_subspace 23 8.1 0.151 0.151 23.908 23.908 dbcsr_matrix_vector_mult 652 9.0 0.225 0.225 22.867 22.867 dbcsr_matrix_vector_mult_local 652 10.0 21.518 21.518 21.533 21.533 setup_rec_index_2d 370 8.1 19.540 19.540 19.540 19.540 tree_to_linear_d 110 9.4 18.452 18.452 18.452 18.452 dbcsr_sort_indices 1103 9.9 17.391 17.391 17.391 17.391 quick_finalize 395 10.0 0.634 0.634 14.917 14.917 ls_scf_init_scf 1 4.0 0.000 0.000 13.973 13.973 dbcsr_special_finalize 370 9.1 0.004 0.004 13.746 13.746 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.459 13.459 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.440 12.440 dbcsr_dot_sd 144 6.3 11.643 11.643 11.645 11.645 dbcsr_frobenius_norm 142 6.1 9.596 9.596 9.600 9.600 make_images_data 370 9.1 0.016 0.016 8.744 8.744 matrix_qs_to_ls 12 5.1 0.000 0.000 8.142 8.142 matrix_cluster 12 6.1 0.000 0.000 8.142 8.142 dbcsr_new_transposed 2 7.0 0.191 0.191 8.039 8.039 dbcsr_redistribute 2 8.0 7.747 7.747 7.809 7.809 dbcsr_add_d 280 6.0 0.001 0.001 6.548 6.548 dbcsr_add_anytype 280 7.0 1.754 1.754 6.546 6.546 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.010 108.155 108.156 qs_energies 1 2.0 0.000 0.000 108.048 108.048 ls_scf 1 3.0 0.000 0.000 107.963 107.964 ls_scf_main 1 4.0 0.001 0.003 103.336 103.337 density_matrix_trs4 11 5.0 0.010 0.013 99.127 99.324 dbcsr_multiply_generic 185 6.1 0.079 0.089 93.085 93.596 multiply_cannon 185 7.1 0.049 0.059 77.721 79.903 multiply_cannon_loop 185 8.1 0.238 0.289 73.430 75.804 multiply_cannon_multrec 1480 9.1 46.644 53.687 47.167 54.206 mp_waitall_1 11936 10.3 24.250 29.210 24.250 29.210 multiply_cannon_metrocomm3 1480 9.1 0.021 0.025 14.356 23.926 multiply_cannon_metrocomm1 1480 9.1 0.012 0.016 6.081 14.830 make_m2s 370 7.1 0.036 0.041 10.280 10.418 make_images 370 8.1 0.727 0.756 10.151 10.288 calculate_norms 2960 9.1 5.492 6.424 5.492 6.424 mp_sum_l 1039 5.9 3.725 6.302 3.725 6.302 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.564 4.967 make_images_data 370 9.1 0.014 0.016 4.253 4.893 arnoldi_extremal 12 6.1 0.000 0.001 4.713 4.731 arnoldi_normal_ev 12 7.1 0.002 0.009 4.713 4.730 build_subspace 23 8.1 0.043 0.062 4.552 4.556 hybrid_alltoall_any 393 9.9 0.356 1.775 3.458 3.908 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.654 3.873 dbcsr_matrix_vector_mult 652 9.0 0.020 0.087 3.677 3.801 ls_scf_init_scf 1 4.0 0.000 0.000 3.644 3.646 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.599 3.614 dbcsr_complete_redistribute 23 7.5 1.927 2.275 3.164 3.406 matrix_ls_to_qs 11 6.0 0.000 0.000 3.091 3.339 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.002 3.301 3.308 make_images_pack 370 9.1 2.683 3.185 2.690 3.194 matrix_decluster 11 7.0 0.000 0.000 2.837 3.077 dbcsr_matrix_vector_mult_local 652 10.0 2.639 3.018 2.644 3.025 buffer_matrices_ensure_size 370 8.1 2.463 2.664 2.463 2.664 dbcsr_add_d 280 6.0 0.002 0.005 2.366 2.639 dbcsr_add_anytype 280 7.0 1.303 1.470 2.364 2.637 dbcsr_finalize 646 7.5 0.015 0.018 2.065 2.248 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=117.30100000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=78.281, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=44.675, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=36.281, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=21.518, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=19.54, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.795, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.927, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=46.644, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.639, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.683, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.725, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=24.25, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.492, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 106.976 106.976 lib_test 1 2.0 0.000 0.000 106.969 106.969 dbcsr_run_tests 3 3.0 0.003 0.003 106.969 106.969 test_multiplies_multiproc 3 4.0 0.001 0.001 84.506 84.506 dbcsr_redistribute 9 5.0 55.934 55.934 60.142 60.142 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.493 22.493 dbcsr_make_random_matrix 9 4.0 15.454 15.454 22.373 22.373 multiply_cannon 9 6.0 0.002 0.002 15.266 15.266 multiply_cannon_loop 9 7.0 0.003 0.003 14.729 14.729 multiply_cannon_multrec 9 8.0 14.725 14.725 14.726 14.726 dbcsr_finalize 27 5.7 0.005 0.005 11.345 11.345 dbcsr_merge_all 18 6.5 3.818 3.818 10.574 10.574 tree_to_linear_d 9 7.0 4.232 4.232 4.232 4.232 mp_alltoall_d11v 27 6.0 3.881 3.881 3.881 3.881 dbcsr_data_release 975 7.6 2.479 2.479 2.479 2.479 dbcsr_data_copy_aa2 9 7.0 2.476 2.476 2.476 2.476 make_m2s 18 6.0 0.001 0.001 2.199 2.199 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 24.765 24.766 lib_test 1 2.0 0.000 0.001 24.735 24.755 dbcsr_run_tests 3 3.0 0.000 0.001 24.734 24.753 test_multiplies_multiproc 3 4.0 0.001 0.001 23.613 23.665 dbcsr_multiply_generic 9 5.0 0.001 0.002 21.727 21.817 multiply_cannon 9 6.0 0.002 0.002 19.608 20.110 multiply_cannon_loop 9 7.0 0.003 0.004 19.182 19.639 multiply_cannon_multrec 72 8.0 16.134 17.057 16.136 17.058 mp_waitall_1 576 9.2 3.440 3.964 3.440 3.964 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.679 3.437 mp_sum_l 310 2.7 0.483 1.218 0.483 1.218 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.479 1.215 dbcsr_make_random_matrix 9 4.0 0.863 0.933 1.081 1.137 make_m2s 18 6.0 0.001 0.001 0.899 0.982 make_images 18 7.0 0.026 0.027 0.896 0.979 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.356 0.936 dbcsr_finalize 27 5.7 0.000 0.001 0.795 0.935 dbcsr_merge_all 18 6.5 0.130 0.168 0.716 0.861 dbcsr_data_release 444 7.6 0.611 0.701 0.611 0.701 dbcsr_redistribute 9 5.0 0.369 0.443 0.636 0.689 dbcsr_destroy 111 5.9 0.008 0.095 0.543 0.633 make_images_data 18 8.0 0.001 0.001 0.451 0.575 dbcsr_data_copy_aa2 18 7.5 0.416 0.527 0.416 0.527 hybrid_alltoall_any 18 9.0 0.040 0.181 0.363 0.511 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=10.271, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=55.934, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.454, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.725, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=4.232, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.881, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.479, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.8649999999999984, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.369, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.863, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.134, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.611, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.44, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.483, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.051 0.051 149.821 149.821 qs_mol_dyn_low 1 2.0 0.005 0.005 147.754 147.754 velocity_verlet 5 3.0 0.005 0.005 120.349 120.349 qmmm_el_coupling 6 3.8 0.000 0.000 72.225 72.225 qmmm_elec_with_gaussian 6 4.8 0.234 0.234 72.218 72.218 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 70.515 70.515 qmmm_elec_gaussian_low_G 6 6.8 69.182 69.182 69.182 69.182 qs_forces 6 3.8 0.001 0.001 57.391 57.391 qs_energies 6 4.8 0.001 0.001 50.670 50.670 scf_env_do_scf 6 5.8 0.001 0.001 47.084 47.084 rebuild_ks_matrix 45 8.4 0.000 0.000 41.645 41.645 qs_ks_build_kohn_sham_matrix 45 9.4 0.009 0.009 41.645 41.645 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 40.788 40.788 qs_ks_update_qs_env 45 7.8 0.001 0.001 35.595 35.595 pw_transfer 966 11.9 0.080 0.080 24.989 24.989 fft_wrap_pw1pw2 801 13.0 0.010 0.010 24.646 24.646 fft_wrap_pw1pw2_150 507 14.3 2.418 2.418 24.115 24.115 qs_vxc_create 45 10.4 0.001 0.001 21.810 21.810 xc_vxc_pw_create 45 11.4 4.051 4.051 21.809 21.809 pw_scatter_s 429 15.4 12.323 12.323 12.323 12.323 xc_rho_set_and_dset_create 45 12.4 0.236 0.236 10.509 10.509 qs_rho_update_rho 45 7.9 0.000 0.000 10.100 10.100 calculate_rho_elec 45 8.9 0.862 0.862 10.099 10.099 pw_integral_ab 2539 7.4 9.565 9.565 9.565 9.565 qmmm_forces 6 3.8 0.002 0.002 8.696 8.696 fft3d_s 802 15.0 8.646 8.646 8.657 8.657 fist_calc_energy_force 6 3.8 0.002 0.002 8.619 8.619 qmmm_forces_with_gaussian 6 4.8 0.172 0.172 8.171 8.171 qs_ks_ddapc 45 10.4 0.001 0.001 7.554 7.554 force_nonbond 6 4.8 7.268 7.268 7.268 7.268 pw_poisson_solve 51 9.9 2.903 2.903 6.345 6.345 init_scf_loop 6 6.8 0.000 0.000 6.291 6.291 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.172 6.172 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 6.064 6.064 qmmm_forces_gaussian_low_G 6 6.8 5.129 5.129 5.129 5.129 density_rs2pw 45 9.9 0.003 0.003 4.887 4.887 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.681 4.681 sum_up_and_integrate 45 10.4 0.300 0.300 4.488 4.488 grid_collocate_task_list 45 9.9 4.350 4.350 4.350 4.350 integrate_v_rspace 45 11.4 0.010 0.010 4.188 4.188 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.035 82.565 82.566 qs_mol_dyn_low 1 2.0 0.004 0.005 81.040 81.132 qs_forces 6 3.8 0.001 0.001 59.364 59.364 qs_energies 6 4.8 0.001 0.001 56.486 56.486 scf_env_do_scf 6 5.8 0.000 0.001 55.046 55.047 scf_env_do_scf_inner_loop 113 6.2 0.003 0.008 52.853 52.854 rebuild_ks_matrix 119 8.1 0.000 0.001 39.054 39.078 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.025 39.054 39.078 qs_ks_update_qs_env 119 7.3 0.001 0.001 36.630 36.653 velocity_verlet 5 3.0 0.002 0.002 34.188 34.192 pw_transfer 2446 11.8 0.264 0.304 24.652 25.218 fft_wrap_pw1pw2 2059 12.8 0.032 0.037 23.902 24.608 fft_wrap_pw1pw2_150 1321 14.0 2.096 2.437 23.252 23.865 qs_vxc_create 119 10.1 0.003 0.004 19.643 19.648 xc_vxc_pw_create 119 11.1 0.402 0.532 19.640 19.645 fft3d_ps 2059 14.8 11.009 12.313 18.099 18.909 qs_rho_update_rho 119 7.3 0.001 0.001 15.391 15.392 calculate_rho_elec 119 8.3 0.085 0.094 15.391 15.392 sum_up_and_integrate 119 10.1 0.083 0.090 14.315 14.381 integrate_v_rspace 119 11.1 0.004 0.006 14.233 14.297 qmmm_forces 6 3.8 0.002 0.003 12.033 12.034 qmmm_forces_with_gaussian 6 4.8 0.346 0.435 11.734 11.875 rs_pw_transfer 988 11.5 0.014 0.017 10.484 11.025 density_rs2pw 119 9.3 0.011 0.012 9.200 9.807 xc_rho_set_and_dset_create 119 12.1 0.492 0.585 9.342 9.700 qmmm_el_coupling 6 3.8 0.000 0.000 8.538 8.604 qmmm_elec_with_gaussian 6 4.8 0.305 0.416 8.535 8.601 potential_pw2rs 119 12.1 0.010 0.012 8.167 8.184 grid_collocate_task_list 119 9.3 5.949 6.385 5.949 6.385 grid_integrate_task_list 119 12.1 5.638 5.912 5.638 5.912 mp_alltoall_z22v 2059 16.8 4.297 5.852 4.297 5.852 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.682 5.816 qmmm_forces_gaussian_low_G 6 6.8 4.661 4.785 4.661 4.785 rs_pw_transfer_PW2RS_150 125 13.9 2.314 2.443 4.559 4.676 pw_restrict_s3 18 5.8 2.069 2.141 4.502 4.616 mp_waitany 4028 12.8 3.288 4.383 3.288 4.383 rs_pw_transfer_RS2PW_150 125 11.2 1.888 2.062 3.789 4.307 yz_to_x 964 15.3 0.988 1.191 3.067 4.171 x_to_yz 1095 16.3 1.760 1.932 3.977 4.168 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.648 3.711 pw_prolongate_s3 18 6.8 1.663 1.701 3.648 3.711 pw_integral_ab 2761 7.7 3.039 3.074 3.367 3.622 qs_scf_new_mos 113 7.2 0.001 0.001 3.548 3.558 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.547 3.557 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.323 3.438 ot_scf_mini 113 9.2 0.002 0.002 3.392 3.403 dbcsr_multiply_generic 2588 12.3 0.095 0.114 3.205 3.310 qs_ks_ddapc 119 10.1 0.002 0.003 2.691 2.866 mp_sum_dm3 33 5.7 2.303 2.592 2.303 2.592 qmmm_elec_gaussian_low_G 6 6.8 2.390 2.491 2.390 2.491 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.433 2.435 pw_gather_p 964 14.3 1.871 2.345 1.871 2.345 init_scf_loop 6 6.8 0.000 0.000 2.190 2.190 ot_mini 113 10.2 0.001 0.001 2.142 2.155 mp_waitall_1 188862 16.2 1.830 2.145 1.830 2.145 pw_scatter_p 1095 15.3 1.741 1.837 1.741 1.837 pw_derive 732 12.5 1.600 1.809 1.600 1.809 qs_ot_get_derivative 113 11.2 0.001 0.001 1.692 1.703 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=33.358000000000004, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=69.182, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=12.323, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=9.565, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.646, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=7.268, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.129, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.35, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=45.581999999999994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.39, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.039, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.661, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.949, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.297, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.009, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.638, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-10-04 12:45:15+00:00