StartDate: 2021-10-05 12:03:23+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 62b873b2330fd5d0572eb06d6310c71ab2240b83 CommitTime: 2021-10-05 11:36:40 +0200 CommitAuthor: fbelle CommitSubject: Adding density matrix based conjugate-gradient linear response solver. (#1664) Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 62b873b2330fd5d0572eb06d6310c71ab2240b83 CommitTime: 2021-10-05 11:36:40 +0200 CommitAuthor: fbelle CommitSubject: Adding density matrix based conjugate-gradient linear response solver. (#1664) ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.037 155.554 155.554 qs_mol_dyn_low 1 2.0 0.004 0.004 154.741 154.741 qs_forces 11 3.9 0.001 0.001 154.683 154.683 qs_energies 11 4.9 0.001 0.001 143.204 143.204 scf_env_do_scf 11 5.9 0.001 0.001 115.831 115.831 velocity_verlet 10 3.0 0.002 0.002 104.061 104.061 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 91.119 91.119 rebuild_ks_matrix 119 8.3 0.001 0.001 42.125 42.125 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 42.124 42.124 qs_ks_update_qs_env 119 7.6 0.001 0.001 37.669 37.669 qs_rho_update_rho 119 7.7 0.001 0.001 37.540 37.540 calculate_rho_elec 119 8.7 1.557 1.557 37.540 37.540 grid_collocate_task_list 119 9.7 31.435 31.435 31.435 31.435 sum_up_and_integrate 119 10.3 0.394 0.394 30.081 30.081 integrate_v_rspace 119 11.3 0.162 0.162 29.686 29.686 grid_integrate_task_list 119 12.3 27.028 27.028 27.028 27.028 init_scf_loop 11 6.9 0.000 0.000 24.512 24.512 qs_scf_new_mos 108 7.5 0.001 0.001 23.417 23.417 qs_scf_loop_do_ot 108 8.5 0.001 0.001 23.416 23.416 dbcsr_multiply_generic 2286 12.5 0.176 0.176 22.448 22.448 ot_scf_mini 108 9.5 0.003 0.003 22.016 22.016 prepare_preconditioner 11 7.9 0.000 0.000 19.884 19.884 make_preconditioner 11 8.9 0.000 0.000 19.884 19.884 make_full_inverse_cholesky 11 9.9 0.000 0.000 17.927 17.927 ot_mini 108 10.5 0.001 0.001 14.339 14.339 make_m2s 4572 13.5 0.064 0.064 13.556 13.556 init_scf_run 11 5.9 0.001 0.001 13.313 13.313 scf_env_initial_rho_setup 11 6.9 0.001 0.001 13.312 13.312 wfi_extrapolate 11 7.9 0.001 0.001 12.501 12.501 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.474 10.474 cp_gemm 81 9.0 0.000 0.000 9.987 9.987 cp_gemm_cosma 81 10.0 9.987 9.987 9.987 9.987 pw_transfer 1439 11.6 0.095 0.095 7.730 7.730 ot_diis_step 108 11.5 0.006 0.006 7.566 7.566 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 7.413 7.413 make_images 4572 14.5 2.595 2.595 7.118 7.118 cp_fm_cholesky_decompose 22 10.9 6.965 6.965 6.965 6.965 qs_ot_get_derivative 108 11.5 0.002 0.002 6.769 6.769 dbcsr_make_dense_low 5837 15.5 0.098 0.098 6.695 6.695 make_dense_data 5837 16.5 5.993 5.993 6.575 6.575 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.515 6.515 apply_single 119 13.6 0.001 0.001 6.515 6.515 dbcsr_complete_redistribute 329 12.2 3.036 3.036 6.389 6.389 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.287 6.287 fft_wrap_pw1pw2_140 487 13.2 0.662 0.662 6.268 6.268 dbcsr_make_images_dense 3978 14.8 0.027 0.027 6.018 6.018 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.988 5.988 qs_create_task_list 11 7.9 0.000 0.000 5.443 5.443 generate_qs_task_list 11 8.9 3.723 3.723 5.443 5.443 dbcsr_copy 2102 12.0 0.303 0.303 5.274 5.274 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.264 5.264 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.189 5.189 dbcsr_copy_into_existing 22 7.9 4.924 4.924 4.925 4.925 cp_fm_cholesky_invert 11 10.9 4.759 4.759 4.759 4.759 multiply_cannon 2286 13.5 0.270 0.270 4.683 4.683 pw_poisson_solve 119 10.3 1.982 1.982 4.681 4.681 density_rs2pw 119 9.7 0.006 0.006 4.547 4.547 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.376 4.376 multiply_cannon_loop 2286 14.5 0.047 0.047 3.982 3.982 multiply_cannon_multrec 2286 15.5 3.860 3.860 3.934 3.934 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.899 3.899 qs_ot_get_p 119 10.4 0.001 0.001 3.877 3.877 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.413 3.413 calculate_w_matrix_ot 11 6.9 0.008 0.008 3.413 3.413 fft3d_s 1202 14.6 3.282 3.282 3.288 3.288 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.124 3.124 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.017 72.816 72.818 qs_mol_dyn_low 1 2.0 0.005 0.005 72.687 72.700 qs_forces 11 3.9 0.002 0.002 72.634 72.634 qs_energies 11 4.9 0.001 0.002 67.566 67.567 scf_env_do_scf 11 5.9 0.001 0.001 61.486 61.487 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 57.031 57.031 velocity_verlet 10 3.0 0.002 0.002 43.140 43.141 rebuild_ks_matrix 119 8.3 0.001 0.001 28.701 28.767 qs_ks_build_kohn_sham_matrix 119 9.3 0.023 0.024 28.700 28.766 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.408 25.467 sum_up_and_integrate 119 10.3 0.046 0.050 22.389 22.432 integrate_v_rspace 119 11.3 0.005 0.005 22.343 22.386 qs_rho_update_rho 119 7.7 0.001 0.001 22.300 22.311 calculate_rho_elec 119 8.7 0.048 0.050 22.299 22.310 dbcsr_multiply_generic 2286 12.5 0.134 0.136 17.317 17.455 grid_collocate_task_list 119 9.7 15.785 16.633 15.785 16.633 grid_integrate_task_list 119 12.3 15.946 16.259 15.946 16.259 qs_scf_new_mos 108 7.5 0.001 0.001 13.986 14.023 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.985 14.022 ot_scf_mini 108 9.5 0.003 0.004 13.147 13.181 multiply_cannon 2286 13.5 0.218 0.223 11.671 11.866 multiply_cannon_loop 2286 14.5 0.218 0.233 10.665 10.906 mp_waitall_1 169478 16.3 8.548 8.975 8.548 8.975 ot_mini 108 10.5 0.001 0.001 7.822 7.860 rs_pw_transfer 974 11.9 0.017 0.018 6.876 7.734 density_rs2pw 119 9.7 0.009 0.010 5.889 6.758 pw_transfer 1439 11.6 0.134 0.144 5.993 6.052 multiply_cannon_metrocomm3 18288 15.5 0.078 0.081 5.447 5.823 fft_wrap_pw1pw2 1201 12.6 0.015 0.016 5.701 5.768 potential_pw2rs 119 12.3 0.010 0.011 5.317 5.332 fft_wrap_pw1pw2_140 487 13.2 0.593 0.621 5.013 5.192 init_scf_loop 11 6.9 0.000 0.001 4.438 4.438 fft3d_ps 1201 14.6 2.354 2.480 4.220 4.297 init_scf_run 11 5.9 0.000 0.002 4.199 4.200 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.199 4.199 multiply_cannon_multrec 18288 15.5 4.016 4.086 4.033 4.103 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.046 4.095 apply_single 119 13.6 0.001 0.001 4.045 4.095 ot_diis_step 108 11.5 0.005 0.005 4.090 4.090 make_m2s 4572 13.5 0.075 0.078 3.915 3.978 wfi_extrapolate 11 7.9 0.001 0.001 3.801 3.801 qs_ot_get_derivative 108 11.5 0.001 0.002 3.705 3.740 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.521 3.528 make_images 4572 14.5 0.188 0.193 3.216 3.287 mp_waitany 9880 13.7 2.213 3.028 2.213 3.028 rs_pw_transfer_RS2PW_140 130 11.5 0.633 0.705 2.059 2.931 rs_pw_transfer_PW2RS_140 130 13.9 1.280 1.341 2.699 2.735 mp_alltoall_d11v 2130 13.8 1.408 2.053 1.408 2.053 qs_ot_get_p 119 10.4 0.001 0.001 1.753 1.803 rs_gather_matrices 119 12.3 0.135 0.148 1.023 1.710 make_images_data 4572 15.5 0.062 0.067 1.560 1.681 hybrid_alltoall_any 4725 16.4 0.131 0.480 1.408 1.523 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.415 1.516 prepare_preconditioner 11 7.9 0.000 0.000 1.451 1.461 make_preconditioner 11 8.9 0.000 0.000 1.451 1.461 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=70.28600000000002, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=31.435, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.028, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.987, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.965, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.993, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.86, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.167, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.785, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.946, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.016, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.354, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.548, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 195.429 195.429 qs_mol_dyn_low 1 2.0 0.004 0.004 194.636 194.636 qs_forces 11 3.9 0.002 0.002 194.578 194.578 qs_energies 11 4.9 0.001 0.001 180.888 180.888 scf_env_do_scf 11 5.9 0.001 0.001 149.646 149.646 velocity_verlet 10 3.0 0.002 0.002 128.411 128.411 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 121.177 121.177 rebuild_ks_matrix 107 8.3 0.001 0.001 63.113 63.113 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 63.112 63.112 qs_rho_update_rho 107 7.7 0.001 0.001 57.137 57.137 calculate_rho_elec 107 8.7 1.402 1.402 57.136 57.136 qs_ks_update_qs_env 107 7.6 0.001 0.001 56.538 56.538 sum_up_and_integrate 107 10.3 0.360 0.360 52.149 52.149 integrate_v_rspace 107 11.3 0.150 0.150 51.788 51.788 grid_collocate_task_list 107 9.7 51.549 51.549 51.549 51.549 grid_integrate_task_list 107 12.3 49.325 49.325 49.325 49.325 init_scf_loop 11 6.9 0.000 0.000 28.244 28.244 prepare_preconditioner 11 7.9 0.000 0.000 20.880 20.880 make_preconditioner 11 8.9 0.000 0.000 20.880 20.880 qs_scf_new_mos 96 7.5 0.001 0.001 20.086 20.086 qs_scf_loop_do_ot 96 8.5 0.001 0.001 20.086 20.086 dbcsr_multiply_generic 1966 12.4 0.155 0.155 19.413 19.413 ot_scf_mini 96 9.5 0.003 0.003 18.829 18.829 make_full_inverse_cholesky 11 9.9 0.000 0.000 18.694 18.694 init_scf_run 11 5.9 0.001 0.001 15.831 15.831 scf_env_initial_rho_setup 11 6.9 0.001 0.001 15.830 15.830 wfi_extrapolate 11 7.9 0.001 0.001 14.798 14.798 ot_mini 96 10.5 0.001 0.001 12.089 12.089 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.668 11.668 make_m2s 3932 13.4 0.055 0.055 11.588 11.588 cp_gemm 81 9.0 0.000 0.000 10.049 10.049 cp_gemm_cosma 81 10.0 10.049 10.049 10.049 10.049 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.256 8.256 cp_fm_cholesky_decompose 22 10.9 7.313 7.313 7.313 7.313 pw_transfer 1295 11.6 0.086 0.086 7.158 7.158 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.035 7.035 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.860 6.860 dbcsr_complete_redistribute 317 12.2 3.075 3.075 6.701 6.701 qs_create_task_list 11 7.9 0.000 0.000 6.471 6.471 generate_qs_task_list 11 8.9 4.796 4.796 6.471 6.471 make_images 3932 14.4 2.336 2.336 6.329 6.329 ot_diis_step 96 11.5 0.005 0.005 6.088 6.088 qs_ot_get_derivative 96 11.5 0.001 0.001 5.997 5.997 fft_wrap_pw1pw2_140 439 13.2 0.592 0.592 5.815 5.815 dbcsr_copy 1855 11.9 0.281 0.281 5.527 5.527 dbcsr_make_dense_low 4961 15.5 0.085 0.085 5.485 5.485 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.431 5.431 make_dense_data 4961 16.5 4.897 4.897 5.382 5.382 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.353 5.353 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.315 5.315 apply_single 107 13.6 0.000 0.000 5.315 5.315 dbcsr_copy_into_existing 22 7.9 5.204 5.204 5.204 5.204 cp_fm_cholesky_invert 11 10.9 5.050 5.050 5.050 5.050 dbcsr_make_images_dense 3386 14.7 0.023 0.023 4.883 4.883 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.381 4.381 pw_poisson_solve 107 10.3 1.853 1.853 4.316 4.316 multiply_cannon 1966 13.4 0.235 0.235 4.212 4.212 density_rs2pw 107 9.7 0.006 0.006 4.186 4.186 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.006 4.006 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 128.061 128.062 qs_mol_dyn_low 1 2.0 0.004 0.005 127.946 127.952 qs_forces 11 3.9 0.002 0.003 127.893 127.893 qs_energies 11 4.9 0.001 0.002 119.063 119.065 scf_env_do_scf 11 5.9 0.001 0.001 109.993 109.993 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 102.279 102.279 velocity_verlet 10 3.0 0.002 0.002 76.183 76.185 rebuild_ks_matrix 107 8.3 0.001 0.001 58.583 58.610 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.021 58.582 58.609 sum_up_and_integrate 107 10.3 0.041 0.044 52.942 52.979 integrate_v_rspace 107 11.3 0.004 0.005 52.901 52.939 qs_ks_update_qs_env 107 7.6 0.001 0.001 51.516 51.542 qs_rho_update_rho 107 7.7 0.001 0.001 49.618 49.627 calculate_rho_elec 107 8.7 0.043 0.044 49.617 49.626 grid_integrate_task_list 107 12.3 46.218 47.117 46.218 47.117 grid_collocate_task_list 107 9.7 43.182 44.218 43.182 44.218 dbcsr_multiply_generic 1966 12.4 0.115 0.118 15.235 15.404 qs_scf_new_mos 96 7.5 0.001 0.001 12.110 12.151 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.110 12.150 ot_scf_mini 96 9.5 0.003 0.003 11.375 11.411 multiply_cannon 1966 13.4 0.187 0.191 10.325 10.519 multiply_cannon_loop 1966 14.4 0.188 0.195 9.454 9.766 rs_pw_transfer 878 11.9 0.015 0.016 6.930 8.084 mp_waitall_1 146670 16.2 7.582 7.927 7.582 7.927 init_scf_loop 11 6.9 0.000 0.001 7.697 7.697 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.276 7.281 init_scf_run 11 5.9 0.000 0.002 7.172 7.172 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.172 7.172 density_rs2pw 107 9.7 0.008 0.009 5.883 7.050 ot_mini 96 10.5 0.001 0.001 6.774 6.813 wfi_extrapolate 11 7.9 0.001 0.001 6.539 6.539 pw_transfer 1295 11.6 0.120 0.131 5.307 5.371 fft_wrap_pw1pw2 1081 12.6 0.013 0.015 5.047 5.126 multiply_cannon_metrocomm3 15728 15.4 0.068 0.070 4.820 5.121 potential_pw2rs 107 12.3 0.009 0.010 4.826 4.832 fft_wrap_pw1pw2_140 439 13.2 0.523 0.545 4.430 4.590 mp_waitany 8968 13.7 2.821 3.957 2.821 3.957 fft3d_ps 1081 14.6 2.063 2.177 3.734 3.808 rs_pw_transfer_RS2PW_140 118 11.5 0.445 0.467 2.504 3.673 multiply_cannon_multrec 15728 15.4 3.572 3.656 3.587 3.671 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.570 3.605 apply_single 107 13.6 0.001 0.001 3.569 3.605 ot_diis_step 96 11.5 0.004 0.004 3.559 3.559 mp_alltoall_d11v 1998 13.7 2.149 3.552 2.149 3.552 make_m2s 3932 13.4 0.064 0.067 3.439 3.485 qs_ot_get_derivative 96 11.5 0.001 0.001 3.192 3.228 rs_gather_matrices 107 12.3 0.130 0.143 1.801 3.169 make_images 3932 14.4 0.164 0.167 2.838 2.883 rs_pw_transfer_PW2RS_140 118 13.9 1.202 1.251 2.550 2.585 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=71.989, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=51.549, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=49.325, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.049, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.313, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=5.204, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=24.686000000000007, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=43.182, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.218, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.572, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.821, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.582, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.360 0.360 240.960 240.960 qs_energies 1 2.0 0.000 0.000 239.721 239.721 scf_env_do_scf 1 3.0 0.000 0.000 237.098 237.098 qs_ks_update_qs_env 8 5.0 0.000 0.000 228.436 228.436 rebuild_ks_matrix 7 6.0 0.000 0.000 228.327 228.327 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 228.327 228.327 hfx_ks_matrix 7 8.0 0.000 0.000 165.995 165.995 integrate_four_center 7 9.0 2.142 2.142 165.965 165.965 integrate_four_center_main 7 10.0 0.772 0.772 154.665 154.665 integrate_four_center_bin 444 11.0 153.893 153.893 153.893 153.893 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 142.587 142.587 init_scf_loop 1 4.0 0.000 0.000 94.494 94.494 cp_gemm 129 10.3 0.001 0.001 46.755 46.755 cp_gemm_cosma 129 11.3 46.755 46.755 46.755 46.755 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 27.295 27.295 admm_fit_mo_coeffs 7 9.0 0.000 0.000 24.627 24.627 admm_mo_merge_derivs 7 8.0 0.000 0.000 23.911 23.911 merge_mo_derivs_diag 7 9.0 0.024 0.024 23.911 23.911 purify_mo_diag 7 10.0 0.001 0.001 13.046 13.046 fit_mo_coeffs 7 10.0 0.000 0.000 11.581 11.581 integrate_four_center_load 7 10.0 0.000 0.000 8.767 8.767 hfx_load_balance 1 11.0 0.002 0.002 8.767 8.767 calculate_rho_elec 15 7.4 0.194 0.194 6.233 6.233 grid_collocate_task_list 15 8.4 5.459 5.459 5.459 5.459 qs_vxc_create 14 8.0 0.000 0.000 5.000 5.000 xc_vxc_pw_create 14 9.0 0.828 0.828 5.000 5.000 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.201 0.208 183.663 183.664 qs_energies 1 2.0 0.001 0.001 183.322 183.323 scf_env_do_scf 1 3.0 0.000 0.000 182.699 182.699 qs_ks_update_qs_env 8 5.0 0.000 0.000 179.734 179.734 rebuild_ks_matrix 7 6.0 0.000 0.000 179.720 179.720 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 179.720 179.720 hfx_ks_matrix 7 8.0 0.000 0.001 168.897 168.897 integrate_four_center 7 9.0 0.098 0.398 168.880 168.880 integrate_four_center_main 7 10.0 0.005 0.005 154.246 157.910 integrate_four_center_bin 448 11.0 154.241 157.905 154.241 157.905 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 106.456 106.456 init_scf_loop 1 4.0 0.000 0.000 76.241 76.241 integrate_four_center_load 7 10.0 0.000 0.000 8.759 8.761 hfx_load_balance 1 11.0 0.001 0.002 8.759 8.761 mp_sync 70 11.3 5.032 8.421 5.032 8.421 hfx_load_balance_bin 1 12.0 4.306 4.386 4.306 4.386 hfx_load_balance_count 1 12.0 4.299 4.385 4.299 4.385 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=31.522999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=153.893, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=46.755, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.459, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.142, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.828, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.36, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=15.485999999999962, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=154.241, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.098, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.201, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.299, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=5.032, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.306, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 338.277 338.277 qs_energies 1 2.0 0.000 0.000 337.765 337.765 mp2_main 1 3.0 0.000 0.000 333.165 333.165 mp2_gpw_main 1 4.0 0.000 0.000 332.981 332.981 rpa_ri_compute_en 1 5.0 0.000 0.000 313.354 313.354 rpa_num_int 1 6.0 0.001 0.001 313.329 313.329 compute_mat_P_omega 1 7.0 0.002 0.002 192.893 192.893 compute_mat_P_omega_contract 10 8.0 12.539 12.539 191.704 191.704 dbcsr_t_total 2336 9.6 0.016 0.016 181.087 181.087 dbcsr_t_contract 787 11.0 46.801 46.801 108.118 108.118 cp_gemm 105 8.4 0.000 0.000 98.281 98.281 cp_gemm_cosma 105 9.4 98.280 98.280 98.280 98.280 dbcsr_t_copy 1103 10.7 19.997 19.997 71.468 71.468 compute_mat_P_omega_calc_M_occ 250 9.0 12.626 12.626 71.383 71.383 GW_matrix_operations 10 7.0 0.006 0.006 65.353 65.353 dbcsr_tas_total 1149 12.2 0.047 0.047 55.155 55.155 dbcsr_tas_multiply 807 12.1 0.003 0.003 53.677 53.677 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 44.016 44.016 dbcsr_multiply_generic 837 15.8 0.128 0.128 39.696 39.696 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 39.428 39.428 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 34.060 34.060 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 32.336 32.336 dbcsr_tas_reserve_blocks_index 3261 13.7 7.137 7.137 28.257 28.257 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 27.739 27.739 multiply_cannon 837 16.8 0.357 0.357 25.546 25.546 dbcsr_tas_copy 574 11.4 16.954 16.954 24.653 24.653 multiply_cannon_loop 837 17.8 0.163 0.163 22.164 22.164 dbcsr_t_reserve_blocks_index 2280 12.5 1.291 1.291 21.899 21.899 multiply_cannon_multrec 837 18.8 20.038 20.038 20.865 20.865 dbcsr_reserve_blocks 3717 14.7 20.399 20.399 20.791 20.791 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.010 0.010 20.627 20.627 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 19.613 19.613 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 19.416 19.416 compute_QP_energies 1 7.0 0.000 0.000 19.064 19.064 compute_self_energy_cubic_gw 1 8.0 0.106 0.106 19.064 19.064 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 14.266 14.266 dbcsr_t_copy_nocomm 251 12.0 11.147 11.147 13.535 13.535 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.146 12.146 make_m2s 1674 16.8 0.104 0.104 11.586 11.586 make_images 1674 17.8 5.427 5.427 11.105 11.105 dbcsr_tas_mm_2 251 15.0 0.001 0.001 10.302 10.302 dbcsr_finalize 9888 13.6 1.552 1.552 8.257 8.257 contract_cubic_gw 21 9.0 0.000 0.000 7.944 7.944 mp2_ri_gpw_compute_in_copy_3c 6 6.0 0.667 0.667 7.314 7.314 build_3c_integrals 5 6.0 3.256 3.256 7.223 7.223 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.009 59.417 59.419 qs_energies 1 2.0 0.001 0.001 59.296 59.303 mp2_main 1 3.0 0.000 0.001 57.835 57.842 mp2_gpw_main 1 4.0 0.000 0.001 57.767 57.774 rpa_ri_compute_en 1 5.0 0.000 0.000 55.701 55.707 rpa_num_int 1 6.0 0.000 0.001 55.693 55.700 dbcsr_t_total 2336 9.6 0.016 0.018 44.493 44.494 compute_mat_P_omega 1 7.0 0.001 0.004 43.344 43.354 compute_mat_P_omega_contract 10 8.0 0.823 0.856 43.132 43.136 dbcsr_t_contract 787 11.0 1.942 2.107 32.679 32.683 dbcsr_tas_total 1149 12.2 0.065 0.070 28.818 28.819 dbcsr_tas_multiply 807 12.1 0.003 0.003 28.683 28.686 dbcsr_tas_dbcsr 807 14.1 0.003 0.004 21.011 21.011 dbcsr_multiply_generic 837 15.8 0.074 0.077 17.528 18.557 compute_mat_P_omega_calc_M_occ 250 9.0 0.797 0.826 14.474 14.474 multiply_cannon 837 16.8 0.140 0.155 10.235 10.774 dbcsr_t_copy 1111 10.7 4.492 4.757 10.190 10.622 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.493 10.494 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 9.308 10.246 multiply_cannon_loop 837 17.8 0.045 0.049 9.325 9.809 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 9.206 9.206 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.944 7.944 multiply_cannon_multrec 1386 17.8 7.189 7.629 7.455 7.874 mp_sync 8696 11.6 6.631 7.763 6.631 7.763 cp_gemm 105 8.4 0.000 0.000 7.270 7.281 cp_gemm_cosma 105 9.4 7.270 7.281 7.270 7.281 make_m2s 1674 16.8 0.047 0.050 6.304 6.881 make_images 1674 17.8 0.249 0.260 6.219 6.795 GW_matrix_operations 10 7.0 0.001 0.002 4.814 4.820 compute_QP_energies 1 7.0 0.000 0.001 4.486 4.486 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.483 4.486 dbcsr_t_communicate_buffer 1098 11.7 0.096 0.103 3.929 4.081 mp_waitall_2 3776 14.7 3.691 4.004 3.691 4.004 make_images_data 1674 18.8 0.038 0.040 3.351 3.542 contract_cubic_gw 21 9.0 0.000 0.000 3.449 3.449 hybrid_alltoall_any 1724 19.5 2.585 2.917 3.228 3.416 dbcsr_t_reserve_blocks_index 2849 12.4 0.110 0.117 2.898 3.231 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.020 0.022 2.853 3.175 dbcsr_tas_reserve_blocks_index 3300 13.8 0.268 0.297 2.845 3.173 make_images_pack 1674 18.8 2.407 2.912 2.422 2.926 dbcsr_reserve_blocks 3785 14.7 2.572 2.877 2.613 2.919 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.676 2.681 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 2.556 2.562 mp_waitall_1 26582 19.0 1.731 2.212 1.731 2.212 convert_to_new_pgrid 2421 14.1 0.018 0.019 2.013 2.164 dbcsr_copy 3323 15.8 1.946 2.100 1.976 2.129 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 2.064 2.064 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.845 1.850 dbcsr_add_anytype 909 13.7 1.091 1.142 1.691 1.765 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.633 1.638 dbcsr_tas_replicate 396 14.1 0.817 0.904 1.413 1.489 scf_env_do_scf 1 3.0 0.000 0.000 1.401 1.401 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.400 1.401 mp_max_i 2057 9.6 0.980 1.245 0.980 1.245 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=132.76199999999994, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=98.28, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=46.801, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=20.399, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.038, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=19.997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=25.630000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=7.27, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.942, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.572, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.189, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.492, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.691, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.631, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.131 0.131 177.890 177.890 qs_energies 1 2.0 0.000 0.000 176.119 176.119 scf_env_do_scf 1 3.0 0.000 0.000 165.508 165.508 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 165.507 165.507 qs_ks_update_qs_env 15 5.0 0.000 0.000 70.748 70.748 rebuild_ks_matrix 15 6.0 0.000 0.000 70.361 70.361 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 70.361 70.361 qs_scf_new_mos 15 5.0 0.000 0.000 62.824 62.824 eigensolver 15 6.0 0.002 0.002 49.074 49.074 qs_vxc_create 15 8.0 0.003 0.003 45.948 45.948 calculate_dispersion_nonloc 15 9.0 8.964 8.964 40.301 40.301 cp_fm_diag_elpa 15 7.0 0.000 0.000 33.788 33.788 cp_fm_diag_elpa_base 15 8.0 29.031 29.031 33.788 33.788 pw_transfer 1191 9.8 0.097 0.097 27.727 27.727 fft_wrap_pw1pw2 1086 10.9 0.014 0.014 27.412 27.412 qs_rho_update_rho 16 5.0 0.000 0.000 25.431 25.431 calculate_rho_elec 16 6.0 0.347 0.347 25.431 25.431 grid_collocate_task_list 16 7.0 23.764 23.764 23.764 23.764 sum_up_and_integrate 15 8.0 0.077 0.077 22.750 22.750 integrate_v_rspace 15 9.0 0.034 0.034 22.673 22.673 grid_integrate_task_list 15 10.0 22.010 22.010 22.010 22.010 fft_wrap_pw1pw2_150 765 12.0 3.373 3.373 20.805 20.805 fft3d_s 1087 12.8 11.427 11.427 11.438 11.438 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.313 11.313 cp_fm_cholesky_restore 45 7.0 10.824 10.824 10.824 10.824 pw_scatter_s 585 13.0 10.818 10.818 10.818 10.818 dbcsr_complete_redistribute 46 8.3 3.550 3.550 9.817 9.817 cp_fm_upper_to_full 30 8.0 9.217 9.217 9.217 9.217 vdW_energy 15 10.0 8.285 8.285 8.285 8.285 gspace_mixing 14 5.0 0.273 0.273 7.827 7.827 broyden_mixing 14 6.0 7.077 7.077 7.077 7.077 fft_wrap_pw1pw2_200 197 11.5 0.373 0.373 6.346 6.346 xc_vxc_pw_create 15 9.0 1.356 1.356 5.644 5.644 init_scf_run 1 3.0 0.000 0.000 5.183 5.183 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.627 4.627 dbcsr_finalize 159 9.9 0.021 0.021 4.141 4.141 dbcsr_merge_all 91 11.1 0.078 0.078 3.986 3.986 mp_alltoall_d11v 186 9.2 3.794 3.794 3.794 3.794 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.018 89.019 89.020 qs_energies 1 2.0 0.000 0.001 88.636 88.637 scf_env_do_scf 1 3.0 0.000 0.000 83.498 83.499 scf_env_do_scf_inner_loop 15 4.0 0.002 0.003 83.498 83.499 qs_ks_update_qs_env 15 5.0 0.000 0.000 41.342 41.362 rebuild_ks_matrix 15 6.0 0.000 0.000 41.289 41.310 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 41.289 41.310 qs_rho_update_rho 16 5.0 0.000 0.000 23.879 23.882 calculate_rho_elec 16 6.0 0.012 0.013 23.879 23.882 sum_up_and_integrate 15 8.0 0.015 0.017 23.490 23.533 integrate_v_rspace 15 9.0 0.001 0.001 23.475 23.517 grid_collocate_task_list 16 7.0 22.075 22.650 22.075 22.650 grid_integrate_task_list 15 10.0 21.559 22.224 21.559 22.224 qs_scf_new_mos 15 5.0 0.001 0.001 18.662 18.770 qs_vxc_create 15 8.0 0.001 0.001 17.217 17.251 eigensolver 15 6.0 0.002 0.003 17.016 17.035 calculate_dispersion_nonloc 15 9.0 1.449 1.496 13.984 14.017 pw_transfer 1191 9.8 0.144 0.153 13.509 13.690 fft_wrap_pw1pw2 1086 10.9 0.023 0.025 13.184 13.375 cp_fm_diag_elpa 15 7.0 0.000 0.000 12.418 12.425 cp_fm_diag_elpa_base 15 8.0 12.150 12.191 12.412 12.416 fft3d_ps 1086 12.9 5.770 6.024 9.988 10.355 fft_wrap_pw1pw2_150 765 12.0 0.790 0.842 8.856 8.893 cp_fm_cholesky_restore 45 7.0 4.339 4.405 4.339 4.405 fft_wrap_pw1pw2_200 197 11.5 0.420 0.448 4.160 4.302 xc_vxc_pw_create 15 9.0 0.064 0.091 3.232 3.245 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.159 3.159 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.742 2.982 mp_alltoall_z22v 1086 14.9 2.539 2.872 2.539 2.872 x_to_yz 585 14.0 1.040 1.093 2.415 2.525 vdW_energy 15 10.0 2.173 2.268 2.173 2.268 rs_pw_transfer 158 9.4 0.002 0.003 1.622 2.127 build_core_ppnl 1 5.0 1.831 2.005 1.831 2.005 yz_to_x 501 13.7 0.605 0.702 1.770 1.999 density_rs2pw 16 7.0 0.002 0.002 1.613 1.895 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=80.83399999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=29.031, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.764, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.01, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.427, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.824, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=23.126000000000005, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.15, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.075, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.559, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.339, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.77, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.094 0.094 283.010 283.010 qs_energies 1 2.0 0.000 0.000 282.843 282.843 ls_scf 1 3.0 0.000 0.000 281.039 281.039 ls_scf_main 1 4.0 0.002 0.002 268.248 268.248 density_matrix_trs4 11 5.0 0.011 0.011 137.761 137.761 ls_scf_dm_to_ks 11 5.0 0.000 0.000 123.858 123.858 matrix_ls_to_qs 11 6.0 0.000 0.000 119.563 119.563 dbcsr_multiply_generic 185 6.1 0.491 0.491 92.599 92.599 dbcsr_copy_into_existing 11 7.0 69.103 69.103 69.103 69.103 multiply_cannon 185 7.1 0.321 0.321 55.759 55.759 dbcsr_complete_redistribute 23 7.5 39.332 39.332 55.136 55.136 matrix_decluster 11 7.0 0.000 0.000 50.458 50.458 multiply_cannon_loop 185 8.1 0.392 0.392 38.378 38.378 multiply_cannon_multrec 185 9.1 36.062 36.062 36.111 36.111 make_m2s 370 7.1 0.030 0.030 30.687 30.687 make_images 370 8.1 7.598 7.598 28.288 28.288 arnoldi_extremal 12 6.1 0.000 0.000 24.181 24.181 arnoldi_normal_ev 12 7.1 0.027 0.027 24.181 24.181 build_subspace 23 8.1 0.136 0.136 23.574 23.574 dbcsr_matrix_vector_mult 652 9.0 0.237 0.237 22.679 22.679 dbcsr_matrix_vector_mult_local 652 10.0 21.378 21.378 21.399 21.399 dbcsr_finalize 646 7.5 0.205 0.205 20.796 20.796 dbcsr_merge_all 597 8.5 3.214 3.214 19.104 19.104 setup_rec_index_2d 370 8.1 16.911 16.911 16.911 16.911 dbcsr_sort_indices 1103 9.9 16.728 16.728 16.728 16.728 quick_finalize 395 10.0 0.501 0.501 14.244 14.244 tree_to_linear_d 110 9.4 13.535 13.535 13.535 13.535 dbcsr_special_finalize 370 9.1 0.003 0.003 13.136 13.136 ls_scf_init_scf 1 4.0 0.000 0.000 11.922 11.922 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.465 11.465 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.614 10.614 dbcsr_dot_sd 144 6.3 9.120 9.120 9.121 9.121 dbcsr_frobenius_norm 142 6.1 7.882 7.882 7.885 7.885 make_images_data 370 9.1 0.010 0.010 7.366 7.366 matrix_qs_to_ls 12 5.1 0.000 0.000 6.906 6.906 matrix_cluster 12 6.1 0.000 0.000 6.906 6.906 dbcsr_new_transposed 2 7.0 0.136 0.136 6.799 6.799 dbcsr_redistribute 2 8.0 6.551 6.551 6.624 6.624 hybrid_alltoall_any 393 9.9 5.404 5.404 6.206 6.206 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 99.683 99.684 qs_energies 1 2.0 0.000 0.000 99.593 99.596 ls_scf 1 3.0 0.000 0.000 99.518 99.521 ls_scf_main 1 4.0 0.001 0.003 95.584 95.584 density_matrix_trs4 11 5.0 0.009 0.013 91.702 91.807 dbcsr_multiply_generic 185 6.1 0.075 0.088 86.481 86.655 multiply_cannon 185 7.1 0.042 0.045 72.394 73.223 multiply_cannon_loop 185 8.1 0.235 0.246 68.417 69.977 multiply_cannon_multrec 1480 9.1 44.298 46.267 44.805 46.763 mp_waitall_1 11936 10.3 21.464 24.231 21.464 24.231 multiply_cannon_metrocomm3 1480 9.1 0.019 0.021 12.499 15.919 make_m2s 370 7.1 0.035 0.039 9.698 9.785 make_images 370 8.1 0.705 0.732 9.575 9.666 multiply_cannon_metrocomm1 1480 9.1 0.011 0.012 5.383 8.261 calculate_norms 2960 9.1 5.410 5.608 5.410 5.608 make_images_data 370 9.1 0.013 0.014 4.009 4.424 arnoldi_extremal 12 6.1 0.000 0.001 3.743 3.754 arnoldi_normal_ev 12 7.1 0.002 0.008 3.743 3.753 build_subspace 23 8.1 0.041 0.054 3.614 3.617 hybrid_alltoall_any 393 9.9 0.348 1.766 3.286 3.565 mp_sum_l 1039 5.9 3.052 3.501 3.052 3.501 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.367 3.428 dbcsr_matrix_vector_mult 652 9.0 0.018 0.079 3.069 3.141 dbcsr_complete_redistribute 23 7.5 1.811 1.902 2.966 3.084 matrix_ls_to_qs 11 6.0 0.000 0.000 2.928 3.060 ls_scf_init_scf 1 4.0 0.000 0.000 3.020 3.020 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.981 2.991 matrix_decluster 11 7.0 0.000 0.000 2.676 2.803 make_images_pack 370 9.1 2.584 2.771 2.589 2.776 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.729 2.731 dbcsr_matrix_vector_mult_local 652 10.0 2.492 2.579 2.496 2.583 buffer_matrices_ensure_size 370 8.1 2.367 2.496 2.367 2.496 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 2.079 2.480 dbcsr_add_d 280 6.0 0.002 0.002 2.220 2.283 dbcsr_add_anytype 280 7.0 1.208 1.271 2.218 2.282 dbcsr_finalize 646 7.5 0.014 0.015 2.049 2.128 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=100.22399999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=69.103, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=39.332, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=36.062, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=21.378, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=16.911, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=18.572000000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.811, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=44.298, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.492, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=21.464, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.052, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.41, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.584, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 106.581 106.581 lib_test 1 2.0 0.000 0.000 106.575 106.575 dbcsr_run_tests 3 3.0 0.003 0.003 106.574 106.574 test_multiplies_multiproc 3 4.0 0.001 0.001 87.070 87.070 dbcsr_redistribute 9 5.0 57.672 57.672 61.333 61.333 dbcsr_multiply_generic 9 5.0 0.001 0.001 23.801 23.801 dbcsr_make_random_matrix 9 4.0 14.124 14.124 19.414 19.414 multiply_cannon 9 6.0 0.002 0.002 17.087 17.087 multiply_cannon_loop 9 7.0 0.003 0.003 16.525 16.525 multiply_cannon_multrec 9 8.0 16.521 16.521 16.522 16.522 dbcsr_finalize 27 5.7 0.005 0.005 9.122 9.122 dbcsr_merge_all 18 6.5 3.263 3.263 8.385 8.385 mp_alltoall_d11v 27 6.0 3.314 3.314 3.314 3.314 tree_to_linear_d 9 7.0 3.205 3.205 3.205 3.205 dbcsr_data_release 975 7.6 2.478 2.478 2.478 2.478 make_m2s 18 6.0 0.001 0.001 2.273 2.273 make_images 18 7.0 0.694 0.694 2.184 2.184 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 27.392 27.392 lib_test 1 2.0 0.000 0.000 27.363 27.382 dbcsr_run_tests 3 3.0 0.000 0.001 27.362 27.381 test_multiplies_multiproc 3 4.0 0.001 0.001 26.204 26.314 dbcsr_multiply_generic 9 5.0 0.002 0.002 24.199 24.282 multiply_cannon 9 6.0 0.002 0.003 21.803 22.268 multiply_cannon_loop 9 7.0 0.004 0.004 21.355 21.806 multiply_cannon_multrec 72 8.0 17.959 18.975 17.961 18.976 mp_waitall_1 576 9.2 3.839 4.670 3.839 4.670 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 3.032 3.802 mp_sum_l 310 2.7 0.516 1.288 0.516 1.288 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.512 1.285 dbcsr_make_random_matrix 9 4.0 0.883 0.906 1.120 1.156 make_m2s 18 6.0 0.001 0.001 1.013 1.060 make_images 18 7.0 0.027 0.028 1.009 1.056 dbcsr_finalize 27 5.7 0.001 0.001 0.927 1.036 dbcsr_merge_all 18 6.5 0.148 0.178 0.811 0.911 dbcsr_data_release 444 7.6 0.670 0.780 0.670 0.780 dbcsr_redistribute 9 5.0 0.403 0.470 0.709 0.749 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.352 0.688 dbcsr_destroy 111 5.9 0.007 0.054 0.564 0.656 make_images_data 18 8.0 0.001 0.001 0.508 0.583 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.209000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=57.672, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.521, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.124, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.314, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.263, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.478, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.974, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.403, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.959, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.883, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.148, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.67, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.516, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.839, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 144.615 144.615 qs_mol_dyn_low 1 2.0 0.005 0.005 142.747 142.747 velocity_verlet 5 3.0 0.004 0.004 115.462 115.462 qmmm_el_coupling 6 3.8 0.000 0.000 67.846 67.846 qmmm_elec_with_gaussian 6 4.8 0.186 0.186 67.840 67.840 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 66.190 66.190 qmmm_elec_gaussian_low_G 6 6.8 64.592 64.592 64.592 64.592 qs_forces 6 3.8 0.001 0.001 54.583 54.583 qs_energies 6 4.8 0.000 0.000 48.355 48.355 scf_env_do_scf 6 5.8 0.001 0.001 44.885 44.885 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 39.074 39.074 rebuild_ks_matrix 45 8.4 0.000 0.000 38.842 38.842 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 38.842 38.842 qs_ks_update_qs_env 45 7.8 0.000 0.000 33.284 33.284 pw_transfer 966 11.9 0.072 0.072 23.824 23.824 fft_wrap_pw1pw2 801 13.0 0.009 0.009 23.464 23.464 fft_wrap_pw1pw2_150 507 14.3 2.501 2.501 22.928 22.928 qs_vxc_create 45 10.4 0.001 0.001 20.920 20.920 xc_vxc_pw_create 45 11.4 3.859 3.859 20.919 20.919 fist_calc_energy_force 6 3.8 0.002 0.002 10.825 10.825 pw_scatter_s 429 15.4 10.536 10.536 10.536 10.536 qs_rho_update_rho 45 7.9 0.000 0.000 10.334 10.334 calculate_rho_elec 45 8.9 0.888 0.888 10.334 10.334 xc_rho_set_and_dset_create 45 12.4 0.244 0.244 9.805 9.805 force_nonbond 6 4.8 9.419 9.419 9.419 9.419 fft3d_s 802 15.0 9.062 9.062 9.071 9.071 pw_integral_ab 2539 7.4 8.724 8.724 8.724 8.724 qmmm_forces 6 3.8 0.001 0.001 8.711 8.711 qmmm_forces_with_gaussian 6 4.8 0.145 0.145 8.232 8.232 qs_ks_ddapc 45 10.4 0.001 0.001 6.708 6.708 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.285 6.285 init_scf_loop 6 6.8 0.000 0.000 5.805 5.805 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.571 5.571 pw_poisson_solve 51 9.9 2.333 2.333 5.339 5.339 qmmm_forces_gaussian_low_G 6 6.8 5.214 5.214 5.214 5.214 grid_collocate_task_list 45 9.9 4.724 4.724 4.724 4.724 density_rs2pw 45 9.9 0.003 0.003 4.722 4.722 sum_up_and_integrate 45 10.4 0.234 0.234 4.454 4.454 integrate_v_rspace 45 11.4 0.009 0.009 4.220 4.220 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.158 4.158 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.038 90.514 90.516 qs_mol_dyn_low 1 2.0 0.005 0.005 88.946 89.041 qs_forces 6 3.8 0.001 0.001 65.931 65.931 qs_energies 6 4.8 0.001 0.001 62.867 62.868 scf_env_do_scf 6 5.8 0.000 0.001 61.289 61.289 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 58.782 58.784 rebuild_ks_matrix 119 8.1 0.000 0.000 43.697 43.716 qs_ks_build_kohn_sham_matrix 119 9.1 0.021 0.023 43.697 43.715 qs_ks_update_qs_env 119 7.3 0.001 0.001 41.086 41.103 velocity_verlet 5 3.0 0.003 0.003 37.169 37.174 pw_transfer 2446 11.8 0.272 0.289 28.259 28.615 fft_wrap_pw1pw2 2059 12.8 0.035 0.038 27.433 27.819 fft_wrap_pw1pw2_150 1321 14.0 2.441 2.709 26.698 27.023 qs_vxc_create 119 10.1 0.003 0.004 22.360 22.364 xc_vxc_pw_create 119 11.1 0.459 0.623 22.356 22.361 fft3d_ps 2059 14.8 12.672 13.862 20.866 21.383 qs_rho_update_rho 119 7.3 0.001 0.001 17.220 17.220 calculate_rho_elec 119 8.3 0.086 0.095 17.219 17.220 sum_up_and_integrate 119 10.1 0.093 0.101 15.527 15.625 integrate_v_rspace 119 11.1 0.004 0.005 15.434 15.527 qmmm_forces 6 3.8 0.003 0.003 12.731 12.732 qmmm_forces_with_gaussian 6 4.8 0.418 0.503 12.346 12.524 rs_pw_transfer 988 11.5 0.016 0.018 11.916 12.458 density_rs2pw 119 9.3 0.012 0.013 10.507 10.945 xc_rho_set_and_dset_create 119 12.1 0.524 0.626 10.449 10.844 potential_pw2rs 119 12.1 0.011 0.012 9.330 9.341 qmmm_el_coupling 6 3.8 0.000 0.000 9.171 9.219 qmmm_elec_with_gaussian 6 4.8 0.370 0.513 9.167 9.216 grid_collocate_task_list 119 9.3 6.408 7.160 6.408 7.160 mp_alltoall_z22v 2059 16.8 5.006 6.698 5.006 6.698 grid_integrate_task_list 119 12.1 5.713 6.083 5.713 6.083 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.780 5.913 rs_pw_transfer_PW2RS_150 125 13.9 2.644 2.731 5.213 5.241 pw_restrict_s3 18 5.8 2.209 2.239 4.856 4.925 rs_pw_transfer_RS2PW_150 125 11.2 2.163 2.325 4.322 4.878 qmmm_forces_gaussian_low_G 6 6.8 4.731 4.855 4.731 4.855 x_to_yz 1095 16.3 1.993 2.155 4.579 4.830 yz_to_x 964 15.3 1.144 1.304 3.564 4.683 mp_waitany 4028 12.8 3.728 4.667 3.728 4.667 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.980 4.026 pw_prolongate_s3 18 6.8 1.784 1.816 3.979 4.025 pw_integral_ab 2761 7.7 3.236 3.266 3.632 3.817 qs_scf_new_mos 113 7.2 0.001 0.001 3.570 3.578 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.569 3.577 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.377 3.454 ot_scf_mini 113 9.2 0.002 0.002 3.413 3.421 dbcsr_multiply_generic 2588 12.3 0.097 0.113 3.306 3.383 qs_ks_ddapc 119 10.1 0.003 0.003 3.030 3.192 mp_sum_dm3 33 5.7 2.526 2.663 2.526 2.663 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.622 2.623 qmmm_elec_gaussian_low_G 6 6.8 2.433 2.504 2.433 2.504 init_scf_loop 6 6.8 0.000 0.000 2.503 2.503 pw_gather_p 964 14.3 2.054 2.250 2.054 2.250 mp_waitall_1 188862 16.2 2.018 2.217 2.018 2.217 ot_mini 113 10.2 0.001 0.001 2.157 2.169 pw_scatter_p 1095 15.3 1.966 2.038 1.966 2.038 pw_derive 732 12.5 1.809 1.952 1.809 1.952 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=32.34400000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=64.592, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.536, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=9.419, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.062, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.724, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.214, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.724, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=50.315, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.433, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.236, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.731, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.408, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=12.672, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.713, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.006, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-10-05 12:51:53+00:00