StartDate: 2021-10-12 19:19:12+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 81a1f6d95a49d8ecc94b31c7626c68ba1c65f1cd CommitTime: 2021-10-12 18:53:18 +0200 CommitAuthor: Juerg Hutter CommitSubject: Ajust regtests and coding conventions (#1673) Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 81a1f6d95a49d8ecc94b31c7626c68ba1c65f1cd CommitTime: 2021-10-12 18:53:18 +0200 CommitAuthor: Juerg Hutter CommitSubject: Ajust regtests and coding conventions (#1673) ========== Cleaning Build Cache ========== Discovering programs ... rm -rf /workspace/cp2k/obj/precommit rm -rf /workspace/cp2k/obj/prettified rm -rf /workspace/cp2k/obj/doxified rm -rf /workspace/cp2k/regtesting/local/ssmp/TEST-* rm -rf /workspace/cp2k/regtesting/local/ssmp/LAST-* rm -rf /workspace/cp2k/exe /workspace/cp2k/obj /workspace/cp2k/lib /workspace/cp2k/regtesting ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.053 0.053 172.204 172.204 qs_mol_dyn_low 1 2.0 0.004 0.004 171.310 171.310 qs_forces 11 3.9 0.001 0.001 171.231 171.231 qs_energies 11 4.9 0.001 0.001 158.664 158.664 scf_env_do_scf 11 5.9 0.001 0.001 127.309 127.309 velocity_verlet 10 3.0 0.002 0.002 116.067 116.067 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 99.001 99.001 rebuild_ks_matrix 119 8.3 0.001 0.001 44.834 44.834 qs_ks_build_kohn_sham_matrix 119 9.3 0.020 0.020 44.833 44.833 qs_rho_update_rho 119 7.7 0.001 0.001 41.197 41.197 calculate_rho_elec 119 8.7 1.583 1.583 41.196 41.196 qs_ks_update_qs_env 119 7.6 0.001 0.001 40.034 40.034 grid_collocate_task_list 119 9.7 34.427 34.427 34.427 34.427 sum_up_and_integrate 119 10.3 0.409 0.409 32.027 32.027 integrate_v_rspace 119 11.3 0.179 0.179 31.618 31.618 grid_integrate_task_list 119 12.3 28.618 28.618 28.618 28.618 init_scf_loop 11 6.9 0.000 0.000 28.114 28.114 qs_scf_new_mos 108 7.5 0.001 0.001 25.967 25.967 qs_scf_loop_do_ot 108 8.5 0.001 0.001 25.966 25.966 dbcsr_multiply_generic 2286 12.5 0.199 0.199 24.561 24.561 ot_scf_mini 108 9.5 0.004 0.004 24.407 24.407 prepare_preconditioner 11 7.9 0.000 0.000 23.162 23.162 make_preconditioner 11 8.9 0.000 0.000 23.162 23.162 make_full_inverse_cholesky 11 9.9 0.000 0.000 21.066 21.066 ot_mini 108 10.5 0.001 0.001 15.901 15.901 init_scf_run 11 5.9 0.001 0.001 15.406 15.406 scf_env_initial_rho_setup 11 6.9 0.001 0.001 15.405 15.405 make_m2s 4572 13.5 0.072 0.072 14.796 14.796 wfi_extrapolate 11 7.9 0.001 0.001 14.434 14.434 cp_gemm 81 9.0 0.000 0.000 11.960 11.960 cp_gemm_cosma 81 10.0 11.960 11.960 11.960 11.960 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.840 11.840 pw_transfer 1439 11.6 0.107 0.107 8.885 8.885 cp_fm_cholesky_decompose 22 10.9 8.529 8.529 8.529 8.529 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 8.502 8.502 ot_diis_step 108 11.5 0.006 0.006 8.136 8.136 make_images 4572 14.5 2.853 2.853 7.848 7.848 qs_ot_get_derivative 108 11.5 0.002 0.002 7.760 7.760 fft_wrap_pw1pw2_140 487 13.2 0.728 0.728 7.249 7.249 dbcsr_make_dense_low 5837 15.5 0.115 0.115 7.175 7.175 make_dense_data 5837 16.5 6.316 6.316 7.037 7.037 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.902 6.902 apply_single 119 13.6 0.001 0.001 6.902 6.902 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.693 6.693 dbcsr_complete_redistribute 329 12.2 3.152 3.152 6.688 6.688 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.580 6.580 dbcsr_copy 2102 12.0 0.320 0.320 6.481 6.481 dbcsr_make_images_dense 3978 14.8 0.029 0.029 6.449 6.449 dbcsr_copy_into_existing 22 7.9 6.101 6.101 6.101 6.101 cp_fm_cholesky_invert 11 10.9 6.074 6.074 6.074 6.074 qs_create_task_list 11 7.9 0.000 0.000 5.958 5.958 generate_qs_task_list 11 8.9 3.979 3.979 5.958 5.958 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.871 5.871 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.459 5.459 density_rs2pw 119 9.7 0.007 0.007 5.187 5.187 multiply_cannon 2286 13.5 0.345 0.345 5.044 5.044 pw_poisson_solve 119 10.3 2.118 2.118 4.992 4.992 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.566 4.566 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.434 4.434 qs_ot_get_p 119 10.4 0.001 0.001 4.369 4.369 multiply_cannon_loop 2286 14.5 0.069 0.069 4.193 4.193 multiply_cannon_multrec 2286 15.5 4.037 4.037 4.123 4.123 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.913 3.913 calculate_w_matrix_ot 11 6.9 0.009 0.009 3.913 3.913 fft3d_s 1202 14.6 3.886 3.886 3.893 3.893 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.026 0.031 82.545 82.546 qs_mol_dyn_low 1 2.0 0.006 0.009 82.384 82.391 qs_forces 11 3.9 0.002 0.003 82.319 82.320 qs_energies 11 4.9 0.002 0.002 76.878 76.881 scf_env_do_scf 11 5.9 0.001 0.001 69.934 69.936 scf_env_do_scf_inner_loop 108 6.5 0.004 0.011 64.772 64.772 velocity_verlet 10 3.0 0.002 0.002 49.164 49.166 rebuild_ks_matrix 119 8.3 0.001 0.001 31.916 31.972 qs_ks_build_kohn_sham_matrix 119 9.3 0.025 0.026 31.915 31.971 qs_ks_update_qs_env 119 7.6 0.001 0.002 28.335 28.387 qs_rho_update_rho 119 7.7 0.001 0.001 24.607 24.639 calculate_rho_elec 119 8.7 0.049 0.051 24.606 24.639 sum_up_and_integrate 119 10.3 0.062 0.066 24.542 24.587 integrate_v_rspace 119 11.3 0.005 0.006 24.480 24.524 dbcsr_multiply_generic 2286 12.5 0.146 0.149 20.981 21.091 grid_integrate_task_list 119 12.3 16.838 17.367 16.838 17.367 grid_collocate_task_list 119 9.7 16.789 17.356 16.789 17.356 qs_scf_new_mos 108 7.5 0.001 0.001 16.998 17.042 qs_scf_loop_do_ot 108 8.5 0.001 0.002 16.997 17.041 ot_scf_mini 108 9.5 0.004 0.004 15.974 16.018 multiply_cannon 2286 13.5 0.249 0.256 14.213 14.467 multiply_cannon_loop 2286 14.5 0.255 0.269 12.906 13.221 mp_waitall_1 169478 16.3 10.575 10.851 10.575 10.851 ot_mini 108 10.5 0.001 0.002 9.553 9.597 rs_pw_transfer 974 11.9 0.019 0.020 8.174 8.950 density_rs2pw 119 9.7 0.010 0.010 7.086 7.872 pw_transfer 1439 11.6 0.155 0.162 7.216 7.285 multiply_cannon_metrocomm3 18288 15.5 0.088 0.093 6.818 7.232 fft_wrap_pw1pw2 1201 12.6 0.016 0.016 6.864 6.934 potential_pw2rs 119 12.3 0.011 0.011 6.315 6.322 fft_wrap_pw1pw2_140 487 13.2 0.663 0.684 5.952 6.124 fft3d_ps 1201 14.6 2.861 3.017 5.165 5.239 init_scf_loop 11 6.9 0.001 0.001 5.142 5.143 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 4.891 4.925 apply_single 119 13.6 0.001 0.001 4.891 4.924 ot_diis_step 108 11.5 0.005 0.006 4.904 4.904 init_scf_run 11 5.9 0.000 0.002 4.848 4.848 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.847 4.848 multiply_cannon_multrec 18288 15.5 4.628 4.816 4.648 4.837 make_m2s 4572 13.5 0.081 0.087 4.623 4.691 qs_ot_get_derivative 108 11.5 0.002 0.002 4.601 4.646 wfi_extrapolate 11 7.9 0.001 0.001 4.426 4.426 make_images 4572 14.5 0.196 0.201 3.852 3.923 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.829 3.837 mp_waitany 9880 13.7 2.815 3.644 2.815 3.644 rs_pw_transfer_RS2PW_140 130 11.5 0.615 0.650 2.511 3.302 rs_pw_transfer_PW2RS_140 130 13.9 1.498 1.547 3.109 3.143 mp_alltoall_d11v 2130 13.8 1.704 2.310 1.704 2.310 qs_ot_get_p 119 10.4 0.001 0.001 2.100 2.143 rs_gather_matrices 119 12.3 0.151 0.160 1.263 1.916 make_images_data 4572 15.5 0.065 0.069 1.792 1.907 prepare_preconditioner 11 7.9 0.000 0.000 1.774 1.789 make_preconditioner 11 8.9 0.000 0.000 1.774 1.789 mp_alltoall_z22v 1201 16.6 1.512 1.735 1.512 1.735 hybrid_alltoall_any 4725 16.4 0.137 0.500 1.575 1.681 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=78.31700000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=34.427, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=28.618, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=11.96, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.529, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=6.316, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.037, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=30.854000000000006, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.789, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.838, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.628, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.575, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.861, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.042 0.042 207.855 207.855 qs_mol_dyn_low 1 2.0 0.004 0.004 206.972 206.972 qs_forces 11 3.9 0.002 0.002 206.913 206.913 qs_energies 11 4.9 0.001 0.001 192.487 192.487 scf_env_do_scf 11 5.9 0.001 0.001 157.960 157.960 velocity_verlet 10 3.0 0.002 0.002 137.252 137.252 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 126.931 126.931 rebuild_ks_matrix 107 8.3 0.001 0.001 65.136 65.136 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.019 65.135 65.135 qs_rho_update_rho 107 7.7 0.001 0.001 60.026 60.026 calculate_rho_elec 107 8.7 1.419 1.419 60.025 60.025 qs_ks_update_qs_env 107 7.6 0.001 0.001 58.231 58.231 grid_collocate_task_list 107 9.7 53.903 53.903 53.903 53.903 sum_up_and_integrate 107 10.3 0.376 0.376 53.441 53.441 integrate_v_rspace 107 11.3 0.160 0.160 53.065 53.065 grid_integrate_task_list 107 12.3 50.316 50.316 50.316 50.316 init_scf_loop 11 6.9 0.000 0.000 30.808 30.808 prepare_preconditioner 11 7.9 0.000 0.000 23.089 23.089 make_preconditioner 11 8.9 0.000 0.000 23.088 23.088 qs_scf_new_mos 96 7.5 0.001 0.001 21.811 21.811 qs_scf_loop_do_ot 96 8.5 0.001 0.001 21.810 21.810 make_full_inverse_cholesky 11 9.9 0.000 0.000 20.790 20.790 dbcsr_multiply_generic 1966 12.4 0.174 0.174 20.720 20.720 ot_scf_mini 96 9.5 0.003 0.003 20.447 20.447 init_scf_run 11 5.9 0.001 0.001 17.583 17.583 scf_env_initial_rho_setup 11 6.9 0.001 0.001 17.582 17.582 wfi_extrapolate 11 7.9 0.001 0.001 16.423 16.423 ot_mini 96 10.5 0.001 0.001 13.032 13.032 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 12.760 12.760 make_m2s 3932 13.4 0.062 0.062 12.238 12.238 cp_gemm 81 9.0 0.000 0.000 11.689 11.689 cp_gemm_cosma 81 10.0 11.688 11.688 11.688 11.688 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.629 8.629 cp_fm_cholesky_decompose 22 10.9 8.256 8.256 8.256 8.256 pw_transfer 1295 11.6 0.096 0.096 8.096 8.096 fft_wrap_pw1pw2 1081 12.6 0.011 0.011 7.766 7.766 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.648 7.648 qs_create_task_list 11 7.9 0.000 0.000 7.055 7.055 generate_qs_task_list 11 8.9 5.161 5.161 7.055 7.055 dbcsr_complete_redistribute 317 12.2 3.203 3.203 7.038 7.038 qs_ot_get_derivative 96 11.5 0.002 0.002 6.724 6.724 make_images 3932 14.4 2.410 2.410 6.678 6.678 fft_wrap_pw1pw2_140 439 13.2 0.744 0.744 6.652 6.652 ot_diis_step 96 11.5 0.005 0.005 6.304 6.304 dbcsr_copy 1855 11.9 0.292 0.292 6.296 6.296 dbcsr_copy_into_existing 22 7.9 5.950 5.950 5.951 5.951 cp_fm_cholesky_invert 11 10.9 5.848 5.848 5.848 5.848 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.793 5.793 dbcsr_make_dense_low 4961 15.5 0.098 0.098 5.775 5.775 make_dense_data 4961 16.5 5.083 5.083 5.658 5.658 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.595 5.595 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.485 5.485 apply_single 107 13.6 0.001 0.001 5.485 5.485 dbcsr_make_images_dense 3386 14.7 0.024 0.024 5.133 5.133 density_rs2pw 107 9.7 0.007 0.007 4.703 4.703 pw_poisson_solve 107 10.3 2.106 2.106 4.690 4.690 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.545 4.545 multiply_cannon 1966 13.4 0.291 0.291 4.486 4.486 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.426 4.426 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.013 138.891 138.892 qs_mol_dyn_low 1 2.0 0.005 0.006 138.767 138.773 qs_forces 11 3.9 0.003 0.003 138.708 138.708 qs_energies 11 4.9 0.002 0.002 129.308 129.311 scf_env_do_scf 11 5.9 0.001 0.001 119.230 119.231 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 110.726 110.726 velocity_verlet 10 3.0 0.002 0.003 82.727 82.729 rebuild_ks_matrix 107 8.3 0.001 0.001 62.636 62.674 qs_ks_build_kohn_sham_matrix 107 9.3 0.022 0.023 62.635 62.674 sum_up_and_integrate 107 10.3 0.056 0.059 56.065 56.085 integrate_v_rspace 107 11.3 0.005 0.005 56.009 56.029 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.088 55.122 qs_rho_update_rho 107 7.7 0.001 0.001 52.532 52.551 calculate_rho_elec 107 8.7 0.044 0.045 52.531 52.550 grid_integrate_task_list 107 12.3 48.048 49.143 48.048 49.143 grid_collocate_task_list 107 9.7 44.968 45.803 44.968 45.803 dbcsr_multiply_generic 1966 12.4 0.127 0.129 18.516 18.573 qs_scf_new_mos 96 7.5 0.001 0.001 14.763 14.791 qs_scf_loop_do_ot 96 8.5 0.001 0.001 14.762 14.790 ot_scf_mini 96 9.5 0.003 0.003 13.881 13.906 multiply_cannon 1966 13.4 0.215 0.219 12.625 12.853 multiply_cannon_loop 1966 14.4 0.223 0.229 11.501 11.888 mp_waitall_1 146670 16.2 9.419 9.706 9.419 9.706 rs_pw_transfer 878 11.9 0.017 0.019 8.160 9.421 init_scf_loop 11 6.9 0.001 0.001 8.484 8.485 ot_mini 96 10.5 0.001 0.002 8.295 8.322 density_rs2pw 107 9.7 0.009 0.009 6.913 8.204 init_scf_run 11 5.9 0.000 0.002 7.937 7.937 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.936 7.937 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.772 7.780 wfi_extrapolate 11 7.9 0.001 0.001 7.245 7.245 pw_transfer 1295 11.6 0.138 0.146 6.398 6.466 multiply_cannon_metrocomm3 15728 15.4 0.076 0.079 6.095 6.434 fft_wrap_pw1pw2 1081 12.6 0.014 0.015 6.089 6.158 potential_pw2rs 107 12.3 0.010 0.011 5.790 5.805 fft_wrap_pw1pw2_140 439 13.2 0.595 0.614 5.277 5.463 fft3d_ps 1081 14.6 2.506 2.655 4.562 4.656 mp_waitany 8968 13.7 3.332 4.584 3.332 4.584 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.351 4.389 apply_single 107 13.6 0.001 0.001 4.350 4.388 ot_diis_step 96 11.5 0.005 0.005 4.296 4.296 multiply_cannon_multrec 15728 15.4 4.137 4.247 4.155 4.265 rs_pw_transfer_RS2PW_140 118 11.5 0.485 0.511 2.938 4.206 make_m2s 3932 13.4 0.070 0.076 4.086 4.156 qs_ot_get_derivative 96 11.5 0.001 0.002 3.959 3.988 mp_alltoall_d11v 1998 13.7 2.509 3.931 2.509 3.931 rs_gather_matrices 107 12.3 0.144 0.155 2.107 3.518 make_images 3932 14.4 0.171 0.176 3.413 3.477 rs_pw_transfer_PW2RS_140 118 13.9 1.437 1.499 2.969 3.017 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=77.74199999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=53.903, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=50.316, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=11.688, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.256, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=5.95, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=28.986999999999995, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=44.968, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.048, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.332, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.137, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.419, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.407 0.407 251.878 251.878 qs_energies 1 2.0 0.000 0.000 250.509 250.509 scf_env_do_scf 1 3.0 0.000 0.000 247.721 247.721 qs_ks_update_qs_env 8 5.0 0.000 0.000 238.382 238.382 rebuild_ks_matrix 7 6.0 0.000 0.000 238.274 238.274 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 238.274 238.274 hfx_ks_matrix 7 8.0 0.000 0.000 169.309 169.309 integrate_four_center 7 9.0 2.102 2.102 169.275 169.275 integrate_four_center_main 7 10.0 1.290 1.290 157.742 157.742 integrate_four_center_bin 449 11.0 156.452 156.452 156.452 156.452 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 149.638 149.638 init_scf_loop 1 4.0 0.000 0.000 98.066 98.066 cp_gemm 129 10.3 0.001 0.001 53.170 53.170 cp_gemm_cosma 129 11.3 53.170 53.170 53.170 53.170 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 30.947 30.947 admm_fit_mo_coeffs 7 9.0 0.000 0.000 28.169 28.169 admm_mo_merge_derivs 7 8.0 0.000 0.000 26.801 26.801 merge_mo_derivs_diag 7 9.0 0.023 0.023 26.801 26.801 purify_mo_diag 7 10.0 0.001 0.001 15.174 15.174 fit_mo_coeffs 7 10.0 0.000 0.000 12.995 12.995 integrate_four_center_load 7 10.0 0.001 0.001 9.021 9.021 hfx_load_balance 1 11.0 0.004 0.004 9.019 9.019 calculate_rho_elec 15 7.4 0.194 0.194 6.316 6.316 grid_collocate_task_list 15 8.4 5.486 5.486 5.486 5.486 qs_vxc_create 14 8.0 0.000 0.000 5.331 5.331 xc_vxc_pw_create 14 9.0 0.906 0.906 5.330 5.330 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.234 0.241 191.033 191.034 qs_energies 1 2.0 0.000 0.001 190.650 190.651 scf_env_do_scf 1 3.0 0.000 0.000 190.030 190.030 qs_ks_update_qs_env 8 5.0 0.000 0.000 186.629 186.629 rebuild_ks_matrix 7 6.0 0.000 0.000 186.614 186.615 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 186.614 186.615 hfx_ks_matrix 7 8.0 0.000 0.001 174.607 174.609 integrate_four_center 7 9.0 0.111 0.430 174.589 174.590 integrate_four_center_main 7 10.0 0.005 0.006 158.264 164.158 integrate_four_center_bin 448 11.0 158.259 164.153 158.259 164.153 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 112.695 112.695 init_scf_loop 1 4.0 0.000 0.000 77.334 77.334 mp_sync 70 11.3 6.382 9.274 6.382 9.274 integrate_four_center_load 7 10.0 0.000 0.000 9.024 9.033 hfx_load_balance 1 11.0 0.001 0.002 9.024 9.033 hfx_load_balance_bin 1 12.0 4.443 4.571 4.443 4.571 hfx_load_balance_count 1 12.0 4.423 4.514 4.423 4.514 cp_gemm 129 10.3 0.001 0.001 4.030 4.037 cp_gemm_cosma 129 11.3 4.030 4.036 4.030 4.036 qs_vxc_create 14 8.0 0.000 0.000 3.927 3.928 xc_vxc_pw_create 14 9.0 0.021 0.023 3.927 3.927 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=33.377999999999986, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=156.452, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=53.17, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.486, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.102, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.29, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=13.379999999999995, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.259, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.03, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.111, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.443, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=6.382, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.423, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 399.000 399.000 qs_energies 1 2.0 0.000 0.000 398.442 398.442 mp2_main 1 3.0 0.000 0.000 393.223 393.223 mp2_gpw_main 1 4.0 0.001 0.001 393.033 393.033 rpa_ri_compute_en 1 5.0 0.000 0.000 370.679 370.679 rpa_num_int 1 6.0 0.001 0.001 370.651 370.651 compute_mat_P_omega 1 7.0 0.002 0.002 205.474 205.474 compute_mat_P_omega_contract 10 8.0 13.778 13.778 204.139 204.139 dbcsr_t_total 2336 9.6 0.019 0.019 192.082 192.082 cp_gemm 105 8.4 0.000 0.000 141.754 141.754 cp_gemm_cosma 105 9.4 141.754 141.754 141.754 141.754 dbcsr_t_contract 787 11.0 49.980 49.980 114.557 114.557 GW_matrix_operations 10 7.0 0.016 0.016 94.001 94.001 compute_mat_P_omega_calc_M_occ 250 9.0 13.799 13.799 76.083 76.083 dbcsr_t_copy 1103 10.7 21.257 21.257 75.916 75.916 dbcsr_tas_total 1149 12.2 0.052 0.052 58.246 58.246 dbcsr_tas_multiply 807 12.1 0.003 0.003 56.663 56.663 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 48.775 48.775 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 46.674 46.674 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 46.623 46.623 dbcsr_multiply_generic 837 15.8 0.139 0.139 41.841 41.841 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 41.571 41.571 dbcsr_tas_reserve_blocks_index 3261 13.7 7.251 7.251 29.497 29.497 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 29.259 29.259 multiply_cannon 837 16.8 0.460 0.460 26.814 26.814 dbcsr_tas_copy 574 11.4 17.782 17.782 25.784 25.784 multiply_cannon_loop 837 17.8 0.168 0.168 23.194 23.194 dbcsr_t_reserve_blocks_index 2280 12.5 1.346 1.346 22.903 22.903 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 22.336 22.336 dbcsr_reserve_blocks 3717 14.7 21.553 21.553 21.944 21.944 multiply_cannon_multrec 837 18.8 20.875 20.875 21.808 21.808 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.015 0.015 21.540 21.540 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 20.620 20.620 compute_QP_energies 1 7.0 0.000 0.000 20.318 20.318 compute_self_energy_cubic_gw 1 8.0 0.110 0.110 20.318 20.318 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 15.318 15.318 dbcsr_t_copy_nocomm 251 12.0 12.001 12.001 14.489 14.489 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.853 12.853 make_m2s 1674 16.8 0.109 0.109 12.295 12.295 make_images 1674 17.8 5.734 5.734 11.775 11.775 dbcsr_tas_mm_2 251 15.0 0.002 0.002 10.879 10.879 dbcsr_finalize 9888 13.6 1.636 1.636 8.836 8.836 contract_cubic_gw 21 9.0 0.000 0.000 8.324 8.324 build_3c_integrals 5 6.0 3.806 3.806 8.210 8.210 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 66.497 66.498 qs_energies 1 2.0 0.001 0.003 66.368 66.374 mp2_main 1 3.0 0.000 0.001 64.682 64.689 mp2_gpw_main 1 4.0 0.000 0.001 64.617 64.624 rpa_ri_compute_en 1 5.0 0.000 0.000 62.493 62.500 rpa_num_int 1 6.0 0.000 0.001 62.485 62.492 dbcsr_t_total 2336 9.6 0.019 0.020 50.156 50.158 compute_mat_P_omega 1 7.0 0.001 0.002 49.063 49.071 compute_mat_P_omega_contract 10 8.0 0.927 0.954 48.798 48.803 dbcsr_t_contract 787 11.0 2.165 2.298 36.867 36.871 dbcsr_tas_total 1149 12.2 0.076 0.083 32.555 32.555 dbcsr_tas_multiply 807 12.1 0.003 0.003 32.412 32.415 dbcsr_tas_dbcsr 807 14.1 0.004 0.005 23.629 23.631 dbcsr_multiply_generic 837 15.8 0.082 0.087 19.874 21.138 compute_mat_P_omega_calc_M_occ 250 9.0 0.895 0.926 16.309 16.310 multiply_cannon 837 16.8 0.158 0.178 11.661 12.191 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.058 12.059 dbcsr_t_copy 1111 10.7 5.062 5.292 11.480 11.854 dbcsr_tas_mm_1N 524 15.1 0.003 0.004 10.470 11.628 multiply_cannon_loop 837 17.8 0.053 0.056 10.617 11.101 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.002 10.307 10.308 dbcsr_tas_mm_2 251 15.0 0.002 0.002 9.077 9.078 multiply_cannon_multrec 1386 17.8 8.105 8.550 8.408 8.821 mp_sync 8696 11.6 7.370 8.788 7.370 8.788 make_m2s 1674 16.8 0.053 0.056 7.083 7.956 make_images 1674 17.8 0.261 0.275 6.987 7.860 cp_gemm 105 8.4 0.000 0.000 7.816 7.832 cp_gemm_cosma 105 9.4 7.816 7.831 7.816 7.831 GW_matrix_operations 10 7.0 0.002 0.002 5.128 5.136 compute_QP_energies 1 7.0 0.000 0.000 4.954 4.954 compute_self_energy_cubic_gw 1 8.0 0.006 0.006 4.951 4.954 dbcsr_t_communicate_buffer 1098 11.7 0.104 0.110 4.339 4.496 mp_waitall_2 3776 14.7 4.069 4.371 4.069 4.371 make_images_data 1674 18.8 0.043 0.046 3.843 4.017 hybrid_alltoall_any 1724 19.5 2.948 3.298 3.685 3.876 contract_cubic_gw 21 9.0 0.000 0.000 3.786 3.786 dbcsr_t_reserve_blocks_index 2849 12.4 0.125 0.131 3.326 3.696 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.024 0.027 3.278 3.648 dbcsr_tas_reserve_blocks_index 3300 13.8 0.289 0.361 3.265 3.627 make_images_pack 1674 18.8 2.632 3.453 2.649 3.470 dbcsr_reserve_blocks 3785 14.7 2.978 3.322 3.021 3.367 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.927 2.935 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 2.786 2.794 mp_waitall_1 26582 19.0 2.043 2.534 2.043 2.534 convert_to_new_pgrid 2421 14.1 0.021 0.024 2.306 2.462 dbcsr_copy 3323 15.8 2.224 2.390 2.257 2.422 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.122 2.122 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 2.098 2.104 dbcsr_add_anytype 909 13.7 1.233 1.295 1.921 2.013 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 1.847 1.851 scf_env_do_scf 1 3.0 0.000 0.000 1.618 1.619 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.618 1.619 dbcsr_tas_replicate 396 14.1 0.857 0.933 1.504 1.579 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=143.58100000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=141.754, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=49.98, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=21.553, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=21.257, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.875, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=28.932000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=7.816, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=2.165, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.978, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=5.062, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.105, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=4.069, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=7.37, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.119 0.119 190.895 190.895 qs_energies 1 2.0 0.000 0.000 188.954 188.954 scf_env_do_scf 1 3.0 0.000 0.000 178.099 178.099 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 178.098 178.098 qs_ks_update_qs_env 15 5.0 0.000 0.000 75.437 75.437 rebuild_ks_matrix 15 6.0 0.000 0.000 75.046 75.046 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 75.046 75.046 qs_scf_new_mos 15 5.0 0.000 0.000 69.808 69.808 eigensolver 15 6.0 0.002 0.002 55.509 55.509 qs_vxc_create 15 8.0 0.041 0.041 49.688 49.688 calculate_dispersion_nonloc 15 9.0 9.556 9.556 43.611 43.611 cp_fm_diag_elpa 15 7.0 0.000 0.000 38.636 38.636 cp_fm_diag_elpa_base 15 8.0 33.630 33.630 38.636 38.636 pw_transfer 1191 9.8 0.105 0.105 30.324 30.324 fft_wrap_pw1pw2 1086 10.9 0.014 0.014 29.985 29.985 qs_rho_update_rho 16 5.0 0.000 0.000 25.782 25.782 calculate_rho_elec 16 6.0 0.349 0.349 25.782 25.782 grid_collocate_task_list 16 7.0 24.031 24.031 24.031 24.031 sum_up_and_integrate 15 8.0 0.080 0.080 23.608 23.608 integrate_v_rspace 15 9.0 0.036 0.036 23.528 23.528 fft_wrap_pw1pw2_150 765 12.0 3.593 3.593 22.856 22.856 grid_integrate_task_list 15 10.0 22.809 22.809 22.809 22.809 fft3d_s 1087 12.8 12.674 12.674 12.685 12.685 cp_fm_cholesky_restore 45 7.0 12.122 12.122 12.122 12.122 pw_scatter_s 585 13.0 11.690 11.690 11.690 11.690 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.651 11.651 dbcsr_complete_redistribute 46 8.3 3.807 3.807 10.443 10.443 cp_fm_upper_to_full 30 8.0 9.755 9.755 9.755 9.755 vdW_energy 15 10.0 8.752 8.752 8.752 8.752 gspace_mixing 14 5.0 0.278 0.278 8.403 8.403 broyden_mixing 14 6.0 7.652 7.652 7.652 7.652 fft_wrap_pw1pw2_200 197 11.5 0.401 0.401 6.848 6.848 xc_vxc_pw_create 15 9.0 1.379 1.379 6.036 6.036 init_scf_run 1 3.0 0.000 0.000 5.079 5.079 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.818 4.818 dbcsr_finalize 159 9.9 0.024 0.024 4.518 4.518 dbcsr_merge_all 91 11.1 0.099 0.099 4.355 4.355 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.032 95.636 95.637 qs_energies 1 2.0 0.001 0.002 95.230 95.230 scf_env_do_scf 1 3.0 0.000 0.000 89.907 89.907 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 89.907 89.907 qs_ks_update_qs_env 15 5.0 0.000 0.000 44.100 44.139 rebuild_ks_matrix 15 6.0 0.000 0.000 44.045 44.084 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 44.045 44.084 sum_up_and_integrate 15 8.0 0.016 0.018 24.486 24.544 integrate_v_rspace 15 9.0 0.001 0.001 24.470 24.528 qs_rho_update_rho 16 5.0 0.000 0.000 24.460 24.462 calculate_rho_elec 16 6.0 0.011 0.012 24.459 24.462 grid_integrate_task_list 15 10.0 22.476 23.121 22.476 23.121 grid_collocate_task_list 16 7.0 22.300 22.732 22.300 22.732 qs_scf_new_mos 15 5.0 0.001 0.001 21.667 21.884 eigensolver 15 6.0 0.002 0.003 19.823 19.835 qs_vxc_create 15 8.0 0.001 0.001 18.913 18.932 calculate_dispersion_nonloc 15 9.0 1.487 1.568 15.415 15.449 pw_transfer 1191 9.8 0.144 0.150 15.088 15.281 fft_wrap_pw1pw2 1086 10.9 0.022 0.025 14.764 14.966 cp_fm_diag_elpa 15 7.0 0.000 0.000 14.341 14.351 cp_fm_diag_elpa_base 15 8.0 14.034 14.085 14.334 14.340 fft3d_ps 1086 12.9 6.504 6.803 11.278 11.604 fft_wrap_pw1pw2_150 765 12.0 0.784 0.840 9.982 10.032 cp_fm_cholesky_restore 45 7.0 5.200 5.268 5.200 5.268 fft_wrap_pw1pw2_200 197 11.5 0.433 0.461 4.590 4.744 xc_vxc_pw_create 15 9.0 0.071 0.090 3.498 3.516 mp_alltoall_z22v 1086 14.9 2.989 3.343 2.989 3.343 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.256 3.256 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.821 3.069 x_to_yz 585 14.0 1.128 1.164 2.651 2.854 rs_pw_transfer 158 9.4 0.003 0.004 1.960 2.614 vdW_energy 15 10.0 2.297 2.400 2.297 2.400 density_rs2pw 16 7.0 0.002 0.002 1.947 2.364 yz_to_x 501 13.7 0.621 0.680 2.087 2.315 build_core_ppnl 1 5.0 1.887 2.085 1.887 2.085 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=85.629, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=33.63, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.031, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.809, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.674, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=12.122, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=25.121999999999986, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=14.034, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.3, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.476, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=5.2, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.504, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.094 0.094 310.443 310.443 qs_energies 1 2.0 0.000 0.000 310.264 310.264 ls_scf 1 3.0 0.000 0.000 308.243 308.243 ls_scf_main 1 4.0 0.002 0.002 293.394 293.394 density_matrix_trs4 11 5.0 0.013 0.013 146.991 146.991 ls_scf_dm_to_ks 11 5.0 0.000 0.000 139.240 139.240 matrix_ls_to_qs 11 6.0 0.000 0.000 134.684 134.684 dbcsr_multiply_generic 185 6.1 0.535 0.535 99.119 99.119 dbcsr_copy_into_existing 11 7.0 80.475 80.475 80.475 80.475 multiply_cannon 185 7.1 0.353 0.353 61.189 61.189 dbcsr_complete_redistribute 23 7.5 42.715 42.715 59.315 59.315 matrix_decluster 11 7.0 0.000 0.000 54.199 54.199 multiply_cannon_loop 185 8.1 0.421 0.421 41.803 41.803 multiply_cannon_multrec 185 9.1 39.340 39.340 39.393 39.393 make_m2s 370 7.1 0.033 0.033 31.714 31.714 make_images 370 8.1 7.776 7.776 29.051 29.051 arnoldi_extremal 12 6.1 0.000 0.000 25.214 25.214 arnoldi_normal_ev 12 7.1 0.027 0.027 25.214 25.214 build_subspace 23 8.1 0.145 0.145 24.575 24.575 dbcsr_matrix_vector_mult 652 9.0 0.253 0.253 23.612 23.612 dbcsr_matrix_vector_mult_local 652 10.0 22.196 22.196 22.217 22.217 dbcsr_finalize 646 7.5 0.225 0.225 21.992 21.992 dbcsr_merge_all 597 8.5 3.653 3.653 20.264 20.264 setup_rec_index_2d 370 8.1 18.870 18.870 18.870 18.870 dbcsr_sort_indices 1103 9.9 16.123 16.123 16.123 16.123 tree_to_linear_d 110 9.4 14.159 14.159 14.159 14.159 quick_finalize 395 10.0 0.593 0.593 13.858 13.858 ls_scf_init_scf 1 4.0 0.000 0.000 13.854 13.854 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.385 13.385 dbcsr_special_finalize 370 9.1 0.004 0.004 12.777 12.777 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.471 12.471 dbcsr_dot_sd 144 6.3 9.715 9.715 9.716 9.716 dbcsr_frobenius_norm 142 6.1 8.581 8.581 8.584 8.584 dbcsr_new_transposed 2 7.0 0.166 0.166 8.339 8.339 make_images_data 370 9.1 0.013 0.013 8.292 8.292 dbcsr_redistribute 2 8.0 8.071 8.071 8.137 8.137 matrix_qs_to_ls 12 5.1 0.000 0.000 7.448 7.448 matrix_cluster 12 6.1 0.000 0.000 7.448 7.448 hybrid_alltoall_any 393 9.9 6.008 6.008 6.843 6.843 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.012 111.942 111.943 qs_energies 1 2.0 0.000 0.000 111.838 111.838 ls_scf 1 3.0 0.000 0.000 111.756 111.757 ls_scf_main 1 4.0 0.001 0.004 107.276 107.276 density_matrix_trs4 11 5.0 0.010 0.015 103.109 103.220 dbcsr_multiply_generic 185 6.1 0.085 0.110 96.937 97.192 multiply_cannon 185 7.1 0.055 0.058 81.942 82.607 multiply_cannon_loop 185 8.1 0.296 0.306 77.471 78.853 multiply_cannon_multrec 1480 9.1 50.828 53.160 51.449 53.765 mp_waitall_1 11936 10.3 23.034 26.673 23.034 26.673 multiply_cannon_metrocomm3 1480 9.1 0.024 0.027 13.748 19.702 make_m2s 370 7.1 0.040 0.044 10.805 10.919 make_images 370 8.1 0.731 0.768 10.670 10.787 multiply_cannon_metrocomm1 1480 9.1 0.014 0.017 5.253 8.936 calculate_norms 2960 9.1 6.626 7.089 6.626 7.089 make_images_data 370 9.1 0.015 0.016 4.520 4.869 arnoldi_extremal 12 6.1 0.001 0.001 4.498 4.510 arnoldi_normal_ev 12 7.1 0.002 0.008 4.497 4.510 build_subspace 23 8.1 0.048 0.063 4.355 4.359 hybrid_alltoall_any 393 9.9 0.392 2.032 3.814 4.143 dbcsr_matrix_vector_mult 652 9.0 0.021 0.089 3.618 3.705 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.584 3.660 mp_sum_l 1039 5.9 2.617 3.591 2.617 3.591 ls_scf_init_scf 1 4.0 0.000 0.000 3.436 3.437 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.385 3.405 dbcsr_complete_redistribute 23 7.5 1.987 2.062 3.221 3.343 matrix_ls_to_qs 11 6.0 0.000 0.000 3.154 3.287 make_images_pack 370 9.1 2.958 3.226 2.966 3.233 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 3.102 3.104 matrix_decluster 11 7.0 0.000 0.000 2.890 3.026 dbcsr_matrix_vector_mult_local 652 10.0 2.761 2.992 2.767 2.998 buffer_matrices_ensure_size 370 8.1 2.707 2.841 2.707 2.841 dbcsr_add_d 280 6.0 0.002 0.003 2.556 2.636 dbcsr_add_anytype 280 7.0 1.418 1.482 2.554 2.634 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 1.635 2.496 dbcsr_finalize 646 7.5 0.017 0.017 2.327 2.482 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=106.84699999999998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=80.475, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=42.715, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=39.34, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=22.196, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.87, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=23.747999999999976, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.987, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=50.828, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.761, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=6.626, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.958, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=23.034, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 119.297 119.297 lib_test 1 2.0 0.000 0.000 119.290 119.290 dbcsr_run_tests 3 3.0 0.003 0.003 119.290 119.290 test_multiplies_multiproc 3 4.0 0.001 0.001 97.816 97.816 dbcsr_redistribute 9 5.0 65.252 65.252 68.911 68.911 dbcsr_multiply_generic 9 5.0 0.001 0.001 26.823 26.823 dbcsr_make_random_matrix 9 4.0 15.753 15.753 21.372 21.372 multiply_cannon 9 6.0 0.002 0.002 19.665 19.665 multiply_cannon_loop 9 7.0 0.005 0.005 19.110 19.110 multiply_cannon_multrec 9 8.0 19.104 19.104 19.105 19.105 dbcsr_finalize 27 5.7 0.006 0.006 9.738 9.738 dbcsr_merge_all 18 6.5 3.496 3.496 8.926 8.926 tree_to_linear_d 9 7.0 3.400 3.400 3.400 3.400 mp_alltoall_d11v 27 6.0 3.301 3.301 3.301 3.301 dbcsr_data_release 975 7.6 2.660 2.660 2.660 2.660 make_m2s 18 6.0 0.001 0.001 2.407 2.407 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 31.446 31.446 lib_test 1 2.0 0.000 0.001 31.411 31.434 dbcsr_run_tests 3 3.0 0.001 0.001 31.409 31.433 test_multiplies_multiproc 3 4.0 0.001 0.002 30.202 30.274 dbcsr_multiply_generic 9 5.0 0.002 0.002 28.068 28.151 multiply_cannon 9 6.0 0.003 0.003 25.393 26.002 multiply_cannon_loop 9 7.0 0.004 0.005 24.869 25.473 multiply_cannon_multrec 72 8.0 21.009 21.923 21.011 21.924 mp_waitall_1 576 9.2 4.336 5.178 4.336 5.178 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 3.467 4.274 mp_sum_l 310 2.7 0.555 1.274 0.555 1.274 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.548 1.267 dbcsr_make_random_matrix 9 4.0 0.904 0.917 1.161 1.205 make_m2s 18 6.0 0.001 0.001 1.121 1.198 make_images 18 7.0 0.027 0.030 1.117 1.195 dbcsr_finalize 27 5.7 0.001 0.001 1.077 1.154 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.377 1.120 dbcsr_merge_all 18 6.5 0.178 0.200 0.938 1.030 dbcsr_data_release 444 7.6 0.757 0.901 0.757 0.901 dbcsr_redistribute 9 5.0 0.446 0.499 0.788 0.826 dbcsr_destroy 111 5.9 0.006 0.113 0.626 0.732 make_images_data 18 8.0 0.001 0.001 0.554 0.662 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.632000000000005, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=65.252, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=19.104, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.753, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.496, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.4, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.66, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=3.2609999999999992, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.446, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=21.009, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.904, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.178, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.757, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.336, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.555, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.053 0.053 162.531 162.531 qs_mol_dyn_low 1 2.0 0.006 0.006 160.436 160.436 velocity_verlet 5 3.0 0.005 0.005 130.129 130.129 qmmm_el_coupling 6 3.8 0.000 0.000 77.572 77.572 qmmm_elec_with_gaussian 6 4.8 0.199 0.199 77.566 77.566 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 75.769 75.769 qmmm_elec_gaussian_low_G 6 6.8 74.143 74.143 74.143 74.143 qs_forces 6 3.8 0.001 0.001 60.000 60.000 qs_energies 6 4.8 0.001 0.001 53.215 53.215 scf_env_do_scf 6 5.8 0.001 0.001 49.375 49.375 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 43.026 43.026 rebuild_ks_matrix 45 8.4 0.000 0.000 42.393 42.393 qs_ks_build_kohn_sham_matrix 45 9.4 0.008 0.008 42.393 42.393 qs_ks_update_qs_env 45 7.8 0.000 0.000 36.301 36.301 pw_transfer 966 11.9 0.080 0.080 26.469 26.469 fft_wrap_pw1pw2 801 13.0 0.010 0.010 26.057 26.057 fft_wrap_pw1pw2_150 507 14.3 2.677 2.677 25.458 25.458 qs_vxc_create 45 10.4 0.001 0.001 22.959 22.959 xc_vxc_pw_create 45 11.4 3.985 3.985 22.958 22.958 fist_calc_energy_force 6 3.8 0.002 0.002 12.895 12.895 force_nonbond 6 4.8 11.518 11.518 11.518 11.518 qs_rho_update_rho 45 7.9 0.000 0.000 11.471 11.471 calculate_rho_elec 45 8.9 0.918 0.918 11.471 11.471 pw_scatter_s 429 15.4 11.374 11.374 11.374 11.374 xc_rho_set_and_dset_create 45 12.4 0.259 0.259 10.822 10.822 fft3d_s 802 15.0 10.332 10.332 10.342 10.342 pw_integral_ab 2539 7.4 9.297 9.297 9.297 9.297 qmmm_forces 6 3.8 0.001 0.001 9.105 9.105 qmmm_forces_with_gaussian 6 4.8 0.160 0.160 8.576 8.576 qs_ks_ddapc 45 10.4 0.001 0.001 7.336 7.336 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.440 6.440 init_scf_loop 6 6.8 0.000 0.000 6.343 6.343 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 6.105 6.105 pw_poisson_solve 51 9.9 2.532 2.532 5.830 5.830 grid_collocate_task_list 45 9.9 5.335 5.335 5.335 5.335 qmmm_forces_gaussian_low_G 6 6.8 5.334 5.334 5.334 5.334 density_rs2pw 45 9.9 0.003 0.003 5.218 5.218 sum_up_and_integrate 45 10.4 0.246 0.246 4.765 4.765 integrate_v_rspace 45 11.4 0.011 0.011 4.519 4.519 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.509 4.509 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.040 103.152 103.153 qs_mol_dyn_low 1 2.0 0.006 0.007 101.484 101.578 qs_forces 6 3.8 0.001 0.002 76.526 76.526 qs_energies 6 4.8 0.001 0.001 72.964 72.964 scf_env_do_scf 6 5.8 0.000 0.001 71.146 71.146 scf_env_do_scf_inner_loop 113 6.2 0.003 0.011 68.316 68.318 rebuild_ks_matrix 119 8.1 0.000 0.001 50.981 50.998 qs_ks_build_kohn_sham_matrix 119 9.1 0.025 0.026 50.980 50.998 qs_ks_update_qs_env 119 7.3 0.001 0.001 47.887 47.903 velocity_verlet 5 3.0 0.003 0.004 41.704 41.709 pw_transfer 2446 11.8 0.308 0.325 34.027 34.250 fft_wrap_pw1pw2 2059 12.8 0.038 0.041 33.045 33.298 fft_wrap_pw1pw2_150 1321 14.0 2.848 3.142 32.038 32.325 qs_vxc_create 119 10.1 0.004 0.005 26.501 26.511 xc_vxc_pw_create 119 11.1 0.544 0.707 26.497 26.506 fft3d_ps 2059 14.8 14.960 16.664 24.969 25.293 qs_rho_update_rho 119 7.3 0.001 0.001 19.779 19.781 calculate_rho_elec 119 8.3 0.087 0.096 19.778 19.780 sum_up_and_integrate 119 10.1 0.109 0.119 17.797 17.845 integrate_v_rspace 119 11.1 0.005 0.006 17.688 17.731 rs_pw_transfer 988 11.5 0.019 0.021 14.213 14.980 qmmm_forces 6 3.8 0.003 0.003 13.818 13.818 qmmm_forces_with_gaussian 6 4.8 0.496 0.602 13.380 13.515 density_rs2pw 119 9.3 0.012 0.014 12.535 13.201 xc_rho_set_and_dset_create 119 12.1 0.560 0.651 12.160 12.554 potential_pw2rs 119 12.1 0.012 0.013 11.203 11.217 qmmm_el_coupling 6 3.8 0.000 0.000 9.930 9.993 qmmm_elec_with_gaussian 6 4.8 0.456 0.622 9.926 9.989 mp_alltoall_z22v 2059 16.8 6.220 8.227 6.220 8.227 grid_collocate_task_list 119 9.3 6.881 7.419 6.881 7.419 grid_integrate_task_list 119 12.1 5.977 6.268 5.977 6.268 rs_pw_transfer_PW2RS_150 125 13.9 3.139 3.217 6.126 6.175 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.880 5.964 rs_pw_transfer_RS2PW_150 125 11.2 2.457 2.647 5.093 5.877 yz_to_x 964 15.3 1.440 1.614 4.519 5.874 x_to_yz 1095 16.3 2.290 2.523 5.432 5.785 mp_waitany 4028 12.8 4.488 5.708 4.488 5.708 pw_restrict_s3 18 5.8 2.500 2.548 5.515 5.594 qmmm_forces_gaussian_low_G 6 6.8 4.800 4.905 4.800 4.905 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 4.427 4.480 pw_prolongate_s3 18 6.8 1.996 2.052 4.427 4.480 pw_integral_ab 2761 7.7 3.675 3.713 4.081 4.275 qs_scf_new_mos 113 7.2 0.001 0.001 4.193 4.204 qs_scf_loop_do_ot 113 8.2 0.001 0.001 4.192 4.203 ot_scf_mini 113 9.2 0.002 0.002 4.006 4.014 dbcsr_multiply_generic 2588 12.3 0.103 0.118 3.755 3.835 qs_ks_ddapc 119 10.1 0.003 0.004 3.512 3.680 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.421 3.512 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.108 3.109 mp_sum_dm3 33 5.7 2.888 3.004 2.888 3.004 pw_gather_p 964 14.3 2.595 2.884 2.595 2.884 init_scf_loop 6 6.8 0.000 0.000 2.825 2.826 mp_waitall_1 188862 16.2 2.562 2.719 2.562 2.719 pw_scatter_p 1095 15.3 2.526 2.654 2.526 2.654 qmmm_elec_gaussian_low_G 6 6.8 2.455 2.537 2.455 2.537 ot_mini 113 10.2 0.001 0.001 2.521 2.533 pw_derive 732 12.5 2.088 2.285 2.088 2.285 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=35.19800000000002, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=74.143, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=11.518, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=11.374, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.332, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=9.297, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.335, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.334, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=58.184, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.455, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.675, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.881, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.8, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=14.96, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=6.22, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.977, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-10-12 20:15:03+00:00