StartDate: 2021-12-06 19:39:12+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 8d484e4614370e12de6c0ad035cd33ab61c28c35 CommitTime: 2021-12-06 16:55:24 +0100 CommitAuthor: abussy CommitSubject: RI-HFX| memory management improvements Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 8d484e4614370e12de6c0ad035cd33ab61c28c35 CommitTime: 2021-12-06 16:55:24 +0100 CommitAuthor: abussy CommitSubject: RI-HFX| memory management improvements ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.031 157.515 157.515 qs_mol_dyn_low 1 2.0 0.004 0.004 156.589 156.589 qs_forces 11 3.9 0.001 0.001 156.533 156.533 qs_energies 11 4.9 0.001 0.001 146.918 146.918 scf_env_do_scf 11 5.9 0.001 0.001 115.854 115.854 velocity_verlet 10 3.0 0.002 0.002 109.202 109.202 scf_env_do_scf_inner_loop 108 6.5 0.009 0.009 78.632 78.632 init_scf_loop 11 6.9 0.000 0.000 37.039 37.039 prepare_preconditioner 11 7.9 0.000 0.000 33.082 33.082 make_preconditioner 11 8.9 0.000 0.000 33.082 33.082 rebuild_ks_matrix 119 8.3 0.001 0.001 32.577 32.577 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 32.576 32.576 make_full_inverse_cholesky 11 9.9 0.000 0.000 31.150 31.150 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.421 30.421 qs_rho_update_rho 119 7.7 0.001 0.001 27.608 27.608 calculate_rho_elec 119 8.7 1.537 1.537 27.607 27.607 qs_scf_new_mos 108 7.5 0.001 0.001 26.577 26.577 qs_scf_loop_do_ot 108 8.5 0.001 0.001 26.576 26.576 ot_scf_mini 108 9.5 0.003 0.003 24.731 24.731 dbcsr_multiply_generic 2286 12.5 0.168 0.168 22.502 22.502 grid_collocate_task_list 119 9.7 21.836 21.836 21.836 21.836 sum_up_and_integrate 119 10.3 0.372 0.372 21.048 21.048 integrate_v_rspace 119 11.3 0.534 0.534 20.675 20.675 cp_fm_cholesky_invert 11 10.9 18.754 18.754 18.754 18.754 grid_integrate_task_list 119 12.3 17.809 17.809 17.809 17.809 init_scf_run 11 5.9 0.001 0.001 16.074 16.074 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.073 16.073 wfi_extrapolate 11 7.9 0.001 0.001 15.254 15.254 cp_gemm 81 9.0 0.000 0.000 14.786 14.786 cp_gemm_cosma 81 10.0 14.786 14.786 14.786 14.786 ot_mini 108 10.5 0.001 0.001 14.482 14.482 make_m2s 4572 13.5 0.064 0.064 12.408 12.408 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.131 10.131 qs_ot_get_derivative 108 11.5 0.001 0.001 7.422 7.422 pw_transfer 1439 11.6 0.091 0.091 7.204 7.204 ot_diis_step 108 11.5 0.006 0.006 7.056 7.056 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 6.917 6.917 make_images 4572 14.5 2.460 2.460 6.613 6.613 qs_ot_get_p 119 10.4 0.001 0.001 6.398 6.398 cp_fm_cholesky_decompose 22 10.9 6.371 6.371 6.371 6.371 dbcsr_complete_redistribute 329 12.2 2.895 2.895 6.146 6.146 dbcsr_make_dense_low 5837 15.5 0.093 0.093 5.960 5.960 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.921 5.921 apply_single 119 13.6 0.000 0.000 5.920 5.920 fft_wrap_pw1pw2_140 487 13.2 0.585 0.585 5.848 5.848 make_dense_data 5837 16.5 5.215 5.215 5.845 5.845 multiply_cannon 2286 13.5 0.988 0.988 5.762 5.762 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.752 5.752 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.718 5.718 dbcsr_make_images_dense 3978 14.8 0.026 0.026 5.350 5.350 qs_create_task_list 11 7.9 0.000 0.000 5.237 5.237 generate_qs_task_list 11 8.9 3.605 3.605 5.237 5.237 copy_dbcsr_to_fm 153 11.3 0.003 0.003 4.996 4.996 qs_ot_p2m_diag 50 11.0 0.209 0.209 4.883 4.883 dbcsr_copy 2102 12.0 0.273 0.273 4.813 4.813 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.701 4.701 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.701 4.701 dbcsr_copy_into_existing 22 7.9 4.496 4.496 4.496 4.496 pw_poisson_solve 119 10.3 1.804 1.804 4.365 4.365 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.345 4.345 multiply_cannon_loop 2286 14.5 0.049 0.049 4.250 4.250 density_rs2pw 119 9.7 0.006 0.006 4.234 4.234 multiply_cannon_multrec 2286 15.5 4.135 4.135 4.200 4.200 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.189 4.189 cp_fm_diag_elpa_base 50 14.0 4.128 4.128 4.188 4.188 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.142 4.142 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.894 3.894 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.832 3.832 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.206 3.206 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.028 0.032 71.712 71.713 qs_mol_dyn_low 1 2.0 0.006 0.007 71.566 71.572 qs_forces 11 3.9 0.002 0.002 71.510 71.510 qs_energies 11 4.9 0.001 0.002 66.613 66.616 scf_env_do_scf 11 5.9 0.001 0.001 60.013 60.014 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 55.669 55.670 velocity_verlet 10 3.0 0.002 0.002 42.897 42.898 rebuild_ks_matrix 119 8.3 0.001 0.001 27.922 28.005 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.023 27.922 28.004 qs_ks_update_qs_env 119 7.6 0.001 0.001 24.839 24.921 sum_up_and_integrate 119 10.3 0.045 0.049 22.121 22.159 integrate_v_rspace 119 11.3 0.004 0.005 22.077 22.111 qs_rho_update_rho 119 7.7 0.001 0.001 21.914 21.922 calculate_rho_elec 119 8.7 0.047 0.049 21.913 21.922 grid_collocate_task_list 119 9.7 15.691 16.666 15.691 16.666 dbcsr_multiply_generic 2286 12.5 0.133 0.138 16.405 16.506 grid_integrate_task_list 119 12.3 15.936 16.492 15.936 16.492 qs_scf_new_mos 108 7.5 0.001 0.001 13.478 13.549 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.477 13.548 ot_scf_mini 108 9.5 0.003 0.003 12.656 12.733 multiply_cannon 2286 13.5 0.217 0.223 10.907 11.264 multiply_cannon_loop 2286 14.5 0.222 0.234 9.864 10.323 mp_waitall_1 169478 16.3 8.047 8.383 8.047 8.383 ot_mini 108 10.5 0.001 0.001 7.447 7.529 rs_pw_transfer 974 11.9 0.016 0.017 6.492 7.402 density_rs2pw 119 9.7 0.008 0.009 5.661 6.595 multiply_cannon_metrocomm3 18288 15.5 0.080 0.082 5.232 5.672 pw_transfer 1439 11.6 0.127 0.137 5.460 5.551 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.185 5.288 potential_pw2rs 119 12.3 0.010 0.010 4.792 4.800 fft_wrap_pw1pw2_140 487 13.2 0.527 0.550 4.475 4.717 init_scf_run 11 5.9 0.000 0.002 4.547 4.547 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.547 4.547 init_scf_loop 11 6.9 0.000 0.001 4.328 4.329 wfi_extrapolate 11 7.9 0.001 0.001 4.166 4.167 fft3d_ps 1201 14.6 2.105 2.283 3.849 3.931 qs_ot_get_derivative 108 11.5 0.001 0.002 3.696 3.771 ot_diis_step 108 11.5 0.005 0.005 3.720 3.721 make_m2s 4572 13.5 0.076 0.079 3.653 3.699 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.601 3.668 apply_single 119 13.6 0.001 0.001 3.601 3.667 multiply_cannon_multrec 18288 15.5 3.439 3.561 3.456 3.578 mp_waitany 9880 13.7 2.460 3.371 2.460 3.371 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.285 3.295 rs_pw_transfer_RS2PW_140 130 11.5 0.441 0.486 2.213 3.139 make_images 4572 14.5 0.191 0.196 2.959 3.012 rs_pw_transfer_PW2RS_140 130 13.9 1.147 1.207 2.371 2.401 mp_alltoall_d11v 2130 13.8 1.613 2.181 1.613 2.181 rs_gather_matrices 119 12.3 0.118 0.131 1.299 1.900 qs_ot_get_p 119 10.4 0.001 0.001 1.815 1.892 cp_gemm 81 9.0 0.000 0.000 1.671 1.675 cp_gemm_cosma 81 10.0 1.671 1.675 1.671 1.675 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.438 1.577 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=73.82399999999998, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.836, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=18.754, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.809, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=14.786, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.371, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.135, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=24.468000000000004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.691, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.936, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.671, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.439, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.46, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.047, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.048 0.048 210.041 210.041 qs_mol_dyn_low 1 2.0 0.004 0.004 209.239 209.239 qs_forces 11 3.9 0.001 0.001 209.181 209.181 qs_energies 11 4.9 0.001 0.001 195.782 195.782 scf_env_do_scf 11 5.9 0.001 0.001 160.513 160.513 velocity_verlet 10 3.0 0.002 0.002 141.197 141.197 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 119.810 119.810 rebuild_ks_matrix 107 8.3 0.001 0.001 61.281 61.281 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.016 61.280 61.280 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.197 55.197 qs_rho_update_rho 107 7.7 0.001 0.001 54.057 54.057 calculate_rho_elec 107 8.7 1.377 1.377 54.056 54.056 sum_up_and_integrate 107 10.3 0.335 0.335 50.857 50.857 integrate_v_rspace 107 11.3 0.452 0.452 50.522 50.522 grid_collocate_task_list 107 9.7 48.789 48.789 48.789 48.789 grid_integrate_task_list 107 12.3 47.925 47.925 47.925 47.925 init_scf_loop 11 6.9 0.000 0.000 40.501 40.501 prepare_preconditioner 11 7.9 0.000 0.000 33.207 33.207 make_preconditioner 11 8.9 0.000 0.000 33.207 33.207 make_full_inverse_cholesky 11 9.9 0.000 0.000 31.238 31.238 qs_scf_new_mos 96 7.5 0.001 0.001 22.794 22.794 qs_scf_loop_do_ot 96 8.5 0.001 0.001 22.793 22.793 ot_scf_mini 96 9.5 0.003 0.003 21.240 21.240 init_scf_run 11 5.9 0.001 0.001 19.283 19.283 scf_env_initial_rho_setup 11 6.9 0.001 0.001 19.282 19.282 dbcsr_multiply_generic 1966 12.4 0.150 0.150 19.056 19.056 cp_fm_cholesky_invert 11 10.9 19.055 19.055 19.055 19.055 wfi_extrapolate 11 7.9 0.001 0.001 18.128 18.128 cp_gemm 81 9.0 0.000 0.000 14.863 14.863 cp_gemm_cosma 81 10.0 14.863 14.863 14.863 14.863 ot_mini 96 10.5 0.001 0.001 12.330 12.330 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.041 11.041 make_m2s 3932 13.4 0.056 0.056 10.449 10.449 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.664 7.664 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.860 6.860 qs_ot_get_derivative 96 11.5 0.001 0.001 6.717 6.717 pw_transfer 1295 11.6 0.084 0.084 6.601 6.601 qs_create_task_list 11 7.9 0.000 0.000 6.351 6.351 generate_qs_task_list 11 8.9 4.721 4.721 6.351 6.351 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 6.340 6.340 dbcsr_complete_redistribute 317 12.2 2.891 2.891 6.268 6.268 cp_fm_cholesky_decompose 22 10.9 6.135 6.135 6.135 6.135 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.732 5.732 qs_ot_get_p 107 10.4 0.001 0.001 5.691 5.691 make_images 3932 14.4 2.109 2.109 5.690 5.690 ot_diis_step 96 11.5 0.005 0.005 5.610 5.610 fft_wrap_pw1pw2_140 439 13.2 0.575 0.575 5.372 5.372 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.056 5.056 multiply_cannon 1966 13.4 0.754 0.754 4.951 4.951 dbcsr_make_dense_low 4961 15.5 0.079 0.079 4.926 4.926 make_dense_data 4961 16.5 4.310 4.310 4.828 4.828 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.812 4.812 apply_single 107 13.6 0.000 0.000 4.812 4.812 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.790 4.790 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.790 4.790 dbcsr_copy 1855 11.9 0.250 0.250 4.579 4.579 qs_ot_p2m_diag 44 11.0 0.191 0.191 4.425 4.425 dbcsr_make_images_dense 3386 14.7 0.023 0.023 4.382 4.382 dbcsr_copy_into_existing 22 7.9 4.289 4.289 4.290 4.290 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 126.805 126.806 qs_mol_dyn_low 1 2.0 0.005 0.005 126.689 126.695 qs_forces 11 3.9 0.002 0.002 126.634 126.634 qs_energies 11 4.9 0.001 0.001 117.898 117.901 scf_env_do_scf 11 5.9 0.001 0.001 108.370 108.371 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 100.755 100.755 velocity_verlet 10 3.0 0.002 0.002 75.590 75.591 rebuild_ks_matrix 107 8.3 0.001 0.001 58.114 58.195 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.020 58.113 58.195 sum_up_and_integrate 107 10.3 0.039 0.042 52.926 52.965 integrate_v_rspace 107 11.3 0.004 0.005 52.886 52.926 qs_ks_update_qs_env 107 7.6 0.001 0.001 51.172 51.243 qs_rho_update_rho 107 7.7 0.001 0.001 48.903 48.910 calculate_rho_elec 107 8.7 0.042 0.044 48.903 48.909 grid_integrate_task_list 107 12.3 46.089 47.204 46.089 47.204 grid_collocate_task_list 107 9.7 42.207 43.428 42.207 43.428 dbcsr_multiply_generic 1966 12.4 0.114 0.118 14.347 14.512 qs_scf_new_mos 96 7.5 0.001 0.001 11.559 11.621 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.558 11.620 ot_scf_mini 96 9.5 0.003 0.003 10.846 10.907 multiply_cannon 1966 13.4 0.186 0.192 9.606 9.894 multiply_cannon_loop 1966 14.4 0.192 0.204 8.715 9.196 rs_pw_transfer 878 11.9 0.014 0.016 7.104 8.488 density_rs2pw 107 9.7 0.008 0.008 6.211 7.608 init_scf_loop 11 6.9 0.000 0.001 7.600 7.600 init_scf_run 11 5.9 0.000 0.002 7.473 7.474 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.473 7.473 mp_waitall_1 146670 16.2 7.080 7.432 7.080 7.432 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.126 7.136 wfi_extrapolate 11 7.9 0.001 0.001 6.864 6.864 ot_mini 96 10.5 0.001 0.001 6.363 6.431 multiply_cannon_metrocomm3 15728 15.4 0.069 0.073 4.611 5.043 pw_transfer 1295 11.6 0.114 0.128 4.765 4.850 mp_waitany 8968 13.7 3.459 4.829 3.459 4.829 fft_wrap_pw1pw2 1081 12.6 0.012 0.014 4.521 4.621 rs_pw_transfer_RS2PW_140 118 11.5 0.371 0.411 3.180 4.580 potential_pw2rs 107 12.3 0.009 0.009 4.299 4.312 fft_wrap_pw1pw2_140 439 13.2 0.472 0.488 3.950 4.134 mp_alltoall_d11v 1998 13.7 2.737 4.047 2.737 4.047 rs_gather_matrices 107 12.3 0.108 0.121 2.450 3.748 fft3d_ps 1081 14.6 1.851 2.025 3.316 3.387 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.172 3.254 apply_single 107 13.6 0.001 0.001 3.172 3.254 ot_diis_step 96 11.5 0.004 0.005 3.231 3.231 make_m2s 3932 13.4 0.065 0.067 3.173 3.228 multiply_cannon_multrec 15728 15.4 3.077 3.157 3.091 3.172 qs_ot_get_derivative 96 11.5 0.001 0.001 3.105 3.168 make_images 3932 14.4 0.166 0.171 2.575 2.633 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=73.274, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=48.789, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.925, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.055, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=14.863, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.135, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=24.893000000000015, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.207, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.089, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.077, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.08, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.459, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.374 0.374 272.251 272.251 qs_energies 1 2.0 0.000 0.000 271.035 271.035 scf_env_do_scf 1 3.0 0.000 0.000 268.637 268.637 qs_ks_update_qs_env 8 5.0 0.000 0.000 251.433 251.433 rebuild_ks_matrix 7 6.0 0.000 0.000 251.331 251.331 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 251.331 251.331 hfx_ks_matrix 7 8.0 0.000 0.000 168.385 168.385 integrate_four_center 7 9.0 2.195 2.195 168.356 168.356 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 157.326 157.326 integrate_four_center_main 7 10.0 0.842 0.842 156.686 156.686 integrate_four_center_bin 450 11.0 155.844 155.844 155.844 155.844 init_scf_loop 1 4.0 0.000 0.000 111.297 111.297 cp_gemm 129 10.3 0.000 0.000 68.519 68.519 cp_gemm_cosma 129 11.3 68.519 68.519 68.519 68.519 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 39.138 39.138 admm_fit_mo_coeffs 7 9.0 0.000 0.000 37.476 37.476 admm_mo_merge_derivs 7 8.0 0.000 0.000 35.272 35.272 merge_mo_derivs_diag 7 9.0 0.022 0.022 35.272 35.272 purify_mo_diag 7 10.0 0.001 0.001 22.362 22.362 fit_mo_coeffs 7 10.0 0.000 0.000 15.115 15.115 prepare_preconditioner 1 5.0 0.000 0.000 13.529 13.529 make_preconditioner 1 6.0 0.000 0.000 13.529 13.529 integrate_four_center_load 7 10.0 0.001 0.001 9.102 9.102 hfx_load_balance 1 11.0 0.002 0.002 9.101 9.101 arnoldi_normal_ev 11 9.3 0.002 0.002 8.049 8.049 estimate_cond_num 1 7.0 0.000 0.000 7.979 7.979 build_subspace 28 9.5 0.014 0.014 7.951 7.951 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.189 0.195 186.934 186.935 qs_energies 1 2.0 0.000 0.001 186.598 186.599 scf_env_do_scf 1 3.0 0.000 0.000 186.053 186.053 qs_ks_update_qs_env 8 5.0 0.000 0.000 183.161 183.161 rebuild_ks_matrix 7 6.0 0.000 0.000 183.149 183.149 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 183.149 183.149 hfx_ks_matrix 7 8.0 0.000 0.000 171.131 171.131 integrate_four_center 7 9.0 0.089 0.398 171.116 171.116 integrate_four_center_main 7 10.0 0.004 0.005 154.939 160.293 integrate_four_center_bin 448 11.0 154.934 160.289 154.934 160.289 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 107.917 107.918 init_scf_loop 1 4.0 0.000 0.000 78.134 78.134 integrate_four_center_load 7 10.0 0.000 0.000 9.387 9.390 hfx_load_balance 1 11.0 0.001 0.002 9.387 9.390 mp_sync 70 11.3 5.975 8.983 5.975 8.983 cp_gemm 129 10.3 0.000 0.001 4.960 4.965 cp_gemm_cosma 129 11.3 4.960 4.964 4.960 4.964 hfx_load_balance_bin 1 12.0 4.385 4.698 4.385 4.698 hfx_load_balance_count 1 12.0 4.396 4.680 4.396 4.680 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=44.476999999999975, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=155.844, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=68.519, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.195, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.842, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.374, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.002000000000038, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=154.934, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.96, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.089, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.189, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.396, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.385, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=5.975, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 402.914 402.914 qs_energies 1 2.0 0.000 0.000 402.420 402.420 mp2_main 1 3.0 0.000 0.000 396.040 396.040 mp2_gpw_main 1 4.0 0.001 0.001 395.615 395.615 rpa_ri_compute_en 1 5.0 0.000 0.000 380.640 380.640 rpa_num_int 1 6.0 0.000 0.000 380.616 380.616 compute_mat_P_omega 1 7.0 0.002 0.002 204.326 204.326 compute_mat_P_omega_contract 10 8.0 11.888 11.888 202.791 202.791 dbcsr_t_total 2336 9.6 0.016 0.016 193.464 193.464 cp_gemm 105 8.4 0.000 0.000 148.481 148.481 cp_gemm_cosma 105 9.4 148.481 148.481 148.481 148.481 dbcsr_t_contract 787 11.0 46.578 46.578 121.072 121.072 GW_matrix_operations 10 7.0 0.005 0.005 104.587 104.587 compute_mat_P_omega_calc_M_occ 250 9.0 11.944 11.944 77.043 77.043 dbcsr_t_copy 1103 10.7 20.298 20.298 70.996 70.996 dbcsr_tas_total 1149 12.2 0.052 0.052 68.178 68.178 dbcsr_tas_multiply 807 12.1 0.003 0.003 66.806 66.806 dbcsr_multiply_generic 837 15.8 0.132 0.132 53.418 53.418 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 53.003 53.003 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 50.513 50.513 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 50.341 50.341 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 48.732 48.732 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 41.252 41.252 multiply_cannon 837 16.8 15.334 15.334 40.235 40.235 dbcsr_tas_reserve_blocks_index 3261 13.7 7.227 7.227 27.416 27.416 dbcsr_tas_copy 574 11.4 16.436 16.436 24.116 24.116 multiply_cannon_loop 837 17.8 0.154 0.154 22.263 22.263 multiply_cannon_multrec 837 18.8 20.682 20.682 21.240 21.240 dbcsr_t_reserve_blocks_index 2280 12.5 1.257 1.257 21.021 21.021 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.010 0.010 20.728 20.728 dbcsr_reserve_blocks 3717 14.7 19.510 19.510 19.894 19.894 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 19.607 19.607 compute_QP_energies 1 7.0 0.000 0.000 19.148 19.148 compute_self_energy_cubic_gw 1 8.0 0.095 0.095 19.147 19.147 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 14.960 14.960 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 14.290 14.290 dbcsr_t_copy_nocomm 251 12.0 11.191 11.191 13.584 13.584 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.850 11.850 make_m2s 1674 16.8 0.107 0.107 10.749 10.749 make_images 1674 17.8 5.050 5.050 10.219 10.219 dbcsr_tas_mm_2 251 15.0 0.001 0.001 10.136 10.136 cp_fm_cholesky_invert 10 8.0 8.731 8.731 8.731 8.731 dbcsr_finalize 9888 13.6 1.501 1.501 8.064 8.064 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.009 57.306 57.308 qs_energies 1 2.0 0.001 0.001 57.185 57.191 mp2_main 1 3.0 0.001 0.001 55.794 55.800 mp2_gpw_main 1 4.0 0.000 0.001 55.736 55.742 rpa_ri_compute_en 1 5.0 0.000 0.000 53.662 53.669 rpa_num_int 1 6.0 0.000 0.001 53.654 53.661 dbcsr_t_total 2336 9.6 0.016 0.017 40.909 40.910 compute_mat_P_omega 1 7.0 0.001 0.002 39.882 39.894 compute_mat_P_omega_contract 10 8.0 0.743 0.773 39.597 39.603 dbcsr_t_contract 787 11.0 1.870 2.052 30.216 30.221 dbcsr_tas_total 1149 12.2 0.064 0.069 26.513 26.513 dbcsr_tas_multiply 807 12.1 0.003 0.004 26.391 26.394 dbcsr_tas_dbcsr 807 14.1 0.003 0.004 19.269 19.270 dbcsr_multiply_generic 837 15.8 0.069 0.073 15.848 17.072 compute_mat_P_omega_calc_M_occ 250 9.0 0.716 0.744 13.308 13.308 multiply_cannon 837 16.8 0.130 0.142 9.323 10.003 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.644 9.644 dbcsr_t_copy 1111 10.7 4.095 4.456 9.007 9.488 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 8.425 9.377 cp_gemm 105 8.4 0.000 0.000 9.306 9.322 cp_gemm_cosma 105 9.4 9.306 9.322 9.306 9.322 multiply_cannon_loop 837 17.8 0.041 0.044 8.481 9.138 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 8.533 8.534 mp_sync 8696 11.6 6.587 7.735 6.587 7.735 multiply_cannon_multrec 1386 17.8 6.617 7.235 6.858 7.468 dbcsr_tas_mm_2 251 15.0 0.002 0.002 7.318 7.318 make_m2s 1674 16.8 0.044 0.047 5.569 6.194 make_images 1674 17.8 0.248 0.257 5.487 6.111 GW_matrix_operations 10 7.0 0.001 0.001 5.960 5.967 compute_QP_energies 1 7.0 0.000 0.000 4.205 4.205 compute_self_energy_cubic_gw 1 8.0 0.005 0.006 4.202 4.205 dbcsr_t_communicate_buffer 1098 11.7 0.092 0.099 3.275 3.464 mp_waitall_2 3776 14.7 3.088 3.367 3.088 3.367 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.248 3.257 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.135 3.144 contract_cubic_gw 21 9.0 0.000 0.000 3.134 3.134 make_images_data 1674 18.8 0.037 0.039 2.939 3.039 hybrid_alltoall_any 1724 19.5 2.300 2.564 2.827 2.931 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.019 0.021 2.492 2.852 dbcsr_t_reserve_blocks_index 2849 12.4 0.104 0.115 2.487 2.848 dbcsr_tas_reserve_blocks_index 3300 13.8 0.273 0.299 2.437 2.791 make_images_pack 1674 18.8 2.098 2.689 2.112 2.705 dbcsr_reserve_blocks 3785 14.7 2.151 2.472 2.191 2.515 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.072 2.072 convert_to_new_pgrid 2421 14.1 0.018 0.020 1.811 1.917 dbcsr_copy 3323 15.8 1.744 1.854 1.773 1.883 mp_waitall_1 26582 19.0 1.463 1.811 1.463 1.811 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.671 1.676 dbcsr_add_anytype 909 13.7 0.972 1.017 1.515 1.574 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.501 1.506 dbcsr_tas_replicate 396 14.1 0.809 0.887 1.345 1.422 scf_env_do_scf 1 3.0 0.000 0.000 1.340 1.340 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 1.340 1.340 mp_max_i 2058 9.6 0.994 1.246 0.994 1.246 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=147.365, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=148.481, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=46.578, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.682, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=20.298, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=19.51, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=23.592, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.306, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=1.87, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.617, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.095, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.151, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.088, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.587, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.097 0.097 189.413 189.413 qs_energies 1 2.0 0.000 0.000 187.560 187.560 scf_env_do_scf 1 3.0 0.000 0.000 177.689 177.689 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 177.689 177.689 qs_scf_new_mos 15 5.0 0.000 0.000 78.583 78.583 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.324 68.324 rebuild_ks_matrix 15 6.0 0.000 0.000 67.962 67.962 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 67.962 67.962 eigensolver 15 6.0 0.002 0.002 65.288 65.288 cp_fm_diag_elpa 15 7.0 0.000 0.000 51.472 51.472 cp_fm_diag_elpa_base 15 8.0 46.721 46.721 51.472 51.472 qs_vxc_create 15 8.0 0.043 0.043 44.519 44.519 calculate_dispersion_nonloc 15 9.0 9.067 9.067 38.804 38.804 pw_transfer 1191 9.8 0.090 0.090 26.089 26.089 fft_wrap_pw1pw2 1086 10.9 0.012 0.012 25.804 25.804 qs_rho_update_rho 16 5.0 0.000 0.000 24.755 24.755 calculate_rho_elec 16 6.0 0.341 0.341 24.754 24.754 grid_collocate_task_list 16 7.0 23.207 23.207 23.207 23.207 sum_up_and_integrate 15 8.0 0.078 0.078 21.888 21.888 integrate_v_rspace 15 9.0 0.033 0.033 21.810 21.810 grid_integrate_task_list 15 10.0 21.209 21.209 21.209 21.209 fft_wrap_pw1pw2_150 765 12.0 3.255 3.255 19.537 19.537 fft3d_s 1087 12.8 10.634 10.634 10.646 10.646 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.483 10.483 pw_scatter_s 585 13.0 10.289 10.289 10.289 10.289 dbcsr_complete_redistribute 46 8.3 3.427 3.427 9.464 9.464 cp_fm_upper_to_full 30 8.0 9.428 9.428 9.428 9.428 cp_fm_cholesky_restore 45 7.0 9.137 9.137 9.137 9.137 vdW_energy 15 10.0 7.881 7.881 7.881 7.881 gspace_mixing 14 5.0 0.273 0.273 7.347 7.347 broyden_mixing 14 6.0 6.607 6.607 6.608 6.608 fft_wrap_pw1pw2_200 197 11.5 0.338 0.338 6.026 6.026 xc_vxc_pw_create 15 9.0 1.473 1.473 5.672 5.672 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.648 4.648 init_scf_run 1 3.0 0.000 0.000 4.465 4.465 dbcsr_finalize 159 9.9 0.025 0.025 4.154 4.154 dbcsr_merge_all 91 11.1 0.072 0.072 4.001 4.001 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.023 84.454 84.455 qs_energies 1 2.0 0.001 0.001 84.059 84.060 scf_env_do_scf 1 3.0 0.000 0.000 78.942 78.943 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 78.942 78.943 qs_ks_update_qs_env 15 5.0 0.000 0.000 38.649 38.672 rebuild_ks_matrix 15 6.0 0.000 0.000 38.604 38.626 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.004 38.604 38.626 qs_rho_update_rho 16 5.0 0.000 0.000 23.400 23.401 calculate_rho_elec 16 6.0 0.011 0.012 23.399 23.401 sum_up_and_integrate 15 8.0 0.013 0.014 22.682 22.731 integrate_v_rspace 15 9.0 0.001 0.001 22.669 22.718 grid_collocate_task_list 16 7.0 21.597 22.062 21.597 22.062 grid_integrate_task_list 15 10.0 20.954 21.535 20.954 21.535 qs_scf_new_mos 15 5.0 0.001 0.001 17.387 17.496 eigensolver 15 6.0 0.002 0.002 15.965 15.978 qs_vxc_create 15 8.0 0.001 0.001 15.420 15.429 calculate_dispersion_nonloc 15 9.0 1.386 1.484 12.607 12.624 pw_transfer 1191 9.8 0.124 0.131 11.629 11.755 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.645 11.651 cp_fm_diag_elpa_base 15 8.0 11.400 11.433 11.640 11.643 fft_wrap_pw1pw2 1086 10.9 0.019 0.021 11.350 11.480 fft3d_ps 1086 12.9 4.917 5.110 8.662 8.864 fft_wrap_pw1pw2_150 765 12.0 0.626 0.675 7.670 7.710 cp_fm_cholesky_restore 45 7.0 4.085 4.140 4.085 4.140 fft_wrap_pw1pw2_200 197 11.5 0.336 0.358 3.521 3.620 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.189 3.190 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.740 3.014 xc_vxc_pw_create 15 9.0 0.055 0.072 2.812 2.834 mp_alltoall_z22v 1086 14.9 2.324 2.816 2.324 2.816 rs_pw_transfer 158 9.4 0.002 0.003 1.692 2.287 vdW_energy 15 10.0 2.077 2.192 2.077 2.192 x_to_yz 585 14.0 0.877 0.914 2.030 2.155 build_core_ppnl 1 5.0 1.840 2.047 1.840 2.047 density_rs2pw 16 7.0 0.001 0.002 1.645 2.025 yz_to_x 501 13.7 0.513 0.591 1.684 1.992 mp_waitany 520 11.3 1.144 1.789 1.144 1.789 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=68.21600000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=46.721, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.207, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.209, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.634, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.289, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.137, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=21.50099999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.4, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.597, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=20.954, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.085, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.917, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.081 0.081 313.676 313.676 qs_energies 1 2.0 0.000 0.000 313.524 313.524 ls_scf 1 3.0 0.000 0.000 311.798 311.798 ls_scf_main 1 4.0 0.002 0.002 297.829 297.829 density_matrix_trs4 11 5.0 0.011 0.011 169.973 169.973 ls_scf_dm_to_ks 11 5.0 0.000 0.000 121.119 121.119 matrix_ls_to_qs 11 6.0 0.000 0.000 117.000 117.000 dbcsr_multiply_generic 185 6.1 0.467 0.467 105.048 105.048 multiply_cannon 185 7.1 2.713 2.713 71.029 71.029 dbcsr_copy_into_existing 11 7.0 65.965 65.965 65.965 65.965 dbcsr_complete_redistribute 23 7.5 40.258 40.258 55.851 55.851 matrix_decluster 11 7.0 0.000 0.000 51.034 51.034 multiply_cannon_loop 185 8.1 0.391 0.391 50.436 50.436 multiply_cannon_multrec 185 9.1 48.321 48.321 48.395 48.395 arnoldi_extremal 12 6.1 0.000 0.000 46.357 46.357 arnoldi_normal_ev 12 7.1 0.029 0.029 46.357 46.357 build_subspace 23 8.1 0.131 0.131 45.738 45.738 dbcsr_matrix_vector_mult 652 9.0 0.253 0.253 35.618 35.618 dbcsr_matrix_vector_mult_local 652 10.0 34.122 34.122 34.130 34.130 make_m2s 370 7.1 0.031 0.031 27.875 27.875 make_images 370 8.1 7.237 7.237 25.463 25.463 dbcsr_finalize 646 7.5 0.226 0.226 21.023 21.023 dbcsr_merge_all 597 8.5 3.468 3.468 18.914 18.914 setup_rec_index_2d 370 8.1 17.664 17.664 17.664 17.664 dbcsr_sort_indices 1103 9.9 14.416 14.416 14.416 14.416 tree_to_linear_d 110 9.4 13.269 13.269 13.269 13.269 ls_scf_init_scf 1 4.0 0.000 0.000 12.940 12.940 ls_scf_init_matrix_S 1 5.0 0.000 0.000 12.515 12.515 quick_finalize 395 10.0 0.493 0.493 12.316 12.316 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 11.677 11.677 dbcsr_special_finalize 370 9.1 0.003 0.003 11.354 11.354 dbcsr_dot_sd 144 6.3 8.778 8.778 8.779 8.779 dbcsr_frobenius_norm 142 6.1 7.676 7.676 7.679 7.679 matrix_qs_to_ls 12 5.1 0.000 0.000 7.014 7.014 matrix_cluster 12 6.1 0.000 0.000 7.014 7.014 make_images_data 370 9.1 0.010 0.010 6.707 6.707 dbcsr_new_transposed 2 7.0 0.128 0.128 6.453 6.453 dbcsr_redistribute 2 8.0 6.222 6.222 6.292 6.292 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.029 91.188 91.189 qs_energies 1 2.0 0.000 0.000 91.063 91.066 ls_scf 1 3.0 0.000 0.000 90.974 90.977 ls_scf_main 1 4.0 0.001 0.003 87.421 87.422 density_matrix_trs4 11 5.0 0.009 0.013 83.516 83.592 dbcsr_multiply_generic 185 6.1 0.072 0.080 77.853 78.158 multiply_cannon 185 7.1 0.040 0.045 64.292 65.686 multiply_cannon_loop 185 8.1 0.185 0.202 60.602 62.028 multiply_cannon_multrec 1480 9.1 39.225 42.108 39.667 42.571 mp_waitall_1 11936 10.3 19.457 23.291 19.457 23.291 multiply_cannon_metrocomm3 1480 9.1 0.016 0.019 11.616 17.637 make_m2s 370 7.1 0.033 0.036 8.664 8.773 make_images 370 8.1 0.700 0.751 8.546 8.655 multiply_cannon_metrocomm1 1480 9.1 0.009 0.011 4.743 8.161 mp_sum_l 1039 5.9 3.696 4.808 3.696 4.808 calculate_norms 2960 9.1 4.317 4.658 4.317 4.658 arnoldi_extremal 12 6.1 0.000 0.001 4.169 4.182 arnoldi_normal_ev 12 7.1 0.002 0.008 4.168 4.182 build_subspace 23 8.1 0.039 0.054 4.042 4.045 make_images_data 370 9.1 0.012 0.013 3.463 3.774 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 2.725 3.654 dbcsr_matrix_vector_mult 652 9.0 0.019 0.083 3.285 3.490 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.388 3.481 hybrid_alltoall_any 393 9.9 0.296 1.471 2.775 3.060 dbcsr_complete_redistribute 23 7.5 1.840 1.964 2.918 3.031 matrix_ls_to_qs 11 6.0 0.000 0.000 2.890 3.009 dbcsr_matrix_vector_mult_local 652 10.0 2.561 2.748 2.566 2.753 matrix_decluster 11 7.0 0.000 0.000 2.623 2.739 ls_scf_init_scf 1 4.0 0.000 0.000 2.727 2.728 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.689 2.699 make_images_pack 370 9.1 2.297 2.509 2.302 2.515 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.460 2.463 buffer_matrices_ensure_size 370 8.1 2.093 2.196 2.093 2.196 dbcsr_add_d 280 6.0 0.002 0.002 2.015 2.138 dbcsr_add_anytype 280 7.0 1.085 1.179 2.014 2.136 dbcsr_finalize 646 7.5 0.014 0.015 1.803 1.893 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=107.346, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=65.965, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=48.321, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=40.258, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=34.122, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=17.664, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.092, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=39.225, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.84, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.561, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.457, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.696, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.317, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 101.150 101.150 lib_test 1 2.0 0.000 0.000 101.143 101.143 dbcsr_run_tests 3 3.0 0.002 0.002 101.143 101.143 test_multiplies_multiproc 3 4.0 0.001 0.001 81.581 81.581 dbcsr_redistribute 9 5.0 54.015 54.015 57.406 57.406 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.354 22.354 dbcsr_make_random_matrix 9 4.0 14.242 14.242 19.477 19.477 multiply_cannon 9 6.0 0.002 0.002 15.883 15.883 multiply_cannon_loop 9 7.0 0.004 0.004 15.379 15.379 multiply_cannon_multrec 9 8.0 15.374 15.374 15.375 15.375 dbcsr_finalize 27 5.7 0.004 0.004 8.985 8.985 dbcsr_merge_all 18 6.5 3.200 3.200 8.259 8.259 tree_to_linear_d 9 7.0 3.171 3.171 3.171 3.171 mp_alltoall_d11v 27 6.0 3.065 3.065 3.065 3.065 dbcsr_data_release 975 7.6 2.408 2.408 2.408 2.408 make_m2s 18 6.0 0.001 0.001 2.153 2.153 make_images 18 7.0 0.698 0.698 2.084 2.084 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 25.176 25.177 lib_test 1 2.0 0.000 0.001 25.148 25.167 dbcsr_run_tests 3 3.0 0.001 0.001 25.147 25.165 test_multiplies_multiproc 3 4.0 0.001 0.002 24.011 24.099 dbcsr_multiply_generic 9 5.0 0.002 0.002 22.180 22.274 multiply_cannon 9 6.0 0.002 0.002 19.818 20.313 multiply_cannon_loop 9 7.0 0.003 0.004 19.391 19.874 multiply_cannon_multrec 72 8.0 16.131 17.224 16.132 17.225 mp_waitall_1 576 9.2 3.653 4.294 3.653 4.294 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.885 3.767 mp_sum_l 310 2.7 0.705 1.618 0.705 1.618 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.701 1.614 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.364 1.484 dbcsr_make_random_matrix 9 4.0 0.889 0.909 1.094 1.151 make_m2s 18 6.0 0.001 0.001 0.883 0.961 make_images 18 7.0 0.026 0.027 0.880 0.958 dbcsr_finalize 27 5.7 0.000 0.001 0.797 0.892 dbcsr_merge_all 18 6.5 0.128 0.147 0.707 0.800 dbcsr_data_release 444 7.6 0.628 0.721 0.628 0.721 dbcsr_redistribute 9 5.0 0.378 0.438 0.644 0.681 dbcsr_destroy 111 5.9 0.005 0.050 0.544 0.631 make_images_data 18 8.0 0.001 0.001 0.447 0.529 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.739999999999995, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=54.015, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.374, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.242, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.2, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.171, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.408, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.6640000000000015, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.378, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.131, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.889, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.128, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.628, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.653, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.705, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.042 0.042 140.126 140.126 qs_mol_dyn_low 1 2.0 0.005 0.005 138.132 138.132 velocity_verlet 5 3.0 0.004 0.004 111.584 111.584 qmmm_el_coupling 6 3.8 0.000 0.000 64.172 64.172 qmmm_elec_with_gaussian 6 4.8 0.185 0.185 64.166 64.166 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 62.519 62.519 qmmm_elec_gaussian_low_G 6 6.8 60.917 60.917 60.917 60.917 qs_forces 6 3.8 0.001 0.001 55.766 55.766 qs_energies 6 4.8 0.000 0.000 49.574 49.574 scf_env_do_scf 6 5.8 0.000 0.000 45.731 45.731 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 38.572 38.572 rebuild_ks_matrix 45 8.4 0.000 0.000 38.457 38.457 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 38.457 38.457 qs_ks_update_qs_env 45 7.8 0.000 0.000 32.969 32.969 pw_transfer 966 11.9 0.069 0.069 23.412 23.412 fft_wrap_pw1pw2 801 13.0 0.009 0.009 23.077 23.077 fft_wrap_pw1pw2_150 507 14.3 2.352 2.352 22.575 22.575 qs_vxc_create 45 10.4 0.001 0.001 20.995 20.995 xc_vxc_pw_create 45 11.4 4.247 4.247 20.994 20.994 pw_scatter_s 429 15.4 10.378 10.378 10.378 10.378 qs_rho_update_rho 45 7.9 0.000 0.000 10.049 10.049 calculate_rho_elec 45 8.9 0.883 0.883 10.048 10.048 xc_rho_set_and_dset_create 45 12.4 0.245 0.245 9.697 9.697 qmmm_forces 6 3.8 0.001 0.001 9.099 9.099 fft3d_s 802 15.0 8.973 8.973 8.984 8.984 qmmm_forces_with_gaussian 6 4.8 0.126 0.126 8.624 8.624 pw_integral_ab 2539 7.4 8.573 8.573 8.573 8.573 fist_calc_energy_force 6 3.8 0.002 0.002 8.320 8.320 init_scf_loop 6 6.8 0.000 0.000 7.153 7.153 force_nonbond 6 4.8 7.099 7.099 7.099 7.099 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.700 6.700 qs_ks_ddapc 45 10.4 0.001 0.001 6.500 6.500 qmmm_forces_gaussian_low_G 6 6.8 5.624 5.624 5.624 5.624 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.500 5.500 pw_poisson_solve 51 9.9 2.307 2.307 5.232 5.232 density_rs2pw 45 9.9 0.003 0.003 4.646 4.646 grid_collocate_task_list 45 9.9 4.519 4.519 4.519 4.519 sum_up_and_integrate 45 10.4 0.231 0.231 4.321 4.321 integrate_v_rspace 45 11.4 0.012 0.012 4.090 4.090 cp_ddapc_apply_CD 45 11.4 0.006 0.006 3.997 3.997 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.036 85.512 85.513 qs_mol_dyn_low 1 2.0 0.005 0.006 83.916 84.015 qs_forces 6 3.8 0.001 0.001 61.579 61.579 qs_energies 6 4.8 0.001 0.001 58.708 58.708 scf_env_do_scf 6 5.8 0.000 0.001 57.216 57.216 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 54.921 54.922 rebuild_ks_matrix 119 8.1 0.000 0.000 40.496 40.515 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.022 40.495 40.515 qs_ks_update_qs_env 119 7.3 0.001 0.001 38.077 38.095 velocity_verlet 5 3.0 0.002 0.003 35.328 35.333 pw_transfer 2446 11.8 0.255 0.275 25.709 26.067 fft_wrap_pw1pw2 2059 12.8 0.033 0.035 24.907 25.317 fft_wrap_pw1pw2_150 1321 14.0 2.132 2.287 24.130 24.584 qs_vxc_create 119 10.1 0.003 0.003 20.416 20.420 xc_vxc_pw_create 119 11.1 0.429 0.591 20.413 20.417 fft3d_ps 2059 14.8 11.062 12.236 18.858 19.253 qs_rho_update_rho 119 7.3 0.001 0.001 16.059 16.060 calculate_rho_elec 119 8.3 0.086 0.095 16.058 16.059 sum_up_and_integrate 119 10.1 0.084 0.090 14.707 14.776 integrate_v_rspace 119 11.1 0.004 0.005 14.623 14.692 qmmm_forces 6 3.8 0.002 0.003 12.376 12.376 qmmm_forces_with_gaussian 6 4.8 0.352 0.433 11.878 12.166 rs_pw_transfer 988 11.5 0.016 0.018 10.933 11.484 density_rs2pw 119 9.3 0.011 0.012 9.640 10.081 xc_rho_set_and_dset_create 119 12.1 0.492 0.575 9.562 9.895 qmmm_el_coupling 6 3.8 0.000 0.000 8.758 8.869 qmmm_elec_with_gaussian 6 4.8 0.314 0.435 8.755 8.866 potential_pw2rs 119 12.1 0.011 0.012 8.463 8.475 grid_collocate_task_list 119 9.3 6.092 6.488 6.092 6.488 mp_alltoall_z22v 2059 16.8 4.819 6.471 4.819 6.471 grid_integrate_task_list 119 12.1 5.652 6.046 5.652 6.046 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.809 6.028 qmmm_forces_gaussian_low_G 6 6.8 4.744 4.950 4.744 4.950 rs_pw_transfer_PW2RS_150 125 13.9 2.437 2.487 4.681 4.727 pw_restrict_s3 18 5.8 2.118 2.174 4.570 4.689 x_to_yz 1095 16.3 1.753 1.911 4.250 4.582 rs_pw_transfer_RS2PW_150 125 11.2 1.931 2.062 3.996 4.551 yz_to_x 964 15.3 1.178 1.322 3.500 4.547 mp_waitany 4028 12.8 3.441 4.283 3.441 4.283 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.753 3.863 pw_prolongate_s3 18 6.8 1.713 1.778 3.753 3.863 pw_integral_ab 2761 7.7 3.166 3.192 3.548 3.693 qs_scf_new_mos 113 7.2 0.001 0.001 3.627 3.635 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.626 3.635 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.412 3.499 dbcsr_multiply_generic 2588 12.3 0.097 0.114 3.315 3.494 ot_scf_mini 113 9.2 0.002 0.002 3.467 3.476 qs_ks_ddapc 119 10.1 0.002 0.003 2.747 2.896 qmmm_elec_gaussian_low_G 6 6.8 2.479 2.568 2.479 2.568 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.428 2.429 mp_sum_dm3 33 5.7 2.256 2.374 2.256 2.374 init_scf_loop 6 6.8 0.000 0.000 2.291 2.292 ot_mini 113 10.2 0.001 0.001 2.185 2.198 pw_gather_p 964 14.3 1.974 2.184 1.974 2.184 mp_waitall_1 188862 16.2 1.920 2.113 1.920 2.113 pw_scatter_p 1095 15.3 1.853 1.976 1.853 1.976 qs_ot_get_derivative 113 11.2 0.001 0.001 1.717 1.726 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.04299999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=60.917, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.378, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.973, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.573, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=7.099, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.624, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.519, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=47.498, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.479, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.166, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.744, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.092, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.062, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.652, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.819, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-12-06 20:28:25+00:00