StartDate: 2021-11-29 20:04:52+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: b48b44a57934fd7ef7d431b0efb1ce54fca2178f CommitTime: 2021-11-29 20:17:39 +0100 CommitAuthor: Ole Schütt CommitSubject: Fist: Fix angular derivative of GAL21 forcefield Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: b48b44a57934fd7ef7d431b0efb1ce54fca2178f CommitTime: 2021-11-29 20:17:39 +0100 CommitAuthor: Ole Schütt CommitSubject: Fist: Fix angular derivative of GAL21 forcefield ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.030 155.471 155.471 qs_mol_dyn_low 1 2.0 0.004 0.004 154.678 154.678 qs_forces 11 3.9 0.001 0.001 154.622 154.622 qs_energies 11 4.9 0.001 0.001 144.414 144.414 scf_env_do_scf 11 5.9 0.001 0.001 116.536 116.536 velocity_verlet 10 3.0 0.002 0.002 107.339 107.339 scf_env_do_scf_inner_loop 108 6.5 0.009 0.009 79.460 79.460 init_scf_loop 11 6.9 0.000 0.000 36.898 36.898 prepare_preconditioner 11 7.9 0.000 0.000 33.004 33.004 make_preconditioner 11 8.9 0.000 0.000 33.004 33.004 rebuild_ks_matrix 119 8.3 0.001 0.001 32.353 32.353 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.017 32.352 32.352 make_full_inverse_cholesky 11 9.9 0.000 0.000 31.080 31.080 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.170 30.170 qs_rho_update_rho 119 7.7 0.001 0.001 28.024 28.024 calculate_rho_elec 119 8.7 1.536 1.536 28.023 28.023 qs_scf_new_mos 108 7.5 0.001 0.001 27.201 27.201 qs_scf_loop_do_ot 108 8.5 0.001 0.001 27.200 27.200 ot_scf_mini 108 9.5 0.003 0.003 25.432 25.432 dbcsr_multiply_generic 2286 12.5 0.172 0.172 22.712 22.712 grid_collocate_task_list 119 9.7 22.190 22.190 22.190 22.190 sum_up_and_integrate 119 10.3 0.364 0.364 20.844 20.844 integrate_v_rspace 119 11.3 0.496 0.496 20.480 20.480 cp_fm_cholesky_invert 11 10.9 18.645 18.645 18.645 18.645 grid_integrate_task_list 119 12.3 17.635 17.635 17.635 17.635 ot_mini 108 10.5 0.001 0.001 14.881 14.881 init_scf_run 11 5.9 0.001 0.001 13.335 13.335 scf_env_initial_rho_setup 11 6.9 0.001 0.001 13.334 13.334 make_m2s 4572 13.5 0.065 0.065 12.655 12.655 wfi_extrapolate 11 7.9 0.001 0.001 12.531 12.531 cp_gemm 81 9.0 0.000 0.000 10.943 10.943 cp_gemm_cosma 81 10.0 10.942 10.942 10.942 10.942 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.767 10.767 qs_ot_get_derivative 108 11.5 0.002 0.002 7.728 7.728 pw_transfer 1439 11.6 0.091 0.091 7.312 7.312 ot_diis_step 108 11.5 0.005 0.005 7.149 7.149 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.024 7.024 make_images 4572 14.5 2.540 2.540 6.798 6.798 qs_ot_get_p 119 10.4 0.001 0.001 6.669 6.669 cp_fm_cholesky_decompose 22 10.9 6.448 6.448 6.448 6.448 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.285 6.285 dbcsr_complete_redistribute 329 12.2 2.903 2.903 6.074 6.074 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.035 6.035 apply_single 119 13.6 0.001 0.001 6.034 6.034 dbcsr_make_dense_low 5837 15.5 0.104 0.104 5.968 5.968 fft_wrap_pw1pw2_140 487 13.2 0.612 0.612 5.957 5.957 dbcsr_copy 2102 12.0 0.278 0.278 5.861 5.861 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.844 5.844 make_dense_data 5837 16.5 5.221 5.221 5.842 5.842 multiply_cannon 2286 13.5 0.913 0.913 5.688 5.688 dbcsr_copy_into_existing 22 7.9 5.536 5.536 5.536 5.536 dbcsr_make_images_dense 3978 14.8 0.029 0.029 5.391 5.391 qs_create_task_list 11 7.9 0.000 0.000 5.336 5.336 generate_qs_task_list 11 8.9 3.678 3.678 5.336 5.336 qs_ot_p2m_diag 50 11.0 0.216 0.216 5.130 5.130 copy_dbcsr_to_fm 153 11.3 0.004 0.004 4.946 4.946 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.580 4.580 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.424 4.424 cp_fm_diag_elpa_base 50 14.0 4.366 4.366 4.423 4.423 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.379 4.379 pw_poisson_solve 119 10.3 1.778 1.778 4.331 4.331 density_rs2pw 119 9.7 0.006 0.006 4.297 4.297 multiply_cannon_loop 2286 14.5 0.051 0.051 4.241 4.241 multiply_cannon_multrec 2286 15.5 4.122 4.122 4.189 4.189 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.098 4.098 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.920 3.920 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 3.619 3.619 calculate_w_matrix_ot 11 6.9 0.008 0.008 3.619 3.619 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.187 3.187 fft3d_s 1202 14.6 3.178 3.178 3.184 3.184 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.021 71.775 71.777 qs_mol_dyn_low 1 2.0 0.006 0.007 71.642 71.648 qs_forces 11 3.9 0.002 0.002 71.586 71.587 qs_energies 11 4.9 0.001 0.002 66.711 66.713 scf_env_do_scf 11 5.9 0.001 0.001 60.569 60.569 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 56.201 56.202 velocity_verlet 10 3.0 0.002 0.002 42.463 42.464 rebuild_ks_matrix 119 8.3 0.001 0.001 28.123 28.180 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.022 28.123 28.179 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.021 25.072 sum_up_and_integrate 119 10.3 0.046 0.050 22.103 22.123 integrate_v_rspace 119 11.3 0.005 0.005 22.058 22.078 qs_rho_update_rho 119 7.7 0.001 0.001 21.979 21.998 calculate_rho_elec 119 8.7 0.048 0.049 21.978 21.997 dbcsr_multiply_generic 2286 12.5 0.132 0.137 16.781 16.871 grid_integrate_task_list 119 12.3 15.958 16.461 15.958 16.461 grid_collocate_task_list 119 9.7 15.787 16.460 15.787 16.460 qs_scf_new_mos 108 7.5 0.001 0.001 13.757 13.803 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.756 13.802 ot_scf_mini 108 9.5 0.003 0.003 12.899 12.935 multiply_cannon 2286 13.5 0.218 0.225 11.141 11.380 multiply_cannon_loop 2286 14.5 0.225 0.245 10.052 10.417 mp_waitall_1 169478 16.3 8.227 8.680 8.227 8.680 ot_mini 108 10.5 0.001 0.001 7.622 7.661 rs_pw_transfer 974 11.9 0.016 0.017 6.495 7.456 density_rs2pw 119 9.7 0.009 0.009 5.607 6.580 pw_transfer 1439 11.6 0.133 0.141 5.709 5.792 multiply_cannon_metrocomm3 18288 15.5 0.081 0.086 5.333 5.708 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.426 5.502 potential_pw2rs 119 12.3 0.010 0.011 5.023 5.029 fft_wrap_pw1pw2_140 487 13.2 0.552 0.577 4.706 4.880 init_scf_loop 11 6.9 0.000 0.001 4.352 4.352 init_scf_run 11 5.9 0.000 0.002 4.210 4.211 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.210 4.210 fft3d_ps 1201 14.6 2.230 2.360 4.022 4.087 make_m2s 4572 13.5 0.075 0.078 3.796 3.850 wfi_extrapolate 11 7.9 0.001 0.001 3.820 3.820 qs_ot_get_derivative 108 11.5 0.001 0.002 3.784 3.820 ot_diis_step 108 11.5 0.005 0.005 3.804 3.804 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.713 3.769 apply_single 119 13.6 0.001 0.001 3.713 3.768 multiply_cannon_multrec 18288 15.5 3.532 3.670 3.550 3.689 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.316 3.322 make_images 4572 14.5 0.186 0.190 3.103 3.159 mp_waitany 9880 13.7 2.168 3.137 2.168 3.137 rs_pw_transfer_RS2PW_140 130 11.5 0.526 0.565 1.980 2.954 rs_pw_transfer_PW2RS_140 130 13.9 1.204 1.266 2.495 2.521 mp_alltoall_d11v 2130 13.8 1.354 1.951 1.354 1.951 qs_ot_get_p 119 10.4 0.001 0.001 1.797 1.849 rs_gather_matrices 119 12.3 0.126 0.136 1.025 1.665 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.420 1.525 make_images_data 4572 15.5 0.062 0.067 1.392 1.510 prepare_preconditioner 11 7.9 0.000 0.000 1.445 1.452 make_preconditioner 11 8.9 0.000 0.000 1.445 1.452 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 1.436 1.437 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=75.48899999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.19, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=18.645, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.635, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.942, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.448, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.122, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.04100000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.787, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.958, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.532, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.23, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.227, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 208.667 208.667 qs_mol_dyn_low 1 2.0 0.004 0.004 207.863 207.863 qs_forces 11 3.9 0.001 0.001 207.807 207.807 qs_energies 11 4.9 0.001 0.001 193.765 193.765 scf_env_do_scf 11 5.9 0.001 0.001 161.433 161.433 velocity_verlet 10 3.0 0.002 0.002 140.542 140.542 scf_env_do_scf_inner_loop 96 6.5 0.008 0.008 119.885 119.885 rebuild_ks_matrix 107 8.3 0.001 0.001 61.367 61.367 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.016 61.366 61.366 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.216 55.216 qs_rho_update_rho 107 7.7 0.001 0.001 54.591 54.591 calculate_rho_elec 107 8.7 1.382 1.382 54.590 54.590 sum_up_and_integrate 107 10.3 0.336 0.336 50.840 50.840 integrate_v_rspace 107 11.3 0.421 0.421 50.504 50.504 grid_collocate_task_list 107 9.7 49.291 49.291 49.291 49.291 grid_integrate_task_list 107 12.3 47.926 47.926 47.926 47.926 init_scf_loop 11 6.9 0.000 0.000 41.354 41.354 prepare_preconditioner 11 7.9 0.000 0.000 33.983 33.983 make_preconditioner 11 8.9 0.000 0.000 33.983 33.983 make_full_inverse_cholesky 11 9.9 0.000 0.000 32.008 32.008 qs_scf_new_mos 96 7.5 0.001 0.001 22.563 22.563 qs_scf_loop_do_ot 96 8.5 0.001 0.001 22.562 22.562 ot_scf_mini 96 9.5 0.003 0.003 20.995 20.995 cp_fm_cholesky_invert 11 10.9 19.379 19.379 19.379 19.379 dbcsr_multiply_generic 1966 12.4 0.153 0.153 19.130 19.130 init_scf_run 11 5.9 0.001 0.001 16.605 16.605 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.604 16.604 wfi_extrapolate 11 7.9 0.001 0.001 15.542 15.542 ot_mini 96 10.5 0.001 0.001 12.401 12.401 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.869 11.869 cp_gemm 81 9.0 0.000 0.000 10.959 10.959 cp_gemm_cosma 81 10.0 10.959 10.959 10.959 10.959 make_m2s 3932 13.4 0.057 0.057 10.426 10.426 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.743 7.743 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.022 7.022 qs_ot_get_derivative 96 11.5 0.001 0.001 6.848 6.848 pw_transfer 1295 11.6 0.082 0.082 6.730 6.730 qs_create_task_list 11 7.9 0.000 0.000 6.487 6.487 generate_qs_task_list 11 8.9 4.838 4.838 6.487 6.487 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.464 6.464 cp_fm_cholesky_decompose 22 10.9 6.398 6.398 6.398 6.398 dbcsr_complete_redistribute 317 12.2 2.954 2.954 6.382 6.382 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.296 6.296 dbcsr_copy 1855 11.9 0.254 0.254 5.733 5.733 make_images 3932 14.4 2.130 2.130 5.726 5.726 ot_diis_step 96 11.5 0.005 0.005 5.550 5.550 fft_wrap_pw1pw2_140 439 13.2 0.621 0.621 5.510 5.510 dbcsr_copy_into_existing 22 7.9 5.435 5.435 5.436 5.436 qs_ot_get_p 107 10.4 0.001 0.001 5.353 5.353 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.200 5.200 multiply_cannon 1966 13.4 0.786 0.786 5.012 5.012 dbcsr_make_dense_low 4961 15.5 0.088 0.088 4.806 4.806 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.753 4.753 apply_single 107 13.6 0.001 0.001 4.753 4.753 make_dense_data 4961 16.5 4.190 4.190 4.699 4.699 dbcsr_make_images_dense 3386 14.7 0.024 0.024 4.290 4.290 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.283 4.283 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.275 4.275 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 124.856 124.857 qs_mol_dyn_low 1 2.0 0.005 0.006 124.740 124.745 qs_forces 11 3.9 0.002 0.002 124.686 124.687 qs_energies 11 4.9 0.001 0.001 116.106 116.107 scf_env_do_scf 11 5.9 0.001 0.001 107.190 107.191 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 99.574 99.575 velocity_verlet 10 3.0 0.002 0.002 74.247 74.248 rebuild_ks_matrix 107 8.3 0.001 0.001 57.551 57.606 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.020 57.550 57.605 sum_up_and_integrate 107 10.3 0.039 0.042 52.382 52.408 integrate_v_rspace 107 11.3 0.004 0.004 52.342 52.368 qs_ks_update_qs_env 107 7.6 0.001 0.001 50.712 50.762 qs_rho_update_rho 107 7.7 0.001 0.001 48.321 48.333 calculate_rho_elec 107 8.7 0.043 0.044 48.321 48.332 grid_integrate_task_list 107 12.3 46.096 47.308 46.096 47.308 grid_collocate_task_list 107 9.7 42.300 43.413 42.300 43.413 dbcsr_multiply_generic 1966 12.4 0.114 0.117 14.071 14.173 qs_scf_new_mos 96 7.5 0.001 0.001 11.362 11.407 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.362 11.406 ot_scf_mini 96 9.5 0.003 0.003 10.634 10.676 multiply_cannon 1966 13.4 0.186 0.189 9.382 9.691 multiply_cannon_loop 1966 14.4 0.193 0.206 8.489 8.882 init_scf_loop 11 6.9 0.000 0.000 7.601 7.601 rs_pw_transfer 878 11.9 0.014 0.015 6.446 7.516 mp_waitall_1 146670 16.2 6.763 7.152 6.763 7.152 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.027 7.033 init_scf_run 11 5.9 0.000 0.002 7.005 7.005 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.005 7.005 density_rs2pw 107 9.7 0.008 0.008 5.537 6.632 wfi_extrapolate 11 7.9 0.001 0.001 6.410 6.410 ot_mini 96 10.5 0.001 0.001 6.268 6.315 pw_transfer 1295 11.6 0.117 0.127 4.852 4.930 multiply_cannon_metrocomm3 15728 15.4 0.069 0.073 4.374 4.796 fft_wrap_pw1pw2 1081 12.6 0.013 0.014 4.609 4.698 potential_pw2rs 107 12.3 0.009 0.010 4.363 4.375 fft_wrap_pw1pw2_140 439 13.2 0.478 0.498 4.035 4.185 mp_waitany 8968 13.7 2.733 3.845 2.733 3.845 rs_pw_transfer_RS2PW_140 118 11.5 0.394 0.420 2.472 3.552 fft3d_ps 1081 14.6 1.902 2.037 3.387 3.482 multiply_cannon_multrec 15728 15.4 3.130 3.241 3.145 3.255 make_m2s 3932 13.4 0.064 0.066 3.174 3.220 mp_alltoall_d11v 1998 13.7 2.110 3.193 2.110 3.193 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.120 3.165 apply_single 107 13.6 0.001 0.001 3.120 3.165 qs_ot_get_derivative 96 11.5 0.001 0.001 3.104 3.147 ot_diis_step 96 11.5 0.004 0.005 3.135 3.135 rs_gather_matrices 107 12.3 0.115 0.124 1.833 2.905 make_images 3932 14.4 0.162 0.165 2.577 2.625 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=74.714, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=49.291, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.926, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.379, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=10.959, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.398, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=23.834000000000003, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.3, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.096, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.733, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.763, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.13, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.569 0.569 257.696 257.696 qs_energies 1 2.0 0.000 0.000 256.271 256.271 scf_env_do_scf 1 3.0 0.000 0.000 253.812 253.812 qs_ks_update_qs_env 8 5.0 0.000 0.000 235.424 235.424 rebuild_ks_matrix 7 6.0 0.000 0.000 235.313 235.313 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 235.313 235.313 hfx_ks_matrix 7 8.0 0.000 0.000 169.038 169.038 integrate_four_center 7 9.0 1.993 1.993 169.007 169.007 integrate_four_center_main 7 10.0 0.730 0.730 157.788 157.788 integrate_four_center_bin 455 11.0 157.058 157.058 157.058 157.058 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 146.868 146.868 init_scf_loop 1 4.0 0.000 0.000 106.927 106.927 cp_gemm 129 10.3 0.001 0.001 51.048 51.048 cp_gemm_cosma 129 11.3 51.048 51.048 51.048 51.048 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 31.378 31.378 admm_fit_mo_coeffs 7 9.0 0.000 0.000 29.630 29.630 admm_mo_merge_derivs 7 8.0 0.000 0.000 25.841 25.841 merge_mo_derivs_diag 7 9.0 0.023 0.023 25.841 25.841 purify_mo_diag 7 10.0 0.001 0.001 17.196 17.196 prepare_preconditioner 1 5.0 0.000 0.000 14.305 14.305 make_preconditioner 1 6.0 0.000 0.000 14.305 14.305 fit_mo_coeffs 7 10.0 0.000 0.000 12.434 12.434 arnoldi_normal_ev 11 9.3 0.002 0.002 8.867 8.867 integrate_four_center_load 7 10.0 0.001 0.001 8.830 8.830 hfx_load_balance 1 11.0 0.002 0.002 8.829 8.829 estimate_cond_num 1 7.0 0.000 0.000 8.789 8.789 build_subspace 28 9.5 0.014 0.014 8.732 8.732 qs_vxc_create 14 8.0 0.000 0.000 5.434 5.434 xc_vxc_pw_create 14 9.0 0.933 0.933 5.434 5.434 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.224 0.230 183.216 183.217 qs_energies 1 2.0 0.001 0.001 182.851 182.852 scf_env_do_scf 1 3.0 0.000 0.000 182.297 182.298 qs_ks_update_qs_env 8 5.0 0.000 0.000 179.356 179.356 rebuild_ks_matrix 7 6.0 0.000 0.000 179.343 179.343 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 179.343 179.343 hfx_ks_matrix 7 8.0 0.000 0.001 168.159 168.159 integrate_four_center 7 9.0 0.089 0.399 168.144 168.144 integrate_four_center_main 7 10.0 0.004 0.005 154.634 158.086 integrate_four_center_bin 448 11.0 154.629 158.082 154.629 158.082 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 106.184 106.184 init_scf_loop 1 4.0 0.000 0.000 76.112 76.112 integrate_four_center_load 7 10.0 0.000 0.000 8.890 8.893 hfx_load_balance 1 11.0 0.001 0.001 8.890 8.893 mp_sync 70 11.3 3.821 6.913 3.821 6.913 hfx_load_balance_bin 1 12.0 4.339 4.449 4.339 4.449 hfx_load_balance_count 1 12.0 4.328 4.447 4.328 4.447 cp_gemm 129 10.3 0.000 0.001 4.082 4.087 cp_gemm_cosma 129 11.3 4.081 4.087 4.081 4.087 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=45.934000000000054, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=157.058, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=51.048, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.993, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.933, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.73, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.92500000000004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=154.629, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.081, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.089, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="xc_vxc_pw_create", label="xc_vxc_pw_create", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.821, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.328, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.339, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 411.739 411.739 qs_energies 1 2.0 0.000 0.000 411.092 411.092 mp2_main 1 3.0 0.000 0.000 404.153 404.153 mp2_gpw_main 1 4.0 0.001 0.001 403.674 403.674 rpa_ri_compute_en 1 5.0 0.000 0.000 388.176 388.176 rpa_num_int 1 6.0 0.000 0.000 388.149 388.149 compute_mat_P_omega 1 7.0 0.002 0.002 221.963 221.963 compute_mat_P_omega_contract 10 8.0 13.217 13.217 220.650 220.650 dbcsr_t_total 2336 9.6 0.018 0.018 210.080 210.080 cp_gemm 105 8.4 0.000 0.000 134.351 134.351 cp_gemm_cosma 105 9.4 134.350 134.350 134.350 134.350 dbcsr_t_contract 787 11.0 49.241 49.241 133.917 133.917 GW_matrix_operations 10 7.0 0.006 0.006 96.769 96.769 compute_mat_P_omega_calc_M_occ 250 9.0 13.229 13.229 84.872 84.872 dbcsr_tas_total 1149 12.2 0.055 0.055 78.319 78.319 dbcsr_tas_multiply 807 12.1 0.004 0.004 76.748 76.748 dbcsr_t_copy 1103 10.7 20.939 20.939 74.565 74.565 dbcsr_multiply_generic 837 15.8 0.138 0.138 62.563 62.563 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 62.117 62.117 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 55.744 55.744 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 49.133 49.133 multiply_cannon 837 16.8 19.029 19.029 48.097 48.097 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 46.786 46.786 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 44.615 44.615 dbcsr_tas_reserve_blocks_index 3261 13.7 8.007 8.007 28.468 28.468 multiply_cannon_loop 837 17.8 0.158 0.158 26.183 26.183 dbcsr_tas_copy 574 11.4 17.574 17.574 25.580 25.580 multiply_cannon_multrec 837 18.8 24.298 24.298 24.981 24.981 dbcsr_t_reserve_blocks_index 2280 12.5 1.293 1.293 21.776 21.776 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.014 0.014 21.465 21.465 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 20.309 20.309 dbcsr_reserve_blocks 3717 14.7 19.771 19.771 20.162 20.162 compute_QP_energies 1 7.0 0.000 0.000 20.125 20.125 compute_self_energy_cubic_gw 1 8.0 0.106 0.106 20.125 20.125 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 15.483 15.483 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 14.830 14.830 dbcsr_t_copy_nocomm 251 12.0 11.587 11.587 14.066 14.066 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.987 12.987 make_m2s 1674 16.8 0.109 0.109 11.745 11.745 make_images 1674 17.8 5.358 5.358 11.115 11.115 dbcsr_tas_mm_2 251 15.0 0.002 0.002 11.044 11.044 cp_fm_cholesky_invert 10 8.0 9.570 9.570 9.570 9.570 dbcsr_finalize 9888 13.6 1.564 1.564 8.439 8.439 contract_cubic_gw 21 9.0 0.000 0.000 8.334 8.334 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 64.896 64.897 qs_energies 1 2.0 0.001 0.001 64.759 64.766 mp2_main 1 3.0 0.001 0.001 63.156 63.163 mp2_gpw_main 1 4.0 0.000 0.000 63.098 63.104 rpa_ri_compute_en 1 5.0 0.000 0.000 61.042 61.049 rpa_num_int 1 6.0 0.000 0.001 61.034 61.041 dbcsr_t_total 2336 9.6 0.018 0.020 48.569 48.571 compute_mat_P_omega 1 7.0 0.001 0.002 47.496 47.507 compute_mat_P_omega_contract 10 8.0 0.886 0.915 47.241 47.247 dbcsr_t_contract 787 11.0 2.104 2.275 35.640 35.647 dbcsr_tas_total 1149 12.2 0.072 0.078 31.455 31.456 dbcsr_tas_multiply 807 12.1 0.003 0.003 31.301 31.304 dbcsr_tas_dbcsr 807 14.1 0.004 0.004 22.877 22.878 dbcsr_multiply_generic 837 15.8 0.081 0.089 19.265 20.052 compute_mat_P_omega_calc_M_occ 250 9.0 0.865 0.893 15.730 15.731 multiply_cannon 837 16.8 0.151 0.172 11.311 11.718 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.688 11.689 dbcsr_t_copy 1111 10.7 4.895 5.149 11.052 11.453 dbcsr_tas_mm_1N 524 15.1 0.003 0.004 10.160 10.946 multiply_cannon_loop 837 17.8 0.051 0.053 10.303 10.692 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 10.020 10.021 dbcsr_tas_mm_2 251 15.0 0.002 0.002 8.841 8.841 multiply_cannon_multrec 1386 17.8 7.919 8.311 8.222 8.597 mp_sync 8696 11.6 7.229 8.518 7.229 8.518 cp_gemm 105 8.4 0.000 0.000 8.216 8.231 cp_gemm_cosma 105 9.4 8.216 8.230 8.216 8.230 make_m2s 1674 16.8 0.052 0.055 6.835 7.500 make_images 1674 17.8 0.259 0.273 6.740 7.406 GW_matrix_operations 10 7.0 0.001 0.002 5.350 5.358 compute_QP_energies 1 7.0 0.000 0.001 4.751 4.751 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.747 4.751 dbcsr_t_communicate_buffer 1098 11.7 0.102 0.108 4.153 4.300 mp_waitall_2 3776 14.7 3.901 4.141 3.901 4.141 make_images_data 1674 18.8 0.043 0.045 3.657 3.835 hybrid_alltoall_any 1724 19.5 2.823 3.225 3.512 3.690 contract_cubic_gw 21 9.0 0.000 0.000 3.608 3.608 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.023 0.026 3.137 3.502 dbcsr_t_reserve_blocks_index 2849 12.4 0.119 0.129 3.129 3.498 dbcsr_tas_reserve_blocks_index 3300 13.8 0.280 0.303 3.073 3.442 dbcsr_reserve_blocks 3785 14.7 2.792 3.138 2.835 3.182 make_images_pack 1674 18.8 2.581 3.140 2.599 3.158 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.021 3.031 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 2.900 2.910 convert_to_new_pgrid 2421 14.1 0.021 0.022 2.215 2.355 mp_waitall_1 26582 19.0 1.885 2.340 1.885 2.340 dbcsr_copy 3323 15.8 2.137 2.284 2.169 2.315 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.054 2.054 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 2.040 2.048 dbcsr_add_anytype 909 13.7 1.190 1.250 1.844 1.907 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 1.785 1.789 scf_env_do_scf 1 3.0 0.000 0.000 1.544 1.544 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.544 1.544 dbcsr_tas_replicate 396 14.1 0.829 0.908 1.450 1.527 mp_max_i 2058 9.6 1.057 1.343 1.057 1.343 dbcsr_finalize 10566 13.5 0.051 0.055 1.246 1.301 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=163.14, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=134.35, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=49.241, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.298, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=20.939, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=19.771, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=27.839999999999996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=8.216, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=2.104, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.919, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.895, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.792, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.901, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=7.229, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.099 0.099 189.985 189.985 qs_energies 1 2.0 0.000 0.000 188.212 188.212 scf_env_do_scf 1 3.0 0.000 0.000 178.065 178.065 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 178.065 178.065 qs_scf_new_mos 15 5.0 0.000 0.000 77.570 77.570 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.971 68.971 rebuild_ks_matrix 15 6.0 0.000 0.000 68.610 68.610 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 68.610 68.610 eigensolver 15 6.0 0.002 0.002 63.617 63.617 cp_fm_diag_elpa 15 7.0 0.000 0.000 49.745 49.745 cp_fm_diag_elpa_base 15 8.0 45.023 45.023 49.744 49.744 qs_vxc_create 15 8.0 0.032 0.032 44.982 44.982 calculate_dispersion_nonloc 15 9.0 8.904 8.904 39.164 39.164 pw_transfer 1191 9.8 0.095 0.095 26.632 26.632 fft_wrap_pw1pw2 1086 10.9 0.013 0.013 26.337 26.337 qs_rho_update_rho 16 5.0 0.000 0.000 24.920 24.920 calculate_rho_elec 16 6.0 0.344 0.344 24.920 24.920 grid_collocate_task_list 16 7.0 23.329 23.329 23.329 23.329 sum_up_and_integrate 15 8.0 0.076 0.076 22.053 22.053 integrate_v_rspace 15 9.0 0.033 0.033 21.977 21.977 grid_integrate_task_list 15 10.0 21.347 21.347 21.347 21.347 fft_wrap_pw1pw2_150 765 12.0 3.319 3.319 19.917 19.917 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.156 11.156 fft3d_s 1087 12.8 10.870 10.870 10.881 10.881 pw_scatter_s 585 13.0 10.474 10.474 10.474 10.474 dbcsr_complete_redistribute 46 8.3 3.615 3.615 9.826 9.826 cp_fm_cholesky_restore 45 7.0 9.335 9.335 9.335 9.335 cp_fm_upper_to_full 30 8.0 9.256 9.256 9.256 9.256 vdW_energy 15 10.0 7.989 7.989 7.989 7.989 gspace_mixing 14 5.0 0.273 0.273 7.968 7.968 broyden_mixing 14 6.0 7.223 7.223 7.223 7.223 fft_wrap_pw1pw2_200 197 11.5 0.335 0.335 6.172 6.172 xc_vxc_pw_create 15 9.0 1.498 1.498 5.787 5.787 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.725 4.725 init_scf_run 1 3.0 0.000 0.000 4.622 4.622 dbcsr_finalize 159 9.9 0.023 0.023 4.183 4.183 dbcsr_merge_all 91 11.1 0.077 0.077 4.028 4.028 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.021 0.026 90.034 90.035 qs_energies 1 2.0 0.001 0.001 89.616 89.616 scf_env_do_scf 1 3.0 0.000 0.000 84.363 84.364 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 84.363 84.364 qs_ks_update_qs_env 15 5.0 0.000 0.000 41.215 41.232 rebuild_ks_matrix 15 6.0 0.000 0.000 41.163 41.180 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 41.163 41.180 qs_rho_update_rho 16 5.0 0.000 0.000 24.041 24.043 calculate_rho_elec 16 6.0 0.012 0.013 24.040 24.043 sum_up_and_integrate 15 8.0 0.015 0.017 23.172 23.198 integrate_v_rspace 15 9.0 0.001 0.001 23.158 23.184 grid_collocate_task_list 16 7.0 21.944 22.696 21.944 22.696 grid_integrate_task_list 15 10.0 21.235 21.970 21.235 21.970 qs_scf_new_mos 15 5.0 0.001 0.001 19.494 19.557 eigensolver 15 6.0 0.002 0.003 17.870 17.883 qs_vxc_create 15 8.0 0.001 0.001 17.397 17.407 calculate_dispersion_nonloc 15 9.0 1.439 1.517 14.158 14.178 pw_transfer 1191 9.8 0.143 0.150 13.652 13.767 fft_wrap_pw1pw2 1086 10.9 0.022 0.026 13.334 13.445 cp_fm_diag_elpa 15 7.0 0.000 0.000 12.876 12.885 cp_fm_diag_elpa_base 15 8.0 12.592 12.632 12.869 12.874 fft3d_ps 1086 12.9 5.875 6.162 10.139 10.413 fft_wrap_pw1pw2_150 765 12.0 0.741 0.823 9.031 9.082 cp_fm_cholesky_restore 45 7.0 4.730 4.789 4.730 4.789 fft_wrap_pw1pw2_200 197 11.5 0.403 0.433 4.132 4.217 xc_vxc_pw_create 15 9.0 0.061 0.087 3.238 3.250 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.248 3.249 mp_alltoall_z22v 1086 14.9 2.610 3.082 2.610 3.082 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.796 3.066 x_to_yz 585 14.0 1.012 1.056 2.353 2.499 rs_pw_transfer 158 9.4 0.002 0.003 1.953 2.381 vdW_energy 15 10.0 2.160 2.266 2.160 2.266 density_rs2pw 16 7.0 0.002 0.002 1.907 2.183 yz_to_x 501 13.7 0.606 0.699 1.874 2.114 build_core_ppnl 1 5.0 1.867 2.052 1.867 2.052 mp_waitany 520 11.3 1.271 1.851 1.271 1.851 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=69.607, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=45.023, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.329, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.347, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.87, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.474, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.335, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=23.658, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.592, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.944, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.235, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.73, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.875, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.087 0.087 344.383 344.383 qs_energies 1 2.0 0.000 0.000 344.223 344.223 ls_scf 1 3.0 0.000 0.000 342.430 342.430 ls_scf_main 1 4.0 0.002 0.002 326.557 326.557 density_matrix_trs4 11 5.0 0.011 0.011 179.104 179.104 ls_scf_dm_to_ks 11 5.0 0.000 0.000 140.776 140.776 matrix_ls_to_qs 11 6.0 0.000 0.000 136.444 136.444 dbcsr_multiply_generic 185 6.1 0.470 0.470 112.429 112.429 dbcsr_copy_into_existing 11 7.0 84.818 84.818 84.819 84.819 multiply_cannon 185 7.1 3.444 3.444 77.762 77.762 dbcsr_complete_redistribute 23 7.5 40.444 40.444 56.425 56.425 multiply_cannon_loop 185 8.1 0.382 0.382 55.858 55.858 multiply_cannon_multrec 185 9.1 53.705 53.705 53.777 53.777 matrix_decluster 11 7.0 0.000 0.000 51.624 51.624 arnoldi_extremal 12 6.1 0.000 0.000 47.664 47.664 arnoldi_normal_ev 12 7.1 0.028 0.028 47.664 47.664 build_subspace 23 8.1 0.130 0.130 47.040 47.040 dbcsr_matrix_vector_mult 652 9.0 0.256 0.256 36.694 36.694 dbcsr_matrix_vector_mult_local 652 10.0 35.190 35.190 35.199 35.199 make_m2s 370 7.1 0.030 0.030 28.490 28.490 make_images 370 8.1 7.400 7.400 26.104 26.104 dbcsr_finalize 646 7.5 0.209 0.209 21.609 21.609 dbcsr_merge_all 597 8.5 3.691 3.691 19.473 19.473 setup_rec_index_2d 370 8.1 18.213 18.213 18.213 18.213 ls_scf_init_scf 1 4.0 0.000 0.000 14.946 14.946 dbcsr_sort_indices 1103 9.9 14.753 14.753 14.753 14.753 ls_scf_init_matrix_S 1 5.0 0.000 0.000 14.513 14.513 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 13.657 13.657 tree_to_linear_d 110 9.4 13.546 13.546 13.546 13.546 quick_finalize 395 10.0 0.472 0.472 12.590 12.590 dbcsr_special_finalize 370 9.1 0.003 0.003 11.611 11.611 dbcsr_dot_sd 144 6.3 8.874 8.874 8.876 8.876 dbcsr_new_transposed 2 7.0 0.135 0.135 8.312 8.312 dbcsr_redistribute 2 8.0 8.075 8.075 8.143 8.143 dbcsr_frobenius_norm 142 6.1 7.775 7.775 7.778 7.778 matrix_qs_to_ls 12 5.1 0.000 0.000 6.958 6.958 matrix_cluster 12 6.1 0.000 0.000 6.958 6.958 make_images_data 370 9.1 0.010 0.010 6.913 6.913 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.010 92.742 92.743 qs_energies 1 2.0 0.000 0.000 92.645 92.645 ls_scf 1 3.0 0.000 0.000 92.565 92.565 ls_scf_main 1 4.0 0.001 0.002 88.793 88.797 density_matrix_trs4 11 5.0 0.009 0.013 85.048 85.113 dbcsr_multiply_generic 185 6.1 0.071 0.081 79.770 79.967 multiply_cannon 185 7.1 0.041 0.046 66.433 67.512 multiply_cannon_loop 185 8.1 0.209 0.223 62.649 64.582 multiply_cannon_multrec 1480 9.1 41.476 44.291 41.955 44.772 mp_waitall_1 11936 10.3 18.696 21.736 18.696 21.736 multiply_cannon_metrocomm3 1480 9.1 0.017 0.019 11.177 15.286 make_m2s 370 7.1 0.034 0.036 8.811 8.911 make_images 370 8.1 0.696 0.719 8.691 8.798 multiply_cannon_metrocomm1 1480 9.1 0.010 0.011 4.431 8.071 calculate_norms 2960 9.1 4.802 4.990 4.802 4.990 mp_sum_l 1039 5.9 3.262 4.430 3.262 4.430 arnoldi_extremal 12 6.1 0.000 0.001 3.933 3.943 arnoldi_normal_ev 12 7.1 0.002 0.008 3.932 3.942 make_images_data 370 9.1 0.012 0.013 3.470 3.880 build_subspace 23 8.1 0.038 0.052 3.804 3.806 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.354 3.347 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.244 3.330 dbcsr_matrix_vector_mult 652 9.0 0.018 0.080 3.161 3.247 hybrid_alltoall_any 393 9.9 0.312 1.564 2.808 3.185 dbcsr_complete_redistribute 23 7.5 1.779 1.942 2.834 2.938 matrix_ls_to_qs 11 6.0 0.000 0.000 2.795 2.908 ls_scf_init_scf 1 4.0 0.000 0.000 2.890 2.891 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.851 2.863 dbcsr_matrix_vector_mult_local 652 10.0 2.505 2.689 2.510 2.694 make_images_pack 370 9.1 2.426 2.658 2.431 2.664 matrix_decluster 11 7.0 0.000 0.000 2.545 2.654 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.607 2.609 buffer_matrices_ensure_size 370 8.1 2.180 2.304 2.180 2.304 dbcsr_add_d 280 6.0 0.002 0.002 2.073 2.186 dbcsr_add_anytype 280 7.0 1.123 1.214 2.072 2.185 dbcsr_finalize 646 7.5 0.014 0.014 1.911 2.002 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=112.013, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=84.818, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=53.705, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=40.444, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=35.19, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.213, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.221999999999994, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=41.476, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.779, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.505, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.262, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=18.696, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.802, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 112.938 112.938 lib_test 1 2.0 0.000 0.000 112.931 112.931 dbcsr_run_tests 3 3.0 0.003 0.003 112.931 112.931 test_multiplies_multiproc 3 4.0 0.001 0.001 92.918 92.918 dbcsr_redistribute 9 5.0 64.703 64.703 68.158 68.158 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.996 22.996 dbcsr_make_random_matrix 9 4.0 14.622 14.622 19.929 19.929 multiply_cannon 9 6.0 0.002 0.002 16.397 16.397 multiply_cannon_loop 9 7.0 0.003 0.003 15.883 15.883 multiply_cannon_multrec 9 8.0 15.880 15.880 15.881 15.881 dbcsr_finalize 27 5.7 0.004 0.004 9.164 9.164 dbcsr_merge_all 18 6.5 3.287 3.287 8.425 8.425 tree_to_linear_d 9 7.0 3.211 3.211 3.211 3.211 mp_alltoall_d11v 27 6.0 3.123 3.123 3.123 3.123 dbcsr_data_release 975 7.6 2.442 2.442 2.442 2.442 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 26.376 26.377 lib_test 1 2.0 0.000 0.000 26.346 26.366 dbcsr_run_tests 3 3.0 0.000 0.001 26.344 26.365 test_multiplies_multiproc 3 4.0 0.000 0.001 25.212 25.264 dbcsr_multiply_generic 9 5.0 0.001 0.002 23.323 23.418 multiply_cannon 9 6.0 0.002 0.003 21.089 21.479 multiply_cannon_loop 9 7.0 0.004 0.004 20.660 21.023 multiply_cannon_multrec 72 8.0 17.467 17.998 17.469 18.000 mp_waitall_1 576 9.2 3.576 4.206 3.576 4.206 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.830 3.554 mp_sum_l 310 2.7 0.547 1.302 0.547 1.302 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.543 1.297 dbcsr_make_random_matrix 9 4.0 0.886 0.939 1.094 1.147 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.350 1.014 make_m2s 18 6.0 0.001 0.001 0.910 0.940 make_images 18 7.0 0.026 0.028 0.907 0.937 dbcsr_finalize 27 5.7 0.000 0.001 0.825 0.917 dbcsr_merge_all 18 6.5 0.133 0.158 0.727 0.827 dbcsr_data_release 444 7.6 0.620 0.701 0.620 0.701 dbcsr_redistribute 9 5.0 0.380 0.424 0.651 0.680 dbcsr_destroy 111 5.9 0.008 0.102 0.533 0.605 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.792999999999992, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=64.703, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.88, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.622, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.287, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.211, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.442, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.767000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.38, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=17.467, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.886, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.133, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.62, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.547, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.576, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.044 0.044 155.022 155.022 qs_mol_dyn_low 1 2.0 0.005 0.005 153.072 153.072 velocity_verlet 5 3.0 0.005 0.005 126.179 126.179 qmmm_el_coupling 6 3.8 0.000 0.000 74.614 74.614 qmmm_elec_with_gaussian 6 4.8 0.191 0.191 74.609 74.609 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 72.930 72.930 qmmm_elec_gaussian_low_G 6 6.8 71.368 71.368 71.368 71.368 qs_forces 6 3.8 0.001 0.001 57.455 57.455 qs_energies 6 4.8 0.001 0.001 51.164 51.164 scf_env_do_scf 6 5.8 0.000 0.000 47.223 47.223 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 39.742 39.742 rebuild_ks_matrix 45 8.4 0.000 0.000 39.264 39.264 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 39.264 39.264 qs_ks_update_qs_env 45 7.8 0.000 0.000 33.724 33.724 pw_transfer 966 11.9 0.073 0.073 23.916 23.916 fft_wrap_pw1pw2 801 13.0 0.009 0.009 23.568 23.568 fft_wrap_pw1pw2_150 507 14.3 2.433 2.433 23.054 23.054 qs_vxc_create 45 10.4 0.001 0.001 21.275 21.275 xc_vxc_pw_create 45 11.4 4.143 4.143 21.275 21.275 fist_calc_energy_force 6 3.8 0.002 0.002 11.207 11.207 pw_scatter_s 429 15.4 10.560 10.560 10.560 10.560 qs_rho_update_rho 45 7.9 0.000 0.000 10.337 10.337 calculate_rho_elec 45 8.9 0.901 0.901 10.337 10.337 force_nonbond 6 4.8 9.920 9.920 9.920 9.920 xc_rho_set_and_dset_create 45 12.4 0.251 0.251 9.869 9.869 fft3d_s 802 15.0 9.191 9.191 9.201 9.201 qmmm_forces 6 3.8 0.001 0.001 8.995 8.995 pw_integral_ab 2539 7.4 8.840 8.840 8.840 8.840 qmmm_forces_with_gaussian 6 4.8 0.129 0.129 8.508 8.508 init_scf_loop 6 6.8 0.000 0.000 7.475 7.475 qs_ks_ddapc 45 10.4 0.001 0.001 6.754 6.754 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.567 6.567 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.552 5.552 qmmm_forces_gaussian_low_G 6 6.8 5.507 5.507 5.507 5.507 pw_poisson_solve 51 9.9 2.364 2.364 5.410 5.410 density_rs2pw 45 9.9 0.003 0.003 4.737 4.737 grid_collocate_task_list 45 9.9 4.699 4.699 4.699 4.699 sum_up_and_integrate 45 10.4 0.240 0.240 4.381 4.381 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.172 4.172 integrate_v_rspace 45 11.4 0.014 0.014 4.141 4.141 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.035 86.282 86.283 qs_mol_dyn_low 1 2.0 0.005 0.006 84.719 84.811 qs_forces 6 3.8 0.001 0.001 62.286 62.286 qs_energies 6 4.8 0.001 0.001 59.370 59.370 scf_env_do_scf 6 5.8 0.000 0.001 57.871 57.872 scf_env_do_scf_inner_loop 113 6.2 0.003 0.009 55.558 55.559 rebuild_ks_matrix 119 8.1 0.000 0.000 41.112 41.129 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.021 41.112 41.129 qs_ks_update_qs_env 119 7.3 0.001 0.001 38.647 38.664 velocity_verlet 5 3.0 0.002 0.003 35.575 35.580 pw_transfer 2446 11.8 0.268 0.293 26.227 26.537 fft_wrap_pw1pw2 2059 12.8 0.033 0.036 25.409 25.806 fft_wrap_pw1pw2_150 1321 14.0 2.227 2.409 24.659 25.090 qs_vxc_create 119 10.1 0.003 0.004 20.918 20.923 xc_vxc_pw_create 119 11.1 0.447 0.636 20.915 20.919 fft3d_ps 2059 14.8 11.451 12.524 19.060 19.486 qs_rho_update_rho 119 7.3 0.001 0.001 16.135 16.136 calculate_rho_elec 119 8.3 0.086 0.096 16.134 16.135 sum_up_and_integrate 119 10.1 0.085 0.091 14.796 14.843 integrate_v_rspace 119 11.1 0.004 0.005 14.711 14.763 qmmm_forces 6 3.8 0.002 0.003 12.504 12.505 qmmm_forces_with_gaussian 6 4.8 0.381 0.471 12.066 12.250 rs_pw_transfer 988 11.5 0.015 0.017 10.967 11.480 xc_rho_set_and_dset_create 119 12.1 0.500 0.591 9.816 10.211 density_rs2pw 119 9.3 0.011 0.013 9.693 10.140 qmmm_el_coupling 6 3.8 0.000 0.000 8.796 8.844 qmmm_elec_with_gaussian 6 4.8 0.335 0.467 8.793 8.841 potential_pw2rs 119 12.1 0.011 0.012 8.602 8.614 grid_collocate_task_list 119 9.3 6.141 6.531 6.141 6.531 grid_integrate_task_list 119 12.1 5.609 6.064 5.609 6.064 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.802 5.953 mp_alltoall_z22v 2059 16.8 4.549 5.889 4.549 5.889 qmmm_forces_gaussian_low_G 6 6.8 4.751 4.888 4.751 4.888 rs_pw_transfer_PW2RS_150 125 13.9 2.509 2.581 4.769 4.828 pw_restrict_s3 18 5.8 2.158 2.186 4.722 4.788 rs_pw_transfer_RS2PW_150 125 11.2 2.026 2.201 3.995 4.505 x_to_yz 1095 16.3 1.819 2.009 4.128 4.394 yz_to_x 964 15.3 1.191 1.353 3.432 4.369 mp_waitany 4028 12.8 3.345 4.166 3.345 4.166 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.797 3.841 pw_prolongate_s3 18 6.8 1.733 1.768 3.797 3.841 pw_integral_ab 2761 7.7 3.183 3.210 3.549 3.767 qs_scf_new_mos 113 7.2 0.001 0.001 3.643 3.652 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.642 3.651 ot_scf_mini 113 9.2 0.002 0.002 3.484 3.491 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.386 3.447 dbcsr_multiply_generic 2588 12.3 0.097 0.112 3.265 3.428 qs_ks_ddapc 119 10.1 0.002 0.003 2.839 2.992 qmmm_elec_gaussian_low_G 6 6.8 2.440 2.497 2.440 2.497 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.475 2.476 pw_gather_p 964 14.3 2.070 2.452 2.070 2.452 mp_sum_dm3 33 5.7 2.247 2.372 2.247 2.372 init_scf_loop 6 6.8 0.000 0.000 2.310 2.310 ot_mini 113 10.2 0.001 0.001 2.203 2.215 mp_waitall_1 188862 16.2 1.871 2.064 1.871 2.064 pw_scatter_p 1095 15.3 1.955 2.045 1.955 2.045 pw_derive 732 12.5 1.721 1.902 1.721 1.902 qs_ot_get_derivative 113 11.2 0.001 0.001 1.736 1.744 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.93699999999998, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=71.368, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.56, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=9.92, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.191, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=8.84, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.507, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.699, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=48.157999999999994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.44, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.183, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.751, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.141, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.451, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.549, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.609, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-11-29 20:51:17+00:00