StartDate: 2022-05-17 19:05:44+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: d19b00a1d01be382a0191c099be3bbf0cad9d01e CommitTime: 2022-05-17 16:21:06 +0200 CommitAuthor: chrisahart CommitSubject: CDFT forces based on Hirshfeld partitioning of the electron density (#2111) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=d19b00a1d01be382a0191c099be3bbf0cad9d01e Sending build context to Docker daemon 362.9MB Step 1/41 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 125a6e411906: Already exists Digest: sha256:26c68657ccce2cb0a31b330cb0be2b5e108d467f641c62e13ab40cbec258c68d Status: Downloaded newer image for ubuntu:22.04 ---> d2e4e1f51132 Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 09af03b14c5c Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> b3a7014288dd Step 4/41 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 12e7491dc4b1 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 2c9c880d562d Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 8739f82ecc2b Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 8a5f1da84c82 Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> 1bafae4924c2 Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 95ce0fc26df7 Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> ab2a4b6ffe9c Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 037422b9e22d Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c54e57d6c9a0 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9a41d4949b7b Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> bdc5378ac471 Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ea7c16093348 Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 8bc9a2b2fb03 Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 8d4dbb4e705c Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> bb2930cbf916 Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> fa953cfcf112 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 36a31c14fe4f Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 3a10de1fd50b Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 1e55235d2787 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 9d549464a016 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> ac9844530fbf Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> e3aa2fe4e392 Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 3dab89bb4c81 Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 198fa20fdcf5 Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 22e10dd952cd Step 29/41 : WORKDIR /opt/cp2k ---> Using cache ---> d09c2fc952d7 Step 30/41 : COPY ./Makefile . ---> Using cache ---> d1c5aa05f9c6 Step 31/41 : COPY ./src ./src ---> 64c28288dfa2 Step 32/41 : COPY ./exts ./exts ---> 2cf410243fdb Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> 7bf08869104f Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 8e0455ac594d './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 8e0455ac594d ---> bfe24d9029ee Step 35/41 : COPY ./data ./data ---> 5c3172ec8203 Step 36/41 : COPY ./tests ./tests ---> 13f608a74f4b Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> 627fb439d425 Step 38/41 : COPY ./benchmarks ./benchmarks ---> 68c5698137cd Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 5c9d8e2f7ab1 Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 62de601253d9 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.026 0.026 195.268 195.268 qs_mol_dyn_low 1 2.0 0.002 0.002 194.651 194.651 qs_forces 11 3.9 0.001 0.001 194.613 194.613 qs_energies 11 4.9 0.001 0.001 188.411 188.411 scf_env_do_scf 11 5.9 0.001 0.001 176.212 176.212 velocity_verlet 10 3.0 0.002 0.002 137.108 137.108 init_scf_loop 11 6.9 0.000 0.000 99.935 99.935 prepare_preconditioner 11 7.9 0.000 0.000 97.470 97.470 make_preconditioner 11 8.9 0.000 0.000 97.470 97.470 make_full_inverse_cholesky 11 9.9 0.000 0.000 96.350 96.350 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 76.157 76.157 cp_fm_cholesky_invert 11 10.9 68.726 68.726 68.726 68.726 qs_scf_new_mos 108 7.5 0.001 0.001 41.847 41.847 qs_scf_loop_do_ot 108 8.5 0.001 0.001 41.846 41.846 ot_scf_mini 108 9.5 0.002 0.002 40.490 40.490 cp_fm_cholesky_decompose 22 10.9 23.901 23.901 23.901 23.901 qs_ot_get_p 119 10.4 0.001 0.001 22.727 22.727 qs_ot_p2m_diag 50 11.0 0.155 0.155 21.225 21.225 cp_dbcsr_syevd 50 12.0 0.002 0.002 20.808 20.808 rebuild_ks_matrix 119 8.3 0.001 0.001 20.534 20.534 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.534 20.534 dbcsr_multiply_generic 2286 12.5 0.165 0.165 20.150 20.150 cp_fm_diag_elpa 50 13.0 0.000 0.000 19.616 19.616 cp_fm_diag_elpa_base 50 14.0 19.554 19.554 19.615 19.615 qs_rho_update_rho 119 7.7 0.001 0.001 19.172 19.172 calculate_rho_elec 119 8.7 0.964 0.964 19.171 19.171 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.914 18.914 grid_collocate_task_list 119 9.7 14.920 14.920 14.920 14.920 ot_mini 108 10.5 0.001 0.001 14.317 14.317 sum_up_and_integrate 119 10.3 0.194 0.194 12.688 12.688 integrate_v_rspace 119 11.3 0.103 0.103 12.494 12.494 make_m2s 4572 13.5 0.046 0.046 11.116 11.116 grid_integrate_task_list 119 12.3 10.520 10.520 10.520 10.520 qs_ot_get_derivative 108 11.5 0.001 0.001 8.747 8.747 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.279 6.279 dbcsr_make_dense_low 5837 15.5 0.080 0.080 5.983 5.983 pw_transfer 1439 11.6 0.056 0.056 5.939 5.939 make_dense_data 5837 16.5 5.247 5.247 5.890 5.890 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.721 5.721 make_images 4572 14.5 2.118 2.118 5.596 5.596 ot_diis_step 108 11.5 0.004 0.004 5.568 5.568 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.155 5.155 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.035 5.035 apply_single 119 13.6 0.000 0.000 5.035 5.035 init_scf_run 11 5.9 0.002 0.002 5.025 5.025 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.023 5.023 multiply_cannon 2286 13.5 0.183 0.183 4.948 4.948 fft_wrap_pw1pw2_140 487 13.2 0.414 0.414 4.895 4.895 multiply_cannon_loop 2286 14.5 0.081 0.081 4.457 4.457 wfi_extrapolate 11 7.9 0.001 0.001 4.397 4.397 multiply_cannon_multrec 2286 15.5 4.328 4.328 4.375 4.375 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.023 49.208 49.219 qs_mol_dyn_low 1 2.0 0.003 0.005 49.106 49.110 qs_forces 11 3.9 0.001 0.001 49.068 49.068 qs_energies 11 4.9 0.001 0.001 45.711 45.712 scf_env_do_scf 11 5.9 0.001 0.002 41.995 41.996 scf_env_do_scf_inner_loop 108 6.5 0.002 0.020 38.750 38.751 velocity_verlet 10 3.0 0.001 0.003 29.196 29.197 rebuild_ks_matrix 119 8.3 0.000 0.001 18.735 18.800 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.020 18.735 18.800 qs_ks_update_qs_env 119 7.6 0.001 0.001 16.701 16.761 dbcsr_multiply_generic 2286 12.5 0.069 0.085 13.731 15.497 qs_rho_update_rho 119 7.7 0.001 0.001 14.297 14.312 calculate_rho_elec 119 8.7 0.029 0.031 14.297 14.311 sum_up_and_integrate 119 10.3 0.017 0.020 13.979 14.284 integrate_v_rspace 119 11.3 0.004 0.005 13.962 14.269 grid_collocate_task_list 119 9.7 9.025 10.923 9.025 10.923 qs_scf_new_mos 108 7.5 0.001 0.001 10.758 10.862 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.758 10.861 grid_integrate_task_list 119 12.3 8.165 10.642 8.165 10.642 ot_scf_mini 108 9.5 0.002 0.002 10.082 10.181 multiply_cannon 2286 13.5 0.129 0.146 9.811 10.154 multiply_cannon_loop 2286 14.5 0.086 0.115 9.220 9.469 mp_waitall_1 169478 16.3 8.401 9.162 8.401 9.162 rs_pw_transfer 974 11.9 0.010 0.013 6.043 6.455 multiply_cannon_metrocomm3 18288 15.5 0.036 0.049 5.453 6.191 ot_mini 108 10.5 0.001 0.001 5.809 5.915 density_rs2pw 119 9.7 0.005 0.006 4.904 5.290 potential_pw2rs 119 12.3 0.006 0.008 3.440 3.465 pw_transfer 1439 11.6 0.081 0.095 3.305 3.381 mp_waitany 9880 13.7 2.823 3.377 2.823 3.377 multiply_cannon_multrec 18288 15.5 2.910 3.259 2.920 3.272 init_scf_loop 11 6.9 0.000 0.000 3.232 3.232 fft_wrap_pw1pw2 1201 12.6 0.008 0.010 3.162 3.226 qs_ot_get_derivative 108 11.5 0.001 0.001 3.059 3.158 mp_alltoall_d11v 2130 13.8 2.550 3.131 2.550 3.131 mp_sum_l 11218 13.2 1.115 3.045 1.115 3.045 rs_pw_transfer_RS2PW_140 130 11.5 0.266 0.348 2.508 2.918 rs_gather_matrices 119 12.3 0.085 0.103 2.318 2.886 fft_wrap_pw1pw2_140 487 13.2 0.244 0.307 2.663 2.796 ot_diis_step 108 11.5 0.003 0.004 2.722 2.723 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.626 2.685 apply_single 119 13.6 0.000 0.000 2.626 2.685 init_scf_run 11 5.9 0.000 0.004 2.541 2.542 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.541 2.542 make_m2s 4572 13.5 0.045 0.056 2.377 2.458 fft3d_ps 1201 14.6 1.143 1.309 2.311 2.389 wfi_extrapolate 11 7.9 0.001 0.001 2.295 2.295 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.188 2.197 make_images 4572 14.5 0.116 0.140 2.059 2.142 qs_ot_get_p 119 10.4 0.001 0.001 1.443 1.560 rs_pw_transfer_PW2RS_140 130 13.9 0.523 0.667 1.411 1.470 mp_sum_d 4129 12.0 1.078 1.431 1.078 1.431 make_images_data 4572 15.5 0.035 0.042 1.125 1.309 prepare_preconditioner 11 7.9 0.000 0.000 1.201 1.214 make_preconditioner 11 8.9 0.000 0.000 1.201 1.214 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.870 1.142 mp_alltoall_z22v 1201 16.6 0.935 1.141 0.935 1.141 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.098 1.115 hybrid_alltoall_any 4725 16.4 0.067 0.217 0.969 1.088 multiply_cannon_metrocomm1 18288 15.5 0.018 0.026 0.568 1.081 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.001 1.050 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.972 1.016 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=53.31899999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=68.726, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=23.901, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.554, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.92, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.52, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.328, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.884, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.025, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.165, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.91, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.401, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.823, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.027 211.202 211.202 qs_mol_dyn_low 1 2.0 0.002 0.002 210.553 210.553 qs_forces 11 3.9 0.001 0.001 210.515 210.515 qs_energies 11 4.9 0.001 0.001 202.807 202.807 scf_env_do_scf 11 5.9 0.001 0.001 188.447 188.447 velocity_verlet 10 3.0 0.002 0.002 147.495 147.495 init_scf_loop 11 6.9 0.000 0.000 97.364 97.364 prepare_preconditioner 11 7.9 0.000 0.000 93.370 93.370 make_preconditioner 11 8.9 0.000 0.000 93.370 93.370 make_full_inverse_cholesky 11 9.9 0.000 0.000 92.292 92.292 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 90.943 90.943 cp_fm_cholesky_invert 11 10.9 63.705 63.705 63.705 63.705 qs_scf_new_mos 96 7.5 0.001 0.001 35.547 35.547 qs_scf_loop_do_ot 96 8.5 0.000 0.000 35.547 35.547 ot_scf_mini 96 9.5 0.002 0.002 34.301 34.301 rebuild_ks_matrix 107 8.3 0.001 0.001 33.621 33.621 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.620 33.620 qs_rho_update_rho 107 7.7 0.001 0.001 31.885 31.885 calculate_rho_elec 107 8.7 0.853 0.853 31.884 31.884 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.271 30.271 grid_collocate_task_list 107 9.7 27.800 27.800 27.800 27.800 sum_up_and_integrate 107 10.3 0.173 0.173 26.668 26.668 integrate_v_rspace 107 11.3 0.086 0.086 26.495 26.495 cp_fm_cholesky_decompose 22 10.9 24.841 24.841 24.841 24.841 grid_integrate_task_list 107 12.3 24.632 24.632 24.632 24.632 qs_ot_get_p 107 10.4 0.001 0.001 18.697 18.697 dbcsr_multiply_generic 1966 12.4 0.139 0.139 17.682 17.682 qs_ot_p2m_diag 44 11.0 0.136 0.136 17.578 17.578 cp_dbcsr_syevd 44 12.0 0.002 0.002 17.218 17.218 cp_fm_diag_elpa 44 13.0 0.000 0.000 16.034 16.034 cp_fm_diag_elpa_base 44 14.0 15.985 15.985 16.034 16.034 ot_mini 96 10.5 0.001 0.001 12.521 12.521 make_m2s 3932 13.4 0.039 0.039 9.718 9.718 qs_ot_get_derivative 96 11.5 0.001 0.001 7.670 7.670 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.278 7.278 init_scf_run 11 5.9 0.002 0.002 6.204 6.204 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.202 6.202 pw_transfer 1295 11.6 0.051 0.051 5.620 5.620 wfi_extrapolate 11 7.9 0.001 0.001 5.462 5.462 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.437 5.437 dbcsr_make_dense_low 4961 15.5 0.061 0.061 5.198 5.198 make_dense_data 4961 16.5 4.640 4.640 5.125 5.125 make_images 3932 14.4 1.910 1.910 4.931 4.931 ot_diis_step 96 11.5 0.003 0.003 4.848 4.848 fft_wrap_pw1pw2_140 439 13.2 0.433 0.433 4.659 4.659 multiply_cannon 1966 13.4 0.150 0.150 4.544 4.544 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.498 4.498 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.461 4.461 apply_single 107 13.6 0.000 0.000 4.461 4.461 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.271 4.271 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.020 82.881 82.892 qs_mol_dyn_low 1 2.0 0.003 0.004 82.783 82.789 qs_forces 11 3.9 0.001 0.001 82.729 82.729 qs_energies 11 4.9 0.001 0.001 77.217 77.219 scf_env_do_scf 11 5.9 0.000 0.002 71.575 71.576 scf_env_do_scf_inner_loop 96 6.5 0.002 0.016 66.347 66.347 velocity_verlet 10 3.0 0.001 0.003 49.361 49.362 rebuild_ks_matrix 107 8.3 0.000 0.001 36.658 36.798 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.020 36.657 36.798 sum_up_and_integrate 107 10.3 0.016 0.019 32.306 32.682 integrate_v_rspace 107 11.3 0.004 0.005 32.289 32.667 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.429 32.554 qs_rho_update_rho 107 7.7 0.001 0.001 31.496 31.513 calculate_rho_elec 107 8.7 0.027 0.038 31.495 31.512 grid_integrate_task_list 107 12.3 22.993 29.293 22.993 29.293 grid_collocate_task_list 107 9.7 22.072 28.391 22.072 28.391 dbcsr_multiply_generic 1966 12.4 0.062 0.075 12.159 17.259 rs_pw_transfer 878 11.9 0.009 0.012 10.122 10.991 density_rs2pw 107 9.7 0.004 0.006 9.063 9.972 qs_scf_new_mos 96 7.5 0.001 0.001 9.359 9.451 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.358 9.450 multiply_cannon 1966 13.4 0.116 0.131 8.479 8.963 ot_scf_mini 96 9.5 0.002 0.002 8.805 8.898 multiply_cannon_loop 1966 14.4 0.081 0.113 7.962 8.468 mp_waitall_1 146670 16.2 7.184 8.022 7.184 8.022 mp_waitany 8968 13.7 7.170 8.020 7.170 8.020 rs_pw_transfer_RS2PW_140 118 11.5 0.248 0.270 6.875 7.752 mp_alltoall_d11v 1998 13.7 6.295 7.278 6.295 7.278 rs_gather_matrices 107 12.3 0.074 0.085 6.065 7.051 mp_sum_l 9666 13.1 1.158 6.138 1.158 6.138 multiply_cannon_metrocomm3 15728 15.4 0.033 0.047 4.543 5.328 ot_mini 96 10.5 0.001 0.001 5.191 5.291 init_scf_loop 11 6.9 0.000 0.000 5.216 5.216 init_scf_run 11 5.9 0.000 0.004 4.451 4.451 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.450 4.451 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.370 4.385 wfi_extrapolate 11 7.9 0.001 0.001 4.052 4.053 potential_pw2rs 107 12.3 0.005 0.007 3.194 3.219 pw_transfer 1295 11.6 0.075 0.085 3.106 3.197 multiply_cannon_multrec 15728 15.4 2.671 3.084 2.679 3.096 fft_wrap_pw1pw2 1081 12.6 0.008 0.011 2.974 3.049 qs_ot_get_derivative 96 11.5 0.001 0.001 2.736 2.827 fft_wrap_pw1pw2_140 439 13.2 0.220 0.280 2.493 2.628 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.312 2.452 apply_single 107 13.6 0.000 0.000 2.311 2.452 ot_diis_step 96 11.5 0.003 0.004 2.420 2.420 fft3d_ps 1081 14.6 1.038 1.158 2.191 2.258 make_m2s 3932 13.4 0.039 0.050 2.139 2.220 make_images 3932 14.4 0.104 0.124 1.853 1.920 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=54.238999999999976, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=63.705, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.8, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=24.841, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.632, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=15.985, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=17.167, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.072, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.993, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.17, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=6.295, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.184, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.187 0.187 151.666 151.666 qs_energies 1 2.0 0.000 0.000 150.836 150.836 scf_env_do_scf 1 3.0 0.000 0.000 149.270 149.270 qs_ks_update_qs_env 8 5.0 0.000 0.000 112.073 112.073 rebuild_ks_matrix 7 6.0 0.000 0.000 112.019 112.019 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 112.019 112.019 hfx_ks_matrix 7 8.0 0.000 0.000 95.254 95.254 integrate_four_center 7 9.0 1.304 1.304 95.228 95.228 init_scf_loop 1 4.0 0.000 0.000 84.286 84.286 integrate_four_center_main 7 10.0 1.000 1.000 82.431 82.431 integrate_four_center_bin 449 11.0 81.430 81.430 81.430 81.430 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 64.973 64.973 prepare_preconditioner 1 5.0 0.000 0.000 31.393 31.393 make_preconditioner 1 6.0 0.000 0.000 31.393 31.393 arnoldi_normal_ev 11 9.3 0.001 0.001 17.612 17.612 estimate_cond_num 1 7.0 0.000 0.000 17.568 17.568 build_subspace 28 9.5 0.010 0.010 17.105 17.105 integrate_four_center_load 7 10.0 0.001 0.001 11.245 11.245 hfx_load_balance 1 11.0 0.001 0.001 11.244 11.244 make_full_inverse_cholesky 1 7.0 0.000 0.000 9.360 9.360 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 9.135 9.135 cp_fm_cholesky_invert 2 9.5 8.741 8.741 8.741 8.741 dbcsr_sym_m_v_mult 562 10.0 0.015 0.015 8.360 8.360 admm_fit_mo_coeffs 7 9.0 0.000 0.000 7.934 7.934 DGKS_ortho_d 673 10.6 7.168 7.168 7.170 7.170 Gram_Schmidt_ortho_d 673 10.6 5.657 5.657 5.657 5.657 hfx_load_balance_bin 1 12.0 5.614 5.614 5.614 5.614 hfx_load_balance_count 1 12.0 5.613 5.613 5.613 5.613 purify_mo_diag 7 10.0 0.000 0.000 4.557 4.557 qs_scf_new_mos 7 5.0 0.000 0.000 4.454 4.454 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.454 4.454 dbcsr_copy 1318 10.8 1.044 1.044 4.426 4.426 ot_scf_mini 7 7.0 0.000 0.000 4.367 4.367 make_full_single_inverse 1 7.0 0.000 0.000 4.301 4.301 arnoldi_generalized_ev 1 8.0 0.000 0.000 4.217 4.217 gev_build_subspace 4 9.0 0.005 0.005 4.071 4.071 cp_fm_syevd 7 11.0 0.000 0.000 3.974 3.974 cp_fm_syevd_base 7 12.0 3.974 3.974 3.974 3.974 dbcsr_create_new 3176 12.1 2.487 2.487 3.866 3.866 qs_vxc_create 14 8.0 0.000 0.000 3.745 3.745 xc_vxc_pw_create 14 9.0 0.131 0.131 3.745 3.745 fit_mo_coeffs 7 10.0 0.000 0.000 3.376 3.376 qs_ot_get_p 8 7.8 0.000 0.000 3.285 3.285 qs_ot_p2m_diag 7 9.0 0.022 0.022 3.203 3.203 cp_fm_cholesky_decompose 3 8.7 3.148 3.148 3.148 3.148 cp_dbcsr_syevd 7 10.0 0.000 0.000 3.141 3.141 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.200 0.221 135.988 135.999 qs_energies 1 2.0 0.000 0.000 135.673 135.680 scf_env_do_scf 1 3.0 0.000 0.001 135.266 135.266 qs_ks_update_qs_env 8 5.0 0.000 0.000 133.121 133.121 rebuild_ks_matrix 7 6.0 0.000 0.000 133.112 133.113 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 133.112 133.113 hfx_ks_matrix 7 8.0 0.000 0.000 126.643 126.645 integrate_four_center 7 9.0 0.052 0.343 126.634 126.635 integrate_four_center_main 7 10.0 0.003 0.004 80.349 114.773 integrate_four_center_bin 448 11.0 80.346 114.769 80.346 114.769 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 77.410 77.410 init_scf_loop 1 4.0 0.000 0.000 57.854 57.855 mp_sync 70 11.3 34.440 36.581 34.440 36.581 integrate_four_center_load 7 10.0 0.000 0.000 11.158 11.162 hfx_load_balance 1 11.0 0.001 0.001 11.158 11.162 mp_sum_l 1135 8.3 5.492 5.759 5.492 5.759 hfx_load_balance_dist 1 12.0 0.000 0.000 5.355 5.627 hfx_load_balance_bin 1 12.0 2.861 5.544 2.861 5.544 hfx_load_balance_count 1 12.0 2.856 5.528 2.856 5.528 qs_vxc_create 14 8.0 0.001 0.001 3.084 3.084 xc_vxc_pw_create 14 9.0 0.008 0.010 3.083 3.084 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=37.442999999999984, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=81.43, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.741, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.168, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=5.657, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.614, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.613, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.992999999999995, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.346, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.861, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.856, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=34.44, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.492, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.011 208.588 208.588 qs_energies 1 2.0 0.000 0.000 208.179 208.179 mp2_main 1 3.0 0.000 0.000 184.670 184.670 mp2_gpw_main 1 4.0 0.000 0.000 183.651 183.651 rpa_ri_compute_en 1 5.0 0.000 0.000 173.859 173.859 rpa_num_int 1 6.0 0.000 0.000 173.846 173.846 compute_mat_P_omega 1 7.0 0.003 0.003 119.487 119.487 compute_mat_P_omega_contract 10 8.0 7.890 7.890 119.108 119.108 dbt_total 2336 9.6 0.012 0.012 113.571 113.571 dbt_contract 787 11.0 27.880 27.880 75.053 75.053 dbt_tas_total 1149 12.2 0.175 0.175 44.729 44.729 dbt_tas_multiply 807 12.1 0.002 0.002 43.995 43.995 dbt_copy 1103 10.7 13.463 13.463 37.764 37.764 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 37.586 37.586 dbt_tas_dbm 807 14.1 0.003 0.003 37.293 37.293 dbm_multiply 807 16.1 37.285 37.285 37.285 37.285 compute_mat_P_omega_calc_M_occ 250 9.0 7.873 7.873 37.002 37.002 GW_matrix_operations 10 7.0 0.008 0.008 30.079 30.079 cp_fm_cholesky_invert 10 8.0 29.301 29.301 29.301 29.301 dbt_tas_mm_1N 524 15.1 0.001 0.001 27.213 27.213 scf_env_do_scf 1 3.0 0.000 0.000 23.308 23.308 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 23.308 23.308 qs_scf_new_mos 17 5.0 0.000 0.000 21.723 21.723 eigensolver 18 5.9 0.001 0.001 20.082 20.082 cp_fm_diag_elpa 18 6.9 0.000 0.000 13.025 13.025 cp_fm_diag_elpa_base 18 7.9 12.982 12.982 13.024 13.024 cp_fm_cholesky_decompose 14 8.1 12.546 12.546 12.546 12.546 compute_QP_energies 1 7.0 0.000 0.000 11.979 11.979 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 11.979 11.979 dbt_tas_copy 574 11.4 9.516 9.516 11.843 11.843 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 11.587 11.587 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 11.100 11.100 RPA_postprocessing_nokp 10 8.0 0.001 0.001 10.110 10.110 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 9.785 9.785 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.064 9.064 dbt_tas_reserve_blocks_index 3261 13.7 5.352 5.352 8.919 8.919 dbt_tas_mm_2 251 15.0 0.001 0.001 8.162 8.162 dbt_reserve_blocks_index 2280 12.5 1.165 1.165 7.538 7.538 dbt_reserve_blocks_index_array 2222 11.6 0.007 0.007 7.512 7.512 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 7.453 7.453 dbt_copy_nocomm 251 12.0 6.127 6.127 7.172 7.172 cp_fm_cholesky_restore 51 7.0 6.945 6.945 6.945 6.945 get_2c_integrals 1 6.0 0.000 0.000 5.994 5.994 contract_cubic_gw 21 9.0 0.000 0.000 5.840 5.840 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.023 33.588 33.599 qs_energies 1 2.0 0.000 0.004 33.498 33.499 mp2_main 1 3.0 0.000 0.001 32.474 32.475 mp2_gpw_main 1 4.0 0.000 0.000 32.418 32.419 rpa_ri_compute_en 1 5.0 0.000 0.000 31.196 31.197 rpa_num_int 1 6.0 0.001 0.012 31.191 31.192 dbt_total 2336 9.6 0.012 0.013 27.684 27.697 compute_mat_P_omega 1 7.0 0.001 0.005 25.801 25.828 compute_mat_P_omega_contract 10 8.0 0.377 0.436 25.623 25.628 dbt_contract 787 11.0 0.935 1.322 20.879 20.888 dbt_tas_total 1149 12.2 0.049 0.064 18.797 18.798 dbt_tas_multiply 807 12.1 0.002 0.002 18.735 18.738 dbt_tas_dbm 807 14.1 0.003 0.003 13.494 13.503 dbm_multiply 807 16.1 10.224 10.971 10.224 10.971 compute_mat_P_omega_calc_M_occ 250 9.0 0.361 0.424 7.724 7.724 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.508 7.508 mp_sync 8707 11.6 5.588 7.429 5.588 7.429 dbt_copy 1111 10.7 2.143 2.951 5.728 6.272 dbt_tas_mm_2 251 15.0 0.001 0.002 6.090 6.094 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.469 5.472 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.774 5.450 compute_QP_energies 1 7.0 0.000 0.000 3.691 3.693 compute_self_energy_cubic_gw 1 8.0 0.003 0.005 3.688 3.692 mp_waitall_2 3776 14.7 2.745 2.960 2.745 2.960 contract_cubic_gw 21 9.0 0.000 0.000 2.825 2.825 dbt_communicate_buffer 1098 11.7 0.053 0.072 2.638 2.765 dbt_tas_replicate 396 14.1 0.667 1.101 1.606 1.858 dbt_reserve_blocks_index_array 2791 11.4 0.008 0.010 1.547 1.819 dbt_reserve_blocks_index 2849 12.4 0.064 0.082 1.547 1.817 dbt_tas_reserve_blocks_index 3300 13.8 0.184 0.267 1.520 1.781 dbm_reserve_blocks 3696 14.8 1.380 1.625 1.380 1.625 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 1.217 1.219 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.035 1.045 cp_gemm 105 8.4 0.000 0.000 0.991 1.003 cp_gemm_cosma 105 9.4 0.990 1.002 0.990 1.002 scf_env_do_scf 1 3.0 0.000 0.000 0.982 0.982 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 0.982 0.982 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.929 0.933 convert_to_new_pgrid 2421 14.1 0.022 0.032 0.778 0.929 dbm_copy 1608 15.1 0.750 0.900 0.750 0.900 mp_max_i 1992 9.8 0.628 0.834 0.628 0.834 GW_matrix_operations 10 7.0 0.001 0.001 0.781 0.790 dbm_add 807 14.1 0.619 0.712 0.619 0.712 dbt_tas_mm_3N 22 15.1 0.000 0.000 0.502 0.672 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=87.677, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=37.285, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=29.301, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=27.88, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=13.463, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.982, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.573, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.224, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=0.935, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=2.143, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.745, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.38, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.588, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 502.583 502.583 qs_forces 1 2.0 0.000 0.000 501.993 501.993 rebuild_ks_matrix 7 6.6 0.000 0.000 481.134 481.134 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 481.134 481.134 hfx_ks_matrix 7 8.6 0.000 0.000 479.171 479.171 dbt_total 4939 11.6 0.030 0.030 421.347 421.347 hfx_ri_update_ks 7 9.6 0.000 0.000 372.293 372.293 hfx_ri_update_ks_Pmat 7 10.6 30.979 30.979 372.290 372.290 qs_energies 1 3.0 0.000 0.000 348.212 348.212 scf_env_do_scf 1 4.0 0.000 0.000 347.822 347.822 qs_ks_update_qs_env 8 6.0 0.000 0.000 327.404 327.404 dbt_contract 1473 13.0 105.147 105.147 301.869 301.869 dbt_tas_total 2391 14.1 0.645 0.645 203.142 203.142 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 195.768 195.768 hfx_ri_update_ks_Pmat_KS 567 11.6 0.005 0.005 188.545 188.545 dbt_tas_multiply 1482 14.0 0.005 0.005 186.625 186.625 dbt_tas_dbm 1482 16.0 0.005 0.005 166.938 166.938 dbm_multiply 1482 18.0 166.922 166.922 166.922 166.922 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 153.733 153.733 init_scf_loop 2 5.0 0.000 0.000 152.045 152.045 dbt_tas_mm_2 649 17.1 0.004 0.004 139.059 139.059 hfx_ri_update_forces 1 7.0 0.000 0.000 106.876 106.876 dbt_copy 2411 12.3 54.220 54.220 102.914 102.914 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 78.051 78.051 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 53.290 53.290 precalc_derivatives 1 8.0 0.006 0.006 41.967 41.967 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 35.261 35.261 dbt_tas_reserve_blocks_index 7477 15.3 13.626 13.626 24.108 24.108 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 21.376 21.376 dbt_tas_mm_3T 659 17.1 0.002 0.002 21.324 21.324 dbt_reserve_blocks_index 4998 14.5 2.830 2.830 20.688 20.688 dbt_reserve_blocks_index_array 4963 13.5 0.016 0.016 20.509 20.509 dbt_tas_copy 1555 13.0 15.209 15.209 20.253 20.253 prepare_preconditioner 2 6.0 0.000 0.000 17.748 17.748 make_preconditioner 2 7.0 0.000 0.000 17.314 17.314 cp_fm_syevd 12 10.7 0.000 0.000 16.968 16.968 cp_fm_syevd_base 12 11.7 16.968 16.968 16.968 16.968 make_full_all 2 8.0 0.000 0.000 16.894 16.894 build_3c_derivatives 9 9.0 2.252 2.252 16.886 16.886 dbt_tas_reshape 906 14.4 0.026 0.026 15.112 15.112 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 12.655 12.655 dbt_tas_reshape_buffer_obtain 906 15.4 7.237 7.237 10.765 10.765 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.020 58.730 58.740 qs_forces 1 2.0 0.000 0.000 58.550 58.550 rebuild_ks_matrix 7 6.6 0.000 0.000 57.856 57.860 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.014 57.855 57.860 hfx_ks_matrix 7 8.6 0.000 0.001 56.822 56.831 dbt_total 4939 11.6 0.025 0.030 51.313 51.331 dbt_contract 1473 13.0 3.367 4.993 38.617 38.628 hfx_ri_update_ks 7 9.6 0.000 0.000 38.505 38.505 hfx_ri_update_ks_Pmat 7 10.6 1.338 1.881 38.504 38.505 dbt_tas_total 2391 14.1 0.108 0.139 36.020 36.022 qs_energies 1 3.0 0.000 0.000 35.324 35.324 scf_env_do_scf 1 4.0 0.000 0.001 35.182 35.182 qs_ks_update_qs_env 8 6.0 0.000 0.000 34.642 34.647 dbt_tas_multiply 1482 14.0 0.004 0.005 31.257 31.263 dbt_tas_dbm 1482 16.0 0.005 0.006 23.470 23.489 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.215 23.215 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 20.460 20.461 hfx_ri_update_ks_Pmat_KS 567 11.6 0.003 0.004 20.404 20.404 dbm_multiply 1482 18.0 16.082 20.084 16.082 20.084 hfx_ri_update_forces 1 7.0 0.000 0.001 18.316 18.324 mp_sync 17669 13.5 12.004 15.006 12.004 15.006 init_scf_loop 2 5.0 0.000 0.000 14.721 14.727 dbt_tas_mm_2 649 17.1 0.003 0.004 13.192 13.207 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 13.009 13.024 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 8.462 8.462 dbt_copy 2429 12.3 2.239 2.942 6.554 7.193 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.133 4.899 dbt_tas_mm_3N 163 16.5 0.000 0.001 4.016 4.123 precalc_derivatives 1 8.0 0.001 0.002 4.096 4.097 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.914 3.916 dbt_tas_merge 649 14.1 1.664 2.813 2.809 3.748 mp_waitall_2 5988 16.1 2.934 3.291 2.934 3.291 mp_max_i 3372 12.5 2.519 2.905 2.519 2.905 dbt_tas_reserve_blocks_index 7508 15.4 0.447 0.617 2.244 2.733 dbt_tas_replicate 909 15.6 0.691 0.932 2.335 2.533 dbt_tas_reshape 916 14.4 0.009 0.011 2.196 2.345 dbt_reserve_blocks_index 5399 14.4 0.116 0.147 1.898 2.295 dbt_reserve_blocks_index_array 5364 13.5 0.012 0.015 1.895 2.292 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.237 2.248 dbm_reserve_blocks 8417 16.2 1.914 2.247 1.914 2.247 dbt_tas_communicate_buffer 1825 16.3 0.058 0.078 2.036 2.235 mp_alltoall_i 3084 15.2 2.021 2.230 2.021 2.230 build_3c_derivatives 9 9.0 0.228 0.331 1.882 1.885 dbt_communicate_buffer 1257 13.5 0.042 0.063 1.566 1.706 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.470 1.478 convert_to_new_pgrid 4446 16.0 0.033 0.045 1.206 1.398 mp_sum_l 38201 15.3 1.171 1.384 1.171 1.384 dbm_copy 3043 16.9 1.173 1.367 1.173 1.367 dbt_tas_reshape_alltoall 916 15.4 0.001 0.001 1.066 1.225 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=128.34700000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=166.922, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=105.147, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=54.22, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=30.979, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=16.968, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_max_i", label="mp_max_i", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=18.246999999999993, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=16.082, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=3.367, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=2.239, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.338, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.934, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_max_i", label="mp_max_i", y=2.519, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=12.004, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 242.292 242.292 qs_energies 1 2.0 0.000 0.000 242.004 242.004 mp2_main 1 3.0 0.000 0.000 206.542 206.542 mp2_gpw_main 1 4.0 0.001 0.001 202.019 202.019 mp2_ri_gpw_compute_in 1 5.0 0.371 0.371 144.502 144.502 mp2_ri_gpw_compute_in_loop 1 6.0 0.011 0.011 119.213 119.213 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 90.137 90.137 integrate_v_rspace 2666 8.0 0.718 0.718 75.594 75.594 grid_integrate_task_list 2666 9.0 72.806 72.806 72.806 72.806 mp2_ri_gpw_compute_en 1 5.0 0.060 0.060 57.492 57.492 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.251 11.251 55.723 55.723 scf_env_do_scf 1 3.0 0.000 0.000 34.440 34.440 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 34.439 34.439 qs_scf_new_mos 10 5.0 0.000 0.000 33.110 33.110 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.294 2.294 30.505 30.505 offload_gemm 2080 8.0 28.211 28.211 28.211 28.211 eigensolver 11 5.8 0.001 0.001 25.472 25.472 get_2c_integrals 1 6.0 0.000 0.000 24.744 24.744 calculate_wavefunction 5312 9.0 16.719 16.719 24.555 24.555 cp_fm_diag_elpa 11 6.8 0.000 0.000 23.118 23.118 cp_fm_diag_elpa_base 11 7.8 22.966 22.966 23.118 23.118 dbcsr_multiply_generic 5322 8.0 0.185 0.185 22.498 22.498 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 22.478 22.478 compute_2c_integrals 1 7.0 0.006 0.006 17.964 17.964 compute_2c_integrals_loop_lm 1 8.0 0.009 0.009 17.944 17.944 mp2_eri_2c_integrate_gpw 1 9.0 3.069 3.069 17.936 17.936 pw_transfer 63872 10.6 0.949 0.949 11.970 11.970 multiply_cannon 5322 9.0 0.454 0.454 11.542 11.542 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 10.740 10.740 multiply_cannon_loop 5322 10.0 0.334 0.334 10.114 10.114 qs_diis_b_step 9 6.0 0.000 0.000 9.517 9.517 make_m2s 10644 9.0 0.068 0.068 8.761 8.761 fill_local_i_aL 4160 7.0 8.749 8.749 8.749 8.749 multiply_cannon_multrec 5322 11.0 8.579 8.579 8.621 8.621 make_images 10644 10.0 3.239 3.239 8.414 8.414 cp_fm_symm 18 7.0 8.318 8.318 8.318 8.318 fft_wrap_pw1pw2_20 21271 12.4 0.567 0.567 7.380 7.380 fft3d_s 53229 13.4 6.907 6.907 7.000 7.000 cp_fm_triangular_invert 2 6.0 6.494 6.494 6.494 6.494 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.489 2.489 6.489 6.489 mp2_ri_gpw_compute_en_ener 2080 7.0 5.215 5.215 5.215 5.215 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.021 47.337 47.348 qs_energies 1 2.0 0.000 0.000 47.269 47.270 mp2_main 1 3.0 0.000 0.001 45.039 45.040 mp2_gpw_main 1 4.0 0.001 0.002 44.891 44.891 mp2_ri_gpw_compute_en 1 5.0 0.283 0.287 25.887 26.330 mp2_ri_gpw_compute_in 1 5.0 0.043 0.044 18.925 24.984 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.215 23.276 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.019 21.051 integrate_v_rspace 93 8.1 0.112 0.122 14.894 20.797 grid_integrate_task_list 93 9.1 14.517 20.470 14.517 20.470 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.628 0.766 16.917 17.403 mp2_ri_gpw_compute_en_expansio 65 7.0 0.127 0.169 11.558 12.206 offload_gemm 65 8.0 11.431 12.064 11.431 12.064 mp2_ri_get_sizes 1 6.0 0.000 0.000 6.061 6.503 mp_min_d 1 7.0 6.061 6.503 6.061 6.503 mp2_ri_gpw_compute_en_comm 65 7.0 0.249 0.362 3.996 5.099 mp_sendrecv_dm3 390 8.0 2.862 4.113 2.862 4.113 mp2_ri_create_group 1 6.0 0.000 0.000 2.096 2.138 replicate_iaK_2intgroup 1 7.0 1.331 1.499 1.953 2.138 scf_env_do_scf 1 3.0 0.000 0.000 2.098 2.099 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.098 2.099 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.896 2.067 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.880 2.048 mp_sum_d 498 2.3 0.541 1.896 0.541 1.896 fill_local_i_aL 520 7.8 1.253 1.709 1.253 1.709 get_2c_integrals 1 6.0 0.000 0.000 1.578 1.599 compute_2c_integrals 1 7.0 0.004 0.006 1.364 1.380 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.986 1.296 mp2_eri_2c_integrate_gpw 1 9.0 0.204 0.320 0.985 1.294 multiply_cannon 176 9.0 0.016 0.017 1.117 1.197 calculate_wavefunction 166 9.0 0.505 0.718 0.880 1.139 multiply_cannon_loop 176 10.0 0.002 0.002 1.057 1.135 qs_scf_new_mos 10 5.0 0.000 0.000 1.027 1.086 eigensolver 11 5.8 0.001 0.001 1.034 1.035 multiply_cannon_multrec 246 11.0 0.915 0.962 0.921 0.968 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=90.339, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=72.806, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=28.211, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.966, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=16.719, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.251, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="replicate_iaK_2intgroup", label="replicate_iaK_2intgroup", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=10.002000000000002, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.517, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.431, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.505, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.628, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.862, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=6.061, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="replicate_iaK_2intgroup", label="replicate_iaK_2intgroup", y=1.331, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.080 0.080 234.676 234.676 qs_energies 1 2.0 0.000 0.000 233.252 233.252 scf_env_do_scf 1 3.0 0.000 0.000 223.383 223.383 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 223.383 223.383 qs_scf_new_mos 15 5.0 0.000 0.000 142.746 142.746 eigensolver 15 6.0 0.001 0.001 134.906 134.906 cp_fm_diag_elpa 15 7.0 0.000 0.000 120.497 120.497 cp_fm_diag_elpa_base 15 8.0 118.070 118.070 120.496 120.496 qs_ks_update_qs_env 15 5.0 0.000 0.000 53.328 53.328 rebuild_ks_matrix 15 6.0 0.000 0.000 53.113 53.113 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 53.113 53.113 qs_vxc_create 15 8.0 0.013 0.013 36.011 36.011 calculate_dispersion_nonloc 15 9.0 7.292 7.292 31.290 31.290 pw_transfer 1191 10.0 0.060 0.060 24.780 24.780 fft_wrap_pw1pw2 1086 11.0 0.010 0.010 24.585 24.585 qs_rho_update_rho 16 5.0 0.000 0.000 23.906 23.906 calculate_rho_elec 16 6.0 0.218 0.218 23.906 23.906 grid_collocate_task_list 16 7.0 22.477 22.477 22.477 22.477 fft_wrap_pw1pw2_150 765 12.0 3.387 3.387 17.607 17.607 sum_up_and_integrate 15 8.0 0.043 0.043 15.526 15.526 integrate_v_rspace 15 9.0 0.021 0.021 15.483 15.483 grid_integrate_task_list 15 10.0 14.925 14.925 14.925 14.925 cp_fm_cholesky_restore 45 7.0 12.133 12.133 12.133 12.133 fft3d_s 1087 13.0 11.383 11.383 11.418 11.418 pw_scatter_s 585 13.1 7.662 7.662 7.662 7.662 fft_wrap_pw1pw2_200 197 12.3 0.834 0.834 6.790 6.790 init_scf_run 1 3.0 0.000 0.000 6.094 6.094 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.537 5.537 dbcsr_complete_redistribute 46 8.3 2.239 2.239 5.532 5.532 gspace_mixing 14 5.0 0.171 0.171 4.726 4.726 xc_vxc_pw_create 15 9.0 0.221 0.221 4.709 4.709 cp_fm_upper_to_full 30 8.0 4.701 4.701 4.701 4.701 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.028 70.610 70.621 qs_energies 1 2.0 0.000 0.000 70.349 70.350 scf_env_do_scf 1 3.0 0.000 0.001 66.016 66.016 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 66.015 66.016 qs_ks_update_qs_env 15 5.0 0.000 0.000 28.023 28.093 rebuild_ks_matrix 15 6.0 0.000 0.000 27.987 28.057 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.004 27.987 28.057 qs_rho_update_rho 16 5.0 0.000 0.000 23.449 23.454 calculate_rho_elec 16 6.0 0.007 0.007 23.449 23.454 grid_collocate_task_list 16 7.0 20.244 21.030 20.244 21.030 sum_up_and_integrate 15 8.0 0.007 0.010 16.799 16.900 integrate_v_rspace 15 9.0 0.001 0.001 16.792 16.896 qs_scf_new_mos 15 5.0 0.000 0.001 15.300 15.685 grid_integrate_task_list 15 10.0 13.799 14.507 13.799 14.507 eigensolver 15 6.0 0.001 0.002 14.227 14.253 qs_vxc_create 15 8.0 0.001 0.001 10.867 10.881 cp_fm_diag_elpa 15 7.0 0.000 0.000 10.445 10.452 cp_fm_diag_elpa_base 15 8.0 10.280 10.330 10.434 10.439 calculate_dispersion_nonloc 15 9.0 0.992 1.646 8.919 8.947 pw_transfer 1191 10.0 0.075 0.088 8.326 8.457 fft_wrap_pw1pw2 1086 11.0 0.011 0.014 8.159 8.314 fft3d_ps 1086 13.0 2.526 3.005 6.487 6.763 fft_wrap_pw1pw2_150 765 12.0 0.261 0.311 5.597 5.679 mp_alltoall_z22v 1086 15.0 3.355 4.010 3.355 4.010 cp_fm_cholesky_restore 45 7.0 3.622 3.703 3.622 3.703 rs_pw_transfer 158 9.4 0.001 0.002 3.131 3.606 density_rs2pw 16 7.0 0.001 0.001 3.074 3.432 mp_waitany 520 11.3 2.244 3.133 2.244 3.133 mp_alltoall_d11v 217 9.2 2.630 2.925 2.630 2.925 yz_to_x 501 13.9 0.245 0.343 2.484 2.848 rs_gather_matrices 15 10.0 0.022 0.026 2.428 2.724 fft_wrap_pw1pw2_200 197 12.3 0.185 0.223 2.442 2.526 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.521 2.522 rs_pw_transfer_RS2PW_200 18 8.8 0.040 0.051 1.617 2.495 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.217 2.403 xc_vxc_pw_create 15 9.0 0.015 0.019 1.948 1.980 x_to_yz 585 14.1 0.337 0.370 1.453 1.731 init_scf_run 1 3.0 0.000 0.001 1.565 1.566 vdW_energy 15 10.0 1.397 1.491 1.397 1.491 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.459 1.459 build_core_ppnl 1 5.0 1.306 1.441 1.306 1.441 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=55.68799999999996, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=118.07, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.477, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.925, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=12.133, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.383, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=19.310000000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=10.28, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.244, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.799, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.622, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.355, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.075 0.075 245.329 245.329 qs_energies 1 2.0 0.000 0.000 245.194 245.194 ls_scf 1 3.0 0.000 0.000 243.954 243.954 ls_scf_main 1 4.0 0.002 0.002 231.366 231.366 density_matrix_trs4 11 5.0 0.012 0.012 134.767 134.767 ls_scf_dm_to_ks 11 5.0 0.000 0.000 91.077 91.077 matrix_ls_to_qs 11 6.0 0.000 0.000 87.746 87.746 dbcsr_multiply_generic 185 6.1 0.819 0.819 69.905 69.905 arnoldi_extremal 12 6.1 0.000 0.000 56.684 56.684 arnoldi_normal_ev 12 7.1 0.017 0.017 56.684 56.684 build_subspace 23 8.1 0.083 0.083 55.654 55.654 dbcsr_copy_into_existing 11 7.0 47.999 47.999 47.999 47.999 dbcsr_complete_redistribute 23 7.5 31.787 31.787 43.634 43.634 matrix_decluster 11 7.0 0.000 0.000 39.745 39.745 multiply_cannon 185 7.1 0.319 0.319 39.217 39.217 dbcsr_matrix_vector_mult 652 9.0 0.168 0.168 30.158 30.158 multiply_cannon_loop 185 8.1 0.228 0.228 26.617 26.617 make_m2s 370 7.1 0.038 0.038 26.067 26.067 multiply_cannon_multrec 185 9.1 24.752 24.752 24.782 24.782 make_images 370 8.1 10.696 10.696 24.329 24.329 dbcsr_matrix_vector_mult_local 652 10.0 18.961 18.961 18.966 18.966 dbcsr_finalize 646 7.5 0.165 0.165 15.401 15.401 dbcsr_merge_all 597 8.5 2.105 2.105 14.220 14.220 DGKS_ortho_d 702 9.1 13.005 13.005 13.009 13.009 setup_rec_index_2d 370 8.1 12.168 12.168 12.168 12.168 ls_scf_init_scf 1 4.0 0.000 0.000 11.958 11.958 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.630 11.630 dbcsr_sort_indices 1103 9.9 11.334 11.334 11.334 11.334 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.992 10.992 tree_to_linear_d 110 9.4 10.749 10.749 10.749 10.749 Gram_Schmidt_ortho_d 702 9.1 10.705 10.705 10.707 10.707 quick_finalize 395 10.0 0.410 0.410 9.834 9.834 dbcsr_special_finalize 370 9.1 0.002 0.002 9.079 9.079 matrix_qs_to_ls 12 5.1 0.000 0.000 5.717 5.717 matrix_cluster 12 6.1 0.000 0.000 5.717 5.717 dbcsr_dot_sd 144 6.3 5.070 5.070 5.071 5.071 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 72.024 72.035 qs_energies 1 2.0 0.000 0.000 71.920 71.920 ls_scf 1 3.0 0.000 0.000 71.850 71.852 ls_scf_main 1 4.0 0.001 0.008 69.129 69.129 density_matrix_trs4 11 5.0 0.006 0.020 66.281 66.363 dbcsr_multiply_generic 185 6.1 0.061 0.080 62.662 62.929 multiply_cannon 185 7.1 0.033 0.044 52.222 53.788 multiply_cannon_loop 185 8.1 0.120 0.152 49.601 50.889 multiply_cannon_multrec 1480 9.1 30.407 37.572 30.691 37.911 mp_waitall_1 11936 10.3 16.869 20.250 16.869 20.250 multiply_cannon_metrocomm3 1480 9.1 0.013 0.016 9.689 14.260 multiply_cannon_metrocomm1 1480 9.1 0.008 0.011 4.153 8.497 make_m2s 370 7.1 0.035 0.039 7.274 7.405 make_images 370 8.1 0.651 0.741 7.143 7.266 calculate_norms 2960 9.1 4.885 6.447 4.885 6.447 make_images_data 370 9.1 0.009 0.011 3.266 3.629 hybrid_alltoall_any 393 9.9 0.184 0.822 2.842 3.122 mp_sum_l 1119 5.6 2.190 2.920 2.190 2.920 arnoldi_extremal 12 6.1 0.000 0.000 2.765 2.795 arnoldi_normal_ev 12 7.1 0.001 0.004 2.765 2.795 build_subspace 23 8.1 0.021 0.028 2.650 2.652 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.487 2.582 dbcsr_matrix_vector_mult 652 9.0 0.010 0.047 2.189 2.394 dbcsr_complete_redistribute 23 7.5 1.257 1.682 2.038 2.254 matrix_ls_to_qs 11 6.0 0.000 0.000 1.978 2.210 ls_scf_init_scf 1 4.0 0.000 0.000 2.113 2.115 dbcsr_matrix_vector_mult_local 652 10.0 1.752 2.084 1.754 2.087 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.080 2.086 matrix_decluster 11 7.0 0.000 0.000 1.831 2.028 make_images_pack 370 9.1 1.727 2.005 1.731 2.011 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 1.901 1.904 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.279 1.888 buffer_matrices_ensure_size 370 8.1 1.450 1.734 1.450 1.734 dbcsr_finalize 646 7.5 0.008 0.016 1.324 1.475 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=108.82500000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=47.999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.787, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.752, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=18.961, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=13.005, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.664000000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.257, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=30.407, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.752, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.19, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.869, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.885, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 71.345 71.345 lib_test 1 2.0 0.000 0.000 71.337 71.337 dbcsr_run_tests 3 3.0 0.002 0.002 71.337 71.337 test_multiplies_multiproc 3 4.0 0.001 0.001 55.677 55.677 dbcsr_redistribute 9 5.0 35.701 35.701 37.225 37.225 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.865 16.865 dbcsr_make_random_matrix 9 4.0 12.739 12.739 15.562 15.562 multiply_cannon 9 6.0 0.001 0.001 12.213 12.213 multiply_cannon_loop 9 7.0 0.032 0.032 11.834 11.834 multiply_cannon_multrec 9 8.0 11.802 11.802 11.803 11.803 dbcsr_finalize 27 5.7 0.030 0.030 5.406 5.406 dbcsr_merge_all 18 6.5 2.072 2.072 4.739 4.739 dbcsr_data_release 975 7.6 2.480 2.480 2.480 2.480 tree_to_linear_d 9 7.0 1.840 1.840 1.840 1.840 make_m2s 18 6.0 0.001 0.001 1.523 1.523 make_images 18 7.0 0.531 0.531 1.471 1.471 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.014 18.300 18.305 lib_test 1 2.0 0.001 0.013 18.275 18.291 dbcsr_run_tests 3 3.0 0.000 0.001 18.262 18.278 test_multiplies_multiproc 3 4.0 0.000 0.002 17.419 17.472 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.618 15.707 multiply_cannon 9 6.0 0.001 0.002 13.783 14.114 multiply_cannon_loop 9 7.0 0.002 0.003 13.497 13.827 multiply_cannon_multrec 72 8.0 11.155 12.275 11.156 12.276 mp_waitall_1 576 9.2 2.657 3.389 2.657 3.389 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.033 2.764 mp_sum_l 390 2.5 0.579 1.063 0.579 1.063 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.574 1.057 dbcsr_make_random_matrix 9 4.0 0.663 0.915 0.791 1.002 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.301 0.891 dbcsr_data_release 444 7.6 0.616 0.750 0.616 0.750 make_m2s 18 6.0 0.001 0.001 0.675 0.722 make_images 18 7.0 0.021 0.026 0.671 0.718 dbcsr_finalize 27 5.7 0.000 0.000 0.598 0.682 dbcsr_destroy 111 5.9 0.000 0.000 0.509 0.617 dbcsr_merge_all 18 6.5 0.090 0.109 0.490 0.579 dbcsr_checksum 6 5.0 0.157 0.526 0.529 0.529 dbcsr_redistribute 9 5.0 0.227 0.268 0.402 0.443 make_images_data 18 8.0 0.000 0.001 0.357 0.431 hybrid_alltoall_any 18 9.0 0.030 0.176 0.314 0.397 mp_sum_d 191 1.2 0.373 0.392 0.373 0.392 dbcsr_data_copy_aa2 18 7.5 0.290 0.369 0.290 0.369 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.551000000000002, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.701, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.739, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.802, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.48, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.072, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.3130000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.227, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.663, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.155, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.616, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.09, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.657, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.579, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.048 0.048 143.363 143.363 qs_mol_dyn_low 1 2.0 0.003 0.003 141.846 141.846 velocity_verlet 5 3.0 0.003 0.003 116.065 116.065 qmmm_el_coupling 6 3.8 0.000 0.000 88.564 88.564 qmmm_elec_with_gaussian 6 4.8 0.092 0.092 88.555 88.555 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.818 87.818 qmmm_elec_gaussian_low_G 6 6.8 86.899 86.899 86.899 86.899 qs_forces 6 3.8 0.000 0.000 44.506 44.506 qs_energies 6 4.8 0.000 0.000 40.325 40.325 scf_env_do_scf 6 5.8 0.001 0.001 37.153 37.153 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 26.187 26.187 rebuild_ks_matrix 45 8.4 0.000 0.000 25.137 25.137 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 25.137 25.137 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.395 21.395 pw_transfer 966 12.3 0.049 0.049 17.315 17.315 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.046 17.046 fft_wrap_pw1pw2_150 507 15.2 2.102 2.102 16.623 16.623 qs_vxc_create 45 10.4 0.001 0.001 13.557 13.557 xc_vxc_pw_create 45 11.4 0.637 0.637 13.557 13.557 init_scf_loop 6 6.8 0.000 0.000 10.960 10.960 xc_pw_derive 270 13.4 0.002 0.002 9.371 9.371 fft3d_s 802 15.6 7.925 7.925 7.933 7.933 prepare_preconditioner 6 7.8 0.000 0.000 7.597 7.597 qs_rho_update_rho 45 7.9 0.000 0.000 7.496 7.496 calculate_rho_elec 45 8.9 0.563 0.563 7.495 7.495 make_preconditioner 6 8.8 0.000 0.000 7.297 7.297 xc_rho_set_and_dset_create 45 12.4 0.708 0.708 7.046 7.046 make_full_all 6 9.8 0.001 0.001 6.916 6.916 xc_pw_divergence 45 12.4 0.001 0.001 5.822 5.822 pw_scatter_s 429 15.8 5.639 5.639 5.639 5.639 qmmm_forces 6 3.8 0.001 0.001 5.602 5.602 qmmm_forces_with_gaussian 6 4.8 0.101 0.101 5.283 5.283 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.444 4.444 pw_integral_ab 2539 7.4 4.395 4.395 4.395 4.395 qs_ks_ddapc 45 10.4 0.001 0.001 4.322 4.322 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.852 3.852 cp_fm_diag_elpa_base 18 12.2 3.843 3.843 3.852 3.852 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.749 3.749 qmmm_forces_gaussian_low_G 6 6.8 3.699 3.699 3.699 3.699 grid_collocate_task_list 45 9.9 3.618 3.618 3.618 3.618 density_rs2pw 45 9.9 0.002 0.002 3.315 3.315 sum_up_and_integrate 45 10.4 0.128 0.128 3.127 3.127 pw_poisson_solve 51 9.9 1.284 1.284 3.057 3.057 integrate_v_rspace 45 11.4 0.007 0.007 3.000 3.000 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.054 59.176 59.190 qs_mol_dyn_low 1 2.0 0.003 0.004 57.921 57.979 qs_forces 6 3.8 0.001 0.001 41.606 41.607 qs_energies 6 4.8 0.001 0.006 39.669 39.670 scf_env_do_scf 6 5.8 0.000 0.001 38.664 38.664 scf_env_do_scf_inner_loop 113 6.2 0.002 0.019 37.096 37.096 rebuild_ks_matrix 119 8.1 0.000 0.000 27.328 27.339 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.020 27.327 27.339 qs_ks_update_qs_env 119 7.3 0.001 0.001 25.679 25.689 velocity_verlet 5 3.0 0.002 0.004 24.956 24.959 pw_transfer 2446 12.3 0.161 0.186 17.265 17.810 fft_wrap_pw1pw2 2059 13.4 0.021 0.026 16.897 17.500 fft_wrap_pw1pw2_150 1321 14.9 1.201 1.416 16.202 16.784 fft3d_ps 2059 15.4 6.576 7.687 12.825 13.963 qs_vxc_create 119 10.1 0.002 0.003 13.665 13.667 xc_vxc_pw_create 119 11.1 0.145 0.211 13.663 13.665 qs_rho_update_rho 119 7.3 0.001 0.001 11.281 11.282 calculate_rho_elec 119 8.3 0.049 0.055 11.281 11.281 xc_pw_derive 714 13.1 0.008 0.011 10.273 10.595 sum_up_and_integrate 119 10.1 0.054 0.069 9.934 10.142 integrate_v_rspace 119 11.1 0.003 0.004 9.881 10.095 qmmm_forces 6 3.8 0.002 0.002 8.188 8.189 rs_pw_transfer 988 11.5 0.011 0.017 7.858 8.140 qmmm_forces_with_gaussian 6 4.8 0.313 0.374 7.509 8.102 xc_rho_set_and_dset_create 119 12.1 0.356 0.595 6.620 7.321 qmmm_el_coupling 6 3.8 0.000 0.000 7.092 7.228 qmmm_elec_with_gaussian 6 4.8 0.316 0.378 7.090 7.226 xc_pw_divergence 119 12.1 0.004 0.006 6.662 6.928 density_rs2pw 119 9.3 0.005 0.008 6.590 6.874 mp_alltoall_z22v 2059 17.4 4.891 6.449 4.891 6.449 potential_pw2rs 119 12.1 0.005 0.007 5.945 5.969 grid_collocate_task_list 119 9.3 4.507 4.967 4.507 4.967 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.943 4.169 x_to_yz 1095 16.8 0.759 0.901 3.379 4.049 grid_integrate_task_list 119 12.1 3.468 4.023 3.468 4.023 mp_waitany 4028 12.8 2.938 3.872 2.938 3.872 yz_to_x 964 16.0 0.549 0.672 2.821 3.746 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.438 3.668 qmmm_forces_gaussian_low_G 6 6.8 3.245 3.483 3.245 3.483 rs_pw_transfer_PW2RS_150 125 13.9 1.152 1.452 2.988 3.087 qmmm_elec_gaussian_low_G 6 6.8 2.841 3.074 2.841 3.074 pw_restrict_s3 18 5.8 1.446 1.687 2.564 2.881 rs_pw_transfer_RS2PW_150 125 11.2 0.900 1.219 2.551 2.855 mp_waitall_1 188862 16.2 2.070 2.484 2.070 2.484 dbcsr_multiply_generic 2588 12.3 0.057 0.070 2.213 2.374 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.111 2.278 pw_prolongate_s3 18 6.8 1.188 1.349 2.111 2.278 qs_scf_new_mos 113 7.2 0.000 0.001 2.081 2.091 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.081 2.091 ot_scf_mini 113 9.2 0.001 0.001 1.990 1.997 mp_sum_dm3 33 5.7 1.819 1.985 1.819 1.985 qs_ks_ddapc 119 10.1 0.002 0.002 1.861 1.968 pw_gather_p 964 15.0 1.259 1.870 1.259 1.870 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.659 1.659 pw_scatter_p 1095 15.8 1.563 1.601 1.563 1.601 pw_integral_ab 2761 7.7 1.056 1.200 1.457 1.595 mp_sum_d 5820 12.2 1.023 1.573 1.023 1.573 init_scf_loop 6 6.8 0.000 0.000 1.566 1.566 rs_pw_transfer_PW2RS_40 119 14.1 0.232 0.288 1.141 1.292 ot_mini 113 10.2 0.000 0.001 1.252 1.263 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=27.345000000000013, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=86.899, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.925, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.639, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.395, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=3.843, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.699, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.618, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=32.592, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.841, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.056, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.245, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.507, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.891, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.576, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.468, yerr=0.0 Summary: Performance test took 47 minutes. Status: OK Removing intermediate container 62de601253d9 ---> d5c0c5d79ea5 Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in e65d911cb351 Removing intermediate container e65d911cb351 ---> 706d258d91d8 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 706d258d91d8 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-05-17 20:03:57+00:00