StartDate: 2022-09-29 19:05:47+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: e2bd27a0fbe7d28ec5009d2fd2a3e163d28b4594 CommitTime: 2022-09-29 13:14:42 +0200 CommitAuthor: Matthias Krack CommitSubject: Update arch file for leak check Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=e2bd27a0fbe7d28ec5009d2fd2a3e163d28b4594 Sending build context to Docker daemon 364.4MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 2b55860d4c66: Already exists Digest: sha256:20fa2d7bb4de7723f542be5923b06c4d704370f0390e4ae9e1c833c8785644c1 Status: Downloaded newer image for ubuntu:22.04 ---> 2dc39ba059dc Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> bf0c853ea628 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 3f014c28f167 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 9e0fed4f46ea Step 5/42 : RUN mkdir scripts ---> Using cache ---> 51c38e746560 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> acd0a7419027 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> abc26b960d8f Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> c171efa88a98 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> f0a05a324a8a Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> fcd40d0746c6 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 88869032a8c8 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> bc905fda18b5 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 3eab8bd7fc21 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 738d55b11847 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 1081570b9b80 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 6de6060b35dd Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> a807adc56965 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 06b3bbffb6f7 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 5bd3967d9dcc Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 0990d5b935c0 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 84ca63bd1976 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 6340ea26f483 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 4286138d3f3f Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> c508a4cc368e Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> c9aea88ab162 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> bcef9dcbb78f Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 9572216c5cc3 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 421ea4b7e23a Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 10d4e0da71d6 Step 30/42 : COPY ./Makefile . ---> Using cache ---> dc8fb9cb50b6 Step 31/42 : COPY ./src ./src ---> Using cache ---> bc2e0c6bf776 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> e1117655fb9a Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 7960e052be92 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Using cache ---> 3851a3df4a24 Step 35/42 : COPY ./data ./data ---> Using cache ---> 1c852f2b4019 Step 36/42 : COPY ./tests ./tests ---> Using cache ---> 21c431d8743a Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> Using cache ---> 7cf7f600f462 Step 38/42 : COPY ./benchmarks ./benchmarks ---> Using cache ---> 565f1804764a Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> Using cache ---> e8803833a2ad Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Using cache ---> 81786c78475e Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Using cache ---> 5447999d24a7 Step 42/42 : ENTRYPOINT [] ---> Using cache ---> 42cfc2b7e9c0 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 42cfc2b7e9c0 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 100.942 100.942 qs_mol_dyn_low 1 2.0 0.003 0.003 100.283 100.283 qs_forces 11 3.9 0.001 0.001 100.241 100.241 qs_energies 11 4.9 0.001 0.001 93.563 93.563 scf_env_do_scf 11 5.9 0.002 0.002 81.620 81.620 velocity_verlet 10 3.0 0.002 0.002 64.856 64.856 scf_env_do_scf_inner_loop 108 6.5 0.013 0.013 62.768 62.768 qs_rho_update_rho_low 119 7.7 0.001 0.001 23.507 23.507 calculate_rho_elec 119 8.7 0.978 0.978 23.506 23.506 rebuild_ks_matrix 119 8.3 0.001 0.001 23.452 23.452 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.016 23.451 23.451 dbcsr_multiply_generic 2286 12.5 0.173 0.173 22.535 22.535 qs_scf_new_mos 108 7.5 0.001 0.001 22.164 22.164 qs_scf_loop_do_ot 108 8.5 0.001 0.001 22.163 22.163 qs_ks_update_qs_env 119 7.6 0.001 0.001 21.538 21.538 ot_scf_mini 108 9.5 0.003 0.003 20.534 20.534 init_scf_loop 11 6.9 0.000 0.000 18.682 18.682 grid_collocate_task_list 119 9.7 18.060 18.060 18.060 18.060 prepare_preconditioner 11 7.9 0.000 0.000 15.869 15.869 make_preconditioner 11 8.9 0.000 0.000 15.869 15.869 make_full_inverse_cholesky 11 9.9 0.000 0.000 14.638 14.638 sum_up_and_integrate 119 10.3 0.248 0.248 14.466 14.466 integrate_v_rspace 119 11.3 0.115 0.115 14.218 14.218 ot_mini 108 10.5 0.001 0.001 13.272 13.272 make_m2s 4572 13.5 0.052 0.052 12.141 12.141 grid_integrate_task_list 119 12.3 11.786 11.786 11.786 11.786 pw_transfer 1439 11.6 0.081 0.081 7.766 7.766 fft_wrap_pw1pw2 1201 12.6 0.008 0.008 7.406 7.406 qs_ot_get_derivative 108 11.5 0.001 0.001 7.108 7.108 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.844 6.844 fft_wrap_pw1pw2_140 487 13.2 0.498 0.498 6.447 6.447 make_images 4572 14.5 2.415 2.415 6.348 6.348 dbcsr_make_dense_low 5837 15.5 0.082 0.082 6.287 6.287 make_dense_data 5837 16.5 5.467 5.467 6.190 6.190 ot_diis_step 108 11.5 0.004 0.004 6.161 6.161 multiply_cannon 2286 13.5 0.238 0.238 6.000 6.000 cp_fm_cholesky_decompose 22 10.9 5.638 5.638 5.638 5.638 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.611 5.611 apply_single 119 13.6 0.000 0.000 5.611 5.611 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.429 5.429 multiply_cannon_loop 2286 14.5 0.049 0.049 5.422 5.422 multiply_cannon_multrec 2286 15.5 5.310 5.310 5.372 5.372 cp_fm_cholesky_invert 11 10.9 4.676 4.676 4.676 4.676 density_rs2pw 119 9.7 0.004 0.004 4.468 4.468 init_scf_run 11 5.9 0.002 0.002 4.431 4.431 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.429 4.429 dbcsr_copy 2102 12.0 0.267 0.267 4.028 4.028 dbcsr_complete_redistribute 329 12.2 2.041 2.041 3.908 3.908 wfi_extrapolate 11 7.9 0.001 0.001 3.839 3.839 dbcsr_copy_into_existing 22 7.9 3.723 3.723 3.723 3.723 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.720 3.720 fft3d_s 1202 14.6 3.626 3.626 3.632 3.632 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.581 3.581 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.269 3.269 qs_ot_get_p 119 10.4 0.001 0.001 3.237 3.237 qs_create_task_list 11 7.9 0.000 0.000 3.135 3.135 generate_qs_task_list 11 8.9 2.147 2.147 3.135 3.135 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.956 2.956 pw_poisson_solve 119 10.3 1.125 1.125 2.860 2.860 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.837 2.837 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.504 2.504 potential_pw2rs 119 12.3 0.052 0.052 2.317 2.317 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.262 2.262 hybrid_alltoall_any 4725 16.4 1.896 1.896 2.244 2.244 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.197 2.197 pw_scatter_s 595 15.2 2.067 2.067 2.067 2.067 make_images_data 4572 15.5 0.034 0.034 2.031 2.031 dbcsr_data_release 279534 16.0 2.030 2.030 2.030 2.030 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.026 59.142 59.153 qs_mol_dyn_low 1 2.0 0.003 0.005 59.025 59.029 qs_forces 11 3.9 0.001 0.002 58.979 58.979 qs_energies 11 4.9 0.001 0.001 55.015 55.021 scf_env_do_scf 11 5.9 0.001 0.002 50.554 50.554 scf_env_do_scf_inner_loop 108 6.5 0.003 0.023 46.682 46.683 velocity_verlet 10 3.0 0.002 0.003 35.235 35.236 rebuild_ks_matrix 119 8.3 0.001 0.001 22.429 22.571 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.024 22.428 22.570 qs_ks_update_qs_env 119 7.6 0.001 0.001 20.057 20.186 qs_rho_update_rho_low 119 7.7 0.001 0.001 17.467 17.483 calculate_rho_elec 119 8.7 0.031 0.032 17.467 17.482 sum_up_and_integrate 119 10.3 0.027 0.031 17.276 17.332 integrate_v_rspace 119 11.3 0.005 0.006 17.249 17.307 dbcsr_multiply_generic 2286 12.5 0.086 0.105 15.724 15.994 grid_collocate_task_list 119 9.7 10.599 13.125 10.599 13.125 qs_scf_new_mos 108 7.5 0.001 0.001 12.766 12.951 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.765 12.950 grid_integrate_task_list 119 12.3 9.508 12.482 9.508 12.482 ot_scf_mini 108 9.5 0.003 0.006 11.968 12.159 multiply_cannon 2286 13.5 0.157 0.192 10.910 11.751 multiply_cannon_loop 2286 14.5 0.110 0.142 10.147 10.835 mp_waitall_1 169478 16.3 9.138 9.678 9.138 9.678 rs_pw_transfer 974 11.9 0.014 0.017 7.791 8.531 ot_mini 108 10.5 0.001 0.001 7.030 7.234 density_rs2pw 119 9.7 0.006 0.007 6.430 7.152 multiply_cannon_metrocomm3 18288 15.5 0.043 0.056 5.817 6.656 mp_waitany 9880 13.7 3.904 4.596 3.904 4.596 potential_pw2rs 119 12.3 0.008 0.009 4.471 4.511 pw_transfer 1439 11.6 0.105 0.114 4.342 4.417 fft_wrap_pw1pw2 1201 12.6 0.011 0.013 4.144 4.215 rs_pw_transfer_RS2PW_140 130 11.5 0.281 0.332 3.474 4.213 mp_alltoall_d11v 2130 13.8 3.520 4.090 3.520 4.090 qs_ot_get_derivative 108 11.5 0.001 0.001 3.756 3.942 init_scf_loop 11 6.9 0.000 0.001 3.851 3.852 rs_gather_matrices 119 12.3 0.101 0.110 3.222 3.775 multiply_cannon_multrec 18288 15.5 3.351 3.753 3.364 3.766 fft_wrap_pw1pw2_140 487 13.2 0.385 0.438 3.402 3.523 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.084 3.230 apply_single 119 13.6 0.000 0.001 3.084 3.230 ot_diis_step 108 11.5 0.004 0.005 3.220 3.220 make_m2s 4572 13.5 0.053 0.067 3.113 3.184 fft3d_ps 1201 14.6 1.542 1.702 3.045 3.145 init_scf_run 11 5.9 0.000 0.005 3.064 3.064 scf_env_initial_rho_setup 11 6.9 0.000 0.005 3.063 3.064 make_images 4572 14.5 0.131 0.158 2.735 2.817 wfi_extrapolate 11 7.9 0.001 0.001 2.781 2.781 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.551 2.563 rs_pw_transfer_PW2RS_140 130 13.9 0.778 0.886 1.901 1.991 multiply_cannon_metrocomm1 18288 15.5 0.021 0.032 0.639 1.964 qs_ot_get_p 119 10.4 0.001 0.001 1.645 1.878 mp_sum_l 11218 13.2 1.163 1.805 1.163 1.805 mp_sum_d 4129 12.0 1.217 1.642 1.217 1.642 make_images_data 4572 15.5 0.040 0.051 1.328 1.531 prepare_preconditioner 11 7.9 0.000 0.000 1.434 1.456 make_preconditioner 11 8.9 0.000 0.000 1.434 1.455 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.001 0.986 1.380 mp_alltoall_z22v 1201 16.6 1.168 1.375 1.168 1.375 hybrid_alltoall_any 4725 16.4 0.076 0.238 1.176 1.335 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.233 1.330 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.284 1.310 qs_ot_get_derivative_taylor 59 13.0 0.001 0.002 1.173 1.269 rs_pw_transfer_PW2RS_50 119 14.3 0.361 0.416 1.061 1.206 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=54.681, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.06, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.786, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.638, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.467, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.31, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=19.122, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.599, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.508, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.351, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.904, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.138, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.52, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.030 128.027 128.027 qs_mol_dyn_low 1 2.0 0.003 0.003 127.335 127.335 qs_forces 11 3.9 0.001 0.001 127.294 127.294 qs_energies 11 4.9 0.001 0.001 118.401 118.401 scf_env_do_scf 11 5.9 0.001 0.001 104.282 104.282 scf_env_do_scf_inner_loop 96 6.5 0.011 0.011 83.574 83.574 velocity_verlet 10 3.0 0.002 0.002 82.205 82.205 rebuild_ks_matrix 107 8.3 0.001 0.001 38.275 38.275 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.014 38.274 38.274 qs_rho_update_rho_low 107 7.7 0.001 0.001 37.173 37.173 calculate_rho_elec 107 8.7 0.883 0.883 37.173 37.173 qs_ks_update_qs_env 107 7.6 0.001 0.001 34.321 34.321 grid_collocate_task_list 107 9.7 32.207 32.207 32.207 32.207 sum_up_and_integrate 107 10.3 0.224 0.224 30.234 30.234 integrate_v_rspace 107 11.3 0.106 0.106 30.010 30.010 grid_integrate_task_list 107 12.3 27.731 27.731 27.731 27.731 init_scf_loop 11 6.9 0.000 0.000 20.512 20.512 dbcsr_multiply_generic 1966 12.4 0.179 0.179 20.399 20.399 qs_scf_new_mos 96 7.5 0.001 0.001 19.915 19.915 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.915 19.915 ot_scf_mini 96 9.5 0.003 0.003 18.457 18.457 prepare_preconditioner 11 7.9 0.000 0.000 15.924 15.924 make_preconditioner 11 8.9 0.000 0.000 15.924 15.924 make_full_inverse_cholesky 11 9.9 0.000 0.000 14.685 14.685 ot_mini 96 10.5 0.001 0.001 11.628 11.628 make_m2s 3932 13.4 0.045 0.045 10.681 10.681 pw_transfer 1295 11.6 0.072 0.072 7.096 7.096 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.029 7.029 fft_wrap_pw1pw2 1081 12.6 0.007 0.007 6.812 6.812 qs_ot_get_derivative 96 11.5 0.001 0.001 6.337 6.337 init_scf_run 11 5.9 0.002 0.002 6.182 6.182 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.180 6.180 fft_wrap_pw1pw2_140 439 13.2 0.490 0.490 5.912 5.912 dbcsr_make_dense_low 4961 15.5 0.089 0.089 5.558 5.558 multiply_cannon 1966 13.4 0.204 0.204 5.527 5.527 make_images 3932 14.4 2.193 2.193 5.523 5.523 wfi_extrapolate 11 7.9 0.001 0.001 5.498 5.498 make_dense_data 4961 16.5 4.782 4.782 5.456 5.456 cp_fm_cholesky_decompose 22 10.9 5.398 5.398 5.398 5.398 ot_diis_step 96 11.5 0.004 0.004 5.288 5.288 multiply_cannon_loop 1966 14.4 0.082 0.082 5.021 5.021 multiply_cannon_multrec 1966 15.4 4.885 4.885 4.938 4.938 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.892 4.892 cp_fm_cholesky_invert 11 10.9 4.879 4.879 4.879 4.879 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.852 4.852 apply_single 107 13.6 0.000 0.000 4.852 4.852 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.755 4.755 dbcsr_complete_redistribute 317 12.2 2.023 2.023 4.201 4.201 dbcsr_copy 1855 11.9 0.248 0.248 4.124 4.124 density_rs2pw 107 9.7 0.004 0.004 4.083 4.083 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.998 3.998 dbcsr_copy_into_existing 22 7.9 3.839 3.839 3.839 3.839 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.822 3.822 qs_create_task_list 11 7.9 0.000 0.000 3.411 3.411 generate_qs_task_list 11 8.9 2.417 2.417 3.411 3.411 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.319 3.319 fft3d_s 1082 14.6 3.232 3.232 3.239 3.239 qs_ot_get_p 107 10.4 0.001 0.001 2.998 2.998 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.788 2.788 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.028 101.649 101.659 qs_mol_dyn_low 1 2.0 0.003 0.005 101.529 101.534 qs_forces 11 3.9 0.001 0.002 101.484 101.485 qs_energies 11 4.9 0.001 0.001 94.773 94.787 scf_env_do_scf 11 5.9 0.001 0.002 87.979 87.989 scf_env_do_scf_inner_loop 96 6.5 0.003 0.020 81.597 81.598 velocity_verlet 10 3.0 0.002 0.003 59.708 59.710 rebuild_ks_matrix 107 8.3 0.001 0.001 44.985 45.151 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.022 44.984 45.150 sum_up_and_integrate 107 10.3 0.026 0.031 40.136 40.280 integrate_v_rspace 107 11.3 0.005 0.006 40.110 40.257 qs_ks_update_qs_env 107 7.6 0.001 0.001 39.840 39.986 qs_rho_update_rho_low 107 7.7 0.001 0.001 38.554 38.585 calculate_rho_elec 107 8.7 0.028 0.030 38.554 38.584 grid_integrate_task_list 107 12.3 26.344 33.103 26.344 33.103 grid_collocate_task_list 107 9.7 25.686 31.615 25.686 31.615 dbcsr_multiply_generic 1966 12.4 0.077 0.094 14.513 16.030 rs_pw_transfer 878 11.9 0.013 0.018 13.766 15.725 density_rs2pw 107 9.7 0.005 0.007 12.439 14.401 mp_waitany 8968 13.7 10.175 12.043 10.175 12.043 qs_scf_new_mos 96 7.5 0.001 0.001 11.576 11.769 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.576 11.768 mp_alltoall_d11v 1998 13.7 9.807 11.759 9.807 11.759 rs_pw_transfer_RS2PW_140 118 11.5 0.257 0.280 9.716 11.667 rs_gather_matrices 107 12.3 0.098 0.109 9.518 11.427 ot_scf_mini 96 9.5 0.002 0.003 10.846 11.034 multiply_cannon 1966 13.4 0.143 0.185 9.921 10.585 multiply_cannon_loop 1966 14.4 0.105 0.131 9.183 9.860 mp_waitall_1 146670 16.2 8.220 8.998 8.220 8.998 ot_mini 96 10.5 0.001 0.001 6.447 6.662 init_scf_loop 11 6.9 0.000 0.001 6.360 6.361 multiply_cannon_metrocomm3 15728 15.4 0.040 0.053 5.172 5.860 init_scf_run 11 5.9 0.000 0.005 5.378 5.378 scf_env_initial_rho_setup 11 6.9 0.000 0.004 5.377 5.378 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.308 5.327 wfi_extrapolate 11 7.9 0.001 0.001 4.856 4.856 potential_pw2rs 107 12.3 0.007 0.008 4.198 4.246 pw_transfer 1295 11.6 0.096 0.105 4.040 4.108 fft_wrap_pw1pw2 1081 12.6 0.010 0.012 3.859 3.921 qs_ot_get_derivative 96 11.5 0.001 0.001 3.479 3.665 multiply_cannon_multrec 15728 15.4 3.139 3.402 3.151 3.416 fft_wrap_pw1pw2_140 439 13.2 0.357 0.410 3.156 3.268 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.816 2.974 apply_single 107 13.6 0.000 0.001 2.816 2.974 make_m2s 3932 13.4 0.047 0.058 2.879 2.963 fft3d_ps 1081 14.6 1.410 1.525 2.829 2.951 ot_diis_step 96 11.5 0.004 0.005 2.915 2.916 mp_sum_l 9666 13.1 1.227 2.692 1.227 2.692 make_images 3932 14.4 0.117 0.136 2.539 2.629 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=52.92699999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=32.207, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.731, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.398, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.885, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.879, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=18.27799999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=25.686, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.344, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.139, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=10.175, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=9.807, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.22, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.214 0.214 124.975 124.975 qs_energies 1 2.0 0.000 0.000 124.093 124.093 scf_env_do_scf 1 3.0 0.000 0.000 122.816 122.816 qs_ks_update_qs_env 8 5.0 0.000 0.000 116.991 116.991 rebuild_ks_matrix 7 6.0 0.000 0.000 116.935 116.935 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 116.935 116.935 hfx_ks_matrix 7 8.0 0.000 0.000 106.653 106.653 integrate_four_center 7 9.0 1.771 1.771 106.622 106.622 integrate_four_center_main 7 10.0 0.520 0.520 92.770 92.770 integrate_four_center_bin 466 11.0 92.250 92.250 92.250 92.250 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 64.372 64.372 init_scf_loop 1 4.0 0.000 0.000 58.433 58.433 integrate_four_center_load 7 10.0 0.000 0.000 11.805 11.805 hfx_load_balance 1 11.0 0.002 0.002 11.805 11.805 hfx_load_balance_bin 1 12.0 5.902 5.902 5.902 5.902 hfx_load_balance_count 1 12.0 5.884 5.884 5.884 5.884 qs_vxc_create 14 8.0 0.000 0.000 4.155 4.155 xc_vxc_pw_create 14 9.0 0.123 0.123 4.155 4.155 xc_rho_set_and_dset_create 14 10.0 0.116 0.116 3.185 3.185 prepare_preconditioner 1 5.0 0.000 0.000 2.874 2.874 make_preconditioner 1 6.0 0.000 0.000 2.874 2.874 calculate_rho_elec 15 7.4 0.120 0.120 2.814 2.814 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.236 0.257 153.561 153.572 qs_energies 1 2.0 0.000 0.000 153.168 153.175 scf_env_do_scf 1 3.0 0.000 0.000 152.731 152.731 qs_ks_update_qs_env 8 5.0 0.000 0.000 150.168 150.169 rebuild_ks_matrix 7 6.0 0.000 0.000 150.156 150.157 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 150.156 150.157 hfx_ks_matrix 7 8.0 0.000 0.000 142.875 142.877 integrate_four_center 7 9.0 0.062 0.383 142.864 142.865 integrate_four_center_main 7 10.0 0.004 0.006 92.032 129.619 integrate_four_center_bin 448 11.0 92.028 129.613 92.028 129.613 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 86.957 86.957 init_scf_loop 1 4.0 0.000 0.000 65.772 65.773 mp_sync 70 11.3 37.604 43.491 37.604 43.491 integrate_four_center_load 7 10.0 0.000 0.000 12.446 12.455 hfx_load_balance 1 11.0 0.001 0.001 12.446 12.455 mp_sum_l 1135 8.3 5.840 6.565 5.840 6.565 hfx_load_balance_dist 1 12.0 0.000 0.000 5.689 6.379 hfx_load_balance_bin 1 12.0 3.333 6.218 3.333 6.218 hfx_load_balance_count 1 12.0 3.337 6.140 3.337 6.140 qs_vxc_create 14 8.0 0.000 0.000 3.325 3.326 xc_vxc_pw_create 14 9.0 0.008 0.010 3.325 3.326 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=18.647999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=92.25, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.902, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.884, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.771, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.52, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.35299999999998, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=92.028, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.333, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.337, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.062, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.84, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=37.604, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 89.053 89.053 qs_energies 1 2.0 0.000 0.000 88.644 88.644 mp2_main 1 3.0 0.000 0.000 85.324 85.324 mp2_gpw_main 1 4.0 0.000 0.000 85.192 85.192 rpa_ri_compute_en 1 5.0 0.000 0.000 81.665 81.665 rpa_num_int 1 6.0 0.001 0.001 81.659 81.659 compute_mat_P_omega 1 7.0 0.003 0.003 70.242 70.242 compute_mat_P_omega_contract 10 8.0 9.701 9.701 69.986 69.986 dbt_total 2336 9.6 0.015 0.015 55.044 55.044 dbt_contract 787 11.0 0.050 0.050 46.877 46.877 dbt_tas_total 1149 12.2 0.303 0.303 45.262 45.262 dbt_tas_multiply 807 12.1 0.003 0.003 43.854 43.854 dbt_tas_dbm 807 14.1 0.004 0.004 36.679 36.679 dbm_multiply 807 16.1 36.667 36.667 36.667 36.667 dbt_tas_mm_1N 524 15.1 0.002 0.002 26.248 26.248 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 24.386 24.386 compute_mat_P_omega_calc_M_occ 250 9.0 9.736 9.736 19.561 19.561 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.735 9.735 dbt_tas_mm_2 251 15.0 0.002 0.002 8.682 8.682 dbt_copy 1103 10.7 0.082 0.082 6.736 6.736 compute_QP_energies 1 7.0 0.000 0.000 5.766 5.766 compute_self_energy_cubic_gw 1 8.0 0.055 0.055 5.765 5.765 contract_cubic_gw 21 9.0 0.000 0.000 4.590 4.590 dbt_tas_reserve_blocks_index 3261 14.3 0.168 0.168 4.011 4.011 dbm_reserve_blocks 3628 15.3 3.935 3.935 3.935 3.935 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.520 3.520 scf_env_do_scf 1 3.0 0.000 0.000 3.200 3.200 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.200 3.200 dbt_reserve_blocks_index 2280 13.1 0.066 0.066 3.062 3.062 dbt_reserve_blocks_index_array 2222 12.2 0.013 0.013 3.051 3.051 dbt_crop 1042 12.0 1.900 1.900 2.879 2.879 dbt_tas_copy 574 11.4 1.705 1.705 2.699 2.699 convert_to_new_pgrid 2421 14.1 0.174 0.174 2.462 2.462 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.389 2.389 compute_W_cubic_GW 10 7.0 0.020 0.020 2.340 2.340 dbm_copy 1614 15.1 2.288 2.288 2.288 2.288 dbt_reshape 278 11.9 1.216 1.216 2.205 2.205 dbt_tas_reshape 367 15.0 0.008 0.008 2.154 2.154 get_2c_integrals 1 6.0 0.000 0.000 1.864 1.864 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.793 1.793 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.028 41.088 41.098 qs_energies 1 2.0 0.000 0.000 40.975 40.976 mp2_main 1 3.0 0.000 0.000 39.753 39.755 mp2_gpw_main 1 4.0 0.000 0.000 39.708 39.709 rpa_ri_compute_en 1 5.0 0.000 0.000 38.229 38.231 rpa_num_int 1 6.0 0.001 0.002 38.228 38.229 dbt_total 2336 9.6 0.015 0.019 33.966 33.978 compute_mat_P_omega 1 7.0 0.001 0.006 32.592 32.623 compute_mat_P_omega_contract 10 8.0 0.509 0.561 32.422 32.428 dbt_contract 787 11.0 0.033 0.040 25.181 25.191 dbt_tas_total 1149 12.2 0.068 0.092 22.329 22.330 dbt_tas_multiply 807 12.1 0.002 0.003 22.281 22.286 dbt_tas_dbm 807 14.1 0.004 0.006 16.171 16.182 dbm_multiply 807 16.1 12.489 13.244 12.489 13.244 compute_mat_P_omega_calc_M_occ 250 9.0 0.481 0.529 9.762 9.764 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.268 9.271 dbt_copy 1111 10.7 0.015 0.019 7.536 7.968 mp_sync 8706 11.6 6.216 7.655 6.216 7.655 dbt_reshape 1098 11.7 2.749 3.364 7.185 7.583 dbt_tas_mm_2 251 15.0 0.002 0.003 7.535 7.537 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.884 6.888 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.772 6.462 mp_waitall_2 3776 15.3 3.489 3.802 3.489 3.802 compute_QP_energies 1 7.0 0.000 0.000 3.642 3.643 compute_self_energy_cubic_gw 1 8.0 0.003 0.011 3.639 3.642 dbt_communicate_buffer 1098 12.7 0.060 0.079 3.431 3.622 contract_cubic_gw 21 9.0 0.000 0.000 2.842 2.842 dbt_reserve_blocks_index 2849 13.1 0.075 0.096 2.166 2.507 dbt_reserve_blocks_index_array 2791 12.2 0.010 0.014 2.165 2.504 dbt_tas_reserve_blocks_index 3300 14.5 0.125 0.165 2.136 2.474 dbm_reserve_blocks 3696 15.4 2.130 2.472 2.130 2.472 dbt_crop 1042 12.0 1.109 1.584 1.786 2.347 dbt_tas_replicate 396 14.1 0.614 1.018 1.550 1.724 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.474 1.477 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.297 1.301 convert_to_new_pgrid 2421 14.1 0.029 0.036 1.061 1.298 dbm_copy 1608 15.1 1.025 1.260 1.025 1.260 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.198 1.210 parallel_gemm_fm 105 8.4 0.000 0.000 1.188 1.201 parallel_gemm_fm_cosma 105 9.4 1.188 1.201 1.188 1.201 mp_max_i 1992 9.8 0.942 1.194 0.942 1.194 scf_env_do_scf 1 3.0 0.000 0.000 1.171 1.171 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 1.171 1.171 dbm_add 807 14.1 0.901 1.021 0.901 1.021 compute_W_cubic_GW 10 7.0 0.001 0.001 0.893 0.904 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=25.50999999999999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=36.667, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.736, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.701, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.935, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.288, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.216, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=12.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=12.489, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.481, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.509, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.13, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.025, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.749, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.216, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.489, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 326.246 326.246 qs_forces 1 2.0 0.000 0.000 325.643 325.643 rebuild_ks_matrix 7 6.6 0.000 0.000 323.795 323.795 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 323.795 323.795 hfx_ks_matrix 7 8.6 0.000 0.000 321.225 321.225 dbt_total 4959 11.6 0.041 0.041 259.169 259.169 hfx_ri_update_ks 7 9.6 0.000 0.000 252.646 252.646 hfx_ri_update_ks_Pmat 7 10.6 36.747 36.747 252.644 252.644 dbt_tas_total 2445 14.1 1.832 1.832 227.864 227.864 qs_energies 1 3.0 0.000 0.000 223.753 223.753 scf_env_do_scf 1 4.0 0.001 0.001 223.371 223.371 qs_ks_update_qs_env 8 6.0 0.000 0.000 221.972 221.972 dbt_contract 1473 13.0 0.196 0.196 207.996 207.996 dbt_tas_multiply 1482 14.0 0.005 0.005 195.728 195.728 hfx_ri_update_ks_Pmat_KS 567 11.6 0.006 0.006 176.020 176.020 dbt_tas_dbm 1482 16.0 0.009 0.009 165.339 165.339 dbm_multiply 1482 18.0 165.313 165.313 165.313 165.313 dbt_tas_mm_2 649 17.1 0.006 0.006 145.666 145.666 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 138.394 138.394 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 101.827 101.827 init_scf_loop 2 5.0 0.000 0.000 84.974 84.974 hfx_ri_update_forces 1 7.0 1.763 1.763 68.576 68.576 hfx_ri_forces_Pmat_3c 1 8.0 1.842 1.842 48.266 48.266 dbt_tas_reshape 978 14.8 0.018 0.018 26.137 26.137 dbt_copy 2373 12.3 0.154 0.154 18.973 18.973 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 15.595 15.595 precalc_derivatives 1 8.0 2.431 2.431 15.216 15.216 dbt_tas_merge 649 14.1 13.489 13.489 15.192 15.192 dbt_tas_reshape_buffer_fill 978 15.8 14.277 14.277 14.277 14.277 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.003 0.003 14.098 14.098 dbt_tas_mm_3T 659 17.1 0.003 0.003 13.156 13.156 dbm_reserve_blocks 8570 16.8 12.195 12.195 12.195 12.195 dbt_crop 2763 14.2 8.606 8.606 11.965 11.965 dbt_tas_reserve_blocks_index 7592 16.0 0.372 0.372 11.303 11.303 dbt_reshape 850 13.9 6.385 6.385 10.688 10.688 dbt_tas_replicate 978 15.8 7.107 7.107 9.723 9.723 reshape_mm_small 978 15.8 0.171 0.171 9.607 9.607 build_3c_derivatives 9 9.0 3.386 3.386 8.637 8.637 dbt_tas_reshape_buffer_obtain 978 15.8 7.415 7.415 8.465 8.465 dbt_reserve_blocks_index 5073 15.1 0.156 0.156 8.171 8.171 dbt_reserve_blocks_index_array 5038 14.1 0.027 0.027 8.108 8.108 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.025 72.172 72.183 qs_forces 1 2.0 0.000 0.000 71.943 71.944 rebuild_ks_matrix 7 6.6 0.000 0.000 71.073 71.074 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 71.073 71.074 hfx_ks_matrix 7 8.6 0.000 0.000 69.755 69.764 dbt_total 4959 11.6 0.034 0.043 62.765 62.777 dbt_contract 1473 13.0 0.119 0.144 48.986 49.014 hfx_ri_update_ks 7 9.6 0.000 0.000 47.875 47.875 hfx_ri_update_ks_Pmat 7 10.6 1.724 2.243 47.873 47.874 dbt_tas_total 2445 14.1 0.152 0.209 46.006 46.008 qs_energies 1 3.0 0.000 0.000 43.912 43.913 scf_env_do_scf 1 4.0 0.000 0.001 43.735 43.735 qs_ks_update_qs_env 8 6.0 0.000 0.000 43.058 43.059 dbt_tas_multiply 1482 14.0 0.006 0.007 41.277 41.284 dbt_tas_dbm 1482 16.0 0.007 0.009 31.222 31.236 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 28.017 28.017 hfx_ri_update_ks_Pmat_KS 567 11.6 0.005 0.006 26.084 26.085 dbm_multiply 1482 18.0 20.905 25.847 20.905 25.847 scf_env_do_scf_inner_loop 6 5.0 0.000 0.002 25.117 25.117 hfx_ri_update_forces 1 7.0 0.076 0.094 21.879 21.889 mp_sync 17817 13.6 15.801 19.179 15.801 19.179 dbt_tas_mm_2 649 17.1 0.004 0.006 18.666 18.681 init_scf_loop 2 5.0 0.000 0.000 18.616 18.616 hfx_ri_forces_Pmat_3c 1 8.0 0.064 0.077 15.692 15.734 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 9.755 9.756 dbt_copy 2391 12.4 0.041 0.049 7.346 7.874 dbt_tas_mm_3T 659 17.1 0.002 0.003 5.189 5.711 dbt_reshape 1252 13.5 2.420 2.957 5.316 5.625 dbt_crop 2763 14.2 3.513 4.479 4.411 5.433 dbt_tas_mm_3N 163 16.5 0.001 0.001 5.067 5.272 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.001 5.188 5.188 precalc_derivatives 1 8.0 0.118 0.152 4.547 4.547 mp_waitall_2 6239 16.7 3.706 4.104 3.706 4.104 dbt_tas_merge 649 14.1 1.793 2.360 3.113 3.682 dbm_reserve_blocks 8600 16.9 2.720 3.230 2.720 3.230 dbt_tas_replicate 979 15.8 0.816 1.030 3.030 3.181 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.001 0.001 3.167 3.180 dbt_tas_reserve_blocks_index 7621 16.0 0.292 0.367 2.548 3.067 dbt_tas_communicate_buffer 1965 16.6 0.070 0.091 2.627 2.966 mp_max_i 3372 12.5 2.258 2.739 2.258 2.739 build_3c_derivatives 9 9.0 0.266 0.415 2.610 2.614 dbt_reserve_blocks_index 5475 15.1 0.132 0.159 2.084 2.457 dbt_reserve_blocks_index_array 5440 14.1 0.016 0.022 2.081 2.449 dbt_tas_reshape 986 14.8 0.011 0.015 2.063 2.192 dbt_communicate_buffer 1252 14.5 0.050 0.067 1.986 2.189 mp_alltoall_i 4471 15.4 1.851 2.087 1.851 2.087 convert_to_new_pgrid 4446 16.0 0.048 0.058 1.656 1.859 mp_sum_l 38255 15.3 1.367 1.826 1.367 1.826 dbm_copy 3043 16.9 1.608 1.815 1.608 1.815 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 1.800 1.804 dbm_add 1482 16.0 1.185 1.488 1.185 1.488 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=75.619, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=165.313, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=36.747, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=14.277, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=13.489, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=12.195, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=8.606, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=22.00999999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=20.905, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.724, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.793, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.72, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.513, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=15.801, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.706, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 207.583 207.583 qs_energies 1 2.0 0.000 0.000 207.391 207.391 mp2_main 1 3.0 0.000 0.000 201.649 201.649 mp2_gpw_main 1 4.0 0.001 0.001 201.072 201.072 mp2_ri_gpw_compute_in 1 5.0 0.414 0.414 141.994 141.994 mp2_ri_gpw_compute_in_loop 1 6.0 0.025 0.025 128.350 128.350 mp2_eri_3c_integrate_gpw 2656 7.0 0.024 0.024 89.352 89.352 integrate_v_rspace 2666 8.0 0.820 0.820 74.751 74.751 grid_integrate_task_list 2666 9.0 71.405 71.405 71.405 71.405 mp2_ri_gpw_compute_en 1 5.0 0.099 0.099 59.048 59.048 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.562 11.562 56.904 56.904 mp2_ri_gpw_compute_en_expansio 2080 7.0 3.118 3.118 35.042 35.042 local_gemm 2080 8.0 31.924 31.924 31.924 31.924 dbcsr_multiply_generic 5322 8.0 0.323 0.323 27.326 27.326 ao_to_mo_and_store_B_mult_1 2656 7.0 0.022 0.022 27.304 27.304 pw_transfer 63872 10.6 1.226 1.226 13.819 13.819 get_2c_integrals 1 6.0 0.000 0.000 13.227 13.227 calculate_wavefunction 2656 8.0 8.826 8.826 13.085 13.085 multiply_cannon 5322 9.0 0.757 0.757 12.607 12.607 fft_wrap_pw1pw2 53228 11.4 0.137 0.137 12.302 12.302 compute_2c_integrals 1 7.0 0.007 0.007 12.122 12.122 compute_2c_integrals_loop_lm 1 8.0 0.010 0.010 12.103 12.103 mp2_eri_2c_integrate_gpw 1 9.0 3.623 3.623 12.092 12.092 make_m2s 10644 9.0 0.093 0.093 11.602 11.602 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 3.515 3.515 11.566 11.566 make_images 10644 10.0 4.313 4.313 11.147 11.147 multiply_cannon_loop 5322 10.0 0.177 0.177 10.690 10.690 multiply_cannon_multrec 5322 11.0 8.779 8.779 8.836 8.836 copy_dbcsr_to_fm 2679 8.0 0.048 0.048 8.711 8.711 fft_wrap_pw1pw2_20 21271 12.4 0.606 0.606 8.561 8.561 fft3d_s 53229 13.4 7.559 7.559 7.603 7.603 mp2_ri_gpw_compute_en_ener 2080 7.0 7.475 7.475 7.475 7.475 dbcsr_complete_redistribute 2689 9.0 1.437 1.437 6.728 6.728 dbcsr_finalize 10708 9.5 0.220 0.220 5.937 5.937 scf_env_do_scf 1 3.0 0.000 0.000 5.301 5.301 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 5.301 5.301 dbcsr_merge_all 8011 10.3 3.861 3.861 5.077 5.077 potential_pw2rs 5322 10.0 0.167 0.167 4.981 4.981 hybrid_alltoall_any 13323 11.6 4.436 4.436 4.746 4.746 make_images_data 10644 11.0 0.098 0.098 4.724 4.724 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.005 0.005 4.200 4.200 collocate_single_gaussian 2656 10.0 0.158 0.158 4.167 4.167 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.024 47.504 47.514 qs_energies 1 2.0 0.000 0.000 47.418 47.419 mp2_main 1 3.0 0.000 0.001 44.597 44.598 mp2_gpw_main 1 4.0 0.002 0.003 44.437 44.437 mp2_ri_gpw_compute_in 1 5.0 0.053 0.060 20.711 25.832 mp2_ri_gpw_compute_en 1 5.0 0.153 0.161 23.618 24.825 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.002 18.966 24.090 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.002 16.209 21.556 integrate_v_rspace 93 8.1 0.135 0.155 16.144 21.386 grid_integrate_task_list 93 9.1 15.656 20.988 15.656 20.988 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.262 1.483 17.786 17.803 mp2_ri_gpw_compute_en_expansio 65 7.0 0.161 0.206 12.386 13.277 local_gemm 65 8.0 12.225 13.168 12.225 13.168 mp_min_d 2 7.0 5.152 6.343 5.152 6.343 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.124 6.330 mp2_ri_gpw_compute_en_comm 17 7.0 0.085 0.107 3.627 4.162 mp_sendrecv_dm3 1054 8.0 2.905 3.506 2.905 3.506 dbcsr_multiply_generic 176 8.0 0.010 0.013 2.368 2.743 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.346 2.714 scf_env_do_scf 1 3.0 0.000 0.000 2.661 2.662 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 2.661 2.662 get_2c_integrals 1 6.0 0.000 0.001 1.667 1.709 multiply_cannon 176 9.0 0.021 0.029 1.287 1.508 qs_scf_new_mos 10 5.0 0.000 0.000 1.365 1.435 multiply_cannon_loop 176 10.0 0.003 0.004 1.214 1.423 eigensolver 11 5.8 0.001 0.001 1.330 1.333 compute_2c_integrals 1 7.0 0.003 0.004 1.309 1.331 make_m2s 352 9.0 0.004 0.005 1.030 1.235 compute_2c_integrals_loop_lm 1 8.0 0.002 0.003 0.922 1.223 mp2_eri_2c_integrate_gpw 1 9.0 0.240 0.384 0.920 1.222 make_images 352 10.0 0.059 0.067 1.016 1.220 multiply_cannon_multrec 246 11.0 0.993 1.145 1.000 1.156 cp_fm_diag_elpa 11 6.8 0.000 0.000 1.116 1.117 cp_fm_redistribute_end 11 7.8 0.429 1.096 0.445 1.104 cp_fm_diag_elpa_base 11 7.8 0.642 1.031 0.655 1.052 pw_transfer 2120 10.5 0.054 0.073 0.915 1.030 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=75.08699999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=71.405, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=31.924, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.562, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.826, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.779, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=9.311, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.656, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=12.225, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.262, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.993, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.152, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.905, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.128 0.128 150.480 150.480 qs_energies 1 2.0 0.000 0.000 148.951 148.951 scf_env_do_scf 1 3.0 0.000 0.000 141.126 141.126 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 141.125 141.125 qs_ks_update_qs_env 15 5.0 0.000 0.000 59.037 59.037 rebuild_ks_matrix 15 6.0 0.000 0.000 58.805 58.805 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 58.805 58.805 qs_scf_new_mos 15 5.0 0.000 0.000 53.487 53.487 eigensolver 15 6.0 0.002 0.002 44.684 44.684 qs_vxc_create 15 8.0 0.046 0.046 41.217 41.217 calculate_dispersion_nonloc 15 9.0 7.768 7.768 35.725 35.725 pw_transfer 1191 10.0 0.085 0.085 28.641 28.641 cp_fm_diag_elpa 15 7.0 0.000 0.000 28.458 28.458 cp_fm_diag_elpa_base 15 8.0 25.782 25.782 28.457 28.457 fft_wrap_pw1pw2 1086 11.0 0.013 0.013 28.371 28.371 qs_rho_update_rho_low 16 5.0 0.000 0.000 25.361 25.361 calculate_rho_elec 16 6.0 0.247 0.247 25.361 25.361 grid_collocate_task_list 16 7.0 23.701 23.701 23.701 23.701 fft_wrap_pw1pw2_150 765 12.0 4.238 4.238 20.751 20.751 sum_up_and_integrate 15 8.0 0.053 0.053 15.976 15.976 integrate_v_rspace 15 9.0 0.026 0.026 15.923 15.923 grid_integrate_task_list 15 10.0 15.250 15.250 15.250 15.250 cp_fm_cholesky_restore 45 7.0 13.797 13.797 13.797 13.797 fft3d_s 1087 13.0 13.055 13.055 13.078 13.078 pw_scatter_s 585 13.1 8.072 8.072 8.072 8.072 fft_wrap_pw1pw2_200 197 12.3 0.896 0.896 7.404 7.404 copy_dbcsr_to_fm 16 5.9 0.001 0.001 6.338 6.338 dbcsr_complete_redistribute 46 8.3 2.553 2.553 6.190 6.190 xc_vxc_pw_create 15 9.0 0.222 0.222 5.445 5.445 vdW_energy 15 10.0 5.164 5.164 5.164 5.164 cp_fm_upper_to_full 30 8.0 5.104 5.104 5.104 5.104 gspace_mixing 14 5.0 0.171 0.171 4.619 4.619 broyden_mixing 14 6.0 3.937 3.937 3.937 3.937 init_scf_run 1 3.0 0.000 0.000 3.750 3.750 xc_pw_derive 90 11.0 0.001 0.001 3.598 3.598 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.434 3.434 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.029 77.622 77.633 qs_energies 1 2.0 0.000 0.000 77.320 77.327 scf_env_do_scf 1 3.0 0.000 0.001 72.453 72.454 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 72.453 72.454 qs_ks_update_qs_env 15 5.0 0.000 0.001 30.860 30.871 rebuild_ks_matrix 15 6.0 0.000 0.000 30.812 30.823 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 30.812 30.823 qs_rho_update_rho_low 16 5.0 0.000 0.000 24.800 24.808 calculate_rho_elec 16 6.0 0.007 0.007 24.800 24.807 grid_collocate_task_list 16 7.0 22.020 23.705 22.020 23.705 qs_scf_new_mos 15 5.0 0.000 0.001 17.420 17.463 sum_up_and_integrate 15 8.0 0.012 0.016 17.064 17.118 integrate_v_rspace 15 9.0 0.001 0.001 17.052 17.109 eigensolver 15 6.0 0.002 0.007 16.164 16.224 grid_integrate_task_list 15 10.0 14.808 16.120 14.808 16.120 qs_vxc_create 15 8.0 0.001 0.001 13.324 13.332 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.623 11.631 cp_fm_diag_elpa_base 15 8.0 11.419 11.450 11.608 11.617 calculate_dispersion_nonloc 15 9.0 1.128 1.976 10.902 10.938 pw_transfer 1191 10.0 0.107 0.134 10.497 10.742 fft_wrap_pw1pw2 1086 11.0 0.016 0.018 10.248 10.508 fft3d_ps 1086 13.0 3.322 3.685 8.003 8.452 fft_wrap_pw1pw2_150 765 12.0 0.472 0.593 7.003 7.166 mp_alltoall_z22v 1086 15.0 3.915 4.868 3.915 4.868 cp_fm_cholesky_restore 45 7.0 4.337 4.441 4.337 4.441 yz_to_x 501 13.9 0.311 0.398 2.857 3.346 rs_pw_transfer 158 9.4 0.002 0.003 2.749 3.298 fft_wrap_pw1pw2_200 197 12.3 0.297 0.360 3.067 3.182 density_rs2pw 16 7.0 0.001 0.001 2.611 3.056 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.913 2.913 mp_waitany 520 11.3 2.162 2.882 2.162 2.882 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.437 2.763 rs_pw_transfer_RS2PW_200 18 8.8 0.044 0.055 1.645 2.704 xc_vxc_pw_create 15 9.0 0.019 0.028 2.421 2.454 x_to_yz 585 14.1 0.423 0.463 1.791 2.175 mp_alltoall_d11v 217 9.2 1.729 2.001 1.729 2.001 xc_pw_derive 90 11.0 0.001 0.001 1.674 1.786 vdW_energy 15 10.0 1.661 1.747 1.661 1.747 rs_gather_matrices 15 10.0 0.025 0.030 1.468 1.745 init_scf_run 1 3.0 0.000 0.001 1.676 1.677 build_core_ppnl 1 5.0 1.439 1.671 1.439 1.671 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.563 1.563 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=58.89499999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=25.782, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.701, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.25, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=13.797, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=13.055, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=21.122999999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.419, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.02, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.808, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.337, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.915, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.116 0.116 289.979 289.979 qs_energies 1 2.0 0.000 0.000 289.797 289.797 ls_scf 1 3.0 0.000 0.000 288.502 288.502 ls_scf_main 1 4.0 0.002 0.002 278.184 278.184 density_matrix_trs4 11 5.0 0.016 0.016 175.571 175.571 ls_scf_dm_to_ks 11 5.0 0.000 0.000 96.957 96.957 matrix_ls_to_qs 11 6.0 0.000 0.000 93.465 93.465 dbcsr_multiply_generic 185 6.1 0.810 0.810 86.421 86.421 arnoldi_extremal 12 6.1 0.000 0.000 76.222 76.222 arnoldi_normal_ev 12 7.1 0.016 0.016 76.222 76.222 build_subspace 23 8.1 0.090 0.090 74.956 74.956 dbcsr_matrix_vector_mult 652 9.0 0.182 0.182 74.010 74.010 dbcsr_matrix_vector_mult_local 652 10.0 72.123 72.123 72.132 72.132 multiply_cannon 185 7.1 0.339 0.339 54.140 54.140 dbcsr_copy_into_existing 11 7.0 51.762 51.762 51.762 51.762 dbcsr_complete_redistribute 23 7.5 32.837 32.837 45.771 45.771 matrix_decluster 11 7.0 0.000 0.000 41.702 41.702 multiply_cannon_loop 185 8.1 0.351 0.351 41.394 41.394 multiply_cannon_multrec 185 9.1 30.564 30.564 30.767 30.767 make_m2s 370 7.1 0.040 0.040 27.151 27.151 make_images 370 8.1 11.077 11.077 25.171 25.171 dbcsr_finalize 646 7.5 0.228 0.228 17.544 17.544 dbcsr_merge_all 597 8.5 3.027 3.027 16.222 16.222 setup_rec_index_2d 370 8.1 12.288 12.288 12.288 12.288 tree_to_linear_d 110 9.4 11.636 11.636 11.636 11.636 dbcsr_sort_indices 1103 9.9 10.522 10.522 10.522 10.522 calculate_norms 370 9.1 10.275 10.275 10.275 10.275 ls_scf_init_scf 1 4.0 0.000 0.000 9.503 9.503 quick_finalize 395 10.0 0.508 0.508 9.237 9.237 ls_scf_init_matrix_S 1 5.0 0.000 0.000 9.156 9.156 dbcsr_special_finalize 370 9.1 0.003 0.003 8.548 8.548 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 8.462 8.462 matrix_qs_to_ls 12 5.1 0.000 0.000 5.872 5.872 matrix_cluster 12 6.1 0.000 0.000 5.872 5.872 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.027 88.904 88.916 qs_energies 1 2.0 0.000 0.000 88.805 88.805 ls_scf 1 3.0 0.000 0.000 88.731 88.732 ls_scf_main 1 4.0 0.001 0.011 85.224 85.224 density_matrix_trs4 11 5.0 0.007 0.024 81.859 81.921 dbcsr_multiply_generic 185 6.1 0.071 0.091 76.534 76.847 multiply_cannon 185 7.1 0.041 0.051 64.194 65.590 multiply_cannon_loop 185 8.1 0.169 0.216 61.088 62.433 multiply_cannon_multrec 1480 9.1 37.015 41.901 37.387 42.272 mp_waitall_1 11936 10.3 19.724 24.041 19.724 24.041 multiply_cannon_metrocomm3 1480 9.1 0.018 0.022 11.655 17.030 calculate_norms 2960 9.1 7.253 10.454 7.253 10.454 multiply_cannon_metrocomm1 1480 9.1 0.010 0.013 4.543 9.801 make_m2s 370 7.1 0.039 0.045 8.540 8.629 make_images 370 8.1 0.677 0.772 8.398 8.491 arnoldi_extremal 12 6.1 0.000 0.000 4.444 4.468 arnoldi_normal_ev 12 7.1 0.001 0.004 4.443 4.467 build_subspace 23 8.1 0.026 0.038 4.311 4.315 make_images_data 370 9.1 0.011 0.014 3.857 4.115 dbcsr_matrix_vector_mult 652 9.0 0.012 0.061 3.440 3.799 hybrid_alltoall_any 393 9.9 0.262 1.523 3.402 3.783 mp_sum_l 1119 5.6 2.624 3.562 2.624 3.562 dbcsr_matrix_vector_mult_local 652 10.0 2.112 3.315 2.114 3.320 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.953 3.042 dbcsr_complete_redistribute 23 7.5 1.440 1.962 2.386 2.775 matrix_ls_to_qs 11 6.0 0.000 0.000 2.298 2.699 ls_scf_init_scf 1 4.0 0.000 0.000 2.695 2.697 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.666 2.672 matrix_decluster 11 7.0 0.000 0.000 2.141 2.503 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 1.610 2.438 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.002 2.432 2.434 make_images_pack 370 9.1 2.071 2.291 2.075 2.294 buffer_matrices_ensure_size 370 8.1 1.910 2.263 1.910 2.263 mp_sum_dv 2907 10.4 1.655 1.945 1.655 1.945 dbcsr_finalize 646 7.5 0.009 0.011 1.626 1.842 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=80.12999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=72.123, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=51.762, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=32.837, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=30.564, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=12.288, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=10.275, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=18.73599999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.112, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.44, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=37.015, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=7.253, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.624, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.724, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 80.999 80.999 lib_test 1 2.0 0.000 0.000 80.954 80.954 dbcsr_run_tests 3 3.0 0.002 0.002 80.954 80.954 test_multiplies_multiproc 3 4.0 0.001 0.001 64.328 64.328 dbcsr_redistribute 9 5.0 39.978 39.978 41.756 41.756 dbcsr_multiply_generic 9 5.0 0.001 0.001 20.762 20.762 dbcsr_make_random_matrix 9 4.0 13.173 13.173 16.509 16.509 multiply_cannon 9 6.0 0.049 0.049 15.333 15.333 multiply_cannon_loop 9 7.0 0.040 0.040 14.907 14.907 multiply_cannon_multrec 9 8.0 14.867 14.867 14.868 14.868 dbcsr_finalize 27 5.7 0.018 0.018 6.296 6.296 dbcsr_merge_all 18 6.5 2.383 2.383 5.506 5.506 dbcsr_data_release 975 7.6 2.948 2.948 2.948 2.948 tree_to_linear_d 9 7.0 2.148 2.148 2.148 2.148 make_m2s 18 6.0 0.001 0.001 1.880 1.880 make_images 18 7.0 0.617 0.617 1.768 1.768 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.012 23.994 23.999 lib_test 1 2.0 0.000 0.000 23.960 23.981 dbcsr_run_tests 3 3.0 0.001 0.001 23.959 23.981 test_multiplies_multiproc 3 4.0 0.000 0.003 23.005 23.077 dbcsr_multiply_generic 9 5.0 0.001 0.002 20.829 20.946 multiply_cannon 9 6.0 0.002 0.003 18.436 18.865 multiply_cannon_loop 9 7.0 0.003 0.003 18.056 18.472 multiply_cannon_multrec 72 8.0 14.993 15.875 14.994 15.877 mp_waitall_1 576 9.2 3.490 4.384 3.490 4.384 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.617 3.440 mp_sum_l 390 2.5 0.675 1.377 0.675 1.377 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.668 1.370 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.435 1.256 dbcsr_make_random_matrix 9 4.0 0.747 1.010 0.916 1.123 make_m2s 18 6.0 0.001 0.001 0.914 0.978 make_images 18 7.0 0.023 0.027 0.910 0.974 dbcsr_finalize 27 5.7 0.000 0.001 0.849 0.949 dbcsr_data_release 444 7.6 0.788 0.903 0.788 0.903 dbcsr_merge_all 18 6.5 0.131 0.158 0.691 0.784 dbcsr_destroy 111 5.9 0.002 0.035 0.634 0.759 dbcsr_checksum 6 5.0 0.211 0.604 0.608 0.608 make_images_data 18 8.0 0.001 0.001 0.489 0.582 dbcsr_redistribute 9 5.0 0.299 0.346 0.530 0.572 hybrid_alltoall_any 18 9.0 0.046 0.228 0.448 0.517 dbcsr_data_copy_aa2 18 7.5 0.421 0.513 0.421 0.513 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.650000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=39.978, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.867, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.173, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.948, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.383, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.8709999999999987, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.299, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.993, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.747, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.788, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.131, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.49, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.675, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 137.404 137.404 qs_mol_dyn_low 1 2.0 0.004 0.004 135.974 135.974 velocity_verlet 5 3.0 0.004 0.004 110.959 110.959 qmmm_el_coupling 6 3.8 0.000 0.000 88.630 88.630 qmmm_elec_with_gaussian 6 4.8 0.100 0.100 88.626 88.626 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.834 87.834 qmmm_elec_gaussian_low_G 6 6.8 86.905 86.905 86.905 86.905 qs_forces 6 3.8 0.001 0.001 38.249 38.249 qs_energies 6 4.8 0.000 0.000 33.903 33.903 scf_env_do_scf 6 5.8 0.001 0.001 31.454 31.454 scf_env_do_scf_inner_loop 39 6.8 0.005 0.005 27.467 27.467 rebuild_ks_matrix 45 8.4 0.000 0.000 26.439 26.439 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 26.439 26.439 qs_ks_update_qs_env 45 7.8 0.000 0.000 22.612 22.612 pw_transfer 966 12.3 0.060 0.060 18.347 18.347 fft_wrap_pw1pw2 801 13.6 0.007 0.007 18.103 18.103 fft_wrap_pw1pw2_150 507 15.2 2.409 2.409 17.678 17.678 qs_vxc_create 45 10.4 0.001 0.001 14.455 14.455 xc_vxc_pw_create 45 11.4 0.646 0.646 14.454 14.454 xc_pw_derive 270 13.4 0.002 0.002 10.104 10.104 fft3d_s 802 15.6 8.488 8.488 8.497 8.497 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.001 8.001 calculate_rho_elec 45 8.9 0.565 0.565 8.001 8.001 xc_rho_set_and_dset_create 45 12.4 0.734 0.734 7.507 7.507 xc_pw_divergence 45 12.4 0.001 0.001 6.242 6.242 qmmm_forces 6 3.8 0.002 0.002 5.885 5.885 pw_scatter_s 429 15.8 5.655 5.655 5.655 5.655 qmmm_forces_with_gaussian 6 4.8 0.108 0.108 5.546 5.546 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.711 4.711 pw_integral_ab 2539 7.4 4.577 4.577 4.577 4.577 qs_ks_ddapc 45 10.4 0.001 0.001 4.370 4.370 init_scf_loop 6 6.8 0.000 0.000 3.982 3.982 qmmm_forces_gaussian_low_G 6 6.8 3.957 3.957 3.957 3.957 grid_collocate_task_list 45 9.9 3.862 3.862 3.862 3.862 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.835 3.835 density_rs2pw 45 9.9 0.002 0.002 3.575 3.575 sum_up_and_integrate 45 10.4 0.134 0.134 3.329 3.329 integrate_v_rspace 45 11.4 0.007 0.007 3.195 3.195 pw_poisson_solve 51 9.9 1.339 1.339 3.168 3.168 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.057 68.739 68.754 qs_mol_dyn_low 1 2.0 0.003 0.005 67.366 67.474 qs_forces 6 3.8 0.001 0.001 48.957 48.957 qs_energies 6 4.8 0.000 0.001 46.815 46.815 scf_env_do_scf 6 5.8 0.000 0.001 45.621 45.621 scf_env_do_scf_inner_loop 113 6.2 0.003 0.020 43.823 43.824 rebuild_ks_matrix 119 8.1 0.000 0.000 32.263 32.279 qs_ks_build_kohn_sham_matrix 119 9.1 0.018 0.024 32.263 32.278 qs_ks_update_qs_env 119 7.3 0.001 0.001 30.440 30.454 velocity_verlet 5 3.0 0.002 0.004 27.559 27.564 pw_transfer 2446 12.3 0.208 0.236 20.766 21.541 fft_wrap_pw1pw2 2059 13.4 0.025 0.029 20.252 21.070 fft_wrap_pw1pw2_150 1321 14.9 1.752 2.051 19.372 20.224 qs_vxc_create 119 10.1 0.003 0.004 16.342 16.352 xc_vxc_pw_create 119 11.1 0.173 0.245 16.339 16.349 fft3d_ps 2059 15.4 8.409 9.192 15.153 16.314 qs_rho_update_rho_low 119 7.3 0.001 0.001 12.981 12.986 calculate_rho_elec 119 8.3 0.049 0.056 12.980 12.986 xc_pw_derive 714 13.1 0.008 0.012 12.396 12.870 sum_up_and_integrate 119 10.1 0.069 0.088 11.585 11.760 integrate_v_rspace 119 11.1 0.004 0.005 11.516 11.703 rs_pw_transfer 988 11.5 0.014 0.016 9.109 9.497 qmmm_forces 6 3.8 0.002 0.003 9.267 9.267 qmmm_forces_with_gaussian 6 4.8 0.409 0.468 8.571 9.054 xc_pw_divergence 119 12.1 0.004 0.005 8.155 8.521 xc_rho_set_and_dset_create 119 12.1 0.426 0.691 7.791 8.491 qmmm_el_coupling 6 3.8 0.000 0.000 8.020 8.245 qmmm_elec_with_gaussian 6 4.8 0.388 0.457 8.019 8.242 density_rs2pw 119 9.3 0.006 0.008 7.755 8.135 potential_pw2rs 119 12.1 0.007 0.010 7.189 7.217 mp_alltoall_z22v 2059 17.4 5.058 6.706 5.058 6.706 grid_collocate_task_list 119 9.3 5.057 5.538 5.057 5.538 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.273 4.527 grid_integrate_task_list 119 12.1 3.889 4.430 3.889 4.430 x_to_yz 1095 16.8 0.952 1.100 3.611 4.388 yz_to_x 964 16.0 0.676 0.814 3.075 4.174 mp_waitany 4028 12.8 3.205 3.998 3.205 3.998 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.687 3.936 rs_pw_transfer_PW2RS_150 125 13.9 1.660 1.818 3.742 3.903 qmmm_forces_gaussian_low_G 6 6.8 3.520 3.779 3.520 3.779 pw_restrict_s3 18 5.8 1.599 1.866 3.137 3.396 rs_pw_transfer_RS2PW_150 125 11.2 1.109 1.444 2.907 3.336 qmmm_elec_gaussian_low_G 6 6.8 3.064 3.300 3.064 3.300 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.575 2.761 pw_prolongate_s3 18 6.8 1.308 1.489 2.575 2.761 qs_scf_new_mos 113 7.2 0.000 0.001 2.593 2.600 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.593 2.599 dbcsr_multiply_generic 2588 12.3 0.062 0.078 2.423 2.557 mp_waitall_1 188862 16.2 2.191 2.532 2.191 2.532 ot_scf_mini 113 9.2 0.001 0.002 2.481 2.486 qs_ks_ddapc 119 10.1 0.002 0.003 2.297 2.392 mp_sum_dm3 33 5.7 1.991 2.176 1.991 2.176 pw_gather_p 964 15.0 1.581 1.997 1.581 1.997 pw_integral_ab 2761 7.7 1.463 1.591 1.791 1.902 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.840 1.842 init_scf_loop 6 6.8 0.000 0.000 1.795 1.795 pw_scatter_p 1095 15.8 1.706 1.735 1.706 1.735 ot_mini 113 10.2 0.001 0.001 1.514 1.520 mp_sum_d 5820 12.2 0.925 1.484 0.925 1.484 pw_copy 1670 12.6 1.275 1.430 1.275 1.430 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=23.960000000000008, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=86.905, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.488, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.655, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.577, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.957, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.862, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=38.27900000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.064, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.463, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.52, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.057, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.058, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=8.409, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.889, yerr=0.0 Summary: Performance test took 39 minutes. (cached) Status: OK Uploading artifacts... done EndDate: 2022-09-29 19:07:54+00:00