StartDate: 2022-07-21 11:05:50+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: dbdf36f91bcd85f65f38b982c79a8dd0827d5be2 CommitTime: 2022-07-21 11:18:25 +0200 CommitAuthor: Frederick Stein CommitSubject: Adjust threshold (#2214) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=dbdf36f91bcd85f65f38b982c79a8dd0827d5be2 Sending build context to Docker daemon 364.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 405f018f9d1d: Already exists Digest: sha256:b6b83d3c331794420340093eb706a6f152d9c1fa51b262d9bf34594887c2c7ac Status: Downloaded newer image for ubuntu:22.04 ---> 27941809078c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 92ee757f28a3 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> a398cc4ae5b3 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> e20712e9c254 Step 5/42 : RUN mkdir scripts ---> Using cache ---> c7b9413ca6be Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5f4bcd2de9f5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> c39d97839810 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> d0b21d05b338 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e616c7670ff Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5ba44cd61a38 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 1aa896c19a24 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c7ccbf5e1b85 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9cb6a1bd2cd3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1eed70bdd06a Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ac04ff4ae473 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cd0e2369620a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> fc75688b2cb5 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> bba0dcb0b93f Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 303741b69f4c Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 5c3df3a2c686 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 7290ede69f62 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 805ee354e427 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> c71f3cda37eb Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> a18c9cfd8730 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 320efbba4fd1 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 68bee71ec24d Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 6b6e4f3e24cc Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> d59b20ddd43b Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 775ae464a90b Step 30/42 : COPY ./Makefile . ---> Using cache ---> 98767db1bf2a Step 31/42 : COPY ./src ./src ---> Using cache ---> 93cecf8c28fa Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 764aa139fef1 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> ad54e65c57d2 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Using cache ---> 50d54d82bc2d Step 35/42 : COPY ./data ./data ---> Using cache ---> 0d5f1456b6f3 Step 36/42 : COPY ./tests ./tests ---> e735e4aa8f8a Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> ec24746f5dbe Step 38/42 : COPY ./benchmarks ./benchmarks ---> 108ff29bca40 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 57a021295c83 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 04eaa43ab37f ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.025 0.025 186.901 186.901 qs_mol_dyn_low 1 2.0 0.002 0.002 186.283 186.283 qs_forces 11 3.9 0.001 0.001 186.244 186.244 qs_energies 11 4.9 0.001 0.001 179.927 179.927 scf_env_do_scf 11 5.9 0.001 0.001 167.363 167.363 velocity_verlet 10 3.0 0.002 0.002 128.970 128.970 init_scf_loop 11 6.9 0.000 0.000 90.369 90.369 prepare_preconditioner 11 7.9 0.000 0.000 87.770 87.770 make_preconditioner 11 8.9 0.000 0.000 87.770 87.770 make_full_inverse_cholesky 11 9.9 0.000 0.000 86.636 86.636 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 76.862 76.862 cp_fm_cholesky_invert 11 10.9 60.188 60.188 60.188 60.188 qs_scf_new_mos 108 7.5 0.001 0.001 40.750 40.750 qs_scf_loop_do_ot 108 8.5 0.001 0.001 40.750 40.750 ot_scf_mini 108 9.5 0.003 0.003 39.165 39.165 cp_fm_cholesky_decompose 22 10.9 22.585 22.585 22.585 22.585 rebuild_ks_matrix 119 8.3 0.001 0.001 21.360 21.360 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 21.360 21.360 dbcsr_multiply_generic 2286 12.5 0.188 0.188 21.132 21.132 qs_ot_get_p 119 10.4 0.001 0.001 21.118 21.118 qs_rho_update_rho_low 119 7.7 0.001 0.001 20.446 20.446 calculate_rho_elec 119 8.7 0.980 0.980 20.445 20.445 qs_ot_p2m_diag 50 11.0 0.171 0.171 19.769 19.769 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.625 19.625 cp_dbcsr_syevd 50 12.0 0.002 0.002 19.317 19.317 cp_fm_diag_elpa 50 13.0 0.000 0.000 18.242 18.242 cp_fm_diag_elpa_base 50 14.0 18.181 18.181 18.242 18.242 grid_collocate_task_list 119 9.7 15.974 15.974 15.974 15.974 ot_mini 108 10.5 0.001 0.001 14.355 14.355 sum_up_and_integrate 119 10.3 0.205 0.205 13.296 13.296 integrate_v_rspace 119 11.3 0.118 0.118 13.091 13.091 grid_integrate_task_list 119 12.3 11.062 11.062 11.062 11.062 make_m2s 4572 13.5 0.046 0.046 10.944 10.944 qs_ot_get_derivative 108 11.5 0.001 0.001 8.579 8.579 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.485 6.485 pw_transfer 1439 11.6 0.059 0.059 6.045 6.045 multiply_cannon 2286 13.5 0.171 0.171 5.912 5.912 dbcsr_make_dense_low 5837 15.5 0.068 0.068 5.859 5.859 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 5.818 5.818 make_dense_data 5837 16.5 5.118 5.118 5.777 5.777 ot_diis_step 108 11.5 0.004 0.004 5.773 5.773 make_images 4572 14.5 2.111 2.111 5.501 5.501 multiply_cannon_loop 2286 14.5 0.186 0.186 5.431 5.431 multiply_cannon_multrec 2286 15.5 5.186 5.186 5.243 5.243 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.184 5.184 apply_single 119 13.6 0.000 0.000 5.184 5.184 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.116 5.116 init_scf_run 11 5.9 0.002 0.002 5.024 5.024 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.022 5.022 fft_wrap_pw1pw2_140 487 13.2 0.530 0.530 4.980 4.980 wfi_extrapolate 11 7.9 0.001 0.001 4.459 4.459 dbcsr_copy 2102 12.0 0.209 0.209 3.804 3.804 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.023 53.864 53.874 qs_mol_dyn_low 1 2.0 0.004 0.005 53.755 53.759 qs_forces 11 3.9 0.001 0.001 53.715 53.715 qs_energies 11 4.9 0.001 0.001 50.005 50.008 scf_env_do_scf 11 5.9 0.001 0.002 46.054 46.054 scf_env_do_scf_inner_loop 108 6.5 0.003 0.017 42.654 42.654 velocity_verlet 10 3.0 0.001 0.003 31.987 31.988 rebuild_ks_matrix 119 8.3 0.000 0.001 20.416 20.484 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.021 20.416 20.483 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.163 18.226 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.184 16.221 calculate_rho_elec 119 8.7 0.030 0.031 16.183 16.221 dbcsr_multiply_generic 2286 12.5 0.072 0.086 14.655 15.806 sum_up_and_integrate 119 10.3 0.019 0.021 15.588 15.769 integrate_v_rspace 119 11.3 0.004 0.006 15.569 15.749 qs_scf_new_mos 108 7.5 0.001 0.001 11.437 11.516 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.437 11.516 grid_collocate_task_list 119 9.7 9.623 11.128 9.623 11.128 grid_integrate_task_list 119 12.3 8.558 11.033 8.558 11.033 multiply_cannon 2286 13.5 0.120 0.143 10.535 10.973 ot_scf_mini 108 9.5 0.002 0.002 10.671 10.748 multiply_cannon_loop 2286 14.5 0.096 0.118 9.934 10.270 mp_waitall_1 169478 16.3 8.677 9.216 8.677 9.216 rs_pw_transfer 974 11.9 0.011 0.014 7.313 7.737 density_rs2pw 119 9.7 0.005 0.007 6.193 6.627 ot_mini 108 10.5 0.001 0.001 6.191 6.275 multiply_cannon_metrocomm3 18288 15.5 0.039 0.051 5.716 6.033 mp_waitany 9880 13.7 3.964 4.448 3.964 4.448 mp_alltoall_d11v 2130 13.8 3.657 4.132 3.657 4.132 rs_pw_transfer_RS2PW_140 130 11.5 0.280 0.318 3.680 4.097 rs_gather_matrices 119 12.3 0.079 0.089 3.396 3.869 multiply_cannon_multrec 18288 15.5 3.357 3.590 3.367 3.603 potential_pw2rs 119 12.3 0.006 0.008 3.574 3.591 pw_transfer 1439 11.6 0.080 0.087 3.489 3.524 init_scf_loop 11 6.9 0.000 0.000 3.386 3.386 fft_wrap_pw1pw2 1201 12.6 0.010 0.012 3.337 3.381 qs_ot_get_derivative 108 11.5 0.001 0.001 3.207 3.285 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.887 2.967 apply_single 119 13.6 0.000 0.000 2.886 2.967 ot_diis_step 108 11.5 0.004 0.004 2.961 2.962 fft_wrap_pw1pw2_140 487 13.2 0.294 0.339 2.827 2.889 init_scf_run 11 5.9 0.000 0.004 2.744 2.744 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.744 2.744 make_m2s 4572 13.5 0.046 0.056 2.601 2.647 wfi_extrapolate 11 7.9 0.001 0.001 2.472 2.472 fft3d_ps 1201 14.6 1.260 1.321 2.401 2.445 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.404 2.413 make_images 4572 14.5 0.119 0.141 2.264 2.313 mp_sum_l 11218 13.2 1.059 2.229 1.059 2.229 rs_pw_transfer_PW2RS_140 130 13.9 0.628 0.684 1.522 1.557 qs_ot_get_p 119 10.4 0.001 0.001 1.375 1.458 mp_sum_d 4129 12.0 1.029 1.403 1.029 1.403 make_images_data 4572 15.5 0.037 0.045 1.175 1.295 prepare_preconditioner 11 7.9 0.000 0.000 1.249 1.266 make_preconditioner 11 8.9 0.000 0.000 1.249 1.266 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.917 1.163 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.135 1.149 hybrid_alltoall_any 4725 16.4 0.066 0.190 1.033 1.120 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.076 1.115 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=58.911000000000016, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=60.188, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=22.585, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=18.181, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.974, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.062, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=19.384999999999998, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.623, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.558, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.677, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.657, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.964, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 210.465 210.465 qs_mol_dyn_low 1 2.0 0.002 0.002 209.793 209.793 qs_forces 11 3.9 0.001 0.001 209.753 209.753 qs_energies 11 4.9 0.001 0.001 201.539 201.539 scf_env_do_scf 11 5.9 0.001 0.001 186.454 186.454 velocity_verlet 10 3.0 0.002 0.002 144.993 144.993 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 95.283 95.283 init_scf_loop 11 6.9 0.000 0.000 91.018 91.018 prepare_preconditioner 11 7.9 0.000 0.000 86.593 86.593 make_preconditioner 11 8.9 0.000 0.000 86.593 86.593 make_full_inverse_cholesky 11 9.9 0.000 0.000 85.389 85.389 cp_fm_cholesky_invert 11 10.9 58.769 58.769 58.769 58.769 qs_scf_new_mos 96 7.5 0.001 0.001 36.536 36.536 qs_scf_loop_do_ot 96 8.5 0.001 0.001 36.536 36.536 rebuild_ks_matrix 107 8.3 0.001 0.001 35.762 35.762 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 35.762 35.762 ot_scf_mini 96 9.5 0.002 0.002 35.173 35.173 qs_rho_update_rho_low 107 7.7 0.000 0.000 33.909 33.909 calculate_rho_elec 107 8.7 0.865 0.865 33.908 33.908 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.154 32.154 grid_collocate_task_list 107 9.7 29.756 29.756 29.756 29.756 sum_up_and_integrate 107 10.3 0.198 0.198 28.244 28.244 integrate_v_rspace 107 11.3 0.109 0.109 28.046 28.046 grid_integrate_task_list 107 12.3 26.068 26.068 26.068 26.068 cp_fm_cholesky_decompose 22 10.9 22.529 22.529 22.529 22.529 qs_ot_get_p 107 10.4 0.001 0.001 19.032 19.032 dbcsr_multiply_generic 1966 12.4 0.152 0.152 18.959 18.959 qs_ot_p2m_diag 44 11.0 0.137 0.137 17.848 17.848 cp_dbcsr_syevd 44 12.0 0.002 0.002 17.472 17.472 cp_fm_diag_elpa 44 13.0 0.000 0.000 16.299 16.299 cp_fm_diag_elpa_base 44 14.0 16.245 16.245 16.298 16.298 ot_mini 96 10.5 0.001 0.001 12.751 12.751 make_m2s 3932 13.4 0.040 0.040 9.847 9.847 qs_ot_get_derivative 96 11.5 0.001 0.001 7.971 7.971 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.959 6.959 init_scf_run 11 5.9 0.002 0.002 6.894 6.894 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.893 6.893 wfi_extrapolate 11 7.9 0.001 0.001 6.260 6.260 pw_transfer 1295 11.6 0.056 0.056 5.798 5.798 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.572 5.572 multiply_cannon 1966 13.4 0.178 0.178 5.503 5.503 dbcsr_make_dense_low 4961 15.5 0.062 0.062 5.212 5.212 make_dense_data 4961 16.5 4.531 4.531 5.139 5.139 multiply_cannon_loop 1966 14.4 0.061 0.061 5.039 5.039 make_images 3932 14.4 1.941 1.941 4.995 4.995 multiply_cannon_multrec 1966 15.4 4.923 4.923 4.977 4.977 ot_diis_step 96 11.5 0.003 0.003 4.776 4.776 fft_wrap_pw1pw2_140 439 13.2 0.465 0.465 4.744 4.744 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.530 4.530 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.481 4.481 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.428 4.428 apply_single 107 13.6 0.000 0.000 4.428 4.428 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.021 87.214 87.225 qs_mol_dyn_low 1 2.0 0.003 0.004 87.102 87.107 qs_forces 11 3.9 0.001 0.002 87.060 87.061 qs_energies 11 4.9 0.001 0.001 81.120 81.122 scf_env_do_scf 11 5.9 0.001 0.002 75.191 75.193 scf_env_do_scf_inner_loop 96 6.5 0.003 0.022 69.712 69.717 velocity_verlet 10 3.0 0.001 0.003 51.853 51.854 rebuild_ks_matrix 107 8.3 0.000 0.001 38.934 39.007 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.020 38.934 39.007 sum_up_and_integrate 107 10.3 0.018 0.020 34.380 34.785 integrate_v_rspace 107 11.3 0.004 0.005 34.362 34.767 qs_ks_update_qs_env 107 7.6 0.001 0.001 34.342 34.412 qs_rho_update_rho_low 107 7.7 0.000 0.001 33.012 33.021 calculate_rho_elec 107 8.7 0.027 0.030 33.011 33.020 grid_integrate_task_list 107 12.3 23.962 31.272 23.962 31.272 grid_collocate_task_list 107 9.7 23.293 29.839 23.293 29.839 dbcsr_multiply_generic 1966 12.4 0.065 0.078 12.691 18.297 rs_pw_transfer 878 11.9 0.011 0.013 10.464 11.352 density_rs2pw 107 9.7 0.005 0.006 9.371 10.258 qs_scf_new_mos 96 7.5 0.001 0.001 9.681 9.770 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.681 9.769 multiply_cannon 1966 13.4 0.109 0.129 8.932 9.328 ot_scf_mini 96 9.5 0.002 0.002 9.073 9.169 multiply_cannon_loop 1966 14.4 0.090 0.112 8.429 8.858 mp_alltoall_d11v 1998 13.7 7.309 8.660 7.309 8.660 rs_gather_matrices 107 12.3 0.071 0.084 7.053 8.389 mp_waitany 8968 13.7 7.367 8.321 7.367 8.321 rs_pw_transfer_RS2PW_140 118 11.5 0.268 0.316 7.092 7.968 mp_waitall_1 146670 16.2 7.290 7.614 7.290 7.614 mp_sum_l 9666 13.1 1.104 6.778 1.104 6.778 init_scf_loop 11 6.9 0.000 0.001 5.460 5.460 ot_mini 96 10.5 0.001 0.001 5.326 5.425 multiply_cannon_metrocomm3 15728 15.4 0.036 0.048 4.667 5.175 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.738 4.745 init_scf_run 11 5.9 0.000 0.005 4.705 4.706 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.705 4.705 wfi_extrapolate 11 7.9 0.001 0.001 4.287 4.287 multiply_cannon_multrec 15728 15.4 2.998 3.434 3.007 3.447 potential_pw2rs 107 12.3 0.006 0.007 3.307 3.329 pw_transfer 1295 11.6 0.073 0.081 3.200 3.250 fft_wrap_pw1pw2 1081 12.6 0.009 0.016 3.062 3.114 qs_ot_get_derivative 96 11.5 0.001 0.001 2.764 2.853 fft_wrap_pw1pw2_140 439 13.2 0.267 0.296 2.607 2.702 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.469 2.555 apply_single 107 13.6 0.000 0.000 2.469 2.555 ot_diis_step 96 11.5 0.003 0.004 2.539 2.539 make_m2s 3932 13.4 0.041 0.051 2.243 2.300 fft3d_ps 1081 14.6 1.150 1.229 2.199 2.254 make_images 3932 14.4 0.107 0.129 1.942 2.004 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=57.097999999999985, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=58.769, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=29.756, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.068, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=22.529, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.245, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=17.99300000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.293, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=23.962, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.29, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.309, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.367, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.181 0.181 161.935 161.935 qs_energies 1 2.0 0.000 0.000 161.105 161.105 scf_env_do_scf 1 3.0 0.000 0.000 159.882 159.882 qs_ks_update_qs_env 8 5.0 0.000 0.000 123.732 123.732 rebuild_ks_matrix 7 6.0 0.000 0.000 123.675 123.675 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 123.675 123.675 hfx_ks_matrix 7 8.0 0.000 0.000 105.020 105.020 integrate_four_center 7 9.0 1.391 1.391 104.986 104.986 integrate_four_center_main 7 10.0 0.523 0.523 91.680 91.680 integrate_four_center_bin 455 11.0 91.158 91.158 91.158 91.158 init_scf_loop 1 4.0 0.000 0.000 89.157 89.157 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 70.714 70.714 prepare_preconditioner 1 5.0 0.000 0.000 30.678 30.678 make_preconditioner 1 6.0 0.000 0.000 30.678 30.678 arnoldi_normal_ev 11 9.3 0.002 0.002 18.268 18.268 estimate_cond_num 1 7.0 0.000 0.000 18.223 18.223 build_subspace 28 9.5 0.010 0.010 17.961 17.961 integrate_four_center_load 7 10.0 0.000 0.000 11.642 11.642 hfx_load_balance 1 11.0 0.001 0.001 11.642 11.642 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 10.382 10.382 dbcsr_sym_m_v_mult 562 10.0 0.018 0.018 9.615 9.615 admm_fit_mo_coeffs 7 9.0 0.000 0.000 9.105 9.105 cp_fm_cholesky_invert 2 9.5 8.012 8.012 8.012 8.012 make_full_inverse_cholesky 1 7.0 0.000 0.000 7.576 7.576 DGKS_ortho_d 673 10.6 7.168 7.168 7.170 7.170 hfx_load_balance_bin 1 12.0 5.822 5.822 5.822 5.822 hfx_load_balance_count 1 12.0 5.803 5.803 5.803 5.803 Gram_Schmidt_ortho_d 673 10.6 5.715 5.715 5.716 5.716 purify_mo_diag 7 10.0 0.001 0.001 5.080 5.080 dbcsr_copy 1321 10.8 1.498 1.498 4.992 4.992 make_full_single_inverse 1 7.0 0.000 0.000 4.700 4.700 arnoldi_generalized_ev 1 8.0 0.000 0.000 4.596 4.596 gev_build_subspace 4 9.0 0.005 0.005 4.485 4.485 cp_fm_syevd 7 11.0 0.000 0.000 4.464 4.464 cp_fm_syevd_base 7 12.0 4.464 4.464 4.464 4.464 qs_scf_new_mos 7 5.0 0.000 0.000 4.027 4.027 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.027 4.027 fit_mo_coeffs 7 10.0 0.000 0.000 4.025 4.025 qs_vxc_create 14 8.0 0.000 0.000 3.974 3.974 xc_vxc_pw_create 14 9.0 0.122 0.122 3.973 3.973 ot_scf_mini 7 7.0 0.000 0.000 3.929 3.929 dbcsr_create_new 3179 12.1 2.467 2.467 3.828 3.828 dbcsr_set 2825 11.8 0.003 0.003 3.345 3.345 dbcsr_zero 2837 12.8 3.342 3.342 3.342 3.342 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.196 0.216 143.808 143.819 qs_energies 1 2.0 0.000 0.001 143.492 143.493 scf_env_do_scf 1 3.0 0.000 0.000 143.097 143.097 qs_ks_update_qs_env 8 5.0 0.000 0.000 140.842 140.842 rebuild_ks_matrix 7 6.0 0.000 0.000 140.833 140.833 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 140.833 140.833 hfx_ks_matrix 7 8.0 0.000 0.000 134.112 134.114 integrate_four_center 7 9.0 0.059 0.356 134.102 134.103 integrate_four_center_main 7 10.0 0.003 0.004 86.361 121.087 integrate_four_center_bin 448 11.0 86.358 121.083 86.358 121.083 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 80.388 80.388 init_scf_loop 1 4.0 0.000 0.000 62.707 62.708 mp_sync 70 11.3 34.742 36.978 34.742 36.978 integrate_four_center_load 7 10.0 0.000 0.000 12.280 12.284 hfx_load_balance 1 11.0 0.001 0.001 12.280 12.284 mp_sum_l 1135 8.3 6.302 6.592 6.302 6.592 hfx_load_balance_dist 1 12.0 0.000 0.000 6.163 6.450 hfx_load_balance_bin 1 12.0 3.009 6.212 3.009 6.212 hfx_load_balance_count 1 12.0 3.023 5.984 3.023 5.984 qs_vxc_create 14 8.0 0.000 0.000 3.212 3.212 xc_vxc_pw_create 14 9.0 0.008 0.010 3.212 3.212 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=43.971999999999994, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=91.158, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.012, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.168, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.822, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.803, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.373999999999995, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=86.358, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.009, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.023, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=6.302, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=34.742, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 149.656 149.656 qs_energies 1 2.0 0.000 0.000 149.252 149.252 mp2_main 1 3.0 0.000 0.000 125.278 125.278 mp2_gpw_main 1 4.0 0.000 0.000 123.846 123.846 rpa_ri_compute_en 1 5.0 0.000 0.000 116.249 116.249 rpa_num_int 1 6.0 0.001 0.001 116.243 116.243 compute_mat_P_omega 1 7.0 0.003 0.003 70.099 70.099 compute_mat_P_omega_contract 10 8.0 8.537 8.537 69.889 69.889 dbt_total 2336 9.6 0.011 0.011 57.687 57.687 dbt_contract 787 11.0 0.035 0.035 50.424 50.424 dbt_tas_total 1149 12.2 0.201 0.201 48.980 48.980 dbt_tas_multiply 807 12.1 0.002 0.002 47.626 47.626 dbt_tas_dbm 807 14.1 0.003 0.003 40.621 40.621 dbm_multiply 807 16.1 40.612 40.612 40.612 40.612 dbt_tas_mm_1N 524 15.1 0.002 0.002 29.027 29.027 GW_matrix_operations 10 7.0 0.012 0.012 28.430 28.430 cp_fm_cholesky_invert 10 8.0 27.524 27.524 27.524 27.524 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 25.239 25.239 scf_env_do_scf 1 3.0 0.000 0.000 23.723 23.723 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 23.723 23.723 qs_scf_new_mos 17 5.0 0.000 0.000 22.039 22.039 eigensolver 18 5.9 0.001 0.001 20.353 20.353 compute_mat_P_omega_calc_M_occ 250 9.0 8.550 8.550 19.495 19.495 cp_fm_diag_elpa 18 6.9 0.000 0.000 13.183 13.183 cp_fm_diag_elpa_base 18 7.9 13.116 13.116 13.183 13.183 cp_fm_cholesky_decompose 14 8.1 12.287 12.287 12.287 12.287 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 10.860 10.860 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.803 10.803 RPA_postprocessing_nokp 10 8.0 0.001 0.001 9.920 9.920 dbt_tas_mm_2 251 15.0 0.001 0.001 9.602 9.602 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 7.591 7.591 cp_fm_cholesky_restore 51 7.0 7.018 7.018 7.018 7.018 dbt_copy 1103 10.7 0.140 0.140 5.892 5.892 get_2c_integrals 1 6.0 0.000 0.000 5.856 5.856 compute_QP_energies 1 7.0 0.000 0.000 5.851 5.851 compute_self_energy_cubic_gw 1 8.0 0.055 0.055 5.850 5.850 contract_cubic_gw 21 9.0 0.000 0.000 4.734 4.734 dbt_tas_reserve_blocks_index 3261 14.3 0.154 0.154 3.332 3.332 dbm_reserve_blocks 3628 15.3 3.250 3.250 3.250 3.250 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 36.768 36.778 qs_energies 1 2.0 0.000 0.001 36.673 36.674 mp2_main 1 3.0 0.000 0.001 35.592 35.593 mp2_gpw_main 1 4.0 0.000 0.000 35.552 35.554 rpa_ri_compute_en 1 5.0 0.000 0.000 34.157 34.158 rpa_num_int 1 6.0 0.000 0.002 34.156 34.157 dbt_total 2336 9.6 0.012 0.013 30.383 30.400 compute_mat_P_omega 1 7.0 0.001 0.005 29.173 29.196 compute_mat_P_omega_contract 10 8.0 0.444 0.478 29.001 29.005 dbt_contract 787 11.0 0.027 0.030 22.830 22.837 dbt_tas_total 1149 12.2 0.058 0.071 20.376 20.377 dbt_tas_multiply 807 12.1 0.002 0.003 20.318 20.321 dbt_tas_dbm 807 14.1 0.003 0.004 15.041 15.056 dbm_multiply 807 16.1 11.423 12.580 11.423 12.580 compute_mat_P_omega_calc_M_occ 250 9.0 0.427 0.465 8.726 8.729 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.507 8.509 mp_sync 8706 11.6 5.986 7.245 5.986 7.245 dbt_tas_mm_2 251 15.0 0.001 0.002 7.014 7.019 dbt_copy 1111 10.7 0.012 0.014 6.458 6.715 dbt_reshape 1098 11.7 2.360 2.899 6.150 6.368 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.206 6.207 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.275 5.889 compute_QP_energies 1 7.0 0.000 0.000 3.181 3.182 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.178 3.181 mp_waitall_2 3776 15.3 3.006 3.176 3.006 3.176 dbt_communicate_buffer 1098 12.7 0.059 0.077 3.024 3.117 contract_cubic_gw 21 9.0 0.000 0.000 2.489 2.489 dbt_reserve_blocks_index_array 2791 12.2 0.009 0.011 1.759 1.979 dbt_reserve_blocks_index 2849 13.1 0.071 0.086 1.760 1.979 dbt_tas_reserve_blocks_index 3300 14.5 0.123 0.155 1.727 1.941 dbm_reserve_blocks 3696 15.4 1.709 1.926 1.709 1.926 dbt_crop 1042 12.0 0.996 1.312 1.555 1.902 dbt_tas_replicate 396 14.1 0.592 0.773 1.259 1.428 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.392 1.394 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.148 1.152 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.081 1.082 cp_gemm 105 8.4 0.000 0.000 1.069 1.079 cp_gemm_cosma 105 9.4 1.069 1.079 1.069 1.079 mp_max_i 1992 9.8 0.824 1.056 0.824 1.056 convert_to_new_pgrid 2421 14.1 0.027 0.034 0.908 1.041 scf_env_do_scf 1 3.0 0.000 0.000 1.033 1.033 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.033 1.033 dbm_copy 1608 15.1 0.875 1.006 0.875 1.006 GW_matrix_operations 10 7.0 0.001 0.001 0.813 0.821 dbm_add 807 14.1 0.720 0.773 0.720 0.773 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=44.31700000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=40.612, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=27.524, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=13.116, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=12.287, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.55, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.25, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.857000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.423, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.427, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.709, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.36, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.006, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.986, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 371.773 371.773 qs_forces 1 2.0 0.000 0.000 371.190 371.190 rebuild_ks_matrix 7 6.6 0.000 0.000 350.105 350.105 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 350.105 350.105 hfx_ks_matrix 7 8.6 0.000 0.000 348.016 348.016 hfx_ri_update_ks 7 9.6 0.000 0.000 298.508 298.508 hfx_ri_update_ks_Pmat 7 10.6 32.502 32.502 298.505 298.505 dbt_total 4861 11.6 0.029 0.029 292.488 292.488 qs_energies 1 3.0 0.000 0.000 281.771 281.771 scf_env_do_scf 1 4.0 0.000 0.000 281.391 281.391 dbt_tas_total 2391 14.1 0.807 0.807 264.101 264.101 qs_ks_update_qs_env 8 6.0 0.000 0.000 260.742 260.742 dbt_contract 1473 13.0 0.158 0.158 245.581 245.581 dbt_tas_multiply 1482 14.0 0.004 0.004 234.515 234.515 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 216.311 216.311 dbt_tas_dbm 1482 16.0 0.006 0.006 214.829 214.829 dbm_multiply 1482 18.0 214.811 214.811 214.811 214.811 dbt_tas_mm_2 649 17.1 0.004 0.004 185.807 185.807 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 161.863 161.863 init_scf_loop 2 5.0 0.000 0.000 119.526 119.526 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 89.367 89.367 hfx_ri_update_forces 1 7.0 0.000 0.000 49.505 49.505 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 33.727 33.727 dbt_tas_mm_3T 659 17.1 0.002 0.002 22.194 22.194 dbt_tas_reshape 906 14.4 0.011 0.011 21.162 21.162 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 20.461 20.461 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 20.238 20.238 prepare_preconditioner 2 6.0 0.000 0.000 18.204 18.204 make_preconditioner 2 7.0 0.000 0.000 17.623 17.623 dbt_copy 2331 12.4 0.213 0.213 17.280 17.280 cp_fm_syevd 12 10.7 0.000 0.000 17.103 17.103 cp_fm_syevd_base 12 11.7 17.103 17.103 17.103 17.103 make_full_all 2 8.0 0.000 0.000 17.025 17.025 dbt_tas_merge 649 14.1 12.888 12.888 13.792 13.792 precalc_derivatives 1 8.0 0.005 0.005 12.801 12.801 dbt_tas_reshape_buffer_fill 906 15.4 12.784 12.784 12.784 12.784 dbm_reserve_blocks 8303 16.8 11.379 11.379 11.379 11.379 dbt_tas_reserve_blocks_index 7397 16.0 0.365 0.365 10.953 10.953 dbt_crop 2763 14.2 7.260 7.260 10.827 10.827 dbt_reshape 856 13.9 5.076 5.076 9.228 9.228 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 8.506 8.506 dbt_reserve_blocks_index 4998 15.2 0.142 0.142 8.505 8.505 dbt_reserve_blocks_index_array 4963 14.3 0.021 0.021 8.453 8.453 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.026 65.191 65.203 qs_forces 1 2.0 0.000 0.000 65.002 65.002 rebuild_ks_matrix 7 6.6 0.000 0.000 64.247 64.248 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 64.247 64.248 hfx_ks_matrix 7 8.6 0.000 0.001 63.118 63.127 dbt_total 4861 11.6 0.029 0.032 57.184 57.210 dbt_contract 1473 13.0 0.103 0.112 44.030 44.040 hfx_ri_update_ks 7 9.6 0.000 0.001 42.995 42.995 hfx_ri_update_ks_Pmat 7 10.6 1.566 2.106 42.994 42.994 dbt_tas_total 2391 14.1 0.129 0.152 41.616 41.618 qs_energies 1 3.0 0.000 0.000 39.339 39.340 scf_env_do_scf 1 4.0 0.000 0.001 39.177 39.177 qs_ks_update_qs_env 8 6.0 0.000 0.000 38.599 38.600 dbt_tas_multiply 1482 14.0 0.005 0.006 37.141 37.144 dbt_tas_dbm 1482 16.0 0.005 0.006 28.477 28.503 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 25.649 25.649 dbm_multiply 1482 18.0 19.542 24.381 19.542 24.381 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 23.652 23.653 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 23.097 23.097 hfx_ri_update_forces 1 7.0 0.000 0.001 20.122 20.130 dbt_tas_mm_2 649 17.1 0.004 0.004 16.769 16.781 mp_sync 17513 13.6 13.535 16.415 13.535 16.415 init_scf_loop 2 5.0 0.000 0.000 16.079 16.080 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.003 14.112 14.132 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 8.929 8.930 dbt_copy 2349 12.4 0.034 0.038 7.295 7.759 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.891 5.357 dbt_reshape 1256 13.5 2.366 2.721 5.011 5.252 dbt_crop 2763 14.2 3.285 3.864 4.083 4.740 dbt_tas_mm_3N 163 16.5 0.000 0.001 4.566 4.678 precalc_derivatives 1 8.0 0.002 0.002 4.596 4.596 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.276 4.276 mp_waitall_2 5988 16.5 3.155 3.401 3.155 3.401 dbt_tas_merge 649 14.1 1.741 2.105 2.944 3.348 dbm_reserve_blocks 8337 16.9 2.495 2.852 2.495 2.852 dbt_tas_reserve_blocks_index 7428 16.1 0.274 0.325 2.385 2.789 dbt_tas_replicate 909 15.6 0.653 0.828 2.452 2.545 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.460 2.470 mp_max_i 3372 12.5 1.995 2.391 1.995 2.391 dbt_tas_communicate_buffer 1825 16.3 0.064 0.078 2.142 2.378 dbt_reserve_blocks_index 5398 15.2 0.131 0.153 2.004 2.324 dbt_reserve_blocks_index_array 5363 14.2 0.014 0.020 2.002 2.322 build_3c_derivatives 9 9.0 0.253 0.412 2.155 2.158 mp_alltoall_i 4339 15.3 1.846 2.023 1.846 2.023 dbt_tas_reshape 916 14.4 0.009 0.010 1.890 1.975 dbt_communicate_buffer 1256 14.5 0.047 0.056 1.756 1.884 convert_to_new_pgrid 4446 16.0 0.044 0.053 1.470 1.630 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 1.616 1.625 dbm_copy 3043 16.9 1.425 1.586 1.425 1.586 mp_sum_l 38201 15.3 1.227 1.473 1.227 1.473 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=63.04600000000005, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=214.811, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=32.502, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=17.103, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=12.888, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=12.784, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=11.379, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=7.26, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=19.872, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.542, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.566, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.741, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.495, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.285, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.155, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=13.535, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 235.476 235.476 qs_energies 1 2.0 0.000 0.000 235.268 235.268 mp2_main 1 3.0 0.000 0.000 200.451 200.451 mp2_gpw_main 1 4.0 0.001 0.001 196.513 196.513 mp2_ri_gpw_compute_in 1 5.0 0.378 0.378 149.128 149.128 mp2_ri_gpw_compute_in_loop 1 6.0 0.011 0.011 123.922 123.922 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 94.235 94.235 integrate_v_rspace 2666 8.0 0.669 0.669 80.580 80.580 grid_integrate_task_list 2666 9.0 77.843 77.843 77.843 77.843 mp2_ri_gpw_compute_en 1 5.0 0.074 0.074 47.365 47.365 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.756 9.756 45.558 45.558 scf_env_do_scf 1 3.0 0.000 0.000 33.702 33.702 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 33.701 33.701 qs_scf_new_mos 10 5.0 0.000 0.000 32.373 32.373 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.198 2.198 28.703 28.703 offload_gemm 2080 8.0 26.505 26.505 26.505 26.505 eigensolver 11 5.8 0.001 0.001 24.815 24.815 get_2c_integrals 1 6.0 0.000 0.000 24.690 24.690 calculate_wavefunction 5312 9.0 16.143 16.143 23.966 23.966 dbcsr_multiply_generic 5322 8.0 0.186 0.186 23.154 23.154 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 23.134 23.134 cp_fm_diag_elpa 11 6.8 0.000 0.000 22.467 22.467 cp_fm_diag_elpa_base 11 7.8 22.305 22.305 22.466 22.466 compute_2c_integrals 1 7.0 0.006 0.006 18.600 18.600 compute_2c_integrals_loop_lm 1 8.0 0.013 0.013 18.581 18.581 mp2_eri_2c_integrate_gpw 1 9.0 3.462 3.462 18.568 18.568 multiply_cannon 5322 9.0 0.484 0.484 11.868 11.868 pw_transfer 63872 10.6 0.929 0.929 11.842 11.842 fft_wrap_pw1pw2 53228 11.4 0.109 0.109 10.665 10.665 multiply_cannon_loop 5322 10.0 0.481 0.481 10.415 10.415 qs_diis_b_step 9 6.0 0.000 0.000 9.353 9.353 make_m2s 10644 9.0 0.063 0.063 8.800 8.800 multiply_cannon_multrec 5322 11.0 8.440 8.440 8.482 8.482 make_images 10644 10.0 3.345 3.345 8.420 8.420 cp_fm_symm 18 7.0 8.322 8.322 8.322 8.322 fft_wrap_pw1pw2_20 21271 12.4 0.600 0.600 7.512 7.512 fft3d_s 53229 13.4 6.760 6.760 6.795 6.795 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.441 2.441 6.444 6.444 mp2_ri_gpw_compute_en_ener 2080 7.0 5.229 5.229 5.229 5.229 cp_fm_triangular_invert 2 6.0 5.031 5.031 5.031 5.031 copy_dbcsr_to_fm 2679 8.0 0.027 0.027 4.863 4.863 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 43.596 43.607 qs_energies 1 2.0 0.000 0.001 43.522 43.523 mp2_main 1 3.0 0.000 0.001 41.164 41.165 mp2_gpw_main 1 4.0 0.001 0.002 41.036 41.036 mp2_ri_gpw_compute_in 1 5.0 0.042 0.042 19.115 24.981 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.296 23.164 mp2_ri_gpw_compute_en 1 5.0 0.086 0.089 21.838 22.272 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.073 21.037 integrate_v_rspace 93 8.1 0.111 0.120 14.982 20.754 grid_integrate_task_list 93 9.1 14.589 20.417 14.589 20.417 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.693 0.779 14.920 14.995 mp2_ri_gpw_compute_en_expansio 65 7.0 0.087 0.106 11.596 11.880 offload_gemm 65 8.0 11.509 11.789 11.509 11.789 mp_min_d 2 7.0 5.922 6.334 5.922 6.334 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.868 6.302 mp2_ri_gpw_compute_en_comm 17 7.0 0.105 0.146 2.265 2.573 scf_env_do_scf 1 3.0 0.000 0.000 2.226 2.227 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.226 2.227 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.907 2.100 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.889 2.083 mp_sendrecv_dm3 510 8.0 1.659 1.992 1.659 1.992 get_2c_integrals 1 6.0 0.000 0.000 1.756 1.791 compute_2c_integrals 1 7.0 0.002 0.003 1.456 1.473 compute_2c_integrals_loop_lm 1 8.0 0.002 0.005 1.064 1.388 mp2_eri_2c_integrate_gpw 1 9.0 0.217 0.345 1.062 1.387 multiply_cannon 176 9.0 0.015 0.017 1.123 1.182 calculate_wavefunction 166 9.0 0.522 0.740 0.911 1.170 qs_scf_new_mos 10 5.0 0.000 0.000 1.076 1.154 multiply_cannon_loop 176 10.0 0.002 0.002 1.062 1.120 eigensolver 11 5.8 0.001 0.001 1.035 1.037 multiply_cannon_multrec 246 11.0 0.912 0.945 0.917 0.951 pw_transfer 2120 10.5 0.040 0.050 0.816 0.895 make_m2s 352 9.0 0.003 0.003 0.745 0.879 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=74.48400000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=77.843, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=26.505, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.305, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=16.143, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.756, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.44, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.789999999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.589, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.509, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.522, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.693, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.912, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.922, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.659, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.071 0.071 225.670 225.670 qs_energies 1 2.0 0.000 0.000 224.190 224.190 scf_env_do_scf 1 3.0 0.000 0.000 214.803 214.803 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 214.803 214.803 qs_scf_new_mos 15 5.0 0.000 0.000 134.516 134.516 eigensolver 15 6.0 0.001 0.001 126.604 126.604 cp_fm_diag_elpa 15 7.0 0.000 0.000 113.084 113.084 cp_fm_diag_elpa_base 15 8.0 110.636 110.636 113.084 113.084 qs_ks_update_qs_env 15 5.0 0.000 0.000 52.797 52.797 rebuild_ks_matrix 15 6.0 0.000 0.000 52.582 52.582 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 52.582 52.582 qs_vxc_create 15 8.0 0.020 0.020 35.643 35.643 calculate_dispersion_nonloc 15 9.0 7.208 7.208 30.986 30.986 pw_transfer 1191 10.0 0.061 0.061 24.445 24.445 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 24.218 24.218 qs_rho_update_rho_low 16 5.0 0.000 0.000 23.874 23.874 calculate_rho_elec 16 6.0 0.216 0.216 23.874 23.874 grid_collocate_task_list 16 7.0 22.502 22.502 22.502 22.502 fft_wrap_pw1pw2_150 765 12.0 3.375 3.375 17.206 17.206 sum_up_and_integrate 15 8.0 0.043 0.043 15.327 15.327 integrate_v_rspace 15 9.0 0.020 0.020 15.284 15.284 grid_integrate_task_list 15 10.0 14.761 14.761 14.761 14.761 cp_fm_cholesky_restore 45 7.0 11.337 11.337 11.337 11.337 fft3d_s 1087 13.0 11.120 11.120 11.186 11.186 pw_scatter_s 585 13.1 7.717 7.717 7.717 7.717 fft_wrap_pw1pw2_200 197 12.3 0.802 0.802 6.828 6.828 init_scf_run 1 3.0 0.000 0.000 5.771 5.771 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.657 5.657 dbcsr_complete_redistribute 46 8.3 2.265 2.265 5.580 5.580 gspace_mixing 14 5.0 0.170 0.170 4.933 4.933 vdW_energy 15 10.0 4.644 4.644 4.644 4.644 xc_vxc_pw_create 15 9.0 0.221 0.221 4.637 4.637 cp_fm_upper_to_full 30 8.0 4.629 4.629 4.629 4.629 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.038 67.291 67.302 qs_energies 1 2.0 0.000 0.004 66.927 66.927 scf_env_do_scf 1 3.0 0.000 0.000 62.528 62.528 scf_env_do_scf_inner_loop 15 4.0 0.002 0.012 62.528 62.528 qs_ks_update_qs_env 15 5.0 0.000 0.000 27.062 27.080 rebuild_ks_matrix 15 6.0 0.000 0.000 27.024 27.043 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 27.024 27.042 qs_rho_update_rho_low 16 5.0 0.000 0.000 22.357 22.362 calculate_rho_elec 16 6.0 0.007 0.007 22.357 22.362 grid_collocate_task_list 16 7.0 20.850 21.307 20.850 21.307 sum_up_and_integrate 15 8.0 0.009 0.019 15.412 15.465 integrate_v_rspace 15 9.0 0.001 0.001 15.403 15.459 grid_integrate_task_list 15 10.0 14.046 14.623 14.046 14.623 qs_scf_new_mos 15 5.0 0.000 0.000 13.799 13.922 eigensolver 15 6.0 0.001 0.002 12.719 12.742 qs_vxc_create 15 8.0 0.001 0.001 11.270 11.282 calculate_dispersion_nonloc 15 9.0 1.018 1.875 9.197 9.223 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.201 9.210 cp_fm_diag_elpa_base 15 8.0 9.040 9.073 9.198 9.207 pw_transfer 1191 10.0 0.081 0.088 8.728 8.858 fft_wrap_pw1pw2 1086 11.0 0.013 0.023 8.533 8.693 fft3d_ps 1086 13.0 2.689 2.983 6.685 7.010 fft_wrap_pw1pw2_150 765 12.0 0.327 0.370 5.780 5.860 mp_alltoall_z22v 1086 15.0 3.318 3.895 3.318 3.895 cp_fm_cholesky_restore 45 7.0 3.349 3.387 3.349 3.387 yz_to_x 501 13.9 0.275 0.335 2.472 2.753 fft_wrap_pw1pw2_200 197 12.3 0.222 0.253 2.627 2.701 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.643 2.643 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.268 2.511 xc_vxc_pw_create 15 9.0 0.016 0.020 2.073 2.100 x_to_yz 585 14.1 0.376 0.399 1.497 1.721 rs_pw_transfer 158 9.4 0.002 0.002 1.435 1.708 density_rs2pw 16 7.0 0.001 0.001 1.349 1.558 vdW_energy 15 10.0 1.458 1.519 1.458 1.519 init_scf_run 1 3.0 0.000 0.000 1.517 1.518 xc_pw_derive 90 11.0 0.001 0.001 1.415 1.499 build_core_ppnl 1 5.0 1.335 1.479 1.335 1.479 scf_env_initial_rho_setup 1 4.0 0.000 0.001 1.425 1.425 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=55.31399999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=110.636, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.502, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.761, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.337, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.12, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=16.688000000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.04, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.85, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.046, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.349, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.318, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 372.522 372.522 qs_energies 1 2.0 0.000 0.000 372.383 372.383 ls_scf 1 3.0 0.000 0.000 371.093 371.093 ls_scf_main 1 4.0 0.002 0.002 358.211 358.211 density_matrix_trs4 11 5.0 0.012 0.012 261.449 261.449 arnoldi_extremal 12 6.1 0.000 0.000 174.769 174.769 arnoldi_normal_ev 12 7.1 0.017 0.017 174.769 174.769 build_subspace 23 8.1 0.085 0.085 172.109 172.109 dbcsr_matrix_vector_mult 652 9.0 0.203 0.203 149.289 149.289 dbcsr_matrix_vector_mult_local 652 10.0 138.733 138.733 138.738 138.738 ls_scf_dm_to_ks 11 5.0 0.000 0.000 91.408 91.408 matrix_ls_to_qs 11 6.0 0.000 0.000 88.081 88.081 dbcsr_multiply_generic 185 6.1 0.819 0.819 78.622 78.622 dbcsr_copy_into_existing 11 7.0 48.045 48.045 48.045 48.045 multiply_cannon 185 7.1 0.307 0.307 47.626 47.626 dbcsr_complete_redistribute 23 7.5 31.805 31.805 43.834 43.834 matrix_decluster 11 7.0 0.000 0.000 40.035 40.035 multiply_cannon_loop 185 8.1 0.261 0.261 35.027 35.027 make_m2s 370 7.1 0.038 0.038 26.343 26.343 multiply_cannon_multrec 185 9.1 25.858 25.858 25.977 25.977 make_images 370 8.1 10.967 10.967 24.448 24.448 dbcsr_finalize 646 7.5 0.186 0.186 15.800 15.800 dbcsr_merge_all 597 8.5 2.294 2.294 14.595 14.595 DGKS_ortho_d 702 9.1 12.736 12.736 12.739 12.739 setup_rec_index_2d 370 8.1 12.183 12.183 12.183 12.183 ls_scf_init_scf 1 4.0 0.000 0.000 12.167 12.167 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.823 11.823 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 11.156 11.156 tree_to_linear_d 110 9.4 10.906 10.906 10.906 10.906 dbcsr_sort_indices 1103 9.9 10.690 10.690 10.690 10.690 Gram_Schmidt_ortho_d 702 9.1 10.538 10.538 10.540 10.540 quick_finalize 395 10.0 0.425 0.425 9.297 9.297 calculate_norms 370 9.1 8.789 8.789 8.789 8.789 dbcsr_special_finalize 370 9.1 0.002 0.002 8.599 8.599 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.022 75.564 75.575 qs_energies 1 2.0 0.000 0.000 75.466 75.466 ls_scf 1 3.0 0.000 0.000 75.402 75.403 ls_scf_main 1 4.0 0.000 0.007 72.475 72.477 density_matrix_trs4 11 5.0 0.007 0.019 69.570 69.632 dbcsr_multiply_generic 185 6.1 0.062 0.072 65.265 65.545 multiply_cannon 185 7.1 0.034 0.046 54.401 55.977 multiply_cannon_loop 185 8.1 0.130 0.144 51.665 53.126 multiply_cannon_multrec 1480 9.1 31.899 36.639 32.203 36.989 mp_waitall_1 11936 10.3 16.446 18.141 16.446 18.141 multiply_cannon_metrocomm3 1480 9.1 0.015 0.017 9.821 12.890 calculate_norms 2960 9.1 5.841 7.787 5.841 7.787 make_m2s 370 7.1 0.036 0.039 7.499 7.597 make_images 370 8.1 0.653 0.737 7.363 7.458 multiply_cannon_metrocomm1 1480 9.1 0.008 0.010 3.606 7.380 make_images_data 370 9.1 0.010 0.013 3.289 3.545 arnoldi_extremal 12 6.1 0.000 0.001 3.505 3.526 arnoldi_normal_ev 12 7.1 0.001 0.004 3.505 3.525 build_subspace 23 8.1 0.022 0.028 3.383 3.385 hybrid_alltoall_any 393 9.9 0.210 1.263 2.865 3.128 dbcsr_matrix_vector_mult 652 9.0 0.010 0.047 2.425 3.116 mp_sum_l 1119 5.6 2.339 3.048 2.339 3.048 dbcsr_matrix_vector_mult_local 652 10.0 1.808 2.763 1.810 2.765 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.521 2.589 dbcsr_complete_redistribute 23 7.5 1.267 1.709 2.113 2.290 ls_scf_init_scf 1 4.0 0.000 0.000 2.242 2.243 matrix_ls_to_qs 11 6.0 0.000 0.000 2.032 2.224 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.214 2.221 make_images_pack 370 9.1 1.827 2.079 1.831 2.083 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.482 2.047 matrix_decluster 11 7.0 0.000 0.000 1.884 2.035 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.027 2.029 buffer_matrices_ensure_size 370 8.1 1.573 1.689 1.573 1.689 dbcsr_finalize 646 7.5 0.008 0.009 1.433 1.530 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=106.55599999999998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=138.733, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.045, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.805, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=25.858, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=12.736, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.789, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.136999999999986, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.808, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.267, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=31.899, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.841, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.827, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.446, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.339, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 72.002 72.002 lib_test 1 2.0 0.000 0.000 71.995 71.995 dbcsr_run_tests 3 3.0 0.002 0.002 71.995 71.995 test_multiplies_multiproc 3 4.0 0.001 0.001 56.365 56.365 dbcsr_redistribute 9 5.0 36.441 36.441 37.968 37.968 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.803 16.803 dbcsr_make_random_matrix 9 4.0 12.551 12.551 15.539 15.539 multiply_cannon 9 6.0 0.001 0.001 12.252 12.252 multiply_cannon_loop 9 7.0 0.017 0.017 11.886 11.886 multiply_cannon_multrec 9 8.0 11.869 11.869 11.869 11.869 dbcsr_finalize 27 5.7 0.015 0.015 5.480 5.480 dbcsr_merge_all 18 6.5 2.003 2.003 4.822 4.822 dbcsr_data_release 975 7.6 2.447 2.447 2.447 2.447 tree_to_linear_d 9 7.0 1.882 1.882 1.882 1.882 make_m2s 18 6.0 0.001 0.001 1.508 1.508 make_images 18 7.0 0.520 0.520 1.456 1.456 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.010 19.487 19.492 lib_test 1 2.0 0.000 0.000 19.462 19.477 dbcsr_run_tests 3 3.0 0.000 0.001 19.461 19.477 test_multiplies_multiproc 3 4.0 0.000 0.002 18.604 18.664 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.518 16.597 multiply_cannon 9 6.0 0.002 0.002 14.702 14.983 multiply_cannon_loop 9 7.0 0.002 0.002 14.399 14.722 multiply_cannon_multrec 72 8.0 12.057 12.447 12.058 12.448 mp_waitall_1 576 9.2 2.674 3.263 2.674 3.263 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 2.057 2.542 dbcsr_make_random_matrix 9 4.0 0.686 1.194 0.826 1.249 mp_sum_l 390 2.5 0.483 0.989 0.483 0.989 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.475 0.981 make_m2s 18 6.0 0.001 0.001 0.729 0.764 make_images 18 7.0 0.021 0.026 0.726 0.760 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.276 0.750 dbcsr_finalize 27 5.7 0.000 0.000 0.648 0.734 dbcsr_data_release 444 7.6 0.619 0.712 0.619 0.712 dbcsr_merge_all 18 6.5 0.099 0.117 0.537 0.619 dbcsr_destroy 111 5.9 0.000 0.001 0.510 0.599 mp_cart_create 9 5.7 0.460 0.563 0.460 0.563 dbcsr_checksum 6 5.0 0.165 0.548 0.550 0.550 dbcsr_mp_make_env 6 4.5 0.000 0.000 0.451 0.547 dbcsr_redistribute 9 5.0 0.252 0.294 0.441 0.480 make_images_data 18 8.0 0.001 0.001 0.378 0.442 mp_sum_d 191 1.2 0.386 0.403 0.386 0.403 dbcsr_data_copy_aa2 18 7.5 0.320 0.403 0.320 0.403 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.690999999999988, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=36.441, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.551, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.869, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.447, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.6169999999999973, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.252, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.686, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.057, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.619, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.099, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.674, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.483, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 145.463 145.463 qs_mol_dyn_low 1 2.0 0.003 0.003 144.025 144.025 velocity_verlet 5 3.0 0.003 0.003 117.826 117.826 qmmm_el_coupling 6 3.8 0.000 0.000 89.863 89.863 qmmm_elec_with_gaussian 6 4.8 0.084 0.084 89.859 89.859 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 89.112 89.112 qmmm_elec_gaussian_low_G 6 6.8 88.199 88.199 88.199 88.199 qs_forces 6 3.8 0.000 0.000 45.290 45.290 qs_energies 6 4.8 0.000 0.000 40.913 40.913 scf_env_do_scf 6 5.8 0.001 0.001 37.271 37.271 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 26.591 26.591 rebuild_ks_matrix 45 8.4 0.000 0.000 25.486 25.486 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 25.486 25.486 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.550 21.550 pw_transfer 966 12.3 0.050 0.050 17.490 17.490 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.268 17.268 fft_wrap_pw1pw2_150 507 15.2 2.153 2.153 16.826 16.826 qs_vxc_create 45 10.4 0.001 0.001 13.658 13.658 xc_vxc_pw_create 45 11.4 0.649 0.649 13.657 13.657 init_scf_loop 6 6.8 0.000 0.000 10.675 10.675 xc_pw_derive 270 13.4 0.002 0.002 9.332 9.332 fft3d_s 802 15.6 7.968 7.968 7.977 7.977 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.595 7.595 calculate_rho_elec 45 8.9 0.563 0.563 7.595 7.595 prepare_preconditioner 6 7.8 0.000 0.000 7.240 7.240 xc_rho_set_and_dset_create 45 12.4 0.693 0.693 7.173 7.173 make_preconditioner 6 8.8 0.000 0.000 6.838 6.838 make_full_all 6 9.8 0.001 0.001 6.442 6.442 pw_scatter_s 429 15.8 5.783 5.783 5.783 5.783 xc_pw_divergence 45 12.4 0.001 0.001 5.770 5.770 qmmm_forces 6 3.8 0.002 0.002 5.661 5.661 qmmm_forces_with_gaussian 6 4.8 0.109 0.109 5.322 5.322 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.467 4.467 pw_integral_ab 2539 7.4 4.418 4.418 4.418 4.418 qs_ks_ddapc 45 10.4 0.001 0.001 4.402 4.402 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.944 3.944 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.740 3.740 cp_fm_diag_elpa_base 18 12.2 3.729 3.729 3.740 3.740 qmmm_forces_gaussian_low_G 6 6.8 3.702 3.702 3.702 3.702 grid_collocate_task_list 45 9.9 3.579 3.579 3.579 3.579 density_rs2pw 45 9.9 0.002 0.002 3.454 3.454 sum_up_and_integrate 45 10.4 0.130 0.130 3.284 3.284 integrate_v_rspace 45 11.4 0.006 0.006 3.154 3.154 pw_poisson_solve 51 9.9 1.294 1.294 3.062 3.062 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.054 63.357 63.371 qs_mol_dyn_low 1 2.0 0.003 0.004 62.116 62.178 qs_forces 6 3.8 0.001 0.001 44.687 44.687 qs_energies 6 4.8 0.000 0.000 42.683 42.683 scf_env_do_scf 6 5.8 0.000 0.001 41.620 41.620 scf_env_do_scf_inner_loop 113 6.2 0.002 0.018 39.933 39.934 rebuild_ks_matrix 119 8.1 0.000 0.000 29.599 29.610 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.026 29.599 29.610 qs_ks_update_qs_env 119 7.3 0.001 0.001 27.900 27.911 velocity_verlet 5 3.0 0.002 0.004 26.187 26.190 pw_transfer 2446 12.3 0.165 0.184 18.710 19.185 fft_wrap_pw1pw2 2059 13.4 0.022 0.027 18.285 18.762 fft_wrap_pw1pw2_150 1321 14.9 1.447 1.690 17.539 18.015 qs_vxc_create 119 10.1 0.002 0.003 14.830 14.833 xc_vxc_pw_create 119 11.1 0.157 0.214 14.828 14.831 fft3d_ps 2059 15.4 7.378 8.086 13.811 14.695 qs_rho_update_rho_low 119 7.3 0.000 0.001 11.996 11.997 calculate_rho_elec 119 8.3 0.049 0.055 11.996 11.997 xc_pw_derive 714 13.1 0.008 0.010 11.234 11.554 sum_up_and_integrate 119 10.1 0.059 0.079 10.819 10.960 integrate_v_rspace 119 11.1 0.004 0.007 10.759 10.905 qmmm_forces 6 3.8 0.002 0.002 8.765 8.765 rs_pw_transfer 988 11.5 0.011 0.017 8.268 8.564 qmmm_forces_with_gaussian 6 4.8 0.355 0.399 8.009 8.558 xc_rho_set_and_dset_create 119 12.1 0.382 0.739 7.042 8.077 qmmm_el_coupling 6 3.8 0.000 0.000 7.620 7.779 qmmm_elec_with_gaussian 6 4.8 0.354 0.401 7.618 7.776 xc_pw_divergence 119 12.1 0.004 0.006 7.383 7.637 density_rs2pw 119 9.3 0.005 0.008 7.046 7.332 potential_pw2rs 119 12.1 0.006 0.007 6.333 6.352 mp_alltoall_z22v 2059 17.4 4.888 5.942 4.888 5.942 grid_collocate_task_list 119 9.3 4.744 5.163 4.744 5.163 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.142 4.391 grid_integrate_task_list 119 12.1 3.663 4.379 3.663 4.379 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.642 3.938 x_to_yz 1095 16.8 0.876 0.961 3.390 3.809 qmmm_forces_gaussian_low_G 6 6.8 3.410 3.668 3.410 3.668 yz_to_x 964 16.0 0.615 0.761 2.989 3.651 mp_waitany 4028 12.8 2.902 3.373 2.902 3.373 rs_pw_transfer_PW2RS_150 125 13.9 1.400 1.563 3.245 3.353 qmmm_elec_gaussian_low_G 6 6.8 3.040 3.306 3.040 3.306 pw_restrict_s3 18 5.8 1.542 1.850 2.823 3.119 rs_pw_transfer_RS2PW_150 125 11.2 1.044 1.195 2.693 3.007 dbcsr_multiply_generic 2588 12.3 0.059 0.072 2.147 2.587 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.312 2.531 pw_prolongate_s3 18 6.8 1.259 1.477 2.312 2.531 mp_waitall_1 188862 16.2 2.079 2.355 2.079 2.355 qs_scf_new_mos 113 7.2 0.000 0.000 2.124 2.129 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.123 2.129 qs_ks_ddapc 119 10.1 0.002 0.002 2.027 2.102 ot_scf_mini 113 9.2 0.001 0.001 2.041 2.046 mp_sum_dm3 33 5.7 1.888 2.009 1.888 2.009 pw_gather_p 964 15.0 1.361 1.806 1.361 1.806 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.714 1.715 init_scf_loop 6 6.8 0.000 0.000 1.684 1.684 pw_scatter_p 1095 15.8 1.612 1.657 1.612 1.657 pw_integral_ab 2761 7.7 1.240 1.327 1.580 1.653 xc_functional_eval 238 13.1 0.003 0.004 0.751 1.409 rs_pw_transfer_PW2RS_40 119 14.1 0.236 0.286 1.082 1.273 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=28.084999999999994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=88.199, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.968, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.783, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.418, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=3.729, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.702, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.579, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=34.994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.04, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.24, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.41, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.744, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.888, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.378, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.663, yerr=0.0 Summary: Performance test took 46 minutes. Status: OK Removing intermediate container 04eaa43ab37f ---> c6e672b77b68 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 33fc41120a1d Removing intermediate container 33fc41120a1d ---> dfef42902447 Step 42/42 : ENTRYPOINT [] ---> Running in a826508d0260 Removing intermediate container a826508d0260 ---> bcea4d64e235 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built bcea4d64e235 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-07-21 11:55:13+00:00