StartDate: 2024-04-10 08:54:57+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 10d5862cfdc6bbd0948853da3ffa8e3af272c7f9 CommitTime: 2024-04-09 23:50:49 +0200 CommitAuthor: Ole Schütt CommitSubject: Manual: Add support for youtube videos #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=10d5862cfdc6bbd0948853da3ffa8e3af272c7f9 Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 394.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu bccd10f490ab: Pulling fs layer bccd10f490ab: Verifying Checksum bccd10f490ab: Download complete bccd10f490ab: Pull complete Digest: sha256:77906da86b60585ce12215807090eb327e7386c8fafb5402369e421f44eff17e Status: Downloaded newer image for ubuntu:22.04 ---> ca2b0f26964c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 03d16d9e2c6a Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 72223eb0b8d0 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 432bb79a897a Step 5/42 : RUN mkdir scripts ---> Using cache ---> 1bac2e842a38 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 94fa08efe2f0 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> a9437683062a Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 12ff9cf83aed Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> bb48ad285a7b Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> c92c8d7dd493 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 033cc1ebd13c Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 843b85097872 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> fa81e1dda2cd Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 0e1fba43601b Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 8c902fb666ef Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> a12e3572c9d1 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> ff7670df5622 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 486165f28bb7 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> ebac98ece855 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 47d5dc5407e4 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> c47f20cb16d9 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 8d55cefeb393 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 4722b5a27941 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 5aa9a2bab2d8 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 7fe588a9aa32 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 9442d919667a Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> f20fbac72655 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> e46326340917 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 36c52d8b2c2f Step 30/42 : COPY ./Makefile . ---> Using cache ---> 333093fadb36 Step 31/42 : COPY ./src ./src ---> Using cache ---> 187b6751a929 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> a76cbc7f480b Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> e5f5fa821b98 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/" ---> Running in eadb864c3a9b './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Removing intermediate container eadb864c3a9b ---> 2f12a88c73e4 Step 35/42 : COPY ./data ./data ---> 93b3439e46ff Step 36/42 : COPY ./tests ./tests ---> 692100a0403b Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> e07159598eab Step 38/42 : COPY ./benchmarks ./benchmarks ---> e36ed4eb5648 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 4382090ede48 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 54f478a44d97 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 75 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 91.719 91.719 qs_mol_dyn_low 1 2.0 0.003 0.003 91.146 91.146 qs_forces 11 3.9 0.001 0.001 91.104 91.104 qs_energies 11 4.9 0.001 0.001 84.743 84.743 scf_env_do_scf 11 5.9 0.002 0.002 73.015 73.015 velocity_verlet 10 3.0 0.002 0.002 58.666 58.666 scf_env_do_scf_inner_loop 108 6.5 0.014 0.014 58.651 58.651 dbcsr_multiply_generic 2286 12.5 0.176 0.176 22.328 22.328 qs_scf_new_mos 108 7.5 0.001 0.001 22.248 22.248 qs_scf_loop_do_ot 108 8.5 0.001 0.001 22.248 22.248 rebuild_ks_matrix 119 8.3 0.001 0.001 21.769 21.769 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.014 21.768 21.768 ot_scf_mini 108 9.5 0.003 0.003 20.778 20.778 qs_rho_update_rho_low 119 7.7 0.001 0.001 20.526 20.526 calculate_rho_elec 119 8.7 0.970 0.970 20.525 20.525 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.881 19.881 grid_collocate_task_list 119 9.7 15.649 15.649 15.649 15.649 init_scf_loop 11 6.9 0.000 0.000 14.241 14.241 sum_up_and_integrate 119 10.3 0.002 0.002 13.859 13.859 integrate_v_rspace 119 11.3 0.125 0.125 13.775 13.775 ot_mini 108 10.5 0.001 0.001 13.466 13.466 make_m2s 4572 13.5 0.049 0.049 11.706 11.706 prepare_preconditioner 11 7.9 0.000 0.000 11.674 11.674 make_preconditioner 11 8.9 0.000 0.000 11.674 11.674 grid_integrate_task_list 119 12.3 11.205 11.205 11.205 11.205 make_full_inverse_cholesky 11 9.9 0.017 0.017 10.495 10.495 qs_ot_get_derivative 108 11.5 0.001 0.001 7.375 7.375 fft_wrap_pw1pw2 1201 11.6 0.010 0.010 7.040 7.040 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.971 6.971 multiply_cannon 2286 13.5 0.253 0.253 6.382 6.382 dbcsr_make_dense_low 5837 15.5 0.096 0.096 6.362 6.362 make_dense_data 5837 16.5 5.666 5.666 6.252 6.252 ot_diis_step 108 11.5 0.003 0.003 6.088 6.088 fft_wrap_pw1pw2_140 487 12.2 1.009 1.009 6.027 6.027 make_images 4572 14.5 2.426 2.426 5.849 5.849 multiply_cannon_loop 2286 14.5 0.070 0.070 5.767 5.767 multiply_cannon_multrec 2286 15.5 5.637 5.637 5.696 5.696 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.622 5.622 apply_single 119 13.6 0.000 0.000 5.622 5.622 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.462 5.462 init_scf_run 11 5.9 0.003 0.003 4.141 4.141 scf_env_initial_rho_setup 11 6.9 0.002 0.002 4.138 4.138 density_rs2pw 119 9.7 0.004 0.004 3.906 3.906 dbcsr_copy 2102 12.0 0.292 0.292 3.875 3.875 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.766 3.766 dbcsr_complete_redistribute 329 12.2 1.851 1.851 3.723 3.723 wfi_extrapolate 11 7.9 0.001 0.001 3.512 3.512 dbcsr_copy_into_existing 22 7.9 3.492 3.492 3.492 3.492 qs_ot_get_p 119 10.4 0.001 0.001 3.479 3.479 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.433 3.433 qs_create_task_list 11 7.9 0.000 0.000 3.310 3.310 generate_qs_task_list 11 8.9 1.963 1.963 3.310 3.310 cp_fm_cholesky_invert 11 10.9 3.181 3.181 3.181 3.181 cp_fm_cholesky_decompose 22 10.9 3.089 3.089 3.089 3.089 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.085 3.085 fft3d_s 1202 13.6 3.009 3.009 3.014 3.014 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.926 2.926 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.779 2.779 potential_pw2rs 119 12.3 0.050 0.050 2.446 2.446 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.435 2.435 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.321 2.321 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.311 2.311 pw_poisson_solve 119 10.3 0.002 0.002 2.243 2.243 qs_ot_p2m_diag 50 11.0 0.154 0.154 2.030 2.030 dbcsr_data_release 279532 16.0 1.970 1.970 1.970 1.970 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.881 1.881 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.026 48.066 48.079 qs_mol_dyn_low 1 2.0 0.003 0.003 47.935 47.940 qs_forces 11 3.9 0.001 0.002 47.893 47.894 qs_energies 11 4.9 0.001 0.001 44.825 44.826 scf_env_do_scf 11 5.9 0.000 0.002 41.335 41.336 scf_env_do_scf_inner_loop 108 6.5 0.003 0.021 38.071 38.071 velocity_verlet 10 3.0 0.001 0.003 27.530 27.531 rebuild_ks_matrix 119 8.3 0.000 0.001 17.561 17.600 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.017 17.560 17.599 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.637 15.678 dbcsr_multiply_generic 2286 12.5 0.075 0.078 14.511 14.633 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.218 13.224 calculate_rho_elec 119 8.7 0.030 0.031 13.218 13.224 sum_up_and_integrate 119 10.3 0.002 0.002 12.992 13.026 integrate_v_rspace 119 11.3 0.004 0.005 12.970 13.006 qs_scf_new_mos 108 7.5 0.001 0.001 11.894 11.929 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.893 11.929 ot_scf_mini 108 9.5 0.002 0.002 11.234 11.272 multiply_cannon 2286 13.5 0.140 0.147 10.418 10.813 multiply_cannon_loop 2286 14.5 0.096 0.110 9.808 10.058 grid_collocate_task_list 119 9.7 8.959 9.286 8.959 9.286 grid_integrate_task_list 119 12.3 8.600 8.869 8.600 8.869 mp_waitall_1 158411 16.6 7.936 8.314 7.936 8.314 ot_mini 108 10.5 0.001 0.001 6.640 6.677 multiply_cannon_metrocomm3 18288 15.5 0.043 0.048 5.990 6.352 density_rs2pw 119 9.7 0.005 0.006 3.804 4.235 multiply_cannon_multrec 18288 15.5 3.400 3.730 3.411 3.742 potential_pw2rs 119 12.3 0.006 0.007 3.571 3.588 fft_wrap_pw1pw2 1201 11.6 0.014 0.016 3.346 3.417 qs_ot_get_derivative 108 11.5 0.001 0.001 3.369 3.406 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.224 3.301 apply_single 119 13.6 0.000 0.000 3.224 3.301 ot_diis_step 108 11.5 0.003 0.003 3.248 3.248 init_scf_loop 11 6.9 0.000 0.000 3.245 3.245 fft_wrap_pw1pw2_140 487 12.2 0.130 0.148 2.749 2.833 make_m2s 4572 13.5 0.046 0.050 2.656 2.737 transfer_rs2pw 487 10.6 0.007 0.007 2.256 2.690 transfer_pw2rs 487 13.2 0.005 0.006 2.600 2.611 fft3d_ps 1201 13.6 1.160 1.246 2.433 2.553 init_scf_run 11 5.9 0.000 0.004 2.446 2.446 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.445 2.446 make_images 4572 14.5 0.116 0.121 2.318 2.394 mp_waitany 9880 13.7 1.865 2.294 1.865 2.294 wfi_extrapolate 11 7.9 0.001 0.001 2.215 2.215 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.080 2.088 transfer_rs2pw_140 130 11.5 0.190 0.214 1.334 1.772 qs_ot_get_p 119 10.4 0.001 0.001 1.633 1.698 mp_alltoall_d11v 2130 13.8 1.094 1.415 1.094 1.415 transfer_pw2rs_140 130 13.9 0.374 0.448 1.354 1.392 prepare_preconditioner 11 7.9 0.000 0.000 1.357 1.379 make_preconditioner 11 8.9 0.000 0.000 1.357 1.379 make_images_data 4572 15.5 0.036 0.043 1.259 1.378 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.220 1.244 mp_alltoall_z22v 1201 15.6 1.008 1.213 1.008 1.213 hybrid_alltoall_any 4725 16.4 0.066 0.198 1.076 1.208 rs_gather_matrices 119 12.3 0.076 0.094 0.758 1.082 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.049 1.079 mp_sum_l 11298 13.2 0.949 1.066 0.949 1.066 transfer_pw2rs_50 119 14.3 0.338 0.366 0.951 0.980 mp_sum_d 4139 12.0 0.696 0.965 0.696 0.965 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.961 0.963 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=50.07, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.649, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.205, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.666, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.637, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.492, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.306000000000008, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.959, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.6, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.4, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.936, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.865, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.043 0.043 117.701 117.701 qs_mol_dyn_low 1 2.0 0.003 0.003 117.041 117.041 qs_forces 11 3.9 0.002 0.002 116.997 116.997 qs_energies 11 4.9 0.001 0.001 108.429 108.429 scf_env_do_scf 11 5.9 0.002 0.002 93.836 93.836 scf_env_do_scf_inner_loop 96 6.5 0.012 0.012 77.394 77.394 velocity_verlet 10 3.0 0.002 0.002 75.797 75.797 rebuild_ks_matrix 107 8.3 0.001 0.001 35.276 35.276 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.013 35.275 35.275 qs_rho_update_rho_low 107 7.7 0.001 0.001 33.654 33.654 calculate_rho_elec 107 8.7 0.876 0.876 33.653 33.653 qs_ks_update_qs_env 107 7.6 0.001 0.001 31.398 31.398 grid_collocate_task_list 107 9.7 28.995 28.995 28.995 28.995 sum_up_and_integrate 107 10.3 0.001 0.001 27.892 27.892 integrate_v_rspace 107 11.3 0.098 0.098 27.749 27.749 grid_integrate_task_list 107 12.3 25.147 25.147 25.147 25.147 dbcsr_multiply_generic 1966 12.4 0.164 0.164 20.436 20.436 qs_scf_new_mos 96 7.5 0.001 0.001 19.863 19.863 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.862 19.862 ot_scf_mini 96 9.5 0.002 0.002 18.481 18.481 init_scf_loop 11 6.9 0.000 0.000 16.282 16.282 ot_mini 96 10.5 0.001 0.001 12.323 12.323 prepare_preconditioner 11 7.9 0.000 0.000 11.835 11.835 make_preconditioner 11 8.9 0.000 0.000 11.835 11.835 make_full_inverse_cholesky 11 9.9 0.030 0.030 10.600 10.600 make_m2s 3932 13.4 0.042 0.042 10.561 10.561 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.687 7.687 fft_wrap_pw1pw2 1081 11.6 0.009 0.009 7.091 7.091 qs_ot_get_derivative 96 11.5 0.001 0.001 6.969 6.969 init_scf_run 11 5.9 0.003 0.003 6.182 6.182 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.179 6.179 fft_wrap_pw1pw2_140 439 12.2 1.103 1.103 6.179 6.179 multiply_cannon 1966 13.4 0.231 0.231 6.011 6.011 wfi_extrapolate 11 7.9 0.001 0.001 5.542 5.542 make_images 3932 14.4 2.185 2.185 5.506 5.506 dbcsr_make_dense_low 4961 15.5 0.099 0.099 5.474 5.474 multiply_cannon_loop 1966 14.4 0.075 0.075 5.446 5.446 multiply_cannon_multrec 1966 15.4 5.316 5.316 5.370 5.370 make_dense_data 4961 16.5 4.799 4.799 5.363 5.363 ot_diis_step 96 11.5 0.003 0.003 5.351 5.351 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.061 5.061 apply_single 107 13.6 0.000 0.000 5.060 5.060 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.817 4.817 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.696 4.696 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.313 4.313 dbcsr_copy 1855 11.9 0.247 0.247 3.952 3.952 dbcsr_complete_redistribute 317 12.2 1.837 1.837 3.945 3.945 density_rs2pw 107 9.7 0.004 0.004 3.783 3.783 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.749 3.749 qs_create_task_list 11 7.9 0.000 0.000 3.737 3.737 generate_qs_task_list 11 8.9 2.400 2.400 3.736 3.736 dbcsr_copy_into_existing 22 7.9 3.645 3.645 3.645 3.645 cp_fm_cholesky_decompose 22 10.9 3.144 3.144 3.144 3.144 cp_fm_cholesky_invert 11 10.9 3.127 3.127 3.127 3.127 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.117 3.117 fft3d_s 1082 13.6 3.056 3.056 3.062 3.062 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.891 2.891 qs_ot_get_p 107 10.4 0.001 0.001 2.860 2.860 potential_pw2rs 107 12.3 0.045 0.045 2.504 2.504 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.412 2.412 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.032 76.100 76.112 qs_mol_dyn_low 1 2.0 0.003 0.004 75.962 75.967 qs_forces 11 3.9 0.002 0.002 75.916 75.917 qs_energies 11 4.9 0.001 0.001 70.857 70.867 scf_env_do_scf 11 5.9 0.001 0.002 65.682 65.682 scf_env_do_scf_inner_loop 96 6.5 0.003 0.020 60.531 60.532 velocity_verlet 10 3.0 0.001 0.003 45.482 45.484 rebuild_ks_matrix 107 8.3 0.001 0.001 32.428 32.475 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.017 32.428 32.474 qs_ks_update_qs_env 107 7.6 0.001 0.001 28.571 28.617 sum_up_and_integrate 107 10.3 0.002 0.002 27.957 28.002 integrate_v_rspace 107 11.3 0.004 0.004 27.934 27.979 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.534 26.545 calculate_rho_elec 107 8.7 0.027 0.029 26.533 26.544 grid_integrate_task_list 107 12.3 22.812 23.488 22.812 23.488 grid_collocate_task_list 107 9.7 21.794 22.398 21.794 22.398 dbcsr_multiply_generic 1966 12.4 0.069 0.073 14.032 14.289 qs_scf_new_mos 96 7.5 0.001 0.001 11.384 11.464 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.383 11.464 ot_scf_mini 96 9.5 0.002 0.002 10.776 10.857 multiply_cannon 1966 13.4 0.129 0.144 10.140 10.550 multiply_cannon_loop 1966 14.4 0.092 0.104 9.514 9.771 mp_waitall_1 136719 16.5 7.714 8.215 7.714 8.215 ot_mini 96 10.5 0.001 0.001 6.465 6.553 multiply_cannon_metrocomm3 15728 15.4 0.041 0.047 5.872 6.219 init_scf_loop 11 6.9 0.000 0.000 5.132 5.133 density_rs2pw 107 9.7 0.005 0.005 4.301 4.894 init_scf_run 11 5.9 0.000 0.006 4.047 4.047 scf_env_initial_rho_setup 11 6.9 0.000 0.005 4.046 4.047 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.011 4.020 wfi_extrapolate 11 7.9 0.001 0.001 3.690 3.691 multiply_cannon_multrec 15728 15.4 3.242 3.575 3.253 3.587 potential_pw2rs 107 12.3 0.006 0.006 3.485 3.499 fft_wrap_pw1pw2 1081 11.6 0.014 0.016 3.319 3.391 transfer_rs2pw 439 10.6 0.006 0.007 2.814 3.385 qs_ot_get_derivative 96 11.5 0.001 0.001 3.259 3.337 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.199 3.297 apply_single 107 13.6 0.000 0.000 3.199 3.297 ot_diis_step 96 11.5 0.003 0.003 3.178 3.178 mp_waitany 8968 13.7 2.501 3.048 2.501 3.048 fft_wrap_pw1pw2_140 439 12.2 0.133 0.151 2.732 2.833 make_m2s 3932 13.4 0.042 0.048 2.568 2.668 mp_alltoall_d11v 1998 13.7 1.923 2.643 1.923 2.643 transfer_pw2rs 439 13.2 0.005 0.006 2.525 2.537 transfer_rs2pw_140 118 11.5 0.187 0.213 1.959 2.534 fft3d_ps 1081 13.6 1.136 1.234 2.430 2.533 make_images 3932 14.4 0.106 0.110 2.256 2.329 rs_gather_matrices 107 12.3 0.079 0.094 1.593 2.295 qs_ot_get_p 107 10.4 0.001 0.001 1.502 1.643 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=49.79899999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=28.995, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=25.147, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.316, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.799, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.645, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=18.037, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.794, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.812, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.242, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.501, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.714, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.243 0.243 145.988 145.988 qs_energies 1 2.0 0.000 0.000 145.098 145.098 scf_env_do_scf 1 3.0 0.000 0.000 143.764 143.764 qs_ks_update_qs_env 8 5.0 0.000 0.000 138.096 138.096 rebuild_ks_matrix 7 6.0 0.000 0.000 138.027 138.027 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 138.027 138.027 hfx_ks_matrix 7 8.0 0.000 0.000 128.250 128.250 integrate_four_center 7 9.0 1.807 1.807 128.223 128.223 integrate_four_center_main 7 10.0 0.773 0.773 112.946 112.946 integrate_four_center_bin 452 11.0 112.173 112.173 112.173 112.173 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 81.117 81.117 init_scf_loop 1 4.0 0.000 0.000 62.637 62.637 integrate_four_center_load 7 10.0 0.001 0.001 13.159 13.159 hfx_load_balance 1 11.0 0.001 0.001 13.158 13.158 hfx_load_balance_count 1 12.0 6.577 6.577 6.577 6.577 hfx_load_balance_bin 1 12.0 6.563 6.563 6.563 6.563 qs_vxc_create 14 8.0 0.000 0.000 3.819 3.819 xc_vxc_pw_create 14 9.0 0.123 0.123 3.819 3.819 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.227 0.259 146.184 146.195 qs_energies 1 2.0 0.000 0.000 145.712 145.721 scf_env_do_scf 1 3.0 0.000 0.000 145.325 145.327 qs_ks_update_qs_env 8 5.0 0.000 0.000 142.655 142.657 rebuild_ks_matrix 7 6.0 0.000 0.000 142.644 142.646 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 142.644 142.646 hfx_ks_matrix 7 8.0 0.000 0.001 136.201 136.204 integrate_four_center 7 9.0 0.058 0.358 136.189 136.190 integrate_four_center_main 7 10.0 0.005 0.005 114.607 121.701 integrate_four_center_bin 448 11.0 114.602 121.697 114.602 121.697 scf_env_do_scf_inner_loop 7 4.0 0.001 0.002 85.900 85.900 init_scf_loop 1 4.0 0.000 0.000 59.423 59.424 mp_sync 56 11.2 7.170 14.130 7.170 14.130 integrate_four_center_load 7 10.0 0.000 0.000 13.660 13.661 hfx_load_balance 1 11.0 0.001 0.001 13.660 13.661 hfx_load_balance_bin 1 12.0 6.577 6.832 6.577 6.832 hfx_load_balance_count 1 12.0 6.523 6.816 6.523 6.816 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=17.852000000000004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=112.173, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=6.577, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=6.563, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.807, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.773, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.243, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.021999999999991, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=114.602, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=6.523, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=6.577, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.058, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.227, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=7.17, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 85.262 85.262 qs_energies 1 2.0 0.000 0.000 84.862 84.862 mp2_main 1 3.0 0.000 0.000 81.725 81.725 mp2_gpw_main 1 4.0 0.000 0.000 81.603 81.603 rpa_ri_compute_en 1 5.0 0.000 0.000 77.742 77.742 rpa_num_int 1 6.0 0.001 0.001 77.736 77.736 compute_mat_P_omega 1 7.0 0.004 0.004 68.070 68.070 compute_mat_P_omega_contract 10 8.0 9.613 9.613 67.807 67.807 dbt_total 2336 9.6 0.016 0.016 53.048 53.048 dbt_contract 787 11.0 0.056 0.056 44.773 44.773 dbt_tas_total 1149 12.2 0.295 0.295 42.977 42.977 dbt_tas_multiply 807 12.1 0.002 0.002 41.681 41.681 dbt_tas_dbm 807 14.1 0.004 0.004 34.588 34.588 dbm_multiply 807 16.1 34.579 34.579 34.579 34.579 dbt_tas_mm_1N 524 15.1 0.001 0.001 26.126 26.126 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.914 23.914 compute_mat_P_omega_calc_M_occ 250 9.0 9.514 9.514 19.842 19.842 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.902 7.902 dbt_copy 1103 10.7 0.137 0.137 6.959 6.959 dbt_tas_mm_2 251 15.0 0.002 0.002 6.844 6.844 compute_QP_energies 1 7.0 0.000 0.000 5.457 5.457 compute_self_energy_cubic_gw 1 8.0 0.060 0.060 5.455 5.455 dbt_tas_reserve_blocks_index 3261 14.3 0.490 0.490 4.508 4.508 contract_cubic_gw 21 9.0 0.000 0.000 4.320 4.320 dbm_reserve_blocks 3628 15.3 4.100 4.100 4.100 4.100 mp2_ri_gpw_compute_in 1 5.0 0.002 0.002 3.854 3.854 dbt_reserve_blocks_index 2280 13.1 0.062 0.062 3.383 3.383 dbt_reserve_blocks_index_array 2222 12.2 0.011 0.011 3.323 3.323 scf_env_do_scf 1 3.0 0.000 0.000 3.008 3.008 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.008 3.008 dbt_crop 1042 12.0 1.771 1.771 2.989 2.989 dbt_tas_copy 574 11.4 1.579 1.579 2.773 2.773 convert_to_new_pgrid 2421 14.1 0.172 0.172 2.390 2.390 dbm_copy 1614 15.1 2.217 2.217 2.217 2.217 dbt_reshape 278 11.9 1.202 1.202 2.202 2.202 dbt_tas_reshape 367 15.0 0.034 0.034 2.101 2.101 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.799 1.799 compute_W_cubic_GW 10 7.0 0.009 0.009 1.740 1.740 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.032 39.042 39.055 qs_energies 1 2.0 0.000 0.000 38.899 38.901 mp2_main 1 3.0 0.000 0.001 37.767 37.768 mp2_gpw_main 1 4.0 0.000 0.002 37.619 37.621 rpa_ri_compute_en 1 5.0 0.000 0.000 36.205 36.207 rpa_num_int 1 6.0 0.000 0.003 36.204 36.206 dbt_total 2336 9.6 0.017 0.020 31.858 31.900 compute_mat_P_omega 1 7.0 0.001 0.006 30.463 30.493 compute_mat_P_omega_contract 10 8.0 0.436 0.461 30.292 30.298 dbt_contract 787 11.0 0.045 0.047 23.193 23.204 dbt_tas_total 1149 12.2 0.086 0.093 20.650 20.662 dbt_tas_multiply 807 12.1 0.002 0.003 20.420 20.432 dbt_tas_dbm 807 14.1 0.003 0.004 14.938 14.954 dbm_multiply 807 16.1 11.712 12.596 11.712 12.596 compute_mat_P_omega_calc_M_occ 250 9.0 0.422 0.447 8.911 8.912 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.794 8.796 mp_sync 8688 11.6 6.429 7.712 6.429 7.712 dbt_copy 1149 10.8 0.015 0.015 7.047 7.536 dbt_reshape 1136 11.8 2.846 3.033 6.706 7.184 dbt_tas_mm_2 251 15.0 0.002 0.002 6.962 6.965 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.240 6.241 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.307 6.069 compute_QP_energies 1 7.0 0.000 0.000 3.776 3.777 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.775 3.776 dbt_communicate_buffer 1136 12.8 0.052 0.056 2.877 3.111 mp_waitall_2 3812 15.3 2.905 3.107 2.905 3.107 contract_cubic_gw 21 9.0 0.000 0.000 2.666 2.669 dbt_reserve_blocks_index 2887 13.1 0.075 0.086 1.968 2.267 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.009 1.958 2.257 dbt_tas_reserve_blocks_index 3347 14.5 0.433 0.466 1.948 2.246 dbm_reserve_blocks 3752 15.4 1.615 1.896 1.615 1.896 dbt_crop 1042 12.0 0.941 1.065 1.566 1.796 mp2_ri_gpw_compute_in 1 5.0 0.003 0.005 1.412 1.412 dbt_tas_replicate 405 14.1 0.561 0.746 1.198 1.288 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.272 1.282 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.196 1.199 parallel_gemm_fm 105 8.4 0.000 0.000 1.150 1.169 parallel_gemm_fm_cosma 105 9.4 1.150 1.169 1.150 1.169 scf_env_do_scf 1 3.0 0.000 0.000 1.080 1.080 scf_env_do_scf_inner_loop 17 4.0 0.001 0.003 1.080 1.080 convert_to_new_pgrid 2421 14.1 0.027 0.032 0.869 1.042 mp_max_i 2009 9.8 0.814 1.017 0.814 1.017 dbm_copy 1608 15.1 0.836 1.006 0.836 1.006 compute_W_cubic_GW 10 7.0 0.001 0.001 0.895 0.910 mp_sum_l 6165 12.9 0.705 0.802 0.705 0.802 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=24.037, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=34.579, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.613, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.514, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=4.1, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.217, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.202, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.841000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.712, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.436, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.422, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.615, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.836, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.846, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.905, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.429, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.024 0.024 220.894 220.894 qs_forces 1 2.0 0.000 0.000 220.357 220.357 rebuild_ks_matrix 7 6.6 0.000 0.000 218.801 218.801 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 218.801 218.801 hfx_ks_matrix 7 8.6 0.000 0.000 216.749 216.749 hfx_ri_update_ks 7 9.6 0.000 0.000 172.708 172.708 hfx_ri_update_ks_Pmat 7 10.6 33.577 33.577 172.704 172.704 dbt_total 849 11.0 0.006 0.006 159.377 159.377 qs_energies 1 3.0 0.000 0.000 153.147 153.147 scf_env_do_scf 1 4.0 0.001 0.001 152.803 152.803 qs_ks_update_qs_env 8 6.0 0.000 0.000 151.650 151.650 dbt_contract 207 12.4 0.053 0.053 138.831 138.831 dbt_tas_total 369 13.4 1.484 1.484 137.602 137.602 dbt_tas_multiply 216 13.5 0.001 0.001 133.422 133.422 dbt_tas_dbm 216 15.5 0.001 0.001 121.086 121.086 dbm_multiply 216 17.5 121.083 121.083 121.083 121.083 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 113.951 113.951 dbt_tas_mm_2 91 16.5 0.001 0.001 105.732 105.732 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 96.254 96.254 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 67.155 67.155 init_scf_loop 2 5.0 0.000 0.000 56.546 56.546 hfx_ri_update_forces 1 7.0 1.634 1.634 44.038 44.038 hfx_ri_forces_Pmat_3c 1 8.0 5.095 5.095 25.903 25.903 dbt_copy 423 11.8 0.094 0.094 16.356 16.356 precalc_derivatives 1 8.0 2.289 2.289 13.835 13.835 dbt_reshape 132 13.2 6.913 6.913 11.095 11.095 dbt_tas_mm_3T 77 17.1 0.000 0.000 10.646 10.646 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.458 9.458 dbt_tas_reserve_blocks_index 1323 15.4 1.064 1.064 8.230 8.230 build_3c_derivatives 3 9.0 2.425 2.425 7.790 7.790 dbm_reserve_blocks 1491 16.2 7.449 7.449 7.449 7.449 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.221 7.221 dbt_reserve_blocks_index 846 14.4 0.098 0.098 6.372 6.372 dbt_reserve_blocks_index_array 816 13.5 0.008 0.008 6.247 6.247 dbt_crop 372 13.7 3.022 3.022 5.114 5.114 dbt_tas_reshape 168 14.5 0.003 0.003 4.742 4.742 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 4.698 4.698 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.036 50.907 50.919 qs_forces 1 2.0 0.000 0.000 50.712 50.713 rebuild_ks_matrix 7 6.6 0.000 0.000 49.847 49.848 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 49.847 49.848 hfx_ks_matrix 7 8.6 0.000 0.000 48.716 48.721 dbt_total 849 11.0 0.007 0.007 43.208 43.214 dbt_contract 207 12.4 0.027 0.028 32.765 32.776 hfx_ri_update_ks 7 9.6 0.000 0.000 29.542 29.542 hfx_ri_update_ks_Pmat 7 10.6 1.331 1.418 29.538 29.538 dbt_tas_total 369 13.4 0.062 0.159 28.914 28.915 dbt_tas_multiply 216 13.5 0.001 0.001 28.418 28.419 qs_energies 1 3.0 0.000 0.000 28.072 28.073 scf_env_do_scf 1 4.0 0.000 0.001 27.896 27.896 qs_ks_update_qs_env 8 6.0 0.000 0.000 27.218 27.219 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 22.630 22.631 dbt_tas_dbm 216 15.5 0.001 0.001 22.410 22.417 dbm_multiply 216 17.5 19.910 21.092 19.910 21.092 hfx_ri_update_forces 1 7.0 0.056 0.061 19.173 19.179 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 15.674 15.674 hfx_ri_forces_Pmat_3c 1 8.0 0.158 0.171 14.124 14.143 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.005 12.877 12.878 init_scf_loop 2 5.0 0.000 0.000 12.221 12.221 dbt_tas_mm_2 91 16.5 0.001 0.001 10.358 10.363 dbt_copy 539 12.5 0.012 0.014 9.127 9.704 mp_sync 2901 12.8 5.510 7.767 5.510 7.767 dbt_reshape 393 13.9 3.679 3.767 7.008 7.407 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.557 6.311 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.378 5.378 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.491 4.641 precalc_derivatives 1 8.0 0.091 0.100 3.854 3.854 dbt_tas_reserve_blocks_index 1471 15.8 0.890 0.914 3.283 3.791 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.768 3.769 mp_waitall_2 1318 16.2 3.128 3.416 3.128 3.416 dbm_reserve_blocks 1641 16.6 2.589 3.102 2.589 3.102 dbt_reserve_blocks_index 1107 14.8 0.113 0.121 2.704 3.072 dbt_reserve_blocks_index_array 1077 13.9 0.005 0.006 2.676 3.045 dbt_crop 372 13.7 1.886 1.938 2.725 2.901 dbt_communicate_buffer 393 14.9 0.012 0.015 2.238 2.486 build_3c_derivatives 3 9.0 0.226 0.244 2.120 2.125 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.972 1.978 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.923 1.923 dbt_tas_replicate 170 15.1 0.634 0.677 1.651 1.743 convert_to_new_pgrid 648 15.5 0.039 0.075 1.331 1.650 dbm_copy 452 16.3 1.175 1.514 1.175 1.514 dbt_tas_copy 146 12.6 0.664 0.721 1.304 1.479 mp_sum_l 6385 13.7 0.810 1.102 0.810 1.102 dbt_tas_communicate_buffer 370 16.3 0.012 0.012 1.019 1.083 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=46.77699999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=121.083, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=33.577, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.449, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.913, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=5.095, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=14.601999999999997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.91, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.331, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.589, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.679, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.158, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.51, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.128, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.022 184.754 184.754 qs_energies 1 2.0 0.000 0.000 184.560 184.560 mp2_main 1 3.0 0.000 0.000 179.464 179.464 mp2_gpw_main 1 4.0 0.001 0.001 179.058 179.058 mp2_ri_gpw_compute_in 1 5.0 0.397 0.397 123.721 123.721 mp2_ri_gpw_compute_in_loop 1 6.0 0.010 0.010 113.850 113.850 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 84.379 84.379 integrate_v_rspace 2666 8.0 0.725 0.725 71.075 71.075 grid_integrate_task_list 2666 9.0 67.978 67.978 67.978 67.978 mp2_ri_gpw_compute_en 1 5.0 0.089 0.089 55.314 55.314 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.963 11.963 53.362 53.362 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.196 2.196 32.801 32.801 local_gemm 2080 8.0 30.606 30.606 30.606 30.606 dbcsr_multiply_generic 5322 8.0 0.168 0.168 22.801 22.801 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 22.778 22.778 fft_wrap_pw1pw2 53228 10.4 0.132 0.132 12.785 12.785 calculate_wavefunction 2656 8.0 7.955 7.955 11.831 11.831 multiply_cannon 5322 9.0 0.483 0.483 11.598 11.598 multiply_cannon_loop 5322 10.0 0.204 0.204 10.185 10.185 get_2c_integrals 1 6.0 0.000 0.000 9.473 9.473 fft_wrap_pw1pw2_20 21271 11.4 1.079 1.079 9.329 9.329 make_m2s 10644 9.0 0.064 0.064 8.959 8.959 compute_2c_integrals 1 7.0 0.006 0.006 8.608 8.608 make_images 10644 10.0 3.494 3.494 8.607 8.607 compute_2c_integrals_loop_lm 1 8.0 0.012 0.012 8.594 8.594 mp2_eri_2c_integrate_gpw 1 9.0 0.855 0.855 8.582 8.582 multiply_cannon_multrec 5322 11.0 8.372 8.372 8.413 8.413 fft3d_s 53229 12.4 7.024 7.024 7.062 7.062 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.467 2.467 6.574 6.574 mp2_ri_gpw_compute_en_ener 2080 7.0 6.097 6.097 6.097 6.097 copy_dbcsr_to_fm 2679 8.0 0.026 0.026 4.697 4.697 scf_env_do_scf 1 3.0 0.000 0.000 4.673 4.673 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.673 4.673 potential_pw2rs 5322 10.0 0.150 0.150 4.639 4.639 collocate_single_gaussian 2656 10.0 0.132 0.132 3.827 3.827 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.005 0.005 3.780 3.780 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.037 40.470 40.484 qs_energies 1 2.0 0.000 0.000 39.906 39.907 mp2_main 1 3.0 0.000 0.001 37.723 37.724 mp2_gpw_main 1 4.0 0.001 0.002 37.614 37.614 mp2_ri_gpw_compute_in 1 5.0 0.043 0.044 19.150 19.474 mp2_ri_gpw_compute_en 1 5.0 0.277 0.289 18.397 18.769 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.806 18.131 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.523 3.008 17.169 17.176 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.479 15.770 integrate_v_rspace 93 8.1 0.120 0.133 15.357 15.643 grid_integrate_task_list 93 9.1 15.033 15.306 15.033 15.306 mp2_ri_gpw_compute_en_expansio 65 7.0 0.109 0.129 11.313 11.583 local_gemm 65 8.0 11.205 11.484 11.205 11.484 mp2_ri_gpw_compute_en_comm 17 7.0 0.078 0.103 2.957 4.208 mp_sendrecv_dm3 1054 8.0 2.391 3.777 2.391 3.777 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.993 2.344 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.972 2.327 scf_env_do_scf 1 3.0 0.000 0.000 2.044 2.045 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.044 2.045 get_2c_integrals 1 6.0 0.003 0.011 1.276 1.307 multiply_cannon 176 9.0 0.019 0.020 1.113 1.219 multiply_cannon_loop 176 10.0 0.002 0.003 1.051 1.156 make_m2s 352 9.0 0.003 0.004 0.840 1.083 make_images 352 10.0 0.053 0.064 0.827 1.069 qs_scf_new_mos 10 5.0 0.000 0.000 0.974 0.982 compute_2c_integrals 1 7.0 0.003 0.004 0.959 0.967 multiply_cannon_multrec 246 11.0 0.893 0.949 0.899 0.955 eigensolver 11 5.8 0.001 0.001 0.910 0.912 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.853 0.888 mp2_eri_2c_integrate_gpw 1 9.0 0.224 0.240 0.852 0.887 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=57.879999999999995, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=67.978, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=30.606, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.963, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.372, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=7.955, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.424999999999997, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.033, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.205, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.523, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.893, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.391, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.133 0.133 137.590 137.590 qs_energies 1 2.0 0.000 0.000 136.164 136.164 scf_env_do_scf 1 3.0 0.000 0.000 128.666 128.666 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 128.666 128.666 qs_ks_update_qs_env 15 5.0 0.000 0.000 58.327 58.327 rebuild_ks_matrix 15 6.0 0.000 0.000 58.098 58.098 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 58.098 58.098 qs_scf_new_mos 15 5.0 0.000 0.000 45.308 45.308 qs_vxc_create 15 8.0 0.006 0.006 41.339 41.339 eigensolver 15 6.0 0.002 0.002 37.137 37.137 calculate_dispersion_nonloc 15 9.0 7.182 7.182 35.869 35.869 fft_wrap_pw1pw2 1086 10.0 0.016 0.016 30.971 30.971 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.696 23.696 cp_fm_diag_elpa_base 15 8.0 21.021 21.021 23.696 23.696 fft_wrap_pw1pw2_150 765 11.0 7.018 7.018 22.760 22.760 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.235 21.235 calculate_rho_elec 16 6.0 0.220 0.220 21.235 21.235 grid_collocate_task_list 16 7.0 19.375 19.375 19.375 19.375 sum_up_and_integrate 15 8.0 0.000 0.000 15.645 15.645 integrate_v_rspace 15 9.0 0.021 0.021 15.632 15.632 grid_integrate_task_list 15 10.0 14.882 14.882 14.882 14.882 fft3d_s 1087 12.0 12.139 12.139 12.148 12.148 cp_fm_cholesky_restore 45 7.0 11.025 11.025 11.025 11.025 fft_wrap_pw1pw2_200 197 11.3 1.772 1.772 7.907 7.907 pw_scatter_s 585 12.1 7.649 7.649 7.649 7.649 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.651 5.651 dbcsr_complete_redistribute 46 8.3 2.097 2.097 5.538 5.538 xc_vxc_pw_create 15 9.0 0.211 0.211 5.464 5.464 cp_fm_upper_to_full 30 8.0 5.088 5.088 5.088 5.088 gspace_mixing 14 5.0 0.172 0.172 4.926 4.926 vdW_energy 15 10.0 4.520 4.520 4.520 4.520 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.301 4.301 broyden_mixing 14 6.0 4.217 4.217 4.217 4.217 xc_pw_derive 90 11.0 0.001 0.001 3.904 3.904 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.051 3.051 calculate_dm_sparse 15 6.0 0.019 0.019 2.878 2.878 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 2.791 2.791 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.034 66.570 66.582 qs_energies 1 2.0 0.000 0.000 66.231 66.238 scf_env_do_scf 1 3.0 0.000 0.000 61.909 61.909 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 61.909 61.909 qs_ks_update_qs_env 15 5.0 0.000 0.000 26.582 26.596 rebuild_ks_matrix 15 6.0 0.000 0.000 26.539 26.552 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 26.539 26.552 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.328 20.334 calculate_rho_elec 16 6.0 0.007 0.007 20.328 20.334 grid_collocate_task_list 16 7.0 18.235 18.712 18.235 18.712 sum_up_and_integrate 15 8.0 0.000 0.000 16.023 16.073 integrate_v_rspace 15 9.0 0.001 0.001 16.014 16.067 qs_scf_new_mos 15 5.0 0.000 0.000 15.479 15.532 grid_integrate_task_list 15 10.0 14.601 14.890 14.601 14.890 eigensolver 15 6.0 0.002 0.002 14.332 14.375 qs_vxc_create 15 8.0 0.001 0.001 10.074 10.089 cp_fm_diag_elpa 15 7.0 0.000 0.000 10.071 10.079 cp_fm_diag_elpa_base 15 8.0 9.922 9.952 10.065 10.067 calculate_dispersion_nonloc 15 9.0 0.938 1.058 8.065 8.091 fft_wrap_pw1pw2 1086 10.0 0.018 0.020 7.554 7.672 fft3d_ps 1086 12.0 2.625 3.008 5.706 6.038 fft_wrap_pw1pw2_150 765 11.0 0.123 0.155 4.855 4.896 cp_fm_cholesky_restore 45 7.0 4.086 4.154 4.086 4.154 mp_alltoall_z22v 1086 14.0 2.470 3.219 2.470 3.219 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.754 2.754 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.410 2.656 fft_wrap_pw1pw2_200 197 11.3 0.095 0.118 2.558 2.634 density_rs2pw 16 7.0 0.001 0.001 1.927 2.199 transfer_rs2pw 82 8.0 0.001 0.001 1.656 2.085 xc_vxc_pw_create 15 9.0 0.017 0.024 2.008 2.034 x_to_yz 585 13.1 0.343 0.375 1.592 1.953 mp_waitany 520 11.3 1.447 1.881 1.447 1.881 yz_to_x 501 12.9 0.247 0.323 1.467 1.840 transfer_rs2pw_200 18 8.8 0.034 0.042 1.062 1.679 build_core_ppnl 1 5.0 1.506 1.660 1.506 1.660 vdW_energy 15 10.0 1.421 1.484 1.421 1.484 xc_pw_derive 90 11.0 0.001 0.001 1.350 1.429 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=59.147999999999996, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=21.021, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.375, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.882, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.139, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.025, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.100999999999992, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.922, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.235, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.601, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.086, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.625, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.084 0.084 277.715 277.715 qs_energies 1 2.0 0.000 0.000 277.564 277.564 ls_scf 1 3.0 0.000 0.000 275.996 275.996 ls_scf_main 1 4.0 0.002 0.002 264.549 264.549 density_matrix_trs4 11 5.0 0.013 0.013 164.278 164.278 ls_scf_dm_to_ks 11 5.0 0.000 0.000 94.201 94.201 matrix_ls_to_qs 11 6.0 0.000 0.000 90.780 90.780 dbcsr_multiply_generic 185 6.1 0.967 0.967 84.412 84.412 arnoldi_extremal 12 6.1 0.000 0.000 65.439 65.439 arnoldi_normal_ev 12 7.1 0.056 0.056 65.439 65.439 build_subspace 23 8.1 0.093 0.093 64.171 64.171 dbcsr_matrix_vector_mult 652 9.0 0.238 0.238 63.467 63.467 dbcsr_matrix_vector_mult_local 652 10.0 61.792 61.792 61.802 61.802 multiply_cannon 185 7.1 0.489 0.489 49.213 49.213 dbcsr_copy_into_existing 11 7.0 48.178 48.178 48.178 48.178 dbcsr_complete_redistribute 23 7.5 33.848 33.848 47.012 47.012 matrix_decluster 11 7.0 0.000 0.000 42.601 42.601 multiply_cannon_loop 185 8.1 0.272 0.272 36.272 36.272 make_m2s 370 7.1 0.040 0.040 29.722 29.722 make_images 370 8.1 12.551 12.551 27.225 27.225 multiply_cannon_multrec 185 9.1 26.020 26.020 26.162 26.162 dbcsr_finalize 646 7.5 0.292 0.292 17.588 17.588 dbcsr_merge_all 597 8.5 2.740 2.740 16.138 16.138 setup_rec_index_2d 370 8.1 12.214 12.214 12.214 12.214 dbcsr_sort_indices 804 10.1 12.118 12.118 12.118 12.118 tree_to_linear_d 110 9.4 11.848 11.848 11.848 11.848 ls_scf_init_scf 1 4.0 0.000 0.000 10.681 10.681 quick_finalize 395 10.0 0.436 0.436 10.411 10.411 ls_scf_init_matrix_S 1 5.0 0.000 0.000 10.197 10.197 calculate_norms 370 9.1 9.838 9.838 9.838 9.838 dbcsr_special_finalize 370 9.1 0.002 0.002 9.576 9.576 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 9.202 9.202 dbcsr_dot_sd 144 6.3 6.877 6.877 6.877 6.877 matrix_qs_to_ls 12 5.1 0.000 0.000 6.499 6.499 matrix_cluster 12 6.1 0.000 0.000 6.499 6.499 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.033 81.459 81.472 qs_energies 1 2.0 0.000 0.000 81.331 81.332 ls_scf 1 3.0 0.000 0.000 81.282 81.283 ls_scf_main 1 4.0 0.001 0.008 78.120 78.121 density_matrix_trs4 11 5.0 0.007 0.020 75.025 75.089 dbcsr_multiply_generic 185 6.1 0.069 0.079 71.260 71.463 multiply_cannon 185 7.1 0.037 0.040 58.786 59.939 multiply_cannon_loop 185 8.1 0.127 0.138 56.066 57.728 multiply_cannon_multrec 1480 9.1 33.055 36.490 33.445 36.877 mp_waitall_1 11936 10.3 19.947 23.643 19.947 23.643 multiply_cannon_metrocomm3 1480 9.1 0.014 0.016 15.277 19.019 make_m2s 370 7.1 0.039 0.041 8.202 8.278 make_images 370 8.1 0.678 0.713 8.055 8.124 calculate_norms 2960 9.1 5.914 6.232 5.914 6.232 mp_sum_l 1199 5.3 3.305 4.169 3.305 4.169 make_images_data 370 9.1 0.010 0.022 3.731 3.924 hybrid_alltoall_any 393 9.9 0.279 1.854 3.209 3.484 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.405 3.294 arnoldi_extremal 12 6.1 0.000 0.000 2.940 2.961 arnoldi_normal_ev 12 7.1 0.001 0.008 2.939 2.960 build_subspace 23 8.1 0.021 0.027 2.825 2.827 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.647 2.754 dbcsr_complete_redistribute 23 7.5 1.375 1.450 2.407 2.501 matrix_ls_to_qs 11 6.0 0.000 0.000 2.313 2.410 dbcsr_matrix_vector_mult 652 9.0 0.014 0.055 2.279 2.378 ls_scf_init_scf 1 4.0 0.000 0.000 2.331 2.332 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.296 2.302 matrix_decluster 11 7.0 0.000 0.000 2.167 2.263 make_images_pack 370 9.1 2.012 2.240 2.015 2.243 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.002 2.096 2.097 multiply_cannon_metrocomm1 1480 9.1 0.006 0.007 1.266 2.028 dbcsr_matrix_vector_mult_local 652 10.0 1.746 1.854 1.747 1.856 buffer_matrices_ensure_size 370 8.1 1.503 1.733 1.503 1.733 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=85.488, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=61.792, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.178, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=33.848, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=26.02, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=12.551, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.838, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=13.427000000000007, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.746, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.375, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=33.055, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.678, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.914, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.305, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.012, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.947, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 77.817 77.817 lib_test 1 2.0 0.000 0.000 77.800 77.800 dbcsr_run_tests 3 3.0 0.002 0.002 77.800 77.800 test_multiplies_multiproc 3 4.0 0.001 0.001 61.104 61.104 dbcsr_redistribute 9 5.0 39.866 39.866 41.618 41.618 dbcsr_multiply_generic 9 5.0 0.001 0.001 17.970 17.970 dbcsr_make_random_matrix 9 4.0 13.217 13.217 16.583 16.583 multiply_cannon 9 6.0 0.034 0.034 12.762 12.762 multiply_cannon_loop 9 7.0 0.015 0.015 12.286 12.286 multiply_cannon_multrec 9 8.0 12.270 12.270 12.270 12.270 dbcsr_finalize 27 5.7 0.021 0.021 6.134 6.134 dbcsr_merge_all 18 6.5 2.202 2.202 5.382 5.382 dbcsr_data_release 975 7.6 2.825 2.825 2.825 2.825 tree_to_linear_d 9 7.0 2.098 2.098 2.098 2.098 make_m2s 18 6.0 0.001 0.001 1.857 1.857 make_images 18 7.0 0.647 0.647 1.778 1.778 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.018 21.316 21.322 lib_test 1 2.0 0.000 0.000 21.279 21.299 dbcsr_run_tests 3 3.0 0.000 0.001 21.274 21.294 test_multiplies_multiproc 3 4.0 0.000 0.003 20.388 20.466 dbcsr_multiply_generic 9 5.0 0.001 0.002 18.774 18.869 multiply_cannon 9 6.0 0.002 0.002 16.389 16.806 multiply_cannon_loop 9 7.0 0.002 0.002 16.089 16.490 multiply_cannon_multrec 72 8.0 12.014 12.634 12.015 12.635 mp_waitall_1 576 9.2 4.465 5.111 4.465 5.111 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.972 4.619 mp_sum_l 470 2.5 1.010 1.400 1.010 1.400 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.950 1.339 dbcsr_make_random_matrix 9 4.0 0.673 0.700 0.847 0.883 make_m2s 18 6.0 0.001 0.001 0.824 0.866 make_images 18 7.0 0.021 0.025 0.821 0.863 dbcsr_finalize 27 5.7 0.000 0.000 0.605 0.728 dbcsr_data_release 444 7.6 0.593 0.694 0.593 0.694 dbcsr_merge_all 18 6.5 0.093 0.127 0.541 0.674 dbcsr_redistribute 9 5.0 0.265 0.309 0.621 0.659 dbcsr_destroy 111 5.9 0.000 0.001 0.531 0.632 make_images_data 18 8.0 0.001 0.001 0.438 0.515 hybrid_alltoall_any 18 9.0 0.039 0.171 0.372 0.441 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.436999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=39.866, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.217, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.27, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.825, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.202, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.202999999999996, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.265, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.673, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.014, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.593, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.093, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.465, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.01, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.062 0.062 139.763 139.763 qs_mol_dyn_low 1 2.0 0.004 0.004 138.319 138.319 velocity_verlet 5 3.0 0.003 0.003 113.160 113.160 qmmm_el_coupling 6 3.8 0.000 0.000 90.856 90.856 qmmm_elec_with_gaussian 6 4.8 0.013 0.013 90.852 90.852 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 90.134 90.134 qmmm_elec_gaussian_low_G 6 6.8 89.235 89.235 89.235 89.235 qs_forces 6 3.8 0.001 0.001 38.844 38.844 qs_energies 6 4.8 0.000 0.000 34.527 34.527 scf_env_do_scf 6 5.8 0.001 0.001 31.713 31.713 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 27.326 27.326 rebuild_ks_matrix 45 8.4 0.000 0.000 26.298 26.298 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 26.298 26.298 qs_ks_update_qs_env 45 7.8 0.000 0.000 22.406 22.406 fft_wrap_pw1pw2 801 12.6 0.011 0.011 21.863 21.863 fft_wrap_pw1pw2_150 507 14.2 5.520 5.520 21.386 21.386 qs_vxc_create 45 10.4 0.001 0.001 16.308 16.308 xc_vxc_pw_create 45 11.4 0.680 0.680 16.308 16.308 xc_pw_derive 270 13.4 0.002 0.002 12.175 12.175 fft3d_s 802 14.6 8.558 8.558 8.566 8.566 xc_pw_divergence 45 12.4 0.001 0.001 8.263 8.263 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.077 8.077 calculate_rho_elec 45 8.9 0.563 0.563 8.077 8.077 xc_rho_set_and_dset_create 45 12.4 0.647 0.647 7.294 7.294 pw_scatter_s 429 14.8 5.915 5.915 5.915 5.915 qmmm_forces 6 3.8 0.002 0.002 5.241 5.241 qmmm_forces_with_gaussian 6 4.8 0.021 0.021 4.910 4.910 init_scf_loop 6 6.8 0.000 0.000 4.382 4.382 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.165 4.165 density_rs2pw 45 9.9 0.002 0.002 4.139 4.139 sum_up_and_integrate 45 10.4 0.001 0.001 3.998 3.998 integrate_v_rspace 45 11.4 0.012 0.012 3.950 3.950 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.899 3.899 qs_ks_ddapc 45 10.4 0.001 0.001 3.663 3.663 qmmm_forces_gaussian_low_G 6 6.8 3.459 3.459 3.459 3.459 grid_collocate_task_list 45 9.9 3.375 3.375 3.375 3.375 pw_integral_ab 2539 7.4 2.879 2.879 2.879 2.879 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.051 59.615 59.628 qs_mol_dyn_low 1 2.0 0.003 0.005 58.507 58.567 qs_forces 6 3.8 0.001 0.001 44.073 44.073 qs_energies 6 4.8 0.000 0.000 42.201 42.202 scf_env_do_scf 6 5.8 0.000 0.001 41.244 41.245 scf_env_do_scf_inner_loop 113 6.2 0.003 0.017 39.434 39.435 rebuild_ks_matrix 119 8.1 0.000 0.000 29.237 29.247 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.017 29.237 29.246 qs_ks_update_qs_env 119 7.3 0.001 0.001 27.664 27.673 velocity_verlet 5 3.0 0.002 0.003 22.990 22.993 fft_wrap_pw1pw2 2059 12.4 0.033 0.038 18.300 18.821 fft_wrap_pw1pw2_150 1321 13.9 0.618 0.735 17.446 17.903 fft3d_ps 2059 14.4 7.027 8.090 13.840 14.793 qs_vxc_create 119 10.1 0.002 0.002 14.728 14.732 xc_vxc_pw_create 119 11.1 0.155 0.231 14.726 14.731 xc_pw_derive 714 13.1 0.009 0.010 11.111 11.477 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.186 11.194 calculate_rho_elec 119 8.3 0.049 0.058 11.186 11.194 sum_up_and_integrate 119 10.1 0.002 0.002 10.209 10.242 integrate_v_rspace 119 11.1 0.004 0.004 10.147 10.189 xc_pw_divergence 119 12.1 0.004 0.004 7.312 7.596 qmmm_forces 6 3.8 0.002 0.002 7.278 7.278 xc_rho_set_and_dset_create 119 12.1 0.372 0.479 7.002 7.232 qmmm_forces_with_gaussian 6 4.8 0.006 0.007 6.900 7.125 density_rs2pw 119 9.3 0.005 0.006 6.456 6.759 mp_alltoall_z22v 2059 16.4 5.330 6.750 5.330 6.750 qmmm_el_coupling 6 3.8 0.000 0.000 6.205 6.380 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 6.204 6.378 potential_pw2rs 119 12.1 0.006 0.007 5.933 5.959 grid_collocate_task_list 119 9.3 4.502 4.853 4.502 4.853 yz_to_x 964 15.0 0.654 0.772 3.182 4.184 x_to_yz 1095 15.8 0.784 0.904 3.586 4.180 transfer_pw2rs 500 12.8 0.005 0.007 4.088 4.101 grid_integrate_task_list 119 12.1 3.846 3.992 3.846 3.992 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.867 3.903 mp_waitany 4028 12.8 3.161 3.849 3.161 3.849 transfer_rs2pw 488 10.2 0.007 0.009 3.197 3.554 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.447 3.503 qmmm_forces_gaussian_low_G 6 6.8 3.182 3.219 3.182 3.219 qmmm_elec_gaussian_low_G 6 6.8 2.858 2.900 2.858 2.900 transfer_pw2rs_150 125 13.9 0.806 1.058 2.774 2.870 pw_restrict_s3 18 5.8 1.338 1.405 2.445 2.689 transfer_rs2pw_150 125 11.2 0.637 0.862 2.253 2.623 qs_scf_new_mos 113 7.2 0.000 0.001 2.466 2.471 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.465 2.471 ot_scf_mini 113 9.2 0.001 0.002 2.379 2.383 qs_ks_ddapc 119 10.1 0.002 0.003 2.147 2.291 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.034 2.206 pw_prolongate_s3 18 6.8 1.118 1.174 2.034 2.206 dbcsr_multiply_generic 2588 12.3 0.059 0.061 1.997 2.049 mp_sum_d 5826 12.2 1.230 1.850 1.230 1.850 init_scf_loop 6 6.8 0.000 0.000 1.807 1.807 pw_scatter_p 1095 14.8 1.676 1.707 1.676 1.707 pw_gather_p 964 14.0 1.397 1.644 1.397 1.644 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.593 1.593 pw_copy 2027 12.4 1.293 1.535 1.293 1.535 pw_derive 1089 13.4 1.236 1.523 1.236 1.523 pw_integral_ab 2761 7.7 1.038 1.217 1.401 1.473 pw_poisson_solve 125 9.9 0.003 0.003 1.282 1.450 mp_waitall_1 177795 16.4 1.113 1.292 1.113 1.292 ot_mini 113 10.2 0.001 0.001 1.273 1.279 mp_sum_dm3 33 5.7 1.210 1.269 1.210 1.269 transfer_pw2rs_40 119 14.1 0.276 0.336 1.086 1.236 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=23.700999999999993, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=89.235, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.558, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.915, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=5.52, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.459, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.375, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=32.251999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.858, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.618, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.182, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.502, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.33, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.846, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.027, yerr=0.0 Summary: Performance test took 35 minutes. Status: OK Removing intermediate container 54f478a44d97 ---> 93a158c31614 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in e66d32f55bf9 Removing intermediate container e66d32f55bf9 ---> 04e2654bf32a Step 42/42 : ENTRYPOINT [] ---> Running in 24416867dc87 Removing intermediate container 24416867dc87 ---> 4bc5f709f75a [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 4bc5f709f75a Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2024-04-10 09:43:15+00:00