StartDate: 2022-07-22 11:06:01+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: a713c85bc9528546950c735172cfe5d38cd17865 CommitTime: 2022-07-22 10:07:26 +0200 CommitAuthor: abussy CommitSubject: RI-RPA| Allow for XC correction in ADMM RI-RPA Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=a713c85bc9528546950c735172cfe5d38cd17865 Sending build context to Docker daemon 364.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 405f018f9d1d: Already exists Digest: sha256:b6b83d3c331794420340093eb706a6f152d9c1fa51b262d9bf34594887c2c7ac Status: Downloaded newer image for ubuntu:22.04 ---> 27941809078c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 92ee757f28a3 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> a398cc4ae5b3 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> e20712e9c254 Step 5/42 : RUN mkdir scripts ---> Using cache ---> c7b9413ca6be Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5f4bcd2de9f5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> c39d97839810 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> d0b21d05b338 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e616c7670ff Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5ba44cd61a38 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 1aa896c19a24 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c7ccbf5e1b85 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9cb6a1bd2cd3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1eed70bdd06a Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ac04ff4ae473 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cd0e2369620a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> fc75688b2cb5 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> bba0dcb0b93f Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 303741b69f4c Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 5c3df3a2c686 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 7290ede69f62 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 805ee354e427 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> c71f3cda37eb Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> a18c9cfd8730 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 320efbba4fd1 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 68bee71ec24d Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 6b6e4f3e24cc Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> d59b20ddd43b Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 775ae464a90b Step 30/42 : COPY ./Makefile . ---> Using cache ---> 98767db1bf2a Step 31/42 : COPY ./src ./src ---> f3113f5b830d Step 32/42 : COPY ./exts ./exts ---> 3198fefa2b31 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> 2430f88fbfb3 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in c7a0abfca6c0 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container c7a0abfca6c0 ---> b8fac8f36f42 Step 35/42 : COPY ./data ./data ---> 5125edc20b06 Step 36/42 : COPY ./tests ./tests ---> 9036223444d5 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 777cf73753b6 Step 38/42 : COPY ./benchmarks ./benchmarks ---> dfd43eefc0ea Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 7948536235c8 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 5dabddd93a1f ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.029 0.029 193.163 193.163 qs_mol_dyn_low 1 2.0 0.003 0.003 192.520 192.520 qs_forces 11 3.9 0.001 0.001 192.466 192.466 qs_energies 11 4.9 0.001 0.001 185.970 185.970 scf_env_do_scf 11 5.9 0.001 0.001 173.375 173.375 velocity_verlet 10 3.0 0.002 0.002 133.242 133.242 init_scf_loop 11 6.9 0.000 0.000 92.770 92.770 prepare_preconditioner 11 7.9 0.000 0.000 90.103 90.103 make_preconditioner 11 8.9 0.000 0.000 90.103 90.103 make_full_inverse_cholesky 11 9.9 0.000 0.000 88.931 88.931 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 80.472 80.472 cp_fm_cholesky_invert 11 10.9 62.581 62.581 62.581 62.581 qs_scf_new_mos 108 7.5 0.001 0.001 41.996 41.996 qs_scf_loop_do_ot 108 8.5 0.001 0.001 41.995 41.995 ot_scf_mini 108 9.5 0.003 0.003 40.406 40.406 rebuild_ks_matrix 119 8.3 0.001 0.001 22.522 22.522 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.014 22.522 22.522 cp_fm_cholesky_decompose 22 10.9 22.275 22.275 22.275 22.275 dbcsr_multiply_generic 2286 12.5 0.166 0.166 22.235 22.235 qs_rho_update_rho_low 119 7.7 0.001 0.001 21.970 21.970 calculate_rho_elec 119 8.7 0.986 0.986 21.970 21.970 qs_ot_get_p 119 10.4 0.001 0.001 21.349 21.349 qs_ks_update_qs_env 119 7.6 0.001 0.001 20.723 20.723 qs_ot_p2m_diag 50 11.0 0.160 0.160 19.916 19.916 cp_dbcsr_syevd 50 12.0 0.002 0.002 19.433 19.433 cp_fm_diag_elpa 50 13.0 0.000 0.000 18.103 18.103 cp_fm_diag_elpa_base 50 14.0 18.048 18.048 18.103 18.103 grid_collocate_task_list 119 9.7 17.431 17.431 17.431 17.431 ot_mini 108 10.5 0.001 0.001 15.157 15.157 sum_up_and_integrate 119 10.3 0.229 0.229 13.946 13.946 integrate_v_rspace 119 11.3 0.115 0.115 13.716 13.716 make_m2s 4572 13.5 0.049 0.049 11.879 11.879 grid_integrate_task_list 119 12.3 11.513 11.513 11.513 11.513 qs_ot_get_derivative 108 11.5 0.001 0.001 9.142 9.142 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.452 6.452 pw_transfer 1439 11.6 0.066 0.066 6.276 6.276 dbcsr_make_dense_low 5837 15.5 0.093 0.093 6.236 6.236 make_dense_data 5837 16.5 5.330 5.330 6.129 6.129 multiply_cannon 2286 13.5 0.200 0.200 6.041 6.041 make_images 4572 14.5 2.296 2.296 6.041 6.041 ot_diis_step 108 11.5 0.004 0.004 6.012 6.012 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 6.007 6.007 multiply_cannon_loop 2286 14.5 0.108 0.108 5.511 5.511 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.458 5.458 apply_single 119 13.6 0.000 0.000 5.458 5.458 dbcsr_make_images_dense 3978 14.8 0.019 0.019 5.418 5.418 multiply_cannon_multrec 2286 15.5 5.341 5.341 5.402 5.402 init_scf_run 11 5.9 0.002 0.002 5.108 5.108 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.106 5.106 fft_wrap_pw1pw2_140 487 13.2 0.440 0.440 5.012 5.012 wfi_extrapolate 11 7.9 0.001 0.001 4.480 4.480 dbcsr_complete_redistribute 329 12.2 1.984 1.984 4.032 4.032 copy_fm_to_dbcsr 176 11.2 0.001 0.001 3.991 3.991 dbcsr_copy 2102 12.0 0.267 0.267 3.921 3.921 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.029 56.671 56.681 qs_mol_dyn_low 1 2.0 0.004 0.007 56.526 56.533 qs_forces 11 3.9 0.001 0.002 56.471 56.472 qs_energies 11 4.9 0.001 0.001 52.583 52.588 scf_env_do_scf 11 5.9 0.001 0.002 48.294 48.295 scf_env_do_scf_inner_loop 108 6.5 0.003 0.020 44.669 44.669 velocity_verlet 10 3.0 0.001 0.003 33.420 33.422 rebuild_ks_matrix 119 8.3 0.001 0.001 21.723 21.813 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.021 21.723 21.813 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.364 19.450 dbcsr_multiply_generic 2286 12.5 0.081 0.093 15.632 16.676 sum_up_and_integrate 119 10.3 0.024 0.028 16.342 16.513 integrate_v_rspace 119 11.3 0.005 0.006 16.318 16.490 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.254 16.276 calculate_rho_elec 119 8.7 0.030 0.035 16.254 16.276 qs_scf_new_mos 108 7.5 0.001 0.001 12.453 12.546 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.452 12.545 grid_collocate_task_list 119 9.7 10.170 12.107 10.170 12.107 multiply_cannon 2286 13.5 0.147 0.160 11.171 11.837 grid_integrate_task_list 119 12.3 9.161 11.818 9.161 11.818 ot_scf_mini 108 9.5 0.002 0.003 11.718 11.796 multiply_cannon_loop 2286 14.5 0.106 0.132 10.456 10.790 mp_waitall_1 169478 16.3 9.397 10.083 9.397 10.083 rs_pw_transfer 974 11.9 0.013 0.015 7.005 7.573 ot_mini 108 10.5 0.001 0.001 6.788 6.869 multiply_cannon_metrocomm3 18288 15.5 0.042 0.055 6.032 6.843 density_rs2pw 119 9.7 0.006 0.007 5.613 6.173 potential_pw2rs 119 12.3 0.007 0.008 4.398 4.424 pw_transfer 1439 11.6 0.096 0.106 4.201 4.300 fft_wrap_pw1pw2 1201 12.6 0.011 0.013 4.011 4.105 multiply_cannon_multrec 18288 15.5 3.485 3.870 3.496 3.884 mp_waitany 9880 13.7 3.053 3.775 3.053 3.775 init_scf_loop 11 6.9 0.000 0.001 3.607 3.608 fft_wrap_pw1pw2_140 487 13.2 0.391 0.448 3.379 3.515 mp_alltoall_d11v 2130 13.8 3.021 3.481 3.021 3.481 qs_ot_get_derivative 108 11.5 0.001 0.001 3.397 3.468 ot_diis_step 108 11.5 0.004 0.005 3.349 3.349 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.181 3.315 apply_single 119 13.6 0.000 0.001 3.181 3.314 rs_pw_transfer_RS2PW_140 130 11.5 0.358 0.449 2.656 3.239 rs_gather_matrices 119 12.3 0.101 0.126 2.709 3.147 fft3d_ps 1201 14.6 1.497 1.612 2.907 3.034 make_m2s 4572 13.5 0.050 0.068 2.912 3.000 init_scf_run 11 5.9 0.000 0.004 2.960 2.962 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.960 2.962 wfi_extrapolate 11 7.9 0.001 0.001 2.676 2.676 make_images 4572 14.5 0.128 0.156 2.553 2.644 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.569 2.579 mp_sum_l 11218 13.2 1.032 2.222 1.032 2.222 rs_pw_transfer_PW2RS_140 130 13.9 0.772 0.923 1.955 2.037 qs_ot_get_p 119 10.4 0.001 0.001 1.617 1.754 mp_sum_d 4129 12.0 1.184 1.510 1.184 1.510 make_images_data 4572 15.5 0.038 0.054 1.350 1.506 hybrid_alltoall_any 4725 16.4 0.072 0.219 1.198 1.366 prepare_preconditioner 11 7.9 0.000 0.000 1.284 1.310 make_preconditioner 11 8.9 0.000 0.000 1.284 1.310 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.956 1.282 mp_alltoall_z22v 1201 16.6 1.094 1.272 1.094 1.272 multiply_cannon_metrocomm1 18288 15.5 0.021 0.029 0.605 1.195 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.156 1.180 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.111 1.171 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=55.97400000000002, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=62.581, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=22.275, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=18.048, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.431, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.513, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.341, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=21.405, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.17, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.161, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.485, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.397, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.053, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.029 0.029 215.882 215.882 qs_mol_dyn_low 1 2.0 0.003 0.003 215.215 215.215 qs_forces 11 3.9 0.001 0.001 215.176 215.176 qs_energies 11 4.9 0.001 0.001 207.038 207.038 scf_env_do_scf 11 5.9 0.001 0.001 192.203 192.203 velocity_verlet 10 3.0 0.002 0.002 150.296 150.296 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 96.033 96.033 init_scf_loop 11 6.9 0.000 0.000 96.002 96.002 prepare_preconditioner 11 7.9 0.000 0.000 91.574 91.574 make_preconditioner 11 8.9 0.000 0.000 91.574 91.574 make_full_inverse_cholesky 11 9.9 0.000 0.000 90.381 90.381 cp_fm_cholesky_invert 11 10.9 61.820 61.820 61.820 61.820 qs_scf_new_mos 96 7.5 0.001 0.001 36.301 36.301 qs_scf_loop_do_ot 96 8.5 0.001 0.001 36.300 36.300 rebuild_ks_matrix 107 8.3 0.001 0.001 36.055 36.055 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.012 36.054 36.054 ot_scf_mini 96 9.5 0.002 0.002 34.850 34.850 qs_rho_update_rho_low 107 7.7 0.000 0.000 34.720 34.720 calculate_rho_elec 107 8.7 0.887 0.887 34.720 34.720 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.379 32.379 grid_collocate_task_list 107 9.7 30.407 30.407 30.407 30.407 sum_up_and_integrate 107 10.3 0.214 0.214 28.479 28.479 integrate_v_rspace 107 11.3 0.097 0.097 28.265 28.265 grid_integrate_task_list 107 12.3 26.302 26.302 26.302 26.302 cp_fm_cholesky_decompose 22 10.9 24.591 24.591 24.591 24.591 dbcsr_multiply_generic 1966 12.4 0.149 0.149 18.961 18.961 qs_ot_get_p 107 10.4 0.001 0.001 18.495 18.495 qs_ot_p2m_diag 44 11.0 0.136 0.136 17.274 17.274 cp_dbcsr_syevd 44 12.0 0.002 0.002 16.888 16.888 cp_fm_diag_elpa 44 13.0 0.000 0.000 16.195 16.195 cp_fm_diag_elpa_base 44 14.0 16.142 16.142 16.194 16.194 ot_mini 96 10.5 0.001 0.001 13.036 13.036 make_m2s 3932 13.4 0.041 0.041 9.946 9.946 qs_ot_get_derivative 96 11.5 0.001 0.001 8.184 8.184 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.075 7.075 init_scf_run 11 5.9 0.002 0.002 6.809 6.809 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.807 6.807 wfi_extrapolate 11 7.9 0.001 0.001 6.162 6.162 pw_transfer 1295 11.6 0.060 0.060 5.966 5.966 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.726 5.726 multiply_cannon 1966 13.4 0.169 0.169 5.424 5.424 dbcsr_make_dense_low 4961 15.5 0.060 0.060 5.160 5.160 make_images 3932 14.4 1.985 1.985 5.126 5.126 make_dense_data 4961 16.5 4.485 4.485 5.087 5.087 multiply_cannon_loop 1966 14.4 0.097 0.097 4.983 4.983 fft_wrap_pw1pw2_140 439 13.2 0.552 0.552 4.900 4.900 multiply_cannon_multrec 1966 15.4 4.829 4.829 4.885 4.885 ot_diis_step 96 11.5 0.003 0.003 4.849 4.849 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.556 4.556 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.476 4.476 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.394 4.394 apply_single 107 13.6 0.000 0.000 4.394 4.394 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.021 91.906 91.917 qs_mol_dyn_low 1 2.0 0.003 0.005 91.802 91.807 qs_forces 11 3.9 0.001 0.002 91.760 91.760 qs_energies 11 4.9 0.001 0.001 85.561 85.563 scf_env_do_scf 11 5.9 0.001 0.002 79.206 79.207 scf_env_do_scf_inner_loop 96 6.5 0.003 0.017 73.366 73.367 velocity_verlet 10 3.0 0.002 0.003 54.700 54.701 rebuild_ks_matrix 107 8.3 0.000 0.001 40.768 40.889 qs_ks_build_kohn_sham_matrix 107 9.3 0.015 0.021 40.767 40.889 qs_ks_update_qs_env 107 7.6 0.001 0.001 36.027 36.142 sum_up_and_integrate 107 10.3 0.021 0.024 36.087 36.128 integrate_v_rspace 107 11.3 0.004 0.005 36.066 36.106 qs_rho_update_rho_low 107 7.7 0.001 0.001 34.073 34.085 calculate_rho_elec 107 8.7 0.027 0.028 34.072 34.085 grid_integrate_task_list 107 12.3 24.688 31.998 24.688 31.998 grid_collocate_task_list 107 9.7 24.094 30.500 24.094 30.500 dbcsr_multiply_generic 1966 12.4 0.071 0.083 13.950 14.148 rs_pw_transfer 878 11.9 0.011 0.013 10.834 11.853 qs_scf_new_mos 96 7.5 0.001 0.001 10.896 11.003 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.896 11.002 density_rs2pw 107 9.7 0.005 0.006 9.571 10.549 multiply_cannon 1966 13.4 0.128 0.147 9.924 10.385 ot_scf_mini 96 9.5 0.002 0.002 10.230 10.349 multiply_cannon_loop 1966 14.4 0.097 0.114 9.265 9.803 mp_waitall_1 146670 16.2 8.208 9.050 8.208 9.050 mp_alltoall_d11v 1998 13.7 7.689 8.643 7.689 8.643 mp_waitany 8968 13.7 7.346 8.424 7.346 8.424 rs_gather_matrices 107 12.3 0.095 0.110 7.413 8.330 rs_pw_transfer_RS2PW_140 118 11.5 0.274 0.321 6.957 7.972 ot_mini 96 10.5 0.001 0.001 6.008 6.135 multiply_cannon_metrocomm3 15728 15.4 0.038 0.047 5.223 5.915 init_scf_loop 11 6.9 0.000 0.001 5.823 5.834 init_scf_run 11 5.9 0.000 0.005 5.012 5.021 scf_env_initial_rho_setup 11 6.9 0.000 0.004 5.011 5.021 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.912 4.939 wfi_extrapolate 11 7.9 0.001 0.001 4.564 4.567 potential_pw2rs 107 12.3 0.006 0.012 3.921 3.940 pw_transfer 1295 11.6 0.088 0.096 3.790 3.843 fft_wrap_pw1pw2 1081 12.6 0.009 0.012 3.621 3.681 multiply_cannon_multrec 15728 15.4 3.169 3.399 3.179 3.412 qs_ot_get_derivative 96 11.5 0.001 0.001 3.158 3.278 fft_wrap_pw1pw2_140 439 13.2 0.342 0.380 3.026 3.134 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.778 2.897 apply_single 107 13.6 0.000 0.001 2.778 2.897 ot_diis_step 96 11.5 0.004 0.004 2.828 2.828 fft3d_ps 1081 14.6 1.325 1.412 2.639 2.752 make_m2s 3932 13.4 0.044 0.054 2.640 2.719 make_images 3932 14.4 0.112 0.130 2.323 2.408 rs_pw_transfer_PW2RS_140 118 13.9 0.725 0.817 1.776 1.871 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=56.620000000000005, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=61.82, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=30.407, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.302, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=24.591, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.142, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=19.881, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.094, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.688, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.208, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.346, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.689, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.196 0.196 153.744 153.744 qs_energies 1 2.0 0.000 0.000 152.877 152.877 scf_env_do_scf 1 3.0 0.000 0.000 151.676 151.676 qs_ks_update_qs_env 8 5.0 0.000 0.000 116.550 116.550 rebuild_ks_matrix 7 6.0 0.000 0.000 116.493 116.493 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 116.493 116.493 hfx_ks_matrix 7 8.0 0.000 0.000 98.817 98.817 integrate_four_center 7 9.0 1.301 1.301 98.796 98.796 integrate_four_center_main 7 10.0 0.354 0.354 85.705 85.705 init_scf_loop 1 4.0 0.000 0.000 85.637 85.637 integrate_four_center_bin 456 11.0 85.351 85.351 85.351 85.351 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 66.030 66.030 prepare_preconditioner 1 5.0 0.000 0.000 29.639 29.639 make_preconditioner 1 6.0 0.000 0.000 29.639 29.639 arnoldi_normal_ev 11 9.3 0.002 0.002 17.818 17.818 estimate_cond_num 1 7.0 0.000 0.000 17.774 17.774 build_subspace 28 9.5 0.009 0.009 17.341 17.341 integrate_four_center_load 7 10.0 0.001 0.001 11.528 11.528 hfx_load_balance 1 11.0 0.001 0.001 11.527 11.527 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 9.705 9.705 dbcsr_sym_m_v_mult 562 10.0 0.016 0.016 8.637 8.637 admm_fit_mo_coeffs 7 9.0 0.000 0.000 8.434 8.434 cp_fm_cholesky_invert 2 9.5 8.138 8.138 8.138 8.138 make_full_inverse_cholesky 1 7.0 0.000 0.000 7.744 7.744 DGKS_ortho_d 673 10.6 6.936 6.936 6.937 6.937 hfx_load_balance_count 1 12.0 5.770 5.770 5.770 5.770 hfx_load_balance_bin 1 12.0 5.740 5.740 5.740 5.740 Gram_Schmidt_ortho_d 673 10.6 5.566 5.566 5.567 5.567 purify_mo_diag 7 10.0 0.000 0.000 4.441 4.441 dbcsr_copy 1321 10.8 1.184 1.184 4.370 4.370 qs_scf_new_mos 7 5.0 0.000 0.000 4.107 4.107 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.107 4.107 make_full_single_inverse 1 7.0 0.000 0.000 4.007 4.007 ot_scf_mini 7 7.0 0.000 0.000 3.995 3.995 fit_mo_coeffs 7 10.0 0.000 0.000 3.992 3.992 arnoldi_generalized_ev 1 8.0 0.000 0.000 3.882 3.882 cp_fm_syevd 7 11.0 0.000 0.000 3.854 3.854 cp_fm_syevd_base 7 12.0 3.853 3.853 3.853 3.853 qs_vxc_create 14 8.0 0.000 0.000 3.806 3.806 xc_vxc_pw_create 14 9.0 0.128 0.128 3.805 3.805 gev_build_subspace 4 9.0 0.005 0.005 3.775 3.775 dbcsr_create_new 3179 12.1 2.238 2.238 3.578 3.578 dbcsr_set 2825 11.8 0.003 0.003 3.142 3.142 dbcsr_zero 2837 12.8 3.139 3.139 3.139 3.139 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.195 0.216 144.089 144.099 qs_energies 1 2.0 0.000 0.000 143.760 143.762 scf_env_do_scf 1 3.0 0.000 0.000 143.355 143.355 qs_ks_update_qs_env 8 5.0 0.000 0.000 141.033 141.033 rebuild_ks_matrix 7 6.0 0.000 0.000 141.023 141.023 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 141.023 141.023 hfx_ks_matrix 7 8.0 0.000 0.000 134.213 134.224 integrate_four_center 7 9.0 0.063 0.378 134.201 134.213 integrate_four_center_main 7 10.0 0.003 0.004 86.333 121.087 integrate_four_center_bin 448 11.0 86.330 121.083 86.330 121.083 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 80.647 80.647 init_scf_loop 1 4.0 0.000 0.000 62.706 62.706 mp_sync 70 11.3 34.779 37.283 34.779 37.283 integrate_four_center_load 7 10.0 0.000 0.000 12.323 12.327 hfx_load_balance 1 11.0 0.001 0.001 12.323 12.327 mp_sum_l 1135 8.3 6.229 6.527 6.229 6.527 hfx_load_balance_dist 1 12.0 0.000 0.000 6.085 6.386 hfx_load_balance_bin 1 12.0 3.072 6.171 3.072 6.171 hfx_load_balance_count 1 12.0 3.080 6.066 3.080 6.066 qs_vxc_create 14 8.0 0.000 0.000 3.089 3.089 xc_vxc_pw_create 14 9.0 0.008 0.010 3.089 3.089 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=41.809, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=85.351, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.138, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=6.936, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.77, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.74, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.59899999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=86.33, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.08, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.072, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=6.229, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=34.779, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 158.015 158.015 qs_energies 1 2.0 0.000 0.000 157.624 157.624 mp2_main 1 3.0 0.000 0.000 133.721 133.721 mp2_gpw_main 1 4.0 0.000 0.000 132.579 132.579 rpa_ri_compute_en 1 5.0 0.000 0.000 125.421 125.421 rpa_num_int 1 6.0 0.001 0.001 125.415 125.415 compute_mat_P_omega 1 7.0 0.003 0.003 75.637 75.637 compute_mat_P_omega_contract 10 8.0 8.539 8.539 75.421 75.421 dbt_total 2336 9.6 0.011 0.011 63.149 63.149 dbt_contract 787 11.0 0.035 0.035 55.902 55.902 dbt_tas_total 1149 12.2 0.179 0.179 54.490 54.490 dbt_tas_multiply 807 12.1 0.002 0.002 53.085 53.085 dbt_tas_dbm 807 14.1 0.003 0.003 46.044 46.044 dbm_multiply 807 16.1 46.035 46.035 46.035 46.035 dbt_tas_mm_1N 524 15.1 0.002 0.002 34.000 34.000 GW_matrix_operations 10 7.0 0.007 0.007 32.053 32.053 cp_fm_cholesky_invert 10 8.0 31.232 31.232 31.232 31.232 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 28.928 28.928 scf_env_do_scf 1 3.0 0.000 0.000 23.579 23.579 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 23.579 23.579 qs_scf_new_mos 17 5.0 0.000 0.000 21.905 21.905 compute_mat_P_omega_calc_M_occ 250 9.0 8.532 8.532 20.788 20.788 eigensolver 18 5.9 0.001 0.001 19.622 19.622 cp_fm_diag_elpa 18 6.9 0.000 0.000 12.725 12.725 cp_fm_diag_elpa_base 18 7.9 12.665 12.665 12.725 12.725 cp_fm_cholesky_decompose 14 8.1 12.402 12.402 12.402 12.402 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.306 11.306 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 10.984 10.984 dbt_tas_mm_2 251 15.0 0.001 0.001 10.091 10.091 RPA_postprocessing_nokp 10 8.0 0.001 0.001 10.011 10.011 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 7.151 7.151 cp_fm_cholesky_restore 51 7.0 6.789 6.789 6.789 6.789 dbt_copy 1103 10.7 0.105 0.105 5.824 5.824 compute_QP_energies 1 7.0 0.000 0.000 5.724 5.724 compute_self_energy_cubic_gw 1 8.0 0.048 0.048 5.723 5.723 get_2c_integrals 1 6.0 0.000 0.000 5.502 5.502 contract_cubic_gw 21 9.0 0.000 0.000 4.663 4.663 dbt_tas_reserve_blocks_index 3261 14.3 0.148 0.148 3.523 3.523 dbm_reserve_blocks 3628 15.3 3.446 3.446 3.446 3.446 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.020 36.448 36.458 qs_energies 1 2.0 0.000 0.000 36.372 36.373 mp2_main 1 3.0 0.000 0.001 35.289 35.290 mp2_gpw_main 1 4.0 0.000 0.000 35.251 35.252 rpa_ri_compute_en 1 5.0 0.000 0.000 33.936 33.937 rpa_num_int 1 6.0 0.000 0.002 33.936 33.936 dbt_total 2336 9.6 0.012 0.019 30.072 30.085 compute_mat_P_omega 1 7.0 0.001 0.005 28.848 28.911 compute_mat_P_omega_contract 10 8.0 0.453 0.487 28.701 28.706 dbt_contract 787 11.0 0.027 0.030 22.448 22.452 dbt_tas_total 1149 12.2 0.057 0.070 20.099 20.100 dbt_tas_multiply 807 12.1 0.002 0.003 20.027 20.029 dbt_tas_dbm 807 14.1 0.003 0.004 14.760 14.781 dbm_multiply 807 16.1 11.409 12.128 11.409 12.128 compute_mat_P_omega_calc_M_occ 250 9.0 0.436 0.469 8.615 8.615 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.276 8.276 dbt_copy 1111 10.7 0.012 0.014 6.527 6.831 mp_sync 8706 11.6 5.642 6.758 5.642 6.758 dbt_tas_mm_2 251 15.0 0.001 0.002 6.725 6.739 dbt_reshape 1098 11.7 2.373 3.012 6.237 6.501 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.189 6.193 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.432 5.819 compute_QP_energies 1 7.0 0.000 0.000 3.251 3.252 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.248 3.251 mp_waitall_2 3776 15.3 3.005 3.219 3.005 3.219 dbt_communicate_buffer 1098 12.7 0.056 0.079 3.075 3.185 contract_cubic_gw 21 9.0 0.000 0.000 2.547 2.548 dbt_reserve_blocks_index 2849 13.1 0.072 0.082 1.866 2.060 dbt_reserve_blocks_index_array 2791 12.2 0.009 0.011 1.865 2.060 dbt_tas_reserve_blocks_index 3300 14.5 0.117 0.157 1.832 2.027 dbm_reserve_blocks 3696 15.4 1.818 2.013 1.818 2.013 dbt_crop 1042 12.0 0.990 1.303 1.592 1.922 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.311 1.313 dbt_tas_replicate 396 14.1 0.559 0.729 1.201 1.312 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.142 1.145 cp_gemm 105 8.4 0.000 0.000 1.088 1.098 cp_gemm_cosma 105 9.4 1.088 1.097 1.088 1.097 convert_to_new_pgrid 2421 14.1 0.032 0.040 0.966 1.071 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.041 1.043 scf_env_do_scf 1 3.0 0.000 0.000 1.036 1.036 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.036 1.036 dbm_copy 1608 15.1 0.926 1.031 0.926 1.031 mp_max_i 1992 9.8 0.695 0.864 0.695 0.864 GW_matrix_operations 10 7.0 0.001 0.001 0.823 0.828 dbm_add 807 14.1 0.769 0.822 0.769 0.822 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=43.696, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=46.035, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=31.232, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.665, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=12.402, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.539, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.446, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.748000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.409, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.453, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.818, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.642, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.373, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 379.151 379.151 qs_forces 1 2.0 0.000 0.000 378.564 378.564 rebuild_ks_matrix 7 6.6 0.000 0.000 357.054 357.054 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 357.054 357.054 hfx_ks_matrix 7 8.6 0.000 0.000 354.982 354.982 hfx_ri_update_ks 7 9.6 0.000 0.000 301.304 301.304 hfx_ri_update_ks_Pmat 7 10.6 33.555 33.555 301.302 301.302 dbt_total 4861 11.6 0.029 0.029 298.392 298.392 qs_energies 1 3.0 0.000 0.000 286.629 286.629 scf_env_do_scf 1 4.0 0.000 0.000 286.212 286.212 dbt_tas_total 2391 14.1 0.972 0.972 270.137 270.137 qs_ks_update_qs_env 8 6.0 0.000 0.000 265.175 265.175 dbt_contract 1473 13.0 0.135 0.135 249.478 249.478 dbt_tas_multiply 1482 14.0 0.004 0.004 238.949 238.949 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 218.559 218.559 dbt_tas_dbm 1482 16.0 0.006 0.006 218.161 218.161 dbm_multiply 1482 18.0 218.143 218.143 218.143 218.143 dbt_tas_mm_2 649 17.1 0.004 0.004 188.733 188.733 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 166.900 166.900 init_scf_loop 2 5.0 0.000 0.000 119.310 119.310 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 91.883 91.883 hfx_ri_update_forces 1 7.0 0.000 0.000 53.671 53.671 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 38.206 38.206 dbt_tas_reshape 906 14.4 0.010 0.010 22.663 22.663 dbt_tas_mm_3T 659 17.1 0.002 0.002 22.109 22.109 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 20.647 20.647 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 19.463 19.463 prepare_preconditioner 2 6.0 0.000 0.000 18.697 18.697 make_preconditioner 2 7.0 0.000 0.000 18.225 18.225 cp_fm_syevd 12 10.7 0.000 0.000 17.708 17.708 cp_fm_syevd_base 12 11.7 17.708 17.708 17.708 17.708 make_full_all 2 8.0 0.000 0.000 17.702 17.702 dbt_copy 2331 12.4 0.139 0.139 17.685 17.685 dbt_tas_merge 649 14.1 13.437 13.437 14.585 14.585 dbt_tas_reshape_buffer_fill 906 15.4 13.558 13.558 13.558 13.558 precalc_derivatives 1 8.0 0.005 0.005 12.492 12.492 dbm_reserve_blocks 8303 16.8 11.101 11.101 11.101 11.101 dbt_tas_reserve_blocks_index 7397 16.0 0.357 0.357 10.671 10.671 dbt_crop 2763 14.2 7.081 7.081 10.330 10.330 dbt_reshape 856 13.9 5.470 5.470 9.748 9.748 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 8.631 8.631 dbt_reserve_blocks_index 4998 15.2 0.137 0.137 8.225 8.225 dbt_reserve_blocks_index_array 4963 14.3 0.021 0.021 8.172 8.172 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.023 65.465 65.476 qs_forces 1 2.0 0.000 0.000 65.277 65.278 rebuild_ks_matrix 7 6.6 0.000 0.000 64.505 64.522 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.007 64.505 64.522 hfx_ks_matrix 7 8.6 0.001 0.012 63.161 63.181 dbt_total 4861 11.6 0.028 0.032 57.169 57.184 dbt_contract 1473 13.0 0.103 0.115 44.197 44.213 hfx_ri_update_ks 7 9.6 0.000 0.000 43.311 43.312 hfx_ri_update_ks_Pmat 7 10.6 1.554 2.074 43.311 43.311 dbt_tas_total 2391 14.1 0.126 0.151 41.519 41.520 qs_energies 1 3.0 0.000 0.000 39.981 39.981 scf_env_do_scf 1 4.0 0.000 0.001 39.816 39.817 qs_ks_update_qs_env 8 6.0 0.000 0.000 39.223 39.240 dbt_tas_multiply 1482 14.0 0.005 0.006 37.189 37.192 dbt_tas_dbm 1482 16.0 0.005 0.006 28.565 28.607 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 25.283 25.283 dbm_multiply 1482 18.0 19.508 24.005 19.508 24.005 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 23.717 23.719 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 22.952 22.952 hfx_ri_update_forces 1 7.0 0.000 0.001 19.848 19.860 dbt_tas_mm_2 649 17.1 0.003 0.004 16.870 16.897 init_scf_loop 2 5.0 0.000 0.000 16.863 16.864 mp_sync 17513 13.6 13.750 16.549 13.750 16.549 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 14.000 14.025 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 8.880 8.880 dbt_copy 2349 12.4 0.034 0.037 7.278 7.711 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.907 5.585 dbt_reshape 1256 13.5 2.287 2.673 5.011 5.217 dbt_crop 2763 14.2 3.177 4.151 4.007 5.000 dbt_tas_mm_3N 163 16.5 0.001 0.001 4.521 4.586 precalc_derivatives 1 8.0 0.001 0.002 4.425 4.425 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.001 4.416 4.416 mp_waitall_2 5988 16.5 3.266 3.510 3.266 3.510 dbt_tas_merge 649 14.1 1.656 2.082 2.905 3.244 dbm_reserve_blocks 8337 16.9 2.542 2.775 2.542 2.775 dbt_tas_reserve_blocks_index 7428 16.1 0.259 0.325 2.422 2.707 mp_max_i 3372 12.5 2.242 2.670 2.242 2.670 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.565 2.570 dbt_tas_replicate 909 15.6 0.651 0.844 2.399 2.462 dbt_tas_communicate_buffer 1825 16.3 0.061 0.077 2.238 2.437 dbt_reserve_blocks_index 5398 15.2 0.132 0.153 2.036 2.271 dbt_reserve_blocks_index_array 5363 14.2 0.014 0.016 2.032 2.268 build_3c_derivatives 9 9.0 0.234 0.351 2.032 2.035 dbt_communicate_buffer 1256 14.5 0.045 0.061 1.810 1.920 dbt_tas_reshape 916 14.4 0.008 0.010 1.769 1.855 mp_alltoall_i 4339 15.3 1.672 1.820 1.672 1.820 convert_to_new_pgrid 4446 16.0 0.043 0.048 1.541 1.722 dbm_copy 3043 16.9 1.498 1.681 1.498 1.681 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 1.612 1.619 mp_sum_l 38201 15.3 1.248 1.513 1.248 1.513 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=64.56799999999998, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=218.143, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=33.555, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=17.708, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=13.558, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=13.437, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=11.101, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=7.081, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=20.012000000000008, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.508, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.554, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.656, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.542, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.177, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=13.75, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.266, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 254.669 254.669 qs_energies 1 2.0 0.000 0.000 254.484 254.484 mp2_main 1 3.0 0.000 0.000 219.228 219.228 mp2_gpw_main 1 4.0 0.001 0.001 215.261 215.261 mp2_ri_gpw_compute_in 1 5.0 0.385 0.385 166.488 166.488 mp2_ri_gpw_compute_in_loop 1 6.0 0.011 0.011 141.307 141.307 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 110.345 110.345 integrate_v_rspace 2666 8.0 0.707 0.707 95.979 95.979 grid_integrate_task_list 2666 9.0 93.044 93.044 93.044 93.044 mp2_ri_gpw_compute_en 1 5.0 0.076 0.076 48.747 48.747 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.873 9.873 46.855 46.855 scf_env_do_scf 1 3.0 0.000 0.000 34.317 34.317 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 34.316 34.316 qs_scf_new_mos 10 5.0 0.000 0.000 32.944 32.944 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.098 2.098 29.872 29.872 offload_gemm 2080 8.0 27.774 27.774 27.774 27.774 eigensolver 11 5.8 0.001 0.001 25.235 25.235 calculate_wavefunction 5312 9.0 16.393 16.393 24.893 24.893 get_2c_integrals 1 6.0 0.000 0.000 24.764 24.764 dbcsr_multiply_generic 5322 8.0 0.181 0.181 24.279 24.279 ao_to_mo_and_store_B_mult_1 2656 7.0 0.011 0.011 24.258 24.258 cp_fm_diag_elpa 11 6.8 0.000 0.000 22.566 22.566 cp_fm_diag_elpa_base 11 7.8 22.402 22.402 22.565 22.565 compute_2c_integrals 1 7.0 0.007 0.007 19.206 19.206 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 19.184 19.184 mp2_eri_2c_integrate_gpw 1 9.0 3.740 3.740 19.173 19.173 multiply_cannon 5322 9.0 0.462 0.462 12.846 12.846 pw_transfer 63872 10.6 0.939 0.939 12.275 12.275 multiply_cannon_loop 5322 10.0 0.466 0.466 11.413 11.413 fft_wrap_pw1pw2 53228 11.4 0.113 0.113 11.049 11.049 qs_diis_b_step 9 6.0 0.000 0.000 9.587 9.587 multiply_cannon_multrec 5322 11.0 9.311 9.311 9.352 9.352 make_m2s 10644 9.0 0.062 0.062 8.945 8.945 make_images 10644 10.0 3.443 3.443 8.604 8.604 cp_fm_symm 18 7.0 8.532 8.532 8.532 8.532 fft_wrap_pw1pw2_20 21271 12.4 0.661 0.661 7.796 7.796 fft3d_s 53229 13.4 6.902 6.902 6.937 6.937 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.700 2.700 6.594 6.594 mp2_ri_gpw_compute_en_ener 2080 7.0 5.276 5.276 5.276 5.276 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 44.519 44.530 qs_energies 1 2.0 0.000 0.000 44.446 44.447 mp2_main 1 3.0 0.000 0.001 42.043 42.043 mp2_gpw_main 1 4.0 0.001 0.004 41.913 41.914 mp2_ri_gpw_compute_in 1 5.0 0.041 0.042 19.366 24.966 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.527 23.124 mp2_ri_gpw_compute_en 1 5.0 0.083 0.103 22.464 23.018 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.101 20.922 integrate_v_rspace 93 8.1 0.121 0.150 15.003 20.707 grid_integrate_task_list 93 9.1 14.592 20.366 14.592 20.366 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.895 1.060 15.725 15.883 mp2_ri_gpw_compute_en_expansio 65 7.0 0.100 0.121 11.692 11.970 offload_gemm 65 8.0 11.592 11.862 11.592 11.862 mp_min_d 2 7.0 5.642 6.269 5.642 6.269 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.597 6.150 mp2_ri_gpw_compute_en_comm 17 7.0 0.114 0.160 2.694 3.068 dbcsr_multiply_generic 176 8.0 0.009 0.010 2.093 2.556 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.075 2.538 mp_sendrecv_dm3 510 8.0 2.011 2.418 2.011 2.418 scf_env_do_scf 1 3.0 0.000 0.000 2.269 2.270 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.269 2.270 get_2c_integrals 1 6.0 0.000 0.000 1.771 1.812 compute_2c_integrals 1 7.0 0.003 0.003 1.449 1.464 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 1.076 1.386 mp2_eri_2c_integrate_gpw 1 9.0 0.214 0.335 1.075 1.383 multiply_cannon 176 9.0 0.016 0.018 1.186 1.344 multiply_cannon_loop 176 10.0 0.002 0.003 1.124 1.276 qs_scf_new_mos 10 5.0 0.000 0.000 1.111 1.184 calculate_wavefunction 166 9.0 0.521 0.733 0.911 1.167 make_m2s 352 9.0 0.003 0.004 0.864 1.164 make_images 352 10.0 0.055 0.064 0.851 1.150 eigensolver 11 5.8 0.001 0.001 1.078 1.080 multiply_cannon_multrec 246 11.0 0.949 1.043 0.955 1.050 pw_transfer 2120 10.5 0.043 0.055 0.831 0.917 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.894 0.895 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=75.87200000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=93.044, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=27.774, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.402, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=16.393, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.873, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=9.311, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.316999999999993, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.592, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.592, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.521, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.895, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.949, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.011, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.642, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.085 0.085 212.833 212.833 qs_energies 1 2.0 0.000 0.000 211.528 211.528 scf_env_do_scf 1 3.0 0.000 0.000 202.572 202.572 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 202.572 202.572 qs_scf_new_mos 15 5.0 0.000 0.000 128.711 128.711 eigensolver 15 6.0 0.001 0.001 121.195 121.195 cp_fm_diag_elpa 15 7.0 0.000 0.000 108.776 108.776 cp_fm_diag_elpa_base 15 8.0 106.202 106.202 108.776 108.776 qs_ks_update_qs_env 15 5.0 0.000 0.000 48.857 48.857 rebuild_ks_matrix 15 6.0 0.000 0.000 48.651 48.651 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 48.651 48.651 qs_vxc_create 15 8.0 0.022 0.022 33.456 33.456 calculate_dispersion_nonloc 15 9.0 6.919 6.919 29.182 29.182 pw_transfer 1191 10.0 0.055 0.055 22.639 22.639 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 22.457 22.457 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.772 21.772 calculate_rho_elec 16 6.0 0.219 0.219 21.772 21.772 grid_collocate_task_list 16 7.0 20.464 20.464 20.464 20.464 fft_wrap_pw1pw2_150 765 12.0 3.192 3.192 16.097 16.097 sum_up_and_integrate 15 8.0 0.039 0.039 13.833 13.833 integrate_v_rspace 15 9.0 0.027 0.027 13.794 13.794 grid_integrate_task_list 15 10.0 13.291 13.291 13.291 13.291 fft3d_s 1087 13.0 10.334 10.334 10.341 10.341 cp_fm_cholesky_restore 45 7.0 10.209 10.209 10.209 10.209 pw_scatter_s 585 13.1 7.043 7.043 7.043 7.043 fft_wrap_pw1pw2_200 197 12.3 0.717 0.717 6.171 6.171 init_scf_run 1 3.0 0.000 0.000 5.393 5.393 copy_dbcsr_to_fm 16 5.9 0.000 0.000 5.366 5.366 dbcsr_complete_redistribute 46 8.3 2.157 2.157 5.363 5.363 cp_fm_upper_to_full 30 8.0 4.782 4.782 4.782 4.782 gspace_mixing 14 5.0 0.170 0.170 4.443 4.443 vdW_energy 15 10.0 4.425 4.425 4.425 4.425 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.026 61.643 61.654 qs_energies 1 2.0 0.000 0.002 61.384 61.384 scf_env_do_scf 1 3.0 0.000 0.001 57.242 57.243 scf_env_do_scf_inner_loop 15 4.0 0.001 0.008 57.242 57.243 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.809 24.830 rebuild_ks_matrix 15 6.0 0.000 0.000 24.771 24.793 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.015 24.771 24.793 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.697 20.704 calculate_rho_elec 16 6.0 0.007 0.007 20.696 20.704 grid_collocate_task_list 16 7.0 19.174 19.755 19.174 19.755 sum_up_and_integrate 15 8.0 0.006 0.012 14.144 14.194 integrate_v_rspace 15 9.0 0.001 0.001 14.138 14.191 grid_integrate_task_list 15 10.0 12.746 13.331 12.746 13.331 qs_scf_new_mos 15 5.0 0.000 0.000 12.470 12.650 eigensolver 15 6.0 0.001 0.002 11.510 11.525 qs_vxc_create 15 8.0 0.001 0.001 10.319 10.330 calculate_dispersion_nonloc 15 9.0 0.966 1.795 8.499 8.517 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.225 8.228 cp_fm_diag_elpa_base 15 8.0 8.077 8.108 8.223 8.226 pw_transfer 1191 10.0 0.084 0.091 7.900 7.981 fft_wrap_pw1pw2 1086 11.0 0.012 0.013 7.729 7.811 fft3d_ps 1086 13.0 2.463 2.603 6.089 6.235 fft_wrap_pw1pw2_150 765 12.0 0.271 0.296 5.333 5.388 mp_alltoall_z22v 1086 15.0 3.043 3.534 3.043 3.534 cp_fm_cholesky_restore 45 7.0 3.144 3.172 3.144 3.172 yz_to_x 501 13.9 0.216 0.304 2.349 2.642 qs_energies_init_hamiltonians 1 3.0 0.000 0.006 2.473 2.478 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.148 2.346 fft_wrap_pw1pw2_200 197 12.3 0.174 0.193 2.268 2.323 xc_vxc_pw_create 15 9.0 0.014 0.017 1.820 1.841 rs_pw_transfer 158 9.4 0.001 0.002 1.461 1.780 density_rs2pw 16 7.0 0.001 0.001 1.398 1.592 init_scf_run 1 3.0 0.000 0.000 1.430 1.431 x_to_yz 585 14.1 0.340 0.374 1.250 1.391 build_core_ppnl 1 5.0 1.251 1.379 1.251 1.379 mp_waitany 520 11.3 1.034 1.372 1.034 1.372 vdW_energy 15 10.0 1.311 1.370 1.311 1.370 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.344 1.344 xc_pw_derive 90 11.0 0.001 0.001 1.220 1.296 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.333, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=106.202, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.464, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.291, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.334, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.209, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.459000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.077, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.174, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=12.746, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.144, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.043, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.072 0.072 385.545 385.545 qs_energies 1 2.0 0.000 0.000 385.413 385.413 ls_scf 1 3.0 0.000 0.000 384.202 384.202 ls_scf_main 1 4.0 0.002 0.002 372.019 372.019 density_matrix_trs4 11 5.0 0.012 0.012 279.975 279.975 arnoldi_extremal 12 6.1 0.000 0.000 199.397 199.397 arnoldi_normal_ev 12 7.1 0.014 0.014 199.397 199.397 build_subspace 23 8.1 0.088 0.088 196.143 196.143 dbcsr_matrix_vector_mult 652 9.0 0.194 0.194 173.973 173.973 dbcsr_matrix_vector_mult_local 652 10.0 163.224 163.224 163.228 163.228 ls_scf_dm_to_ks 11 5.0 0.000 0.000 86.873 86.873 matrix_ls_to_qs 11 6.0 0.000 0.000 83.675 83.675 dbcsr_multiply_generic 185 6.1 0.824 0.824 72.644 72.644 dbcsr_copy_into_existing 11 7.0 46.680 46.680 46.680 46.680 multiply_cannon 185 7.1 0.334 0.334 43.901 43.901 dbcsr_complete_redistribute 23 7.5 29.732 29.732 40.629 40.629 matrix_decluster 11 7.0 0.000 0.000 36.994 36.994 multiply_cannon_loop 185 8.1 0.237 0.237 31.671 31.671 make_m2s 370 7.1 0.037 0.037 24.388 24.388 multiply_cannon_multrec 185 9.1 22.878 22.878 22.904 22.904 make_images 370 8.1 10.479 10.479 22.797 22.797 dbcsr_finalize 646 7.5 0.196 0.196 14.465 14.465 dbcsr_merge_all 597 8.5 2.286 2.286 13.252 13.252 DGKS_ortho_d 702 9.1 12.521 12.521 12.523 12.523 setup_rec_index_2d 370 8.1 11.807 11.807 11.807 11.807 ls_scf_init_scf 1 4.0 0.000 0.000 11.484 11.484 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.151 11.151 Gram_Schmidt_ortho_d 702 9.1 10.788 10.788 10.790 10.790 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.502 10.502 dbcsr_sort_indices 1103 9.9 10.102 10.102 10.102 10.102 tree_to_linear_d 110 9.4 9.724 9.724 9.724 9.724 quick_finalize 395 10.0 0.418 0.418 8.805 8.805 calculate_norms 370 9.1 8.530 8.530 8.530 8.530 dbcsr_special_finalize 370 9.1 0.002 0.002 8.151 8.151 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.024 66.276 66.286 qs_energies 1 2.0 0.000 0.000 66.180 66.180 ls_scf 1 3.0 0.000 0.000 66.122 66.123 ls_scf_main 1 4.0 0.000 0.007 63.601 63.601 density_matrix_trs4 11 5.0 0.006 0.017 60.949 61.008 dbcsr_multiply_generic 185 6.1 0.056 0.068 56.886 57.035 multiply_cannon 185 7.1 0.033 0.035 47.331 48.667 multiply_cannon_loop 185 8.1 0.109 0.123 44.914 46.045 multiply_cannon_multrec 1480 9.1 27.574 32.891 27.827 33.206 mp_waitall_1 11936 10.3 14.915 18.293 14.915 18.293 multiply_cannon_metrocomm3 1480 9.1 0.014 0.017 8.955 14.414 multiply_cannon_metrocomm1 1480 9.1 0.007 0.009 3.353 7.452 make_m2s 370 7.1 0.034 0.039 6.473 6.548 make_images 370 8.1 0.618 0.707 6.347 6.425 calculate_norms 2960 9.1 4.611 5.840 4.611 5.840 arnoldi_extremal 12 6.1 0.000 0.000 3.318 3.335 arnoldi_normal_ev 12 7.1 0.001 0.004 3.318 3.335 build_subspace 23 8.1 0.019 0.023 3.202 3.209 make_images_data 370 9.1 0.009 0.013 2.841 3.056 dbcsr_matrix_vector_mult 652 9.0 0.009 0.044 2.258 2.890 mp_sum_l 1119 5.6 2.125 2.840 2.125 2.840 hybrid_alltoall_any 393 9.9 0.180 1.034 2.473 2.689 dbcsr_matrix_vector_mult_local 652 10.0 1.702 2.585 1.703 2.587 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.308 2.383 dbcsr_complete_redistribute 23 7.5 1.185 1.558 1.904 2.092 matrix_ls_to_qs 11 6.0 0.000 0.000 1.846 2.048 ls_scf_init_scf 1 4.0 0.000 0.000 1.945 1.946 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.920 1.925 matrix_decluster 11 7.0 0.000 0.000 1.705 1.876 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.299 1.864 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.748 1.750 make_images_pack 370 9.1 1.559 1.728 1.562 1.735 buffer_matrices_ensure_size 370 8.1 1.297 1.399 1.297 1.399 dbcsr_finalize 646 7.5 0.008 0.009 1.289 1.382 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=101.98000000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=163.224, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.68, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=29.732, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.878, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=12.521, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.53, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.163999999999994, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.702, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.185, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=27.574, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.611, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=14.915, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.125, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.008 69.255 69.255 lib_test 1 2.0 0.000 0.000 69.245 69.245 dbcsr_run_tests 3 3.0 0.002 0.002 69.245 69.245 test_multiplies_multiproc 3 4.0 0.001 0.001 54.296 54.296 dbcsr_redistribute 9 5.0 35.395 35.395 36.894 36.894 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.805 15.805 dbcsr_make_random_matrix 9 4.0 12.104 12.104 14.852 14.852 multiply_cannon 9 6.0 0.001 0.001 11.431 11.431 multiply_cannon_loop 9 7.0 0.002 0.002 11.077 11.077 multiply_cannon_multrec 9 8.0 11.074 11.074 11.075 11.075 dbcsr_finalize 27 5.7 0.010 0.010 5.145 5.145 dbcsr_merge_all 18 6.5 1.901 1.901 4.475 4.475 dbcsr_data_release 975 7.6 2.509 2.509 2.509 2.509 tree_to_linear_d 9 7.0 1.771 1.771 1.771 1.771 make_m2s 18 6.0 0.001 0.001 1.460 1.460 make_images 18 7.0 0.519 0.519 1.408 1.408 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.011 17.580 17.584 lib_test 1 2.0 0.000 0.000 17.543 17.560 dbcsr_run_tests 3 3.0 0.000 0.001 17.542 17.558 test_multiplies_multiproc 3 4.0 0.000 0.002 16.719 16.765 dbcsr_multiply_generic 9 5.0 0.001 0.001 14.804 14.884 multiply_cannon 9 6.0 0.001 0.002 13.182 13.479 multiply_cannon_loop 9 7.0 0.002 0.002 12.913 13.204 multiply_cannon_multrec 72 8.0 10.813 11.604 10.813 11.604 mp_waitall_1 576 9.2 2.397 3.076 2.397 3.076 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.861 2.647 dbcsr_make_random_matrix 9 4.0 0.669 0.976 0.795 1.026 mp_sum_l 390 2.5 0.407 0.870 0.407 0.870 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.391 0.854 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.231 0.849 dbcsr_data_release 444 7.6 0.614 0.698 0.614 0.698 make_m2s 18 6.0 0.001 0.001 0.645 0.676 dbcsr_finalize 27 5.7 0.000 0.000 0.607 0.674 make_images 18 7.0 0.021 0.025 0.642 0.673 dbcsr_destroy 111 5.9 0.000 0.000 0.494 0.561 dbcsr_merge_all 18 6.5 0.083 0.095 0.485 0.535 dbcsr_checksum 6 5.0 0.154 0.522 0.523 0.523 mp_cart_create 9 5.7 0.407 0.485 0.407 0.485 dbcsr_mp_make_env 6 4.5 0.000 0.000 0.397 0.470 dbcsr_redistribute 9 5.0 0.222 0.262 0.383 0.409 make_images_data 18 8.0 0.000 0.001 0.336 0.402 mp_sum_d 191 1.2 0.370 0.386 0.370 0.386 dbcsr_data_copy_aa2 18 7.5 0.297 0.378 0.297 0.378 hybrid_alltoall_any 18 9.0 0.027 0.120 0.296 0.356 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.271999999999991, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.395, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.104, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.074, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.509, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.901, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.3749999999999964, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.222, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.669, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.813, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.614, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.083, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.397, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.407, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 135.679 135.679 qs_mol_dyn_low 1 2.0 0.003 0.003 134.231 134.231 velocity_verlet 5 3.0 0.003 0.003 109.614 109.614 qmmm_el_coupling 6 3.8 0.000 0.000 83.017 83.017 qmmm_elec_with_gaussian 6 4.8 0.085 0.085 83.011 83.011 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 82.292 82.292 qmmm_elec_gaussian_low_G 6 6.8 81.404 81.404 81.404 81.404 qs_forces 6 3.8 0.000 0.000 42.721 42.721 qs_energies 6 4.8 0.000 0.000 38.688 38.688 scf_env_do_scf 6 5.8 0.001 0.001 35.283 35.283 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.149 25.149 rebuild_ks_matrix 45 8.4 0.000 0.000 24.174 24.174 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.174 24.174 qs_ks_update_qs_env 45 7.8 0.000 0.000 20.566 20.566 pw_transfer 966 12.3 0.047 0.047 16.513 16.513 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.314 16.314 fft_wrap_pw1pw2_150 507 15.2 2.035 2.035 15.910 15.910 qs_vxc_create 45 10.4 0.001 0.001 13.062 13.062 xc_vxc_pw_create 45 11.4 0.667 0.667 13.061 13.061 init_scf_loop 6 6.8 0.000 0.000 10.129 10.129 xc_pw_derive 270 13.4 0.002 0.002 8.965 8.965 fft3d_s 802 15.6 7.477 7.477 7.490 7.490 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.018 7.018 calculate_rho_elec 45 8.9 0.569 0.569 7.018 7.018 prepare_preconditioner 6 7.8 0.000 0.000 6.886 6.886 xc_rho_set_and_dset_create 45 12.4 0.664 0.664 6.883 6.883 make_preconditioner 6 8.8 0.000 0.000 6.669 6.669 make_full_all 6 9.8 0.001 0.001 6.387 6.387 pw_scatter_s 429 15.8 5.529 5.529 5.529 5.529 xc_pw_divergence 45 12.4 0.001 0.001 5.464 5.464 qmmm_forces 6 3.8 0.002 0.002 5.389 5.389 qmmm_forces_with_gaussian 6 4.8 0.088 0.088 5.069 5.069 pw_integral_ab 2539 7.4 4.270 4.270 4.270 4.270 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.257 4.257 qs_ks_ddapc 45 10.4 0.001 0.001 4.145 4.145 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.616 3.616 qmmm_forces_gaussian_low_G 6 6.8 3.548 3.548 3.548 3.548 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.481 3.481 cp_fm_diag_elpa_base 18 12.2 3.474 3.474 3.481 3.481 grid_collocate_task_list 45 9.9 3.256 3.256 3.256 3.256 density_rs2pw 45 9.9 0.001 0.001 3.192 3.192 sum_up_and_integrate 45 10.4 0.114 0.114 3.011 3.011 pw_poisson_solve 51 9.9 1.254 1.254 2.939 2.939 integrate_v_rspace 45 11.4 0.006 0.006 2.897 2.897 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.051 57.004 57.014 qs_mol_dyn_low 1 2.0 0.003 0.010 55.855 55.912 qs_forces 6 3.8 0.000 0.001 40.222 40.222 qs_energies 6 4.8 0.000 0.001 38.396 38.396 scf_env_do_scf 6 5.8 0.000 0.001 37.419 37.419 scf_env_do_scf_inner_loop 113 6.2 0.002 0.016 35.936 35.937 rebuild_ks_matrix 119 8.1 0.000 0.000 26.363 26.376 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.018 26.363 26.375 qs_ks_update_qs_env 119 7.3 0.001 0.001 24.825 24.837 velocity_verlet 5 3.0 0.002 0.004 23.502 23.505 pw_transfer 2446 12.3 0.187 0.210 16.742 17.222 fft_wrap_pw1pw2 2059 13.4 0.023 0.026 16.314 16.794 fft_wrap_pw1pw2_150 1321 14.9 1.233 1.380 15.636 16.208 qs_vxc_create 119 10.1 0.002 0.002 13.188 13.203 xc_vxc_pw_create 119 11.1 0.148 0.213 13.186 13.201 fft3d_ps 2059 15.4 6.777 7.769 12.312 13.041 qs_rho_update_rho_low 119 7.3 0.000 0.000 10.846 10.854 calculate_rho_elec 119 8.3 0.049 0.055 10.845 10.854 xc_pw_derive 714 13.1 0.011 0.013 9.907 10.196 sum_up_and_integrate 119 10.1 0.052 0.066 9.496 9.721 integrate_v_rspace 119 11.1 0.003 0.004 9.443 9.659 qmmm_forces 6 3.8 0.002 0.002 7.824 7.825 qmmm_forces_with_gaussian 6 4.8 0.353 0.390 7.286 7.738 rs_pw_transfer 988 11.5 0.011 0.018 7.377 7.626 qmmm_el_coupling 6 3.8 0.000 0.000 6.867 7.020 qmmm_elec_with_gaussian 6 4.8 0.352 0.389 6.865 7.018 xc_rho_set_and_dset_create 119 12.1 0.350 0.517 6.362 6.838 xc_pw_divergence 119 12.1 0.005 0.007 6.433 6.678 density_rs2pw 119 9.3 0.005 0.007 6.353 6.548 potential_pw2rs 119 12.1 0.005 0.008 5.665 5.686 mp_alltoall_z22v 2059 17.4 4.089 5.293 4.089 5.293 grid_collocate_task_list 119 9.3 4.303 4.538 4.303 4.538 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.840 4.065 grid_integrate_task_list 119 12.1 3.369 3.770 3.369 3.770 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.304 3.517 qmmm_forces_gaussian_low_G 6 6.8 3.159 3.389 3.159 3.389 x_to_yz 1095 16.8 0.842 0.999 2.930 3.282 yz_to_x 964 16.0 0.544 0.720 2.545 3.239 mp_waitany 4028 12.8 2.468 3.020 2.468 3.020 qmmm_elec_gaussian_low_G 6 6.8 2.714 2.922 2.714 2.922 rs_pw_transfer_PW2RS_150 125 13.9 1.250 1.355 2.835 2.901 pw_restrict_s3 18 5.8 1.332 1.627 2.486 2.713 rs_pw_transfer_RS2PW_150 125 11.2 1.008 1.171 2.411 2.687 dbcsr_multiply_generic 2588 12.3 0.057 0.067 2.103 2.295 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.053 2.191 pw_prolongate_s3 18 6.8 1.107 1.282 2.053 2.191 mp_waitall_1 188862 16.2 1.910 2.166 1.910 2.166 qs_scf_new_mos 113 7.2 0.000 0.001 2.091 2.099 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.090 2.099 ot_scf_mini 113 9.2 0.001 0.001 2.006 2.014 qs_ks_ddapc 119 10.1 0.002 0.002 1.882 1.984 mp_sum_dm3 33 5.7 1.670 1.751 1.670 1.751 pw_integral_ab 2761 7.7 1.194 1.324 1.507 1.607 pw_scatter_p 1095 15.8 1.542 1.589 1.542 1.589 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.556 1.557 init_scf_loop 6 6.8 0.000 0.000 1.480 1.480 pw_gather_p 964 15.0 1.175 1.453 1.175 1.453 mp_sum_d 5820 12.2 0.948 1.297 0.948 1.297 ot_mini 113 10.2 0.001 0.001 1.234 1.242 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=30.195000000000007, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=81.404, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.477, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.529, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.27, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.548, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.256, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.398999999999994, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.714, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.194, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.159, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.303, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.089, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.369, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.777, yerr=0.0 Summary: Performance test took 46 minutes. Status: OK Removing intermediate container 5dabddd93a1f ---> bb68c0bdbbd8 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 58640b8936cb Removing intermediate container 58640b8936cb ---> 36c9c38ab447 Step 42/42 : ENTRYPOINT [] ---> Running in b998142b6447 Removing intermediate container b998142b6447 ---> 192e33ce258d [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 192e33ce258d Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-07-22 12:03:57+00:00