StartDate: 2022-06-06 11:06:12+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: 024676661e34077435b59312aca1eae59daaf8ed CommitTime: 2022-06-06 11:03:09 +0200 CommitAuthor: Jan Wilhelm CommitSubject: 4-center Hartree-Fock and ADMM for exchange self-energy in GW+kpoints Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=024676661e34077435b59312aca1eae59daaf8ed Sending build context to Docker daemon 363.1MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 125a6e411906: Already exists Digest: sha256:26c68657ccce2cb0a31b330cb0be2b5e108d467f641c62e13ab40cbec258c68d Status: Downloaded newer image for ubuntu:22.04 ---> d2e4e1f51132 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 09af03b14c5c Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> b3a7014288dd Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 12e7491dc4b1 Step 5/42 : RUN mkdir scripts ---> Using cache ---> 2c9c880d562d Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 8739f82ecc2b Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 38fcbedf3264 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> 73b7ed7cdfe0 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 2a422465fce3 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> a816974020fb Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 100ec55352b3 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> eaff4ff473f1 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> a0bb51bcd5ea Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 029068dbbf2e Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 2a92e5adaf21 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 615850df2842 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 9a6fc063f840 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 7f827fb9200a Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> a9bc2482b3e7 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> ce3deb406895 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 76c6ee84755c Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> c65dbd48c176 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 0650752e0577 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 9edcb65d2668 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 2e583be605b5 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 28e1a9378a8a Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> ccb6d6b2c3e8 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 9f1b1fa65dba Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 4dba1a0481e1 Step 30/42 : COPY ./Makefile . ---> Using cache ---> 35d480c2a71d Step 31/42 : COPY ./src ./src ---> 54bdced77420 Step 32/42 : COPY ./exts ./exts ---> ea7ed530e519 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> 314c0c1f707d Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in ff53e5c074da './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container ff53e5c074da ---> 58c3d7dc0fd2 Step 35/42 : COPY ./data ./data ---> 5068ff6779bb Step 36/42 : COPY ./tests ./tests ---> 08c781fd4d33 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 5488dfd5684a Step 38/42 : COPY ./benchmarks ./benchmarks ---> d12c03d2e12c Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 017fba1ba44c Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in d657d44b9633 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.030 177.922 177.922 qs_mol_dyn_low 1 2.0 0.002 0.002 177.326 177.326 qs_forces 11 3.9 0.001 0.001 177.288 177.288 qs_energies 11 4.9 0.001 0.001 171.008 171.008 scf_env_do_scf 11 5.9 0.001 0.001 158.959 158.959 velocity_verlet 10 3.0 0.002 0.002 122.797 122.797 init_scf_loop 11 6.9 0.000 0.000 87.109 87.109 prepare_preconditioner 11 7.9 0.000 0.000 84.506 84.506 make_preconditioner 11 8.9 0.000 0.000 84.506 84.506 make_full_inverse_cholesky 11 9.9 0.000 0.000 83.394 83.394 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 71.723 71.723 cp_fm_cholesky_invert 11 10.9 58.312 58.312 58.312 58.312 qs_scf_new_mos 108 7.5 0.001 0.001 37.786 37.786 qs_scf_loop_do_ot 108 8.5 0.001 0.001 37.785 37.785 ot_scf_mini 108 9.5 0.002 0.002 36.479 36.479 cp_fm_cholesky_decompose 22 10.9 21.109 21.109 21.109 21.109 rebuild_ks_matrix 119 8.3 0.001 0.001 20.170 20.170 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.169 20.169 qs_ot_get_p 119 10.4 0.001 0.001 19.797 19.797 dbcsr_multiply_generic 2286 12.5 0.165 0.165 19.320 19.320 qs_rho_update_rho 119 7.7 0.001 0.001 19.094 19.094 calculate_rho_elec 119 8.7 0.953 0.953 19.093 19.093 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.541 18.541 qs_ot_p2m_diag 50 11.0 0.157 0.157 18.462 18.462 cp_dbcsr_syevd 50 12.0 0.002 0.002 18.064 18.064 cp_fm_diag_elpa 50 13.0 0.000 0.000 16.750 16.750 cp_fm_diag_elpa_base 50 14.0 16.692 16.692 16.749 16.749 grid_collocate_task_list 119 9.7 14.745 14.745 14.745 14.745 ot_mini 108 10.5 0.001 0.001 13.427 13.427 sum_up_and_integrate 119 10.3 0.179 0.179 12.583 12.583 integrate_v_rspace 119 11.3 0.116 0.116 12.404 12.404 make_m2s 4572 13.5 0.046 0.046 10.873 10.873 grid_integrate_task_list 119 12.3 10.438 10.438 10.438 10.438 qs_ot_get_derivative 108 11.5 0.001 0.001 7.984 7.984 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.349 6.349 pw_transfer 1439 11.6 0.058 0.058 6.000 6.000 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.762 5.762 make_images 4572 14.5 2.133 2.133 5.637 5.637 dbcsr_make_dense_low 5837 15.5 0.073 0.073 5.629 5.629 make_dense_data 5837 16.5 5.007 5.007 5.542 5.542 ot_diis_step 108 11.5 0.004 0.004 5.441 5.441 dbcsr_make_images_dense 3978 14.8 0.018 0.018 4.932 4.932 fft_wrap_pw1pw2_140 487 13.2 0.438 0.438 4.911 4.911 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.910 4.910 apply_single 119 13.6 0.000 0.000 4.909 4.909 init_scf_run 11 5.9 0.002 0.002 4.766 4.766 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.765 4.765 multiply_cannon 2286 13.5 0.175 0.175 4.605 4.605 wfi_extrapolate 11 7.9 0.001 0.001 4.228 4.228 multiply_cannon_loop 2286 14.5 0.082 0.082 4.137 4.137 multiply_cannon_multrec 2286 15.5 4.003 4.003 4.054 4.054 dbcsr_copy 2102 12.0 0.212 0.212 3.767 3.767 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.705 3.705 dbcsr_complete_redistribute 329 12.2 1.896 1.896 3.575 3.575 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.021 47.096 47.106 qs_mol_dyn_low 1 2.0 0.004 0.005 46.979 46.990 qs_forces 11 3.9 0.001 0.001 46.940 46.941 qs_energies 11 4.9 0.001 0.001 43.702 43.705 scf_env_do_scf 11 5.9 0.000 0.002 40.071 40.072 scf_env_do_scf_inner_loop 108 6.5 0.003 0.022 36.981 36.992 velocity_verlet 10 3.0 0.001 0.003 27.759 27.760 rebuild_ks_matrix 119 8.3 0.000 0.001 18.098 18.240 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.023 18.098 18.240 qs_ks_update_qs_env 119 7.6 0.001 0.001 16.171 16.302 sum_up_and_integrate 119 10.3 0.018 0.021 14.000 14.035 integrate_v_rspace 119 11.3 0.004 0.005 13.982 14.018 qs_rho_update_rho 119 7.7 0.001 0.001 13.874 13.898 calculate_rho_elec 119 8.7 0.030 0.031 13.873 13.897 dbcsr_multiply_generic 2286 12.5 0.070 0.087 12.249 12.756 grid_collocate_task_list 119 9.7 8.914 10.702 8.914 10.702 grid_integrate_task_list 119 12.3 8.026 10.687 8.026 10.687 qs_scf_new_mos 108 7.5 0.001 0.001 9.845 10.070 qs_scf_loop_do_ot 108 8.5 0.001 0.001 9.845 10.069 ot_scf_mini 108 9.5 0.002 0.002 9.224 9.446 multiply_cannon 2286 13.5 0.112 0.147 8.532 9.172 multiply_cannon_loop 2286 14.5 0.090 0.124 7.976 8.609 mp_waitall_1 169478 16.3 6.952 7.675 6.952 7.675 rs_pw_transfer 974 11.9 0.010 0.013 5.495 5.943 ot_mini 108 10.5 0.001 0.007 5.485 5.731 multiply_cannon_metrocomm3 18288 15.5 0.037 0.052 4.392 5.221 density_rs2pw 119 9.7 0.005 0.006 4.612 5.057 mp_alltoall_d11v 2130 13.8 2.994 3.380 2.994 3.380 pw_transfer 1439 11.6 0.082 0.097 3.273 3.351 qs_ot_get_derivative 108 11.5 0.001 0.001 2.993 3.217 fft_wrap_pw1pw2 1201 12.6 0.010 0.011 3.125 3.200 potential_pw2rs 119 12.3 0.006 0.007 3.166 3.178 rs_gather_matrices 119 12.3 0.075 0.086 2.754 3.161 multiply_cannon_multrec 18288 15.5 2.790 3.099 2.799 3.113 init_scf_loop 11 6.9 0.000 0.000 3.077 3.104 mp_waitany 9880 13.7 2.523 3.033 2.523 3.033 fft_wrap_pw1pw2_140 487 13.2 0.276 0.320 2.669 2.813 rs_pw_transfer_RS2PW_140 130 11.5 0.257 0.304 2.337 2.779 apply_preconditioner_dbcsr 119 12.6 0.000 0.007 2.338 2.505 apply_single 119 13.6 0.000 0.000 2.338 2.505 init_scf_run 11 5.9 0.000 0.005 2.484 2.484 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.484 2.484 ot_diis_step 108 11.5 0.004 0.006 2.467 2.467 make_m2s 4572 13.5 0.045 0.055 2.257 2.331 fft3d_ps 1201 14.6 1.220 1.291 2.234 2.286 wfi_extrapolate 11 7.9 0.001 0.001 2.254 2.254 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.070 2.082 make_images 4572 14.5 0.115 0.138 1.941 2.018 multiply_cannon_metrocomm1 18288 15.5 0.018 0.027 0.514 1.742 mp_sum_l 11218 13.2 1.001 1.635 1.001 1.635 qs_ot_get_p 119 10.4 0.001 0.001 1.246 1.489 rs_pw_transfer_PW2RS_140 130 13.9 0.597 0.629 1.340 1.382 mp_sum_d 4129 12.0 0.987 1.283 0.987 1.283 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.051 1.152 prepare_preconditioner 11 7.9 0.000 0.000 1.121 1.151 make_preconditioner 11 8.9 0.000 0.000 1.121 1.151 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.862 1.140 make_images_data 4572 15.5 0.035 0.042 1.000 1.105 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.973 1.095 hybrid_alltoall_any 4725 16.4 0.061 0.192 0.884 1.028 make_full_inverse_cholesky 11 9.9 0.000 0.000 0.999 1.021 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 0.978 0.978 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=52.62299999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=58.312, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=21.109, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.692, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.745, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.438, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.003, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.42, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.914, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.026, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.79, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.952, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=2.994, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 204.013 204.013 qs_mol_dyn_low 1 2.0 0.002 0.002 203.385 203.385 qs_forces 11 3.9 0.001 0.001 203.347 203.347 qs_energies 11 4.9 0.001 0.001 195.657 195.657 scf_env_do_scf 11 5.9 0.001 0.001 181.100 181.100 velocity_verlet 10 3.0 0.002 0.002 141.543 141.543 init_scf_loop 11 6.9 0.000 0.000 91.431 91.431 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 89.517 89.517 prepare_preconditioner 11 7.9 0.000 0.000 87.383 87.383 make_preconditioner 11 8.9 0.000 0.000 87.383 87.383 make_full_inverse_cholesky 11 9.9 0.000 0.000 86.228 86.228 cp_fm_cholesky_invert 11 10.9 60.343 60.343 60.343 60.343 qs_scf_new_mos 96 7.5 0.001 0.001 34.938 34.938 qs_scf_loop_do_ot 96 8.5 0.000 0.000 34.937 34.937 ot_scf_mini 96 9.5 0.002 0.002 33.754 33.754 rebuild_ks_matrix 107 8.3 0.001 0.001 33.474 33.474 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.474 33.474 qs_rho_update_rho 107 7.7 0.001 0.001 31.172 31.172 calculate_rho_elec 107 8.7 0.855 0.855 31.172 31.172 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.131 30.131 grid_collocate_task_list 107 9.7 27.279 27.279 27.279 27.279 sum_up_and_integrate 107 10.3 0.161 0.161 26.511 26.511 integrate_v_rspace 107 11.3 0.076 0.076 26.350 26.350 grid_integrate_task_list 107 12.3 24.561 24.561 24.561 24.561 cp_fm_cholesky_decompose 22 10.9 21.997 21.997 21.997 21.997 qs_ot_get_p 107 10.4 0.001 0.001 18.742 18.742 qs_ot_p2m_diag 44 11.0 0.145 0.145 17.674 17.674 cp_dbcsr_syevd 44 12.0 0.002 0.002 17.328 17.328 dbcsr_multiply_generic 1966 12.4 0.139 0.139 17.112 17.112 cp_fm_diag_elpa 44 13.0 0.000 0.000 16.163 16.163 cp_fm_diag_elpa_base 44 14.0 16.112 16.112 16.163 16.163 ot_mini 96 10.5 0.001 0.001 12.048 12.048 make_m2s 3932 13.4 0.041 0.041 9.496 9.496 qs_ot_get_derivative 96 11.5 0.001 0.001 7.327 7.327 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.287 7.287 init_scf_run 11 5.9 0.002 0.002 6.256 6.256 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.254 6.254 wfi_extrapolate 11 7.9 0.001 0.001 5.621 5.621 pw_transfer 1295 11.6 0.050 0.050 5.494 5.494 fft_wrap_pw1pw2 1081 12.6 0.005 0.005 5.299 5.299 dbcsr_make_dense_low 4961 15.5 0.077 0.077 4.999 4.999 make_dense_data 4961 16.5 4.376 4.376 4.909 4.909 make_images 3932 14.4 1.882 1.882 4.814 4.814 ot_diis_step 96 11.5 0.003 0.003 4.718 4.718 fft_wrap_pw1pw2_140 439 13.2 0.454 0.454 4.556 4.556 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.405 4.405 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.356 4.356 multiply_cannon 1966 13.4 0.154 0.154 4.319 4.319 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.309 4.309 apply_single 107 13.6 0.000 0.000 4.309 4.309 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.194 4.194 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.022 82.381 82.390 qs_mol_dyn_low 1 2.0 0.003 0.004 82.283 82.287 qs_forces 11 3.9 0.001 0.001 82.233 82.233 qs_energies 11 4.9 0.001 0.001 76.674 76.675 scf_env_do_scf 11 5.9 0.000 0.002 71.117 71.117 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 65.961 65.961 velocity_verlet 10 3.0 0.001 0.003 48.813 48.814 rebuild_ks_matrix 107 8.3 0.000 0.001 36.789 36.882 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.017 36.788 36.882 sum_up_and_integrate 107 10.3 0.018 0.020 32.862 32.881 integrate_v_rspace 107 11.3 0.004 0.005 32.844 32.865 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.506 32.590 qs_rho_update_rho 107 7.7 0.001 0.001 31.408 31.422 calculate_rho_elec 107 8.7 0.027 0.028 31.408 31.422 grid_integrate_task_list 107 12.3 22.881 28.769 22.881 28.769 grid_collocate_task_list 107 9.7 21.964 27.602 21.964 27.602 dbcsr_multiply_generic 1966 12.4 0.063 0.071 11.420 11.588 rs_pw_transfer 878 11.9 0.010 0.012 10.130 10.971 density_rs2pw 107 9.7 0.005 0.006 9.129 9.963 qs_scf_new_mos 96 7.5 0.001 0.001 8.961 9.047 qs_scf_loop_do_ot 96 8.5 0.001 0.001 8.960 9.046 multiply_cannon 1966 13.4 0.105 0.122 8.202 8.591 ot_scf_mini 96 9.5 0.002 0.002 8.410 8.500 multiply_cannon_loop 1966 14.4 0.090 0.111 7.690 8.130 mp_waitany 8968 13.7 7.224 8.124 7.224 8.124 mp_alltoall_d11v 1998 13.7 7.068 8.046 7.068 8.046 rs_pw_transfer_RS2PW_140 118 11.5 0.228 0.273 7.015 7.858 rs_gather_matrices 107 12.3 0.069 0.083 6.851 7.789 mp_waitall_1 146670 16.2 6.726 7.332 6.726 7.332 init_scf_loop 11 6.9 0.000 0.000 5.143 5.144 ot_mini 96 10.5 0.001 0.001 4.959 5.061 multiply_cannon_metrocomm3 15728 15.4 0.036 0.044 4.316 5.020 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.412 4.423 init_scf_run 11 5.9 0.000 0.004 4.347 4.348 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.347 4.347 wfi_extrapolate 11 7.9 0.001 0.001 3.945 3.946 pw_transfer 1295 11.6 0.077 0.087 3.041 3.102 potential_pw2rs 107 12.3 0.006 0.007 3.077 3.087 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 2.904 2.972 multiply_cannon_multrec 15728 15.4 2.659 2.823 2.669 2.833 qs_ot_get_derivative 96 11.5 0.001 0.001 2.643 2.733 fft_wrap_pw1pw2_140 439 13.2 0.251 0.288 2.478 2.559 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.234 2.340 apply_single 107 13.6 0.000 0.000 2.234 2.340 ot_diis_step 96 11.5 0.003 0.004 2.298 2.298 fft3d_ps 1081 14.6 1.113 1.202 2.079 2.146 make_m2s 3932 13.4 0.040 0.048 2.067 2.125 make_images 3932 14.4 0.104 0.123 1.780 1.833 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=53.721000000000004, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=60.343, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.279, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.561, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=21.997, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.112, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.518, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.964, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.881, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.224, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.068, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.726, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.170 0.170 147.601 147.601 qs_energies 1 2.0 0.000 0.000 146.806 146.806 scf_env_do_scf 1 3.0 0.000 0.000 145.240 145.240 qs_ks_update_qs_env 8 5.0 0.000 0.000 110.023 110.023 rebuild_ks_matrix 7 6.0 0.000 0.000 109.965 109.965 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 109.965 109.965 hfx_ks_matrix 7 8.0 0.000 0.000 93.512 93.512 integrate_four_center 7 9.0 1.279 1.279 93.495 93.495 init_scf_loop 1 4.0 0.000 0.000 81.499 81.499 integrate_four_center_main 7 10.0 0.548 0.548 81.068 81.068 integrate_four_center_bin 454 11.0 80.520 80.520 80.520 80.520 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 63.729 63.729 prepare_preconditioner 1 5.0 0.000 0.000 29.842 29.842 make_preconditioner 1 6.0 0.000 0.000 29.842 29.842 arnoldi_normal_ev 11 9.3 0.001 0.001 18.834 18.834 estimate_cond_num 1 7.0 0.000 0.000 18.793 18.793 build_subspace 28 9.5 0.008 0.008 18.378 18.378 integrate_four_center_load 7 10.0 0.001 0.001 10.910 10.910 hfx_load_balance 1 11.0 0.001 0.001 10.909 10.909 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 9.180 9.180 dbcsr_sym_m_v_mult 562 10.0 0.014 0.014 9.062 9.062 admm_fit_mo_coeffs 7 9.0 0.000 0.000 8.018 8.018 cp_fm_cholesky_invert 2 9.5 7.377 7.377 7.377 7.377 DGKS_ortho_d 673 10.6 7.253 7.253 7.254 7.254 make_full_inverse_cholesky 1 7.0 0.000 0.000 7.241 7.241 Gram_Schmidt_ortho_d 673 10.6 5.636 5.636 5.636 5.636 hfx_load_balance_bin 1 12.0 5.449 5.449 5.449 5.449 hfx_load_balance_count 1 12.0 5.443 5.443 5.443 5.443 dbcsr_copy 1318 10.8 1.338 1.338 4.902 4.902 purify_mo_diag 7 10.0 0.000 0.000 4.582 4.582 qs_scf_new_mos 7 5.0 0.000 0.000 4.086 4.086 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.086 4.086 cp_fm_syevd 7 11.0 0.000 0.000 4.082 4.082 cp_fm_syevd_base 7 12.0 4.082 4.082 4.082 4.082 ot_scf_mini 7 7.0 0.000 0.000 4.003 4.003 dbcsr_create_new 3176 12.1 2.495 2.495 3.897 3.897 make_full_single_inverse 1 7.0 0.000 0.000 3.704 3.704 qs_vxc_create 14 8.0 0.000 0.000 3.693 3.693 xc_vxc_pw_create 14 9.0 0.120 0.120 3.693 3.693 arnoldi_generalized_ev 1 8.0 0.000 0.000 3.628 3.628 gev_build_subspace 4 9.0 0.004 0.004 3.557 3.557 fit_mo_coeffs 7 10.0 0.000 0.000 3.437 3.437 dbcsr_set 2825 11.8 0.003 0.003 3.342 3.342 dbcsr_zero 2837 12.8 3.339 3.339 3.339 3.339 qs_ot_get_p 8 7.8 0.000 0.000 3.042 3.042 qs_ot_p2m_diag 7 9.0 0.022 0.022 2.965 2.965 xc_rho_set_and_dset_create 14 10.0 0.096 0.096 2.963 2.963 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.191 0.213 132.509 132.520 qs_energies 1 2.0 0.000 0.001 132.210 132.210 scf_env_do_scf 1 3.0 0.000 0.001 131.826 131.827 qs_ks_update_qs_env 8 5.0 0.000 0.000 129.805 129.805 rebuild_ks_matrix 7 6.0 0.000 0.000 129.791 129.791 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 129.791 129.791 hfx_ks_matrix 7 8.0 0.000 0.000 123.642 123.642 integrate_four_center 7 9.0 0.053 0.333 123.633 123.634 integrate_four_center_main 7 10.0 0.003 0.003 79.687 111.505 integrate_four_center_bin 448 11.0 79.685 111.502 79.685 111.502 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 74.681 74.681 init_scf_loop 1 4.0 0.000 0.000 57.144 57.144 mp_sync 70 11.3 31.836 34.484 31.836 34.484 integrate_four_center_load 7 10.0 0.000 0.000 11.455 11.458 hfx_load_balance 1 11.0 0.001 0.001 11.455 11.458 mp_sum_l 1135 8.3 5.783 6.070 5.783 6.070 hfx_load_balance_dist 1 12.0 0.000 0.000 5.667 5.935 hfx_load_balance_bin 1 12.0 2.856 5.696 2.856 5.696 hfx_load_balance_count 1 12.0 2.855 5.681 2.855 5.681 qs_vxc_create 14 8.0 0.000 0.001 3.011 3.011 xc_vxc_pw_create 14 9.0 0.008 0.008 3.011 3.011 xc_rho_set_and_dset_create 14 10.0 0.010 0.011 1.923 2.672 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=35.923000000000016, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.52, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=7.377, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.253, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=5.636, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.449, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.443, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.493999999999986, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=79.685, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.856, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.855, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.783, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=31.836, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.012 153.520 153.520 qs_energies 1 2.0 0.000 0.000 153.100 153.100 mp2_main 1 3.0 0.000 0.000 130.022 130.022 mp2_gpw_main 1 4.0 0.000 0.000 128.782 128.782 rpa_ri_compute_en 1 5.0 0.000 0.000 122.071 122.071 rpa_num_int 1 6.0 0.001 0.001 122.058 122.058 compute_mat_P_omega 1 7.0 0.003 0.003 72.745 72.745 compute_mat_P_omega_contract 10 8.0 8.789 8.789 72.527 72.527 dbt_total 2336 9.6 0.012 0.012 59.351 59.351 dbt_contract 787 11.0 0.039 0.039 52.840 52.840 dbt_tas_total 1149 12.2 0.223 0.223 51.777 51.777 dbt_tas_multiply 807 12.1 0.002 0.002 50.314 50.314 dbt_tas_dbm 807 14.1 0.003 0.003 43.781 43.781 dbm_multiply 807 16.1 43.772 43.772 43.772 43.772 dbt_tas_mm_1N 524 15.1 0.002 0.002 32.413 32.413 GW_matrix_operations 10 7.0 0.004 0.004 31.864 31.864 cp_fm_cholesky_invert 10 8.0 31.066 31.066 31.066 31.066 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 27.852 27.852 scf_env_do_scf 1 3.0 0.000 0.000 22.796 22.796 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 22.796 22.796 qs_scf_new_mos 17 5.0 0.000 0.000 21.304 21.304 compute_mat_P_omega_calc_M_occ 250 9.0 8.794 8.794 19.928 19.928 eigensolver 18 5.9 0.001 0.001 19.631 19.631 cp_fm_diag_elpa 18 6.9 0.000 0.000 13.189 13.189 cp_fm_diag_elpa_base 18 7.9 13.152 13.152 13.189 13.189 cp_fm_cholesky_decompose 14 8.1 12.332 12.332 12.332 12.332 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 11.199 11.199 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.646 10.646 RPA_postprocessing_nokp 10 8.0 0.001 0.001 10.394 10.394 dbt_tas_mm_2 251 15.0 0.001 0.001 9.593 9.593 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 6.704 6.704 cp_fm_cholesky_restore 51 7.0 6.386 6.386 6.386 6.386 compute_QP_energies 1 7.0 0.000 0.000 5.272 5.272 compute_self_energy_cubic_gw 1 8.0 0.048 0.048 5.271 5.271 get_2c_integrals 1 6.0 0.000 0.000 5.101 5.101 dbt_copy 1103 10.7 0.099 0.099 5.029 5.029 contract_cubic_gw 21 9.0 0.000 0.000 4.250 4.250 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.020 31.593 31.603 qs_energies 1 2.0 0.000 0.001 31.511 31.512 mp2_main 1 3.0 0.000 0.000 30.492 30.493 mp2_gpw_main 1 4.0 0.000 0.000 30.449 30.450 rpa_ri_compute_en 1 5.0 0.000 0.000 29.201 29.202 rpa_num_int 1 6.0 0.000 0.002 29.195 29.197 dbt_total 2336 9.6 0.011 0.012 25.954 25.957 compute_mat_P_omega 1 7.0 0.001 0.005 24.880 24.905 compute_mat_P_omega_contract 10 8.0 0.399 0.423 24.724 24.727 dbt_contract 787 11.0 0.024 0.027 19.484 19.499 dbt_tas_total 1149 12.2 0.050 0.066 17.355 17.355 dbt_tas_multiply 807 12.1 0.002 0.002 17.288 17.290 dbt_tas_dbm 807 14.1 0.003 0.003 12.762 12.773 dbm_multiply 807 16.1 9.805 10.821 9.805 10.821 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.315 7.315 compute_mat_P_omega_calc_M_occ 250 9.0 0.386 0.412 7.309 7.309 mp_sync 8706 11.6 4.886 6.567 4.886 6.567 dbt_tas_mm_2 251 15.0 0.001 0.001 6.032 6.036 dbt_copy 1111 10.7 0.011 0.013 5.583 5.953 dbt_reshape 1098 11.7 2.088 2.589 5.321 5.645 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.324 5.325 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.503 5.094 mp_waitall_2 3776 15.3 2.532 2.766 2.532 2.766 compute_QP_energies 1 7.0 0.000 0.000 2.734 2.735 compute_self_energy_cubic_gw 1 8.0 0.003 0.015 2.732 2.734 dbt_communicate_buffer 1098 12.7 0.054 0.075 2.582 2.717 contract_cubic_gw 21 9.0 0.000 0.000 2.131 2.131 dbt_crop 1042 12.0 0.907 1.241 1.400 1.804 dbt_reserve_blocks_index_array 2791 12.2 0.010 0.012 1.525 1.664 dbt_reserve_blocks_index 2849 13.1 0.061 0.075 1.523 1.662 dbt_tas_reserve_blocks_index 3300 14.5 0.114 0.146 1.495 1.628 dbm_reserve_blocks 3696 15.4 1.473 1.596 1.473 1.596 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 1.245 1.248 dbt_tas_replicate 396 14.1 0.539 0.699 1.101 1.232 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 0.984 0.987 scf_env_do_scf 1 3.0 0.000 0.000 0.961 0.961 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.961 0.961 convert_to_new_pgrid 2421 14.1 0.022 0.028 0.806 0.959 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.953 0.955 dbm_copy 1608 15.1 0.777 0.931 0.777 0.931 mp_max_i 1992 9.8 0.668 0.912 0.668 0.912 cp_gemm 105 8.4 0.000 0.000 0.904 0.911 cp_gemm_cosma 105 9.4 0.903 0.911 0.903 0.911 GW_matrix_operations 10 7.0 0.001 0.001 0.701 0.706 dbm_add 807 14.1 0.623 0.665 0.623 0.665 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=44.40400000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=43.772, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=31.066, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=13.152, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=12.332, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.794, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.423000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=9.805, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.386, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.886, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.473, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.088, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.532, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 357.297 357.297 qs_forces 1 2.0 0.000 0.000 356.740 356.740 rebuild_ks_matrix 7 6.6 0.000 0.000 335.368 335.368 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 335.368 335.368 hfx_ks_matrix 7 8.6 0.000 0.000 333.362 333.362 hfx_ri_update_ks 7 9.6 0.000 0.000 284.200 284.200 hfx_ri_update_ks_Pmat 7 10.6 34.877 34.877 284.198 284.198 dbt_total 4939 11.6 0.027 0.027 276.573 276.573 qs_energies 1 3.0 0.000 0.000 272.062 272.062 scf_env_do_scf 1 4.0 0.000 0.000 271.522 271.522 qs_ks_update_qs_env 8 6.0 0.000 0.000 250.739 250.739 dbt_tas_total 2391 14.1 0.855 0.855 248.680 248.680 dbt_contract 1473 13.0 0.124 0.124 229.221 229.221 dbt_tas_multiply 1482 14.0 0.004 0.004 218.590 218.590 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 202.515 202.515 dbt_tas_dbm 1482 16.0 0.005 0.005 198.998 198.998 dbm_multiply 1482 18.0 198.981 198.981 198.981 198.981 dbt_tas_mm_2 649 17.1 0.004 0.004 171.867 171.867 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 153.021 153.021 init_scf_loop 2 5.0 0.000 0.000 118.499 118.499 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 84.632 84.632 hfx_ri_update_forces 1 7.0 0.000 0.000 49.159 49.159 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 34.438 34.438 dbt_tas_reshape 906 14.4 0.025 0.025 21.611 21.611 dbt_tas_mm_3T 659 17.1 0.002 0.002 20.614 20.614 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 19.059 19.059 prepare_preconditioner 2 6.0 0.000 0.000 18.318 18.318 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 18.152 18.152 make_preconditioner 2 7.0 0.000 0.000 17.855 17.855 make_full_all 2 8.0 0.000 0.000 17.364 17.364 dbt_copy 2411 12.3 0.204 0.204 17.224 17.224 cp_fm_syevd 12 10.7 0.000 0.000 17.163 17.163 cp_fm_syevd_base 12 11.7 17.163 17.163 17.163 17.163 dbt_tas_merge 649 14.1 13.171 13.171 14.205 14.205 dbt_tas_reshape_buffer_fill 906 15.4 13.049 13.049 13.049 13.049 precalc_derivatives 1 8.0 0.007 0.007 11.903 11.903 dbm_reserve_blocks 8383 16.8 11.454 11.454 11.454 11.454 dbt_tas_reserve_blocks_index 7477 16.0 0.347 0.347 11.076 11.076 dbt_crop 2763 14.2 6.913 6.913 10.356 10.356 dbt_reshape 856 13.9 5.196 5.196 9.367 9.367 dbt_reserve_blocks_index 4998 15.2 0.116 0.116 8.461 8.461 dbt_reserve_blocks_index_array 4963 14.3 0.018 0.018 8.419 8.419 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 7.474 7.474 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.019 56.696 56.705 qs_forces 1 2.0 0.000 0.000 56.478 56.478 rebuild_ks_matrix 7 6.6 0.000 0.000 55.827 55.827 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 55.827 55.827 hfx_ks_matrix 7 8.6 0.000 0.000 54.836 54.844 dbt_total 4939 11.6 0.026 0.029 49.371 49.388 hfx_ri_update_ks 7 9.6 0.000 0.000 37.391 37.391 hfx_ri_update_ks_Pmat 7 10.6 1.400 1.871 37.390 37.390 dbt_contract 1473 13.0 0.088 0.098 37.366 37.380 dbt_tas_total 2391 14.1 0.110 0.134 35.324 35.324 qs_energies 1 3.0 0.000 0.001 34.297 34.297 scf_env_do_scf 1 4.0 0.000 0.001 34.157 34.157 qs_ks_update_qs_env 8 6.0 0.000 0.000 33.658 33.659 dbt_tas_multiply 1482 14.0 0.005 0.005 30.934 30.937 dbt_tas_dbm 1482 16.0 0.005 0.006 23.459 23.475 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 22.170 22.170 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 20.058 20.059 dbm_multiply 1482 18.0 16.292 19.939 16.292 19.939 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 19.713 19.713 hfx_ri_update_forces 1 7.0 0.000 0.000 17.445 17.452 init_scf_loop 2 5.0 0.000 0.000 14.442 14.443 mp_sync 17669 13.5 11.499 13.780 11.499 13.780 dbt_tas_mm_2 649 17.1 0.003 0.003 13.355 13.380 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 12.260 12.278 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 7.699 7.700 dbt_copy 2429 12.3 0.030 0.032 6.323 6.714 dbt_crop 2763 14.2 2.969 3.986 3.723 4.796 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.260 4.734 dbt_reshape 1257 13.5 2.081 2.608 4.354 4.613 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.045 4.045 precalc_derivatives 1 8.0 0.001 0.002 3.998 3.998 dbt_tas_mm_3N 163 16.5 0.000 0.001 3.941 3.987 dbt_tas_merge 649 14.1 1.650 2.171 2.859 3.490 mp_waitall_2 5988 16.5 2.853 3.079 2.853 3.079 dbm_reserve_blocks 8417 16.9 2.303 2.594 2.303 2.594 dbt_tas_reserve_blocks_index 7508 16.1 0.253 0.337 2.135 2.496 mp_max_i 3372 12.5 2.009 2.390 2.009 2.390 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.377 2.384 dbt_tas_replicate 909 15.6 0.603 0.765 2.165 2.229 dbt_tas_communicate_buffer 1825 16.3 0.059 0.076 1.971 2.155 dbt_reserve_blocks_index 5399 15.2 0.109 0.130 1.806 2.094 dbt_reserve_blocks_index_array 5364 14.2 0.014 0.020 1.805 2.093 dbt_tas_reshape 916 14.4 0.008 0.009 1.857 2.012 mp_alltoall_i 4341 15.3 1.757 1.978 1.757 1.978 build_3c_derivatives 9 9.0 0.227 0.343 1.895 1.896 dbt_communicate_buffer 1257 14.5 0.043 0.057 1.525 1.633 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.424 1.426 convert_to_new_pgrid 4446 16.0 0.033 0.039 1.224 1.320 dbm_copy 3043 16.9 1.190 1.289 1.190 1.289 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=61.68900000000002, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=198.981, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=34.877, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=17.163, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=13.171, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=13.049, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=11.454, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=6.913, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=17.729999999999997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=16.292, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.4, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.65, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.303, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=2.969, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=11.499, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.853, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 287.491 287.491 qs_energies 1 2.0 0.000 0.000 287.205 287.205 mp2_main 1 3.0 0.000 0.000 251.643 251.643 mp2_gpw_main 1 4.0 0.001 0.001 247.056 247.056 mp2_ri_gpw_compute_in 1 5.0 0.363 0.363 204.130 204.130 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 179.625 179.625 mp2_eri_3c_integrate_gpw 2656 7.0 0.011 0.011 153.321 153.321 integrate_v_rspace 2666 8.0 0.615 0.615 139.911 139.911 grid_integrate_task_list 2666 9.0 137.180 137.180 137.180 137.180 mp2_ri_gpw_compute_en 1 5.0 0.077 0.077 42.903 42.903 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.885 9.885 41.178 41.178 scf_env_do_scf 1 3.0 0.000 0.000 34.368 34.368 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 34.368 34.368 qs_scf_new_mos 10 5.0 0.000 0.000 32.975 32.975 eigensolver 11 5.8 0.001 0.001 26.128 26.128 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.075 2.075 24.135 24.135 get_2c_integrals 1 6.0 0.000 0.000 24.038 24.038 cp_fm_diag_elpa 11 6.8 0.000 0.000 23.694 23.694 cp_fm_diag_elpa_base 11 7.8 23.549 23.549 23.694 23.694 calculate_wavefunction 5312 9.0 15.992 15.992 23.367 23.367 offload_gemm 2080 8.0 22.060 22.060 22.060 22.060 dbcsr_multiply_generic 5322 8.0 0.174 0.174 20.265 20.265 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 20.246 20.246 compute_2c_integrals 1 7.0 0.005 0.005 17.915 17.915 compute_2c_integrals_loop_lm 1 8.0 0.013 0.013 17.896 17.896 mp2_eri_2c_integrate_gpw 1 9.0 3.370 3.370 17.883 17.883 pw_transfer 63872 10.6 0.892 0.892 11.503 11.503 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 10.322 10.322 multiply_cannon 5322 9.0 0.431 0.431 9.866 9.866 qs_diis_b_step 9 6.0 0.000 0.000 9.181 9.181 multiply_cannon_loop 5322 10.0 0.276 0.276 8.532 8.532 cp_fm_symm 18 7.0 8.078 8.078 8.078 8.078 make_m2s 10644 9.0 0.061 0.061 8.076 8.076 make_images 10644 10.0 3.107 3.107 7.781 7.781 fft_wrap_pw1pw2_20 21271 12.4 0.565 0.565 7.267 7.267 multiply_cannon_multrec 5322 11.0 7.191 7.191 7.230 7.230 fft3d_s 53229 13.4 6.618 6.618 6.709 6.709 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.119 2.119 5.957 5.957 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.024 40.638 40.648 qs_energies 1 2.0 0.000 0.001 40.526 40.526 mp2_main 1 3.0 0.000 0.001 38.334 38.334 mp2_gpw_main 1 4.0 0.001 0.001 38.222 38.222 mp2_ri_gpw_compute_in 1 5.0 0.043 0.043 17.619 22.821 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 15.929 21.132 mp2_ri_gpw_compute_en 1 5.0 0.071 0.079 20.522 21.013 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.868 19.216 integrate_v_rspace 93 8.1 0.100 0.120 13.795 19.043 grid_integrate_task_list 93 9.1 13.431 18.736 13.431 18.736 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.537 0.623 13.713 14.428 mp2_ri_gpw_compute_en_expansio 65 7.0 0.072 0.081 10.323 11.609 offload_gemm 65 8.0 10.251 11.550 10.251 11.550 mp_min_d 2 7.0 5.254 5.779 5.254 5.779 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.204 5.695 mp2_ri_gpw_compute_en_comm 17 7.0 0.100 0.154 2.532 3.113 mp_sendrecv_dm3 510 8.0 1.968 2.608 1.968 2.608 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.768 2.098 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.752 2.081 scf_env_do_scf 1 3.0 0.000 0.000 2.061 2.061 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.061 2.061 get_2c_integrals 1 6.0 0.000 0.000 1.555 1.577 compute_2c_integrals 1 7.0 0.004 0.006 1.343 1.353 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.981 1.278 mp2_eri_2c_integrate_gpw 1 9.0 0.203 0.319 0.980 1.277 mp_sum_d 498 2.3 0.666 1.121 0.666 1.121 multiply_cannon 176 9.0 0.014 0.015 1.019 1.105 calculate_wavefunction 166 9.0 0.484 0.680 0.840 1.078 qs_scf_new_mos 10 5.0 0.000 0.000 1.005 1.060 multiply_cannon_loop 176 10.0 0.002 0.002 0.965 1.050 eigensolver 11 5.8 0.001 0.001 0.998 0.998 make_m2s 352 9.0 0.003 0.003 0.713 0.963 make_images 352 10.0 0.050 0.059 0.701 0.951 multiply_cannon_multrec 246 11.0 0.830 0.873 0.835 0.880 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.846 0.847 cp_fm_redistribute_end 11 7.8 0.319 0.837 0.329 0.840 pw_transfer 2120 10.5 0.036 0.047 0.756 0.834 cp_fm_diag_elpa_base 11 7.8 0.496 0.799 0.507 0.814 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=71.63399999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=137.18, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=23.549, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=22.06, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=15.992, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.885, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.191, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.3870000000000005, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.431, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.496, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=10.251, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.484, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.537, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.83, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.968, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.254, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.081 0.081 211.430 211.430 qs_energies 1 2.0 0.000 0.000 210.112 210.112 scf_env_do_scf 1 3.0 0.000 0.000 201.357 201.357 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 201.357 201.357 qs_scf_new_mos 15 5.0 0.000 0.000 126.630 126.630 eigensolver 15 6.0 0.001 0.001 118.972 118.972 cp_fm_diag_elpa 15 7.0 0.000 0.000 105.873 105.873 cp_fm_diag_elpa_base 15 8.0 103.339 103.339 105.873 105.873 qs_ks_update_qs_env 15 5.0 0.000 0.000 49.403 49.403 rebuild_ks_matrix 15 6.0 0.000 0.000 49.198 49.198 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 49.198 49.198 qs_vxc_create 15 8.0 0.026 0.026 33.801 33.801 calculate_dispersion_nonloc 15 9.0 6.942 6.942 29.327 29.327 pw_transfer 1191 10.0 0.054 0.054 23.244 23.244 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 23.065 23.065 qs_rho_update_rho 16 5.0 0.000 0.000 21.874 21.874 calculate_rho_elec 16 6.0 0.219 0.219 21.874 21.874 grid_collocate_task_list 16 7.0 20.486 20.486 20.486 20.486 fft_wrap_pw1pw2_150 765 12.0 3.205 3.205 16.216 16.216 sum_up_and_integrate 15 8.0 0.039 0.039 13.855 13.855 integrate_v_rspace 15 9.0 0.021 0.021 13.815 13.815 grid_integrate_task_list 15 10.0 13.296 13.296 13.296 13.296 cp_fm_cholesky_restore 45 7.0 10.849 10.849 10.849 10.849 fft3d_s 1087 13.0 10.579 10.579 10.585 10.585 pw_scatter_s 585 13.1 7.319 7.319 7.319 7.319 fft_wrap_pw1pw2_200 197 12.3 0.806 0.806 6.662 6.662 dbcsr_complete_redistribute 46 8.3 2.202 2.202 5.531 5.531 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.372 5.372 init_scf_run 1 3.0 0.000 0.000 5.292 5.292 cp_fm_upper_to_full 30 8.0 4.783 4.783 4.783 4.783 gspace_mixing 14 5.0 0.170 0.170 4.649 4.649 vdW_energy 15 10.0 4.454 4.454 4.454 4.454 xc_vxc_pw_create 15 9.0 0.213 0.213 4.448 4.448 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.028 61.436 61.448 qs_energies 1 2.0 0.001 0.009 61.183 61.188 scf_env_do_scf 1 3.0 0.000 0.000 57.074 57.074 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 57.074 57.074 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.884 24.894 rebuild_ks_matrix 15 6.0 0.000 0.000 24.837 24.847 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 24.837 24.847 qs_rho_update_rho 16 5.0 0.000 0.000 20.629 20.631 calculate_rho_elec 16 6.0 0.007 0.007 20.629 20.631 grid_collocate_task_list 16 7.0 19.105 19.814 19.105 19.814 sum_up_and_integrate 15 8.0 0.006 0.011 14.010 14.056 integrate_v_rspace 15 9.0 0.001 0.001 14.003 14.053 grid_integrate_task_list 15 10.0 12.806 13.410 12.806 13.410 qs_scf_new_mos 15 5.0 0.000 0.000 12.284 12.311 eigensolver 15 6.0 0.001 0.002 11.335 11.344 qs_vxc_create 15 8.0 0.001 0.001 10.527 10.541 calculate_dispersion_nonloc 15 9.0 0.969 1.820 8.740 8.757 pw_transfer 1191 10.0 0.083 0.102 8.102 8.202 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.084 8.088 cp_fm_diag_elpa_base 15 8.0 7.945 7.970 8.081 8.083 fft_wrap_pw1pw2 1086 11.0 0.013 0.016 7.931 8.042 fft3d_ps 1086 13.0 2.537 2.676 6.236 6.413 fft_wrap_pw1pw2_150 765 12.0 0.303 0.325 5.581 5.651 mp_alltoall_z22v 1086 15.0 3.075 3.568 3.075 3.568 cp_fm_cholesky_restore 45 7.0 3.109 3.144 3.109 3.144 yz_to_x 501 13.9 0.243 0.380 2.380 2.652 qs_energies_init_hamiltonians 1 3.0 0.000 0.001 2.470 2.470 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.152 2.358 fft_wrap_pw1pw2_200 197 12.3 0.183 0.212 2.226 2.281 xc_vxc_pw_create 15 9.0 0.015 0.020 1.787 1.812 rs_pw_transfer 158 9.4 0.001 0.002 1.474 1.780 density_rs2pw 16 7.0 0.001 0.001 1.412 1.650 x_to_yz 585 14.1 0.355 0.375 1.292 1.452 init_scf_run 1 3.0 0.000 0.000 1.411 1.412 vdW_energy 15 10.0 1.319 1.395 1.319 1.395 build_core_ppnl 1 5.0 1.259 1.389 1.259 1.389 mp_waitany 520 11.3 1.013 1.369 1.013 1.369 scf_env_initial_rho_setup 1 4.0 0.000 0.001 1.329 1.330 xc_pw_derive 90 11.0 0.001 0.002 1.194 1.274 rs_pw_transfer_RS2PW_200 18 8.8 0.036 0.043 0.772 1.250 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.881, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=103.339, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.486, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.296, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.849, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.579, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.395999999999994, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=7.945, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.105, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=12.806, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.109, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.075, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.071 0.071 229.151 229.151 qs_energies 1 2.0 0.000 0.000 229.020 229.020 ls_scf 1 3.0 0.000 0.000 227.819 227.819 ls_scf_main 1 4.0 0.002 0.002 215.857 215.857 density_matrix_trs4 11 5.0 0.012 0.012 125.775 125.775 ls_scf_dm_to_ks 11 5.0 0.000 0.000 84.682 84.682 matrix_ls_to_qs 11 6.0 0.000 0.000 81.486 81.486 dbcsr_multiply_generic 185 6.1 0.814 0.814 64.934 64.934 arnoldi_extremal 12 6.1 0.000 0.000 53.138 53.138 arnoldi_normal_ev 12 7.1 0.014 0.014 53.138 53.138 build_subspace 23 8.1 0.081 0.081 52.443 52.443 dbcsr_copy_into_existing 11 7.0 44.977 44.977 44.978 44.978 dbcsr_complete_redistribute 23 7.5 29.624 29.624 40.384 40.384 multiply_cannon 185 7.1 0.270 0.270 36.523 36.523 matrix_decluster 11 7.0 0.000 0.000 36.508 36.508 dbcsr_matrix_vector_mult 652 9.0 0.166 0.166 27.824 27.824 multiply_cannon_loop 185 8.1 0.237 0.237 24.350 24.350 make_m2s 370 7.1 0.037 0.037 24.256 24.256 make_images 370 8.1 10.521 10.521 22.711 22.711 multiply_cannon_multrec 185 9.1 22.675 22.675 22.705 22.705 dbcsr_matrix_vector_mult_local 652 10.0 17.200 17.200 17.204 17.204 dbcsr_finalize 646 7.5 0.164 0.164 14.108 14.108 dbcsr_merge_all 597 8.5 2.000 2.000 13.060 13.060 DGKS_ortho_d 702 9.1 12.500 12.500 12.503 12.503 setup_rec_index_2d 370 8.1 11.809 11.809 11.809 11.809 ls_scf_init_scf 1 4.0 0.000 0.000 11.342 11.342 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.015 11.015 Gram_Schmidt_ortho_d 702 9.1 10.523 10.523 10.525 10.525 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.384 10.384 dbcsr_sort_indices 1103 9.9 10.084 10.084 10.084 10.084 tree_to_linear_d 110 9.4 9.808 9.808 9.808 9.808 quick_finalize 395 10.0 0.384 0.384 8.751 8.751 dbcsr_special_finalize 370 9.1 0.002 0.002 8.089 8.089 matrix_qs_to_ls 12 5.1 0.000 0.000 5.610 5.610 matrix_cluster 12 6.1 0.000 0.000 5.610 5.610 create_fast_row_vec_access 776 10.2 0.001 0.001 4.969 4.969 create_fast_row_vec_access_d 776 11.2 4.965 4.965 4.968 4.968 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.020 65.206 65.216 qs_energies 1 2.0 0.000 0.000 65.119 65.119 ls_scf 1 3.0 0.000 0.000 65.060 65.060 ls_scf_main 1 4.0 0.000 0.007 62.602 62.602 density_matrix_trs4 11 5.0 0.006 0.017 59.948 60.004 dbcsr_multiply_generic 185 6.1 0.057 0.069 56.440 56.641 multiply_cannon 185 7.1 0.033 0.039 47.071 48.010 multiply_cannon_loop 185 8.1 0.112 0.127 44.656 45.720 multiply_cannon_multrec 1480 9.1 27.558 30.489 27.821 30.812 mp_waitall_1 11936 10.3 15.038 17.711 15.038 17.711 multiply_cannon_metrocomm3 1480 9.1 0.014 0.018 8.979 13.783 make_m2s 370 7.1 0.034 0.037 6.552 6.624 multiply_cannon_metrocomm1 1480 9.1 0.008 0.012 3.409 6.521 make_images 370 8.1 0.621 0.709 6.426 6.496 calculate_norms 2960 9.1 4.278 5.485 4.278 5.485 make_images_data 370 9.1 0.009 0.011 2.889 3.091 arnoldi_extremal 12 6.1 0.000 0.001 2.729 2.747 arnoldi_normal_ev 12 7.1 0.001 0.003 2.729 2.746 hybrid_alltoall_any 393 9.9 0.183 0.987 2.520 2.650 build_subspace 23 8.1 0.020 0.024 2.634 2.637 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.313 2.373 dbcsr_matrix_vector_mult 652 9.0 0.009 0.044 2.109 2.323 mp_sum_l 1119 5.6 1.870 2.278 1.870 2.278 dbcsr_complete_redistribute 23 7.5 1.191 1.561 1.909 2.093 matrix_ls_to_qs 11 6.0 0.000 0.000 1.849 2.055 dbcsr_matrix_vector_mult_local 652 10.0 1.673 1.987 1.675 1.989 matrix_decluster 11 7.0 0.000 0.003 1.714 1.878 ls_scf_init_scf 1 4.0 0.000 0.000 1.855 1.855 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.829 1.835 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.670 1.673 make_images_pack 370 9.1 1.564 1.652 1.568 1.655 buffer_matrices_ensure_size 370 8.1 1.313 1.439 1.313 1.439 dbcsr_finalize 646 7.5 0.008 0.009 1.341 1.438 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.051 1.398 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=102.17500000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=44.977, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=29.624, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.675, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=17.2, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=12.5, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=13.598000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.191, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=27.558, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.673, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.87, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=15.038, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.278, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 68.149 68.149 lib_test 1 2.0 0.000 0.000 68.141 68.141 dbcsr_run_tests 3 3.0 0.002 0.002 68.141 68.141 test_multiplies_multiproc 3 4.0 0.001 0.001 53.152 53.152 dbcsr_redistribute 9 5.0 34.259 34.259 35.786 35.786 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.761 15.761 dbcsr_make_random_matrix 9 4.0 12.107 12.107 14.892 14.892 multiply_cannon 9 6.0 0.001 0.001 11.378 11.378 multiply_cannon_loop 9 7.0 0.024 0.024 11.009 11.009 multiply_cannon_multrec 9 8.0 10.984 10.984 10.985 10.985 dbcsr_finalize 27 5.7 0.019 0.019 5.165 5.165 dbcsr_merge_all 18 6.5 1.939 1.939 4.526 4.526 dbcsr_data_release 975 7.6 2.452 2.452 2.452 2.452 tree_to_linear_d 9 7.0 1.772 1.772 1.772 1.772 make_m2s 18 6.0 0.001 0.001 1.483 1.483 make_images 18 7.0 0.523 0.523 1.433 1.433 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.002 0.010 17.444 17.449 lib_test 1 2.0 0.000 0.000 17.419 17.435 dbcsr_run_tests 3 3.0 0.000 0.001 17.418 17.435 test_multiplies_multiproc 3 4.0 0.001 0.002 16.605 16.657 dbcsr_multiply_generic 9 5.0 0.001 0.002 14.836 14.936 multiply_cannon 9 6.0 0.001 0.002 13.152 13.522 multiply_cannon_loop 9 7.0 0.002 0.002 12.885 13.213 multiply_cannon_multrec 72 8.0 10.826 11.511 10.827 11.512 mp_waitall_1 576 9.2 2.361 2.821 2.361 2.821 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.817 2.215 dbcsr_make_random_matrix 9 4.0 0.658 0.908 0.786 0.989 mp_sum_l 390 2.5 0.444 0.758 0.444 0.758 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.435 0.750 dbcsr_data_release 444 7.6 0.641 0.717 0.641 0.717 dbcsr_finalize 27 5.7 0.000 0.000 0.610 0.672 make_m2s 18 6.0 0.001 0.001 0.643 0.669 make_images 18 7.0 0.021 0.026 0.639 0.666 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.234 0.643 dbcsr_destroy 111 5.9 0.000 0.000 0.523 0.608 dbcsr_merge_all 18 6.5 0.088 0.106 0.491 0.582 dbcsr_checksum 6 5.0 0.154 0.518 0.523 0.523 dbcsr_redistribute 9 5.0 0.224 0.252 0.380 0.404 make_images_data 18 8.0 0.000 0.001 0.339 0.402 mp_sum_d 191 1.2 0.370 0.386 0.370 0.386 dbcsr_data_copy_aa2 18 7.5 0.297 0.362 0.297 0.362 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.408000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=34.259, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.107, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.984, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.452, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.939, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.201999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.224, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.658, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.826, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.641, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.088, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.361, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.444, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 136.878 136.878 qs_mol_dyn_low 1 2.0 0.003 0.003 135.303 135.303 velocity_verlet 5 3.0 0.003 0.003 110.210 110.210 qmmm_el_coupling 6 3.8 0.000 0.000 83.044 83.044 qmmm_elec_with_gaussian 6 4.8 0.063 0.063 83.033 83.033 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 82.346 82.346 qmmm_elec_gaussian_low_G 6 6.8 81.444 81.444 81.444 81.444 qs_forces 6 3.8 0.000 0.000 43.627 43.627 qs_energies 6 4.8 0.000 0.000 39.467 39.467 scf_env_do_scf 6 5.8 0.001 0.001 36.215 36.215 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 24.841 24.841 rebuild_ks_matrix 45 8.4 0.000 0.000 24.076 24.076 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.076 24.076 qs_ks_update_qs_env 45 7.8 0.000 0.000 20.352 20.352 pw_transfer 966 12.3 0.045 0.045 16.425 16.425 fft_wrap_pw1pw2 801 13.6 0.005 0.005 16.220 16.220 fft_wrap_pw1pw2_150 507 15.2 2.025 2.025 15.833 15.833 qs_vxc_create 45 10.4 0.001 0.001 12.899 12.899 xc_vxc_pw_create 45 11.4 0.633 0.633 12.898 12.898 init_scf_loop 6 6.8 0.000 0.000 11.370 11.370 xc_pw_derive 270 13.4 0.002 0.002 8.857 8.857 prepare_preconditioner 6 7.8 0.000 0.000 8.126 8.126 make_preconditioner 6 8.8 0.000 0.000 7.823 7.823 make_full_all 6 9.8 0.001 0.001 7.387 7.387 fft3d_s 802 15.6 7.375 7.375 7.382 7.382 qs_rho_update_rho 45 7.9 0.000 0.000 6.991 6.991 calculate_rho_elec 45 8.9 0.562 0.562 6.990 6.990 xc_rho_set_and_dset_create 45 12.4 0.662 0.662 6.855 6.855 qmmm_forces 6 3.8 0.002 0.002 5.573 5.573 pw_scatter_s 429 15.8 5.543 5.543 5.543 5.543 xc_pw_divergence 45 12.4 0.001 0.001 5.364 5.364 qmmm_forces_with_gaussian 6 4.8 0.074 0.074 5.240 5.240 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.455 4.455 pw_integral_ab 2539 7.4 4.250 4.250 4.250 4.250 qs_ks_ddapc 45 10.4 0.001 0.001 4.232 4.232 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.964 3.964 cp_fm_diag_elpa_base 18 12.2 3.957 3.957 3.964 3.964 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.731 3.731 qmmm_forces_gaussian_low_G 6 6.8 3.690 3.690 3.690 3.690 grid_collocate_task_list 45 9.9 3.295 3.295 3.295 3.295 density_rs2pw 45 9.9 0.001 0.001 3.133 3.133 sum_up_and_integrate 45 10.4 0.114 0.114 3.035 3.035 integrate_v_rspace 45 11.4 0.007 0.007 2.921 2.921 pw_poisson_solve 51 9.9 1.223 1.223 2.915 2.915 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.048 56.679 56.690 qs_mol_dyn_low 1 2.0 0.003 0.003 55.529 55.584 qs_forces 6 3.8 0.000 0.001 39.841 39.842 qs_energies 6 4.8 0.000 0.001 38.016 38.016 scf_env_do_scf 6 5.8 0.000 0.001 37.040 37.040 scf_env_do_scf_inner_loop 113 6.2 0.002 0.018 35.589 35.590 rebuild_ks_matrix 119 8.1 0.000 0.000 26.141 26.150 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.018 26.141 26.149 qs_ks_update_qs_env 119 7.3 0.001 0.001 24.599 24.608 velocity_verlet 5 3.0 0.002 0.005 23.423 23.427 pw_transfer 2446 12.3 0.170 0.192 16.651 17.118 fft_wrap_pw1pw2 2059 13.4 0.024 0.026 16.230 16.669 fft_wrap_pw1pw2_150 1321 14.9 1.282 1.497 15.574 15.963 qs_vxc_create 119 10.1 0.002 0.004 13.085 13.090 xc_vxc_pw_create 119 11.1 0.153 0.216 13.083 13.088 fft3d_ps 2059 15.4 6.950 7.636 12.084 12.799 qs_rho_update_rho 119 7.3 0.000 0.001 10.785 10.789 calculate_rho_elec 119 8.3 0.049 0.055 10.784 10.788 xc_pw_derive 714 13.1 0.012 0.013 9.827 10.131 sum_up_and_integrate 119 10.1 0.053 0.061 9.571 9.662 integrate_v_rspace 119 11.1 0.003 0.004 9.518 9.615 qmmm_forces 6 3.8 0.002 0.002 7.861 7.861 qmmm_forces_with_gaussian 6 4.8 0.390 0.417 7.333 7.756 rs_pw_transfer 988 11.5 0.011 0.016 7.170 7.466 xc_rho_set_and_dset_create 119 12.1 0.362 0.583 6.401 7.006 qmmm_el_coupling 6 3.8 0.000 0.000 6.906 6.986 qmmm_elec_with_gaussian 6 4.8 0.353 0.391 6.905 6.984 xc_pw_divergence 119 12.1 0.005 0.006 6.347 6.631 density_rs2pw 119 9.3 0.005 0.008 6.283 6.572 potential_pw2rs 119 12.1 0.005 0.008 5.543 5.559 grid_collocate_task_list 119 9.3 4.338 4.893 4.338 4.893 mp_alltoall_z22v 2059 17.4 3.628 4.098 3.628 4.098 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.849 4.091 grid_integrate_task_list 119 12.1 3.374 4.003 3.374 4.003 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.301 3.566 qmmm_forces_gaussian_low_G 6 6.8 3.160 3.402 3.160 3.402 x_to_yz 1095 16.8 0.864 0.965 2.810 3.092 qmmm_elec_gaussian_low_G 6 6.8 2.711 2.973 2.711 2.973 rs_pw_transfer_PW2RS_150 125 13.9 1.307 1.380 2.805 2.888 mp_waitany 4028 12.8 2.348 2.741 2.348 2.741 pw_restrict_s3 18 5.8 1.352 1.603 2.512 2.713 rs_pw_transfer_RS2PW_150 125 11.2 1.010 1.136 2.376 2.672 yz_to_x 964 16.0 0.580 0.750 2.261 2.591 dbcsr_multiply_generic 2588 12.3 0.057 0.070 1.881 2.315 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.068 2.212 pw_prolongate_s3 18 6.8 1.121 1.324 2.068 2.212 qs_scf_new_mos 113 7.2 0.000 0.001 2.033 2.038 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.032 2.038 qs_ks_ddapc 119 10.1 0.002 0.002 1.921 2.002 ot_scf_mini 113 9.2 0.001 0.001 1.952 1.957 mp_waitall_1 188862 16.2 1.768 1.920 1.768 1.920 mp_sum_dm3 33 5.7 1.669 1.763 1.669 1.763 pw_gather_p 964 15.0 1.256 1.663 1.256 1.663 pw_scatter_p 1095 15.8 1.557 1.593 1.557 1.593 pw_integral_ab 2761 7.7 1.226 1.306 1.476 1.576 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.555 1.555 init_scf_loop 6 6.8 0.000 0.000 1.449 1.449 ot_mini 113 10.2 0.000 0.000 1.185 1.190 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=27.323999999999984, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=81.444, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.375, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.543, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.25, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=3.957, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.69, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.295, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.292000000000005, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.711, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.226, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.16, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.338, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.628, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.374, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.95, yerr=0.0 Summary: Performance test took 43 minutes. Status: OK Removing intermediate container d657d44b9633 ---> 1a4791fdbbc0 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 8d24d27aeae5 Removing intermediate container 8d24d27aeae5 ---> a0e26f9af3d1 Step 42/42 : ENTRYPOINT [] ---> Running in 6bb311dddeb3 Removing intermediate container 6bb311dddeb3 ---> 63dcfe5490ac [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 63dcfe5490ac Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-06-06 11:59:17+00:00