StartDate: 2022-08-11 19:05:55+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: ab6471db167618e4da138b62e253e19681a02d9c CommitTime: 2022-08-11 18:08:38 +0200 CommitAuthor: Matthias Krack CommitSubject: Revise and tag MOLSYM printout Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=ab6471db167618e4da138b62e253e19681a02d9c Sending build context to Docker daemon 364.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu d19f32bd9e41: Already exists Digest: sha256:34fea4f31bf187bc915536831fd0afc9d214755bf700b5cdb1336c82516d154e Status: Downloaded newer image for ubuntu:22.04 ---> df5de72bdb3b Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 026e35f2a85c Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 61e4cd54df66 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 9302e3cfee49 Step 5/42 : RUN mkdir scripts ---> Using cache ---> ba1db08844ca Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 4fa59dbbe3c5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 92a7ebae54e4 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> fdcb02913a6b Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> bbf5a70e57e9 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 57713487903f Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> a7273f099530 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 5f5e186a6792 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> daaf389ae447 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 6767e014fd7d Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 8bd37651db46 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 7036e597e5e8 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> ff2b1e1cb16d Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> da0abcd2ccad Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> ea1516b197ec Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> c5c4ef116433 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 0e2954296d9e Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> d0b0ced73c5c Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> cc25e8fd9377 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 915029cf8ed8 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 20510d2c9104 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 55a50ff28104 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 02b1fc9204dc Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 67cf34b67198 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 74e513362a2c Step 30/42 : COPY ./Makefile . ---> Using cache ---> a24b1f25fab5 Step 31/42 : COPY ./src ./src ---> bff8f50dccc1 Step 32/42 : COPY ./exts ./exts ---> 67a1e422ac6c Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> 0706bb50c15a Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 93d0c8f77a95 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 93d0c8f77a95 ---> 52dd4503cc43 Step 35/42 : COPY ./data ./data ---> de72a42cf031 Step 36/42 : COPY ./tests ./tests ---> 9cc7cdd9a82a Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> c10546b27aca Step 38/42 : COPY ./benchmarks ./benchmarks ---> 4032a8300eed Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 83f58fcb5668 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 5ed253a96363 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.027 91.064 91.064 qs_mol_dyn_low 1 2.0 0.002 0.002 90.436 90.436 qs_forces 11 3.9 0.001 0.001 90.397 90.397 qs_energies 11 4.9 0.001 0.001 84.263 84.263 scf_env_do_scf 11 5.9 0.001 0.001 73.473 73.473 velocity_verlet 10 3.0 0.002 0.002 57.796 57.796 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 56.524 56.524 qs_scf_new_mos 108 7.5 0.001 0.001 22.244 22.244 qs_scf_loop_do_ot 108 8.5 0.001 0.001 22.243 22.243 ot_scf_mini 108 9.5 0.002 0.002 20.821 20.821 rebuild_ks_matrix 119 8.3 0.001 0.001 20.608 20.608 qs_ks_build_kohn_sham_matrix 119 9.3 0.011 0.011 20.607 20.607 dbcsr_multiply_generic 2286 12.5 0.164 0.164 20.289 20.289 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.153 19.153 calculate_rho_elec 119 8.7 0.946 0.946 19.152 19.152 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.916 18.916 init_scf_loop 11 6.9 0.000 0.000 16.788 16.788 grid_collocate_task_list 119 9.7 14.840 14.840 14.840 14.840 prepare_preconditioner 11 7.9 0.000 0.000 14.315 14.315 make_preconditioner 11 8.9 0.000 0.000 14.314 14.314 make_full_inverse_cholesky 11 9.9 0.000 0.000 13.187 13.187 sum_up_and_integrate 119 10.3 0.189 0.189 12.798 12.798 integrate_v_rspace 119 11.3 0.090 0.090 12.608 12.608 ot_mini 108 10.5 0.001 0.001 11.929 11.929 make_m2s 4572 13.5 0.045 0.045 11.143 11.143 grid_integrate_task_list 119 12.3 10.593 10.593 10.593 10.593 qs_ot_get_derivative 108 11.5 0.001 0.001 6.291 6.291 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.236 6.236 pw_transfer 1439 11.6 0.055 0.055 5.999 5.999 dbcsr_make_dense_low 5837 15.5 0.070 0.070 5.978 5.978 make_dense_data 5837 16.5 5.307 5.307 5.894 5.894 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.799 5.799 ot_diis_step 108 11.5 0.004 0.004 5.636 5.636 make_images 4572 14.5 2.147 2.147 5.564 5.564 qs_ot_get_p 119 10.4 0.001 0.001 5.299 5.299 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.251 5.251 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.134 5.134 apply_single 119 13.6 0.000 0.000 5.133 5.133 multiply_cannon 2286 13.5 0.168 0.168 5.097 5.097 fft_wrap_pw1pw2_140 487 13.2 0.450 0.450 4.965 4.965 multiply_cannon_loop 2286 14.5 0.106 0.106 4.639 4.639 cp_fm_cholesky_decompose 22 10.9 4.603 4.603 4.603 4.603 multiply_cannon_multrec 2286 15.5 4.479 4.479 4.532 4.532 cp_fm_cholesky_invert 11 10.9 4.267 4.267 4.267 4.267 qs_ot_p2m_diag 50 11.0 0.154 0.154 3.942 3.942 init_scf_run 11 5.9 0.002 0.002 3.883 3.883 scf_env_initial_rho_setup 11 6.9 0.001 0.001 3.881 3.881 dbcsr_complete_redistribute 329 12.2 1.883 1.883 3.762 3.762 dbcsr_copy 2102 12.0 0.216 0.216 3.672 3.672 cp_dbcsr_syevd 50 12.0 0.002 0.002 3.550 3.550 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.480 3.480 cp_fm_diag_elpa 50 13.0 0.000 0.000 3.445 3.445 cp_fm_diag_elpa_base 50 14.0 3.384 3.384 3.444 3.444 dbcsr_copy_into_existing 22 7.9 3.416 3.416 3.417 3.417 density_rs2pw 119 9.7 0.003 0.003 3.366 3.366 wfi_extrapolate 11 7.9 0.001 0.001 3.364 3.364 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.296 3.296 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.086 3.086 qs_create_task_list 11 7.9 0.000 0.000 2.888 2.888 generate_qs_task_list 11 8.9 1.968 1.968 2.888 2.888 fft3d_s 1202 14.6 2.863 2.863 2.868 2.868 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.652 2.652 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.534 2.534 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.424 2.424 pw_poisson_solve 119 10.3 0.895 0.895 2.359 2.359 dbcsr_data_release 279534 16.0 2.149 2.149 2.149 2.149 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.002 2.002 potential_pw2rs 119 12.3 0.046 0.046 1.925 1.925 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.916 1.916 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.881 1.881 dbcsr_finalize 5186 13.8 0.113 0.113 1.837 1.837 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.022 49.153 49.163 qs_mol_dyn_low 1 2.0 0.003 0.004 49.041 49.045 qs_forces 11 3.9 0.001 0.001 49.001 49.002 qs_energies 11 4.9 0.001 0.001 45.688 45.690 scf_env_do_scf 11 5.9 0.000 0.002 41.967 41.968 scf_env_do_scf_inner_loop 108 6.5 0.003 0.019 38.756 38.758 velocity_verlet 10 3.0 0.001 0.003 29.083 29.084 rebuild_ks_matrix 119 8.3 0.000 0.001 18.741 18.794 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.020 18.741 18.793 qs_ks_update_qs_env 119 7.6 0.001 0.001 16.707 16.758 dbcsr_multiply_generic 2286 12.5 0.069 0.083 13.676 15.618 sum_up_and_integrate 119 10.3 0.017 0.021 14.215 14.363 integrate_v_rspace 119 11.3 0.004 0.005 14.198 14.346 qs_rho_update_rho_low 119 7.7 0.001 0.001 14.267 14.276 calculate_rho_elec 119 8.7 0.029 0.031 14.266 14.275 grid_collocate_task_list 119 9.7 9.158 10.905 9.158 10.905 qs_scf_new_mos 108 7.5 0.001 0.001 10.808 10.862 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.807 10.862 grid_integrate_task_list 119 12.3 8.196 10.754 8.196 10.754 ot_scf_mini 108 9.5 0.002 0.003 10.172 10.230 multiply_cannon 2286 13.5 0.128 0.137 9.886 10.116 multiply_cannon_loop 2286 14.5 0.089 0.115 9.326 9.582 mp_waitall_1 169478 16.3 8.463 8.993 8.463 8.993 rs_pw_transfer 974 11.9 0.010 0.013 5.840 6.221 ot_mini 108 10.5 0.001 0.002 5.888 5.952 multiply_cannon_metrocomm3 18288 15.5 0.037 0.050 5.469 5.899 density_rs2pw 119 9.7 0.005 0.012 4.750 5.130 multiply_cannon_multrec 18288 15.5 2.981 3.419 2.990 3.431 potential_pw2rs 119 12.3 0.006 0.007 3.382 3.396 pw_transfer 1439 11.6 0.082 0.093 3.262 3.344 mp_alltoall_d11v 2130 13.8 2.819 3.202 2.819 3.202 init_scf_loop 11 6.9 0.000 0.000 3.197 3.198 fft_wrap_pw1pw2 1201 12.6 0.008 0.009 3.119 3.191 mp_waitany 9880 13.7 2.685 3.170 2.685 3.170 qs_ot_get_derivative 108 11.5 0.001 0.001 3.075 3.131 rs_gather_matrices 119 12.3 0.075 0.089 2.581 2.976 mp_sum_l 11218 13.2 0.976 2.834 0.976 2.834 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.714 2.813 apply_single 119 13.6 0.000 0.000 2.714 2.813 ot_diis_step 108 11.5 0.003 0.004 2.791 2.791 fft_wrap_pw1pw2_140 487 13.2 0.232 0.285 2.623 2.767 rs_pw_transfer_RS2PW_140 130 11.5 0.249 0.294 2.330 2.735 init_scf_run 11 5.9 0.000 0.004 2.576 2.576 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.576 2.576 make_m2s 4572 13.5 0.045 0.056 2.387 2.453 fft3d_ps 1201 14.6 1.104 1.265 2.282 2.375 wfi_extrapolate 11 7.9 0.001 0.001 2.333 2.333 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.165 2.175 make_images 4572 14.5 0.117 0.139 2.063 2.127 qs_ot_get_p 119 10.4 0.001 0.001 1.412 1.484 rs_pw_transfer_PW2RS_140 130 13.9 0.503 0.634 1.402 1.442 mp_sum_d 4129 12.0 1.001 1.417 1.001 1.417 make_images_data 4572 15.5 0.034 0.043 1.135 1.290 prepare_preconditioner 11 7.9 0.000 0.000 1.154 1.170 make_preconditioner 11 8.9 0.000 0.000 1.154 1.170 mp_alltoall_z22v 1201 16.6 0.947 1.132 0.947 1.132 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.877 1.121 hybrid_alltoall_any 4725 16.4 0.062 0.228 0.977 1.094 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.053 1.080 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.979 1.034 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.999 1.027 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=51.24199999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.84, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.593, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.307, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.603, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.479, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.535999999999998, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.158, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.196, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.981, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.463, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=2.819, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.027 113.855 113.855 qs_mol_dyn_low 1 2.0 0.003 0.003 113.226 113.226 qs_forces 11 3.9 0.001 0.001 113.187 113.187 qs_energies 11 4.9 0.001 0.001 105.355 105.355 scf_env_do_scf 11 5.9 0.001 0.001 92.703 92.703 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 74.486 74.486 velocity_verlet 10 3.0 0.002 0.002 71.819 71.819 rebuild_ks_matrix 107 8.3 0.001 0.001 33.653 33.653 qs_ks_build_kohn_sham_matrix 107 9.3 0.010 0.010 33.652 33.652 qs_rho_update_rho_low 107 7.7 0.000 0.000 31.299 31.299 calculate_rho_elec 107 8.7 0.852 0.852 31.299 31.299 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.282 30.282 grid_collocate_task_list 107 9.7 27.217 27.217 27.217 27.217 sum_up_and_integrate 107 10.3 0.167 0.167 26.671 26.671 integrate_v_rspace 107 11.3 0.082 0.082 26.505 26.505 grid_integrate_task_list 107 12.3 24.699 24.699 24.699 24.699 qs_scf_new_mos 96 7.5 0.000 0.000 19.659 19.659 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.659 19.659 ot_scf_mini 96 9.5 0.002 0.002 18.462 18.462 init_scf_loop 11 6.9 0.000 0.000 18.076 18.076 dbcsr_multiply_generic 1966 12.4 0.146 0.146 17.741 17.741 prepare_preconditioner 11 7.9 0.000 0.000 14.000 14.000 make_preconditioner 11 8.9 0.000 0.000 14.000 14.000 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.869 12.869 ot_mini 96 10.5 0.001 0.001 10.417 10.417 make_m2s 3932 13.4 0.040 0.040 9.738 9.738 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.673 6.673 pw_transfer 1295 11.6 0.050 0.050 5.608 5.608 qs_ot_get_derivative 96 11.5 0.001 0.001 5.585 5.585 fft_wrap_pw1pw2 1081 12.6 0.005 0.005 5.416 5.416 init_scf_run 11 5.9 0.002 0.002 5.301 5.301 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.299 5.299 dbcsr_make_dense_low 4961 15.5 0.060 0.060 5.255 5.255 make_dense_data 4961 16.5 4.568 4.568 5.183 5.183 qs_ot_get_p 107 10.4 0.001 0.001 4.843 4.843 ot_diis_step 96 11.5 0.003 0.003 4.829 4.829 make_images 3932 14.4 1.850 1.850 4.777 4.777 wfi_extrapolate 11 7.9 0.001 0.001 4.708 4.708 fft_wrap_pw1pw2_140 439 13.2 0.411 0.411 4.653 4.653 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.613 4.613 cp_fm_cholesky_decompose 22 10.9 4.519 4.519 4.519 4.519 multiply_cannon 1966 13.4 0.176 0.176 4.474 4.474 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.424 4.424 apply_single 107 13.6 0.000 0.000 4.423 4.423 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.215 4.215 cp_fm_cholesky_invert 11 10.9 4.200 4.200 4.200 4.200 multiply_cannon_loop 1966 14.4 0.112 0.112 4.027 4.027 multiply_cannon_multrec 1966 15.4 3.870 3.870 3.914 3.914 dbcsr_copy 1855 11.9 0.188 0.188 3.837 3.837 dbcsr_complete_redistribute 317 12.2 1.880 1.880 3.748 3.748 qs_ot_p2m_diag 44 11.0 0.137 0.137 3.737 3.737 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.674 3.674 dbcsr_copy_into_existing 22 7.9 3.621 3.621 3.621 3.621 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.615 3.615 cp_dbcsr_syevd 44 12.0 0.002 0.002 3.406 3.406 cp_fm_diag_elpa 44 13.0 0.000 0.000 3.313 3.313 cp_fm_diag_elpa_base 44 14.0 3.262 3.262 3.313 3.313 qs_create_task_list 11 7.9 0.000 0.000 3.312 3.312 generate_qs_task_list 11 8.9 2.402 2.402 3.312 3.312 density_rs2pw 107 9.7 0.003 0.003 3.230 3.230 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.046 3.046 fft3d_s 1082 14.6 2.680 2.680 2.684 2.684 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.592 2.592 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.319 2.319 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.022 82.972 82.982 qs_mol_dyn_low 1 2.0 0.003 0.004 82.872 82.876 qs_forces 11 3.9 0.001 0.001 82.833 82.833 qs_energies 11 4.9 0.001 0.001 77.331 77.332 scf_env_do_scf 11 5.9 0.000 0.002 71.780 71.780 scf_env_do_scf_inner_loop 96 6.5 0.002 0.016 66.592 66.593 velocity_verlet 10 3.0 0.001 0.003 48.912 48.913 rebuild_ks_matrix 107 8.3 0.000 0.000 36.920 36.969 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.017 36.920 36.969 sum_up_and_integrate 107 10.3 0.016 0.018 32.940 32.968 integrate_v_rspace 107 11.3 0.004 0.005 32.924 32.954 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.633 32.677 qs_rho_update_rho_low 107 7.7 0.000 0.001 31.347 31.355 calculate_rho_elec 107 8.7 0.026 0.027 31.347 31.355 grid_integrate_task_list 107 12.3 22.882 29.719 22.882 29.719 grid_collocate_task_list 107 9.7 21.984 28.409 21.984 28.409 dbcsr_multiply_generic 1966 12.4 0.061 0.071 12.125 12.211 rs_pw_transfer 878 11.9 0.009 0.015 10.084 11.049 density_rs2pw 107 9.7 0.004 0.005 9.040 9.972 qs_scf_new_mos 96 7.5 0.000 0.001 9.508 9.563 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.508 9.562 multiply_cannon 1966 13.4 0.114 0.125 8.853 9.153 ot_scf_mini 96 9.5 0.002 0.002 8.962 9.017 multiply_cannon_loop 1966 14.4 0.083 0.102 8.336 8.507 mp_waitany 8968 13.7 7.265 8.382 7.265 8.382 mp_alltoall_d11v 1998 13.7 7.159 8.122 7.159 8.122 mp_waitall_1 146670 16.2 7.560 8.006 7.560 8.006 rs_gather_matrices 107 12.3 0.073 0.083 6.939 7.884 rs_pw_transfer_RS2PW_140 118 11.5 0.216 0.240 6.884 7.851 multiply_cannon_metrocomm3 15728 15.4 0.034 0.044 4.906 5.373 ot_mini 96 10.5 0.001 0.001 5.293 5.350 init_scf_loop 11 6.9 0.000 0.000 5.175 5.176 init_scf_run 11 5.9 0.000 0.004 4.435 4.435 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.435 4.435 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.411 4.420 wfi_extrapolate 11 7.9 0.001 0.001 4.025 4.025 potential_pw2rs 107 12.3 0.005 0.006 3.062 3.081 pw_transfer 1295 11.6 0.075 0.093 2.904 2.952 multiply_cannon_multrec 15728 15.4 2.643 2.887 2.652 2.897 qs_ot_get_derivative 96 11.5 0.001 0.001 2.798 2.856 fft_wrap_pw1pw2 1081 12.6 0.007 0.008 2.774 2.813 ot_diis_step 96 11.5 0.003 0.003 2.469 2.470 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.426 2.466 apply_single 107 13.6 0.000 0.000 2.426 2.466 fft_wrap_pw1pw2_140 439 13.2 0.211 0.255 2.353 2.441 make_m2s 3932 13.4 0.039 0.049 2.137 2.206 fft3d_ps 1081 14.6 0.990 1.103 2.005 2.051 make_images 3932 14.4 0.104 0.122 1.850 1.910 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=48.652000000000015, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.217, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.699, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.568, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.519, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.2, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.121999999999986, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.984, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.882, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.56, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.265, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.159, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.174 0.174 111.685 111.685 qs_energies 1 2.0 0.000 0.000 110.898 110.898 scf_env_do_scf 1 3.0 0.000 0.000 109.758 109.758 qs_ks_update_qs_env 8 5.0 0.000 0.000 104.116 104.116 rebuild_ks_matrix 7 6.0 0.000 0.000 104.059 104.059 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 104.059 104.059 hfx_ks_matrix 7 8.0 0.000 0.000 94.704 94.704 integrate_four_center 7 9.0 1.302 1.302 94.660 94.660 integrate_four_center_main 7 10.0 0.894 0.894 82.115 82.115 integrate_four_center_bin 446 11.0 81.221 81.221 81.221 81.221 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 57.919 57.919 init_scf_loop 1 4.0 0.000 0.000 51.829 51.829 integrate_four_center_load 7 10.0 0.000 0.000 10.983 10.983 hfx_load_balance 1 11.0 0.015 0.015 10.983 10.983 hfx_load_balance_count 1 12.0 5.482 5.482 5.482 5.482 hfx_load_balance_bin 1 12.0 5.469 5.469 5.469 5.469 qs_vxc_create 14 8.0 0.000 0.000 3.737 3.737 xc_vxc_pw_create 14 9.0 0.136 0.136 3.737 3.737 xc_rho_set_and_dset_create 14 10.0 0.108 0.108 2.974 2.974 prepare_preconditioner 1 5.0 0.000 0.000 2.498 2.498 make_preconditioner 1 6.0 0.000 0.000 2.498 2.498 calculate_rho_elec 15 7.4 0.117 0.117 2.467 2.467 xc_functional_eval 35 11.0 0.000 0.000 2.320 2.320 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.198 0.220 133.712 133.722 qs_energies 1 2.0 0.000 0.000 133.352 133.360 scf_env_do_scf 1 3.0 0.000 0.000 132.976 132.976 qs_ks_update_qs_env 8 5.0 0.000 0.000 130.861 130.861 rebuild_ks_matrix 7 6.0 0.000 0.000 130.838 130.838 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 130.838 130.838 hfx_ks_matrix 7 8.0 0.000 0.000 124.657 124.658 integrate_four_center 7 9.0 0.051 0.334 124.648 124.650 integrate_four_center_main 7 10.0 0.003 0.004 80.351 112.568 integrate_four_center_bin 448 11.0 80.348 112.565 80.348 112.565 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 75.099 75.103 init_scf_loop 1 4.0 0.000 0.000 57.872 57.872 mp_sync 70 11.3 32.234 34.194 32.234 34.194 integrate_four_center_load 7 10.0 0.000 0.000 11.374 11.379 hfx_load_balance 1 11.0 0.001 0.001 11.374 11.379 mp_sum_l 1135 8.3 5.685 5.970 5.685 5.970 hfx_load_balance_dist 1 12.0 0.000 0.000 5.563 5.849 hfx_load_balance_bin 1 12.0 2.864 5.683 2.864 5.683 hfx_load_balance_count 1 12.0 2.869 5.612 2.869 5.612 qs_vxc_create 14 8.0 0.000 0.000 2.943 2.944 xc_vxc_pw_create 14 9.0 0.008 0.010 2.943 2.943 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=17.316999999999993, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=81.221, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.482, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.469, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.302, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.894, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.657999999999987, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.348, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.869, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.864, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.051, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=32.234, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.685, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 82.077 82.077 qs_energies 1 2.0 0.000 0.000 81.669 81.669 mp2_main 1 3.0 0.000 0.000 78.106 78.106 mp2_gpw_main 1 4.0 0.000 0.000 77.976 77.976 rpa_ri_compute_en 1 5.0 0.000 0.000 74.733 74.733 rpa_num_int 1 6.0 0.001 0.001 74.728 74.728 compute_mat_P_omega 1 7.0 0.003 0.003 64.051 64.051 compute_mat_P_omega_contract 10 8.0 8.428 8.428 63.841 63.841 dbt_total 2336 9.6 0.011 0.011 51.732 51.732 dbt_contract 787 11.0 0.063 0.063 45.397 45.397 dbt_tas_total 1149 12.2 0.250 0.250 44.092 44.092 dbt_tas_multiply 807 12.1 0.002 0.002 42.870 42.870 dbt_tas_dbm 807 14.1 0.003 0.003 36.198 36.198 dbm_multiply 807 16.1 36.190 36.190 36.190 36.190 dbt_tas_mm_1N 524 15.1 0.001 0.001 25.053 25.053 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 22.401 22.401 compute_mat_P_omega_calc_M_occ 250 9.0 8.435 8.435 17.573 17.573 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.388 10.388 dbt_tas_mm_2 251 15.0 0.001 0.001 9.236 9.236 compute_QP_energies 1 7.0 0.000 0.000 5.758 5.758 compute_self_energy_cubic_gw 1 8.0 0.047 0.047 5.757 5.757 dbt_copy 1103 10.7 0.096 0.096 5.097 5.097 contract_cubic_gw 21 9.0 0.000 0.000 4.633 4.633 scf_env_do_scf 1 3.0 0.000 0.000 3.447 3.447 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.446 3.446 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 3.236 3.236 dbt_tas_reserve_blocks_index 3261 14.3 0.149 0.149 2.920 2.920 dbm_reserve_blocks 3628 15.3 2.834 2.834 2.834 2.834 convert_to_new_pgrid 2421 14.1 0.163 0.163 2.690 2.690 dbm_copy 1614 15.1 2.526 2.526 2.526 2.526 dbt_crop 1042 12.0 1.546 1.546 2.401 2.401 dbt_reserve_blocks_index 2280 13.1 0.052 0.052 2.149 2.149 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 2.144 2.144 dbt_tas_copy 574 11.4 1.345 1.345 2.142 2.142 compute_W_cubic_GW 10 7.0 0.004 0.004 2.027 2.027 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 1.948 1.948 qs_scf_new_mos 17 5.0 0.000 0.000 1.887 1.887 dbt_tas_reshape 367 15.0 0.006 0.006 1.792 1.792 get_2c_integrals 1 6.0 0.000 0.000 1.735 1.735 dbt_tas_mm_3N 22 15.1 0.000 0.000 1.725 1.725 dbt_reshape 278 11.9 0.991 0.991 1.718 1.718 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 32.351 32.361 qs_energies 1 2.0 0.000 0.000 32.251 32.252 mp2_main 1 3.0 0.000 0.001 31.275 31.276 mp2_gpw_main 1 4.0 0.000 0.000 31.239 31.241 rpa_ri_compute_en 1 5.0 0.000 0.000 30.032 30.033 rpa_num_int 1 6.0 0.001 0.002 30.031 30.033 dbt_total 2336 9.6 0.011 0.019 26.753 26.770 compute_mat_P_omega 1 7.0 0.001 0.005 25.039 25.063 compute_mat_P_omega_contract 10 8.0 0.376 0.403 24.888 24.893 dbt_contract 787 11.0 0.024 0.028 20.348 20.391 dbt_tas_total 1149 12.2 0.049 0.064 18.298 18.298 dbt_tas_multiply 807 12.1 0.002 0.002 18.245 18.247 dbt_tas_dbm 807 14.1 0.003 0.003 13.604 13.650 dbm_multiply 807 16.1 10.060 10.906 10.060 10.906 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.469 7.470 compute_mat_P_omega_calc_M_occ 250 9.0 0.365 0.391 7.265 7.265 mp_sync 8706 11.6 5.743 6.811 5.743 6.811 dbt_tas_mm_2 251 15.0 0.001 0.001 6.125 6.136 dbt_copy 1111 10.7 0.010 0.011 5.385 5.723 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.439 5.442 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.649 5.422 dbt_reshape 1098 11.7 1.999 2.545 5.122 5.414 compute_QP_energies 1 7.0 0.000 0.000 3.388 3.390 compute_self_energy_cubic_gw 1 8.0 0.002 0.003 3.386 3.389 contract_cubic_gw 21 9.0 0.000 0.000 2.755 2.755 mp_waitall_2 3776 15.3 2.484 2.754 2.484 2.754 dbt_communicate_buffer 1098 12.7 0.053 0.071 2.531 2.673 dbt_crop 1042 12.0 0.854 1.221 1.303 1.699 dbt_reserve_blocks_index_array 2791 12.2 0.008 0.011 1.400 1.661 dbt_reserve_blocks_index 2849 13.1 0.063 0.078 1.399 1.658 dbt_tas_reserve_blocks_index 3300 14.5 0.114 0.149 1.367 1.626 dbm_reserve_blocks 3696 15.4 1.338 1.588 1.338 1.588 dbt_tas_replicate 396 14.1 0.530 0.682 1.108 1.230 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 1.204 1.206 convert_to_new_pgrid 2421 14.1 0.023 0.028 0.726 0.996 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 0.967 0.972 dbm_copy 1608 15.1 0.697 0.969 0.697 0.969 cp_gemm 105 8.4 0.000 0.000 0.959 0.966 cp_gemm_cosma 105 9.4 0.959 0.966 0.959 0.966 scf_env_do_scf 1 3.0 0.000 0.000 0.940 0.940 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.940 0.940 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.907 0.917 mp_max_i 1992 9.8 0.680 0.857 0.680 0.857 compute_W_cubic_GW 10 7.0 0.001 0.001 0.740 0.747 dbm_add 807 14.1 0.590 0.691 0.590 0.691 dbt_tas_mm_3N 22 15.1 0.000 0.000 0.495 0.672 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.673000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=36.19, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.435, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.428, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.834, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.526, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.991, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=9.289000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.06, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.365, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.376, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.338, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.697, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=1.999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.743, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.484, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 316.340 316.340 qs_forces 1 2.0 0.000 0.000 315.788 315.788 rebuild_ks_matrix 7 6.6 0.000 0.000 314.282 314.282 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 314.282 314.282 hfx_ks_matrix 7 8.6 0.000 0.000 312.406 312.406 dbt_total 4861 11.6 0.028 0.028 265.866 265.866 hfx_ri_update_ks 7 9.6 0.000 0.000 265.084 265.084 hfx_ri_update_ks_Pmat 7 10.6 31.530 31.530 265.077 265.077 dbt_tas_total 2391 14.1 0.905 0.905 239.477 239.477 qs_energies 1 3.0 0.000 0.000 231.321 231.321 scf_env_do_scf 1 4.0 0.000 0.000 231.016 231.016 qs_ks_update_qs_env 8 6.0 0.000 0.000 229.888 229.888 dbt_contract 1473 13.0 0.175 0.175 223.187 223.187 dbt_tas_multiply 1482 14.0 0.004 0.004 213.343 213.343 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 196.382 196.382 dbt_tas_dbm 1482 16.0 0.006 0.006 194.391 194.391 dbm_multiply 1482 18.0 194.373 194.373 194.373 194.373 dbt_tas_mm_2 649 17.1 0.004 0.004 169.479 169.479 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 146.409 146.409 init_scf_loop 2 5.0 0.000 0.000 84.605 84.605 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 84.397 84.397 hfx_ri_update_forces 1 7.0 0.000 0.000 47.319 47.319 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 32.628 32.628 dbt_tas_reshape 906 14.4 0.010 0.010 19.026 19.026 dbt_tas_mm_3T 659 17.1 0.002 0.002 18.419 18.419 dbt_copy 2331 12.4 0.195 0.195 16.504 16.504 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 16.361 16.361 dbt_tas_merge 649 14.1 11.415 11.415 12.313 12.313 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 11.992 11.992 precalc_derivatives 1 8.0 0.005 0.005 11.875 11.875 dbt_tas_reshape_buffer_fill 906 15.4 11.247 11.247 11.247 11.247 dbm_reserve_blocks 8303 16.8 10.208 10.208 10.208 10.208 dbt_tas_reserve_blocks_index 7397 16.0 0.348 0.348 9.843 9.843 dbt_crop 2763 14.2 6.688 6.688 9.480 9.480 dbt_reshape 856 13.9 5.171 5.171 9.068 9.068 dbt_reserve_blocks_index 4998 15.2 0.121 0.121 7.402 7.402 dbt_reserve_blocks_index_array 4963 14.3 0.017 0.017 7.358 7.358 build_3c_derivatives 9 9.0 2.414 2.414 6.773 6.773 dbt_tas_reshape_buffer_obtain 906 15.4 5.685 5.685 6.352 6.352 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 57.199 57.210 qs_forces 1 2.0 0.000 0.000 57.017 57.017 rebuild_ks_matrix 7 6.6 0.000 0.000 56.346 56.346 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 56.346 56.346 hfx_ks_matrix 7 8.6 0.000 0.001 55.374 55.384 dbt_total 4861 11.6 0.024 0.027 49.904 49.916 hfx_ri_update_ks 7 9.6 0.000 0.000 38.784 38.784 hfx_ri_update_ks_Pmat 7 10.6 1.336 2.425 38.783 38.783 dbt_contract 1473 13.0 0.084 0.093 38.577 38.589 dbt_tas_total 2391 14.1 0.106 0.127 36.334 36.335 qs_energies 1 3.0 0.000 0.001 35.460 35.460 scf_env_do_scf 1 4.0 0.000 0.001 35.327 35.327 qs_ks_update_qs_env 8 6.0 0.000 0.000 34.804 34.804 dbt_tas_multiply 1482 14.0 0.004 0.005 32.272 32.276 dbt_tas_dbm 1482 16.0 0.004 0.005 24.880 24.899 dbm_multiply 1482 18.0 16.187 22.081 16.187 22.081 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 21.968 21.969 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 21.543 21.544 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 20.866 20.866 hfx_ri_update_forces 1 7.0 0.000 0.000 16.590 16.599 mp_sync 17513 13.6 13.133 16.171 13.133 16.171 dbt_tas_mm_2 649 17.1 0.003 0.003 15.499 15.516 init_scf_loop 2 5.0 0.000 0.000 14.460 14.460 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 11.578 11.587 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 7.585 7.585 dbt_copy 2349 12.4 0.027 0.031 6.018 6.520 dbt_crop 2763 14.2 2.862 4.501 3.487 5.259 dbt_reshape 1256 13.5 1.988 2.471 4.151 4.452 dbt_tas_mm_3T 659 17.1 0.002 0.008 3.656 4.402 precalc_derivatives 1 8.0 0.002 0.008 3.857 3.857 dbt_tas_mm_3N 163 16.5 0.000 0.000 3.754 3.840 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.647 3.647 dbt_tas_merge 649 14.1 1.540 2.525 2.744 3.271 mp_waitall_2 5988 16.5 2.855 3.150 2.855 3.150 mp_max_i 3372 12.5 2.136 2.476 2.136 2.476 dbm_reserve_blocks 8337 16.9 1.938 2.318 1.938 2.318 dbt_tas_reserve_blocks_index 7428 16.1 0.250 0.393 1.869 2.297 dbt_tas_communicate_buffer 1825 16.3 0.058 0.076 2.008 2.276 dbt_tas_replicate 909 15.6 0.569 0.760 2.090 2.186 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.074 2.086 dbt_reserve_blocks_index 5398 15.2 0.108 0.152 1.572 1.913 dbt_reserve_blocks_index_array 5363 14.2 0.012 0.014 1.570 1.912 build_3c_derivatives 9 9.0 0.227 0.354 1.786 1.789 dbt_tas_reshape 916 14.4 0.008 0.010 1.594 1.706 mp_alltoall_i 4339 15.3 1.531 1.663 1.531 1.663 dbt_communicate_buffer 1256 14.5 0.042 0.058 1.472 1.585 mp_sum_l 38201 15.3 1.138 1.435 1.138 1.435 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.392 1.408 convert_to_new_pgrid 4446 16.0 0.035 0.042 1.115 1.305 dbm_copy 3043 16.9 1.080 1.266 1.080 1.266 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=50.87899999999996, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=194.373, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.53, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=11.415, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=11.247, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=10.208, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=6.688, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_max_i", label="mp_max_i", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=15.212000000000003, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=16.187, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.336, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.54, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.938, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=2.862, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_max_i", label="mp_max_i", y=2.136, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=13.133, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.855, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 189.983 189.983 qs_energies 1 2.0 0.000 0.000 189.796 189.796 mp2_main 1 3.0 0.000 0.000 184.660 184.660 mp2_gpw_main 1 4.0 0.001 0.001 184.247 184.247 mp2_ri_gpw_compute_in 1 5.0 0.363 0.363 130.213 130.213 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 110.846 110.846 mp2_eri_3c_integrate_gpw 2656 7.0 0.013 0.013 83.379 83.379 integrate_v_rspace 2666 8.0 0.694 0.694 70.166 70.166 grid_integrate_task_list 2666 9.0 67.448 67.448 67.448 67.448 mp2_ri_gpw_compute_en 1 5.0 0.075 0.075 54.013 54.013 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.600 9.600 52.291 52.291 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.142 2.142 35.739 35.739 offload_gemm 2080 8.0 33.596 33.596 33.596 33.596 calculate_wavefunction 5312 9.0 15.883 15.883 23.300 23.300 dbcsr_multiply_generic 5322 8.0 0.177 0.177 21.549 21.549 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 21.528 21.528 get_2c_integrals 1 6.0 0.000 0.000 19.004 19.004 compute_2c_integrals 1 7.0 0.005 0.005 18.137 18.137 compute_2c_integrals_loop_lm 1 8.0 0.012 0.012 18.118 18.118 mp2_eri_2c_integrate_gpw 1 9.0 3.230 3.230 18.106 18.106 pw_transfer 63872 10.6 0.915 0.915 11.616 11.616 multiply_cannon 5322 9.0 0.434 0.434 10.568 10.568 fft_wrap_pw1pw2 53228 11.4 0.111 0.111 10.476 10.476 multiply_cannon_loop 5322 10.0 0.313 0.313 9.216 9.216 make_m2s 10644 9.0 0.062 0.062 8.680 8.680 make_images 10644 10.0 3.222 3.222 8.350 8.350 multiply_cannon_multrec 5322 11.0 7.416 7.416 7.453 7.453 fft_wrap_pw1pw2_20 21271 12.4 0.627 0.627 7.430 7.430 fft3d_s 53229 13.4 6.591 6.591 6.623 6.623 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.149 2.149 5.835 5.835 mp2_ri_gpw_compute_en_ener 2080 7.0 5.036 5.036 5.036 5.036 scf_env_do_scf 1 3.0 0.000 0.000 4.741 4.741 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.740 4.740 copy_dbcsr_to_fm 2679 8.0 0.026 0.026 4.221 4.221 potential_pw2rs 5322 10.0 0.143 0.143 4.030 4.030 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 41.039 41.049 qs_energies 1 2.0 0.000 0.000 40.967 40.967 mp2_main 1 3.0 0.000 0.001 38.692 38.693 mp2_gpw_main 1 4.0 0.001 0.001 38.577 38.577 mp2_ri_gpw_compute_in 1 5.0 0.043 0.046 17.874 23.309 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.157 21.593 mp2_ri_gpw_compute_en 1 5.0 0.153 0.163 20.626 21.053 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.191 19.696 integrate_v_rspace 93 8.1 0.099 0.111 14.102 19.510 grid_integrate_task_list 93 9.1 13.752 19.216 13.752 19.216 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.635 0.773 14.160 14.187 mp2_ri_gpw_compute_en_expansio 65 7.0 0.067 0.082 10.775 10.983 offload_gemm 65 8.0 10.709 10.921 10.709 10.921 mp_min_d 2 7.0 5.489 5.959 5.489 5.959 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.437 5.864 mp2_ri_gpw_compute_en_comm 17 7.0 0.099 0.155 2.416 2.829 mp_sendrecv_dm3 510 8.0 1.846 2.436 1.846 2.436 dbcsr_multiply_generic 176 8.0 0.007 0.009 1.690 2.202 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.005 1.673 2.185 scf_env_do_scf 1 3.0 0.000 0.000 2.146 2.147 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 2.146 2.147 get_2c_integrals 1 6.0 0.000 0.000 1.659 1.685 compute_2c_integrals 1 7.0 0.002 0.004 1.373 1.388 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.988 1.308 mp2_eri_2c_integrate_gpw 1 9.0 0.208 0.328 0.987 1.306 multiply_cannon 176 9.0 0.014 0.016 1.024 1.207 qs_scf_new_mos 10 5.0 0.000 0.000 1.091 1.160 multiply_cannon_loop 176 10.0 0.002 0.002 0.967 1.146 calculate_wavefunction 166 9.0 0.494 0.696 0.856 1.103 eigensolver 11 5.8 0.001 0.001 1.034 1.035 make_m2s 352 9.0 0.003 0.003 0.631 0.954 multiply_cannon_multrec 246 11.0 0.845 0.939 0.850 0.946 make_images 352 10.0 0.051 0.060 0.619 0.941 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.878 0.879 cp_fm_redistribute_end 11 7.8 0.331 0.872 0.343 0.873 pw_transfer 2120 10.5 0.039 0.050 0.767 0.852 cp_fm_diag_elpa_base 11 7.8 0.515 0.830 0.526 0.844 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=56.04000000000002, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=67.448, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=33.596, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=15.883, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.6, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.416, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.2690000000000055, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.752, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=10.709, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.494, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.635, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.845, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.846, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.489, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.073 0.073 125.313 125.313 qs_energies 1 2.0 0.000 0.000 124.013 124.013 scf_env_do_scf 1 3.0 0.000 0.000 117.393 117.393 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 117.393 117.393 qs_ks_update_qs_env 15 5.0 0.000 0.000 49.432 49.432 rebuild_ks_matrix 15 6.0 0.000 0.000 49.223 49.223 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 49.223 49.223 qs_scf_new_mos 15 5.0 0.000 0.000 43.244 43.244 eigensolver 15 6.0 0.001 0.001 35.679 35.679 qs_vxc_create 15 8.0 0.046 0.046 33.875 33.875 calculate_dispersion_nonloc 15 9.0 7.196 7.196 29.535 29.535 cp_fm_diag_elpa 15 7.0 0.000 0.000 22.903 22.903 cp_fm_diag_elpa_base 15 8.0 20.299 20.299 22.903 22.903 pw_transfer 1191 10.0 0.051 0.051 22.475 22.475 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.294 22.294 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.877 21.877 calculate_rho_elec 16 6.0 0.215 0.215 21.877 21.877 grid_collocate_task_list 16 7.0 20.448 20.448 20.448 20.448 fft_wrap_pw1pw2_150 765 12.0 3.199 3.199 16.206 16.206 sum_up_and_integrate 15 8.0 0.039 0.039 13.920 13.920 integrate_v_rspace 15 9.0 0.019 0.019 13.880 13.880 grid_integrate_task_list 15 10.0 13.378 13.378 13.378 13.378 cp_fm_cholesky_restore 45 7.0 10.580 10.580 10.580 10.580 fft3d_s 1087 13.0 10.069 10.069 10.076 10.076 pw_scatter_s 585 13.1 7.145 7.145 7.145 7.145 fft_wrap_pw1pw2_200 197 12.3 0.684 0.684 5.872 5.872 copy_dbcsr_to_fm 16 5.9 0.000 0.000 5.447 5.447 dbcsr_complete_redistribute 46 8.3 2.315 2.315 5.413 5.413 cp_fm_upper_to_full 30 8.0 4.798 4.798 4.798 4.798 vdW_energy 15 10.0 4.391 4.391 4.391 4.391 xc_vxc_pw_create 15 9.0 0.210 0.210 4.294 4.294 gspace_mixing 14 5.0 0.171 0.171 4.035 4.035 broyden_mixing 14 6.0 3.437 3.437 3.437 3.437 init_scf_run 1 3.0 0.000 0.000 3.166 3.166 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.014 3.014 xc_pw_derive 90 11.0 0.001 0.001 2.690 2.690 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.036 63.287 63.297 qs_energies 1 2.0 0.000 0.000 62.977 62.977 scf_env_do_scf 1 3.0 0.000 0.001 58.865 58.866 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 58.865 58.866 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.319 25.328 rebuild_ks_matrix 15 6.0 0.000 0.000 25.285 25.295 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 25.285 25.295 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.934 20.937 calculate_rho_elec 16 6.0 0.007 0.007 20.933 20.937 grid_collocate_task_list 16 7.0 19.502 19.970 19.502 19.970 sum_up_and_integrate 15 8.0 0.006 0.008 14.449 14.491 integrate_v_rspace 15 9.0 0.001 0.001 14.443 14.487 grid_integrate_task_list 15 10.0 13.302 13.835 13.302 13.835 qs_scf_new_mos 15 5.0 0.000 0.000 13.272 13.306 eigensolver 15 6.0 0.001 0.002 12.292 12.322 qs_vxc_create 15 8.0 0.001 0.001 10.529 10.541 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.992 8.996 cp_fm_diag_elpa_base 15 8.0 8.843 8.879 8.989 8.993 calculate_dispersion_nonloc 15 9.0 0.964 1.784 8.683 8.708 pw_transfer 1191 10.0 0.071 0.088 8.108 8.215 fft_wrap_pw1pw2 1086 11.0 0.010 0.013 7.958 8.084 fft3d_ps 1086 13.0 2.344 2.637 6.398 6.592 fft_wrap_pw1pw2_150 765 12.0 0.231 0.283 5.535 5.603 mp_alltoall_z22v 1086 15.0 3.522 4.025 3.522 4.025 cp_fm_cholesky_restore 45 7.0 3.140 3.180 3.140 3.180 yz_to_x 501 13.9 0.202 0.322 2.490 2.773 qs_energies_init_hamiltonians 1 3.0 0.000 0.001 2.472 2.473 fft_wrap_pw1pw2_200 197 12.3 0.161 0.200 2.312 2.370 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.161 2.347 xc_vxc_pw_create 15 9.0 0.014 0.018 1.845 1.873 x_to_yz 585 14.1 0.306 0.353 1.541 1.728 rs_pw_transfer 158 9.4 0.001 0.002 1.362 1.635 density_rs2pw 16 7.0 0.001 0.001 1.310 1.502 init_scf_run 1 3.0 0.000 0.001 1.413 1.414 build_core_ppnl 1 5.0 1.267 1.383 1.267 1.383 vdW_energy 15 10.0 1.313 1.376 1.313 1.376 xc_pw_derive 90 11.0 0.001 0.002 1.255 1.327 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.323 1.324 mp_waitany 520 11.3 0.975 1.268 0.975 1.268 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=50.539, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.448, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.299, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.378, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.58, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.069, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=14.978000000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.502, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.843, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.302, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.14, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.522, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.081 0.081 303.738 303.738 qs_energies 1 2.0 0.000 0.000 303.595 303.595 ls_scf 1 3.0 0.000 0.000 302.358 302.358 ls_scf_main 1 4.0 0.002 0.002 293.321 293.321 density_matrix_trs4 11 5.0 0.012 0.012 200.478 200.478 arnoldi_extremal 12 6.1 0.000 0.000 114.641 114.641 arnoldi_normal_ev 12 7.1 0.014 0.014 114.641 114.641 dbcsr_matrix_vector_mult 652 9.0 0.176 0.176 112.783 112.783 build_subspace 23 8.1 0.075 0.075 112.711 112.711 dbcsr_matrix_vector_mult_local 652 10.0 111.377 111.377 111.386 111.386 ls_scf_dm_to_ks 11 5.0 0.000 0.000 87.620 87.620 matrix_ls_to_qs 11 6.0 0.000 0.000 84.414 84.414 dbcsr_multiply_generic 185 6.1 0.875 0.875 74.829 74.829 dbcsr_copy_into_existing 11 7.0 45.907 45.907 45.907 45.907 multiply_cannon 185 7.1 0.289 0.289 45.410 45.410 dbcsr_complete_redistribute 23 7.5 30.890 30.890 42.145 42.145 matrix_decluster 11 7.0 0.000 0.000 38.506 38.506 multiply_cannon_loop 185 8.1 0.266 0.266 33.025 33.025 make_m2s 370 7.1 0.038 0.038 24.790 24.790 make_images 370 8.1 10.479 10.479 23.201 23.201 multiply_cannon_multrec 185 9.1 22.896 22.896 23.097 23.097 dbcsr_finalize 646 7.5 0.190 0.190 14.685 14.685 dbcsr_merge_all 597 8.5 2.225 2.225 13.567 13.567 setup_rec_index_2d 370 8.1 11.998 11.998 11.998 11.998 dbcsr_sort_indices 1103 9.9 10.762 10.762 10.762 10.762 tree_to_linear_d 110 9.4 10.069 10.069 10.069 10.069 calculate_norms 370 9.1 9.662 9.662 9.662 9.662 quick_finalize 395 10.0 0.346 0.346 9.258 9.258 dbcsr_special_finalize 370 9.1 0.002 0.002 8.562 8.562 ls_scf_init_scf 1 4.0 0.000 0.000 8.349 8.349 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.018 8.018 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.372 7.372 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.025 66.626 66.637 qs_energies 1 2.0 0.000 0.000 66.536 66.536 ls_scf 1 3.0 0.000 0.000 66.477 66.477 ls_scf_main 1 4.0 0.000 0.007 64.017 64.017 density_matrix_trs4 11 5.0 0.006 0.019 61.187 61.279 dbcsr_multiply_generic 185 6.1 0.057 0.085 57.058 57.412 multiply_cannon 185 7.1 0.031 0.034 47.281 48.370 multiply_cannon_loop 185 8.1 0.108 0.128 44.894 45.749 multiply_cannon_multrec 1480 9.1 27.413 30.898 27.673 31.233 mp_waitall_1 11936 10.3 15.047 18.016 15.047 18.016 multiply_cannon_metrocomm3 1480 9.1 0.012 0.017 8.830 13.577 multiply_cannon_metrocomm1 1480 9.1 0.007 0.010 3.537 7.645 make_m2s 370 7.1 0.034 0.037 6.665 6.751 make_images 370 8.1 0.625 0.731 6.539 6.623 calculate_norms 2960 9.1 4.686 6.336 4.686 6.336 arnoldi_extremal 12 6.1 0.000 0.000 3.255 3.278 arnoldi_normal_ev 12 7.1 0.001 0.003 3.255 3.278 make_images_data 370 9.1 0.009 0.013 2.899 3.225 build_subspace 23 8.1 0.019 0.024 3.145 3.149 mp_sum_l 1119 5.6 2.149 3.135 2.149 3.135 dbcsr_matrix_vector_mult 652 9.0 0.009 0.043 2.219 2.869 hybrid_alltoall_any 393 9.9 0.168 0.704 2.491 2.798 dbcsr_matrix_vector_mult_local 652 10.0 1.670 2.542 1.672 2.545 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.459 2.538 dbcsr_complete_redistribute 23 7.5 1.218 1.595 2.027 2.168 matrix_ls_to_qs 11 6.0 0.000 0.000 1.946 2.104 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.217 2.058 matrix_decluster 11 7.0 0.000 0.000 1.804 1.926 ls_scf_init_scf 1 4.0 0.000 0.000 1.867 1.868 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.840 1.848 make_images_pack 370 9.1 1.579 1.831 1.582 1.838 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.683 1.686 buffer_matrices_ensure_size 370 8.1 1.271 1.487 1.271 1.487 mp_sum_d 1403 6.7 1.086 1.440 1.086 1.440 dbcsr_finalize 646 7.5 0.007 0.009 1.246 1.406 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=71.00800000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=111.377, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=45.907, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=30.89, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.896, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=11.998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.662, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.442999999999998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.67, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.218, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=27.413, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.686, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=15.047, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.149, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 69.928 69.928 lib_test 1 2.0 0.000 0.000 69.920 69.920 dbcsr_run_tests 3 3.0 0.002 0.002 69.920 69.920 test_multiplies_multiproc 3 4.0 0.001 0.001 54.696 54.696 dbcsr_redistribute 9 5.0 35.742 35.742 37.245 37.245 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.875 15.875 dbcsr_make_random_matrix 9 4.0 12.395 12.395 15.127 15.127 multiply_cannon 9 6.0 0.006 0.006 11.405 11.405 multiply_cannon_loop 9 7.0 0.027 0.027 11.034 11.034 multiply_cannon_multrec 9 8.0 11.006 11.006 11.007 11.007 dbcsr_finalize 27 5.7 0.019 0.019 5.129 5.129 dbcsr_merge_all 18 6.5 1.886 1.886 4.461 4.461 dbcsr_data_release 975 7.6 2.480 2.480 2.480 2.480 tree_to_linear_d 9 7.0 1.769 1.769 1.769 1.769 make_m2s 18 6.0 0.001 0.001 1.538 1.538 make_images 18 7.0 0.522 0.522 1.454 1.454 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.010 17.312 17.316 lib_test 1 2.0 0.000 0.000 17.278 17.300 dbcsr_run_tests 3 3.0 0.000 0.001 17.277 17.299 test_multiplies_multiproc 3 4.0 0.000 0.002 16.455 16.513 dbcsr_multiply_generic 9 5.0 0.001 0.001 14.673 14.763 multiply_cannon 9 6.0 0.001 0.002 12.961 13.354 multiply_cannon_loop 9 7.0 0.002 0.002 12.690 13.059 multiply_cannon_multrec 72 8.0 10.535 11.434 10.536 11.435 mp_waitall_1 576 9.2 2.455 3.179 2.455 3.179 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.920 2.770 dbcsr_make_random_matrix 9 4.0 0.666 0.927 0.794 1.018 mp_sum_l 390 2.5 0.517 0.998 0.517 0.998 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.513 0.994 dbcsr_data_release 444 7.6 0.612 0.696 0.612 0.696 make_m2s 18 6.0 0.001 0.001 0.650 0.694 make_images 18 7.0 0.021 0.030 0.647 0.690 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.228 0.675 dbcsr_finalize 27 5.7 0.000 0.000 0.576 0.661 dbcsr_destroy 111 5.9 0.000 0.000 0.507 0.612 dbcsr_merge_all 18 6.5 0.086 0.107 0.469 0.568 dbcsr_checksum 6 5.0 0.153 0.523 0.523 0.523 make_images_data 18 8.0 0.000 0.001 0.341 0.403 dbcsr_redistribute 9 5.0 0.221 0.266 0.377 0.402 mp_sum_d 191 1.2 0.371 0.390 0.371 0.390 hybrid_alltoall_any 18 9.0 0.030 0.129 0.303 0.356 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.418999999999997, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.742, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.395, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.48, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.886, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.2200000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.221, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.666, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.535, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.612, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.086, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.517, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.455, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 131.145 131.145 qs_mol_dyn_low 1 2.0 0.003 0.003 129.812 129.812 velocity_verlet 5 3.0 0.003 0.003 106.095 106.095 qmmm_el_coupling 6 3.8 0.000 0.000 85.857 85.857 qmmm_elec_with_gaussian 6 4.8 0.088 0.088 85.849 85.849 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 85.137 85.137 qmmm_elec_gaussian_low_G 6 6.8 84.237 84.237 84.237 84.237 qs_forces 6 3.8 0.000 0.000 35.362 35.362 qs_energies 6 4.8 0.000 0.000 31.370 31.370 scf_env_do_scf 6 5.8 0.001 0.001 29.177 29.177 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.240 25.240 rebuild_ks_matrix 45 8.4 0.000 0.000 24.543 24.543 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.542 24.542 qs_ks_update_qs_env 45 7.8 0.000 0.000 20.976 20.976 pw_transfer 966 12.3 0.045 0.045 16.717 16.717 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.490 16.490 fft_wrap_pw1pw2_150 507 15.2 2.044 2.044 16.048 16.048 qs_vxc_create 45 10.4 0.001 0.001 13.339 13.339 xc_vxc_pw_create 45 11.4 0.645 0.645 13.338 13.338 xc_pw_derive 270 13.4 0.002 0.002 9.162 9.162 fft3d_s 802 15.6 7.598 7.598 7.606 7.606 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.130 7.130 calculate_rho_elec 45 8.9 0.563 0.563 7.130 7.130 xc_rho_set_and_dset_create 45 12.4 0.711 0.711 7.099 7.099 xc_pw_divergence 45 12.4 0.001 0.001 5.545 5.545 pw_scatter_s 429 15.8 5.503 5.503 5.503 5.503 qmmm_forces 6 3.8 0.001 0.001 5.405 5.405 qmmm_forces_with_gaussian 6 4.8 0.071 0.071 5.070 5.070 pw_integral_ab 2539 7.4 4.269 4.269 4.269 4.269 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.259 4.259 qs_ks_ddapc 45 10.4 0.001 0.001 4.070 4.070 init_scf_loop 6 6.8 0.000 0.000 3.932 3.932 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.574 3.574 qmmm_forces_gaussian_low_G 6 6.8 3.555 3.555 3.555 3.555 grid_collocate_task_list 45 9.9 3.352 3.352 3.352 3.352 density_rs2pw 45 9.9 0.001 0.001 3.215 3.215 sum_up_and_integrate 45 10.4 0.120 0.120 3.121 3.121 integrate_v_rspace 45 11.4 0.006 0.006 3.001 3.001 pw_poisson_solve 51 9.9 1.248 1.248 2.965 2.965 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.052 57.844 57.855 qs_mol_dyn_low 1 2.0 0.003 0.004 56.669 56.724 qs_forces 6 3.8 0.001 0.001 40.556 40.556 qs_energies 6 4.8 0.000 0.000 38.656 38.656 scf_env_do_scf 6 5.8 0.000 0.001 37.684 37.684 scf_env_do_scf_inner_loop 113 6.2 0.002 0.015 36.166 36.167 rebuild_ks_matrix 119 8.1 0.000 0.003 26.553 26.562 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.020 26.552 26.562 qs_ks_update_qs_env 119 7.3 0.001 0.001 24.979 24.991 velocity_verlet 5 3.0 0.002 0.003 24.210 24.213 pw_transfer 2446 12.3 0.167 0.196 16.980 17.454 fft_wrap_pw1pw2 2059 13.4 0.021 0.024 16.598 17.122 fft_wrap_pw1pw2_150 1321 14.9 1.173 1.396 15.851 16.311 fft3d_ps 2059 15.4 6.505 7.872 12.572 13.544 qs_vxc_create 119 10.1 0.002 0.002 13.338 13.341 xc_vxc_pw_create 119 11.1 0.146 0.210 13.336 13.340 qs_rho_update_rho_low 119 7.3 0.000 0.001 10.905 10.909 calculate_rho_elec 119 8.3 0.049 0.055 10.905 10.908 xc_pw_derive 714 13.1 0.009 0.012 10.013 10.293 sum_up_and_integrate 119 10.1 0.052 0.067 9.500 9.759 integrate_v_rspace 119 11.1 0.003 0.004 9.447 9.706 qmmm_forces 6 3.8 0.002 0.002 8.119 8.120 qmmm_forces_with_gaussian 6 4.8 0.344 0.405 7.410 7.978 rs_pw_transfer 988 11.5 0.010 0.019 7.364 7.660 xc_rho_set_and_dset_create 119 12.1 0.353 0.595 6.489 7.197 qmmm_el_coupling 6 3.8 0.000 0.000 6.979 7.164 qmmm_elec_with_gaussian 6 4.8 0.329 0.373 6.978 7.163 xc_pw_divergence 119 12.1 0.004 0.006 6.459 6.722 density_rs2pw 119 9.3 0.005 0.008 6.339 6.635 mp_alltoall_z22v 2059 17.4 4.756 6.453 4.756 6.453 potential_pw2rs 119 12.1 0.005 0.008 5.688 5.715 grid_collocate_task_list 119 9.3 4.365 5.061 4.365 5.061 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.938 4.139 x_to_yz 1095 16.8 0.747 0.850 3.283 4.072 grid_integrate_task_list 119 12.1 3.429 3.904 3.429 3.904 yz_to_x 964 16.0 0.512 0.721 2.732 3.665 mp_waitany 4028 12.8 2.750 3.653 2.750 3.653 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.395 3.648 qmmm_forces_gaussian_low_G 6 6.8 3.243 3.454 3.243 3.454 qmmm_elec_gaussian_low_G 6 6.8 2.805 3.048 2.805 3.048 rs_pw_transfer_PW2RS_150 125 13.9 1.147 1.411 2.845 2.977 pw_restrict_s3 18 5.8 1.370 1.686 2.503 2.821 rs_pw_transfer_RS2PW_150 125 11.2 0.855 1.118 2.436 2.744 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.062 2.320 pw_prolongate_s3 18 6.8 1.132 1.387 2.062 2.320 mp_waitall_1 188862 16.2 1.874 2.288 1.874 2.288 dbcsr_multiply_generic 2588 12.3 0.058 0.070 2.203 2.258 qs_scf_new_mos 113 7.2 0.000 0.000 2.143 2.148 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.142 2.148 ot_scf_mini 113 9.2 0.001 0.001 2.052 2.057 qs_ks_ddapc 119 10.1 0.002 0.002 1.888 2.014 mp_sum_dm3 33 5.7 1.717 1.861 1.717 1.861 pw_gather_p 964 15.0 1.246 1.692 1.246 1.692 pw_integral_ab 2761 7.7 1.091 1.243 1.483 1.670 mp_sum_d 5820 12.2 1.078 1.638 1.078 1.638 pw_scatter_p 1095 15.8 1.558 1.610 1.558 1.610 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.595 1.596 init_scf_loop 6 6.8 0.000 0.000 1.515 1.515 mp_sum_dm 514 5.2 0.730 1.303 0.730 1.303 ot_mini 113 10.2 0.001 0.002 1.267 1.274 rs_pw_transfer_PW2RS_40 119 14.1 0.214 0.272 0.994 1.257 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=22.631, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=84.237, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.598, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.503, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.269, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.555, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.352, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.650000000000002, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.805, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.091, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.243, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.365, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.505, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.429, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.756, yerr=0.0 Summary: Performance test took 35 minutes. Status: OK Removing intermediate container 5ed253a96363 ---> 47eba324c833 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 599d00773a71 Removing intermediate container 599d00773a71 ---> 566baf5607ce Step 42/42 : ENTRYPOINT [] ---> Running in 38a4f516df50 Removing intermediate container 38a4f516df50 ---> 9c64fc294dfc [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 9c64fc294dfc Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-08-11 19:52:49+00:00