StartDate: 2023-04-19 12:38:57+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 9d075a87ba6edda76ea5a496699dead09fac0ad2 CommitTime: 2023-04-19 09:57:41 +0200 CommitAuthor: Matthias Krack CommitSubject: Print setup info in arch files Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=9d075a87ba6edda76ea5a496699dead09fac0ad2 DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 368.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 2ab09b027e7f: Pulling fs layer 2ab09b027e7f: Verifying Checksum 2ab09b027e7f: Download complete 2ab09b027e7f: Pull complete Digest: sha256:67211c14fa74f070d27cc59d69a7fa9aeff8e28ea118ef3babc295a0428a6d21 Status: Downloaded newer image for ubuntu:22.04 ---> 08d22c0ceb15 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 47a3542ef7e0 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> e86e12710088 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 5ad7a46db6e4 Step 5/42 : RUN mkdir scripts ---> Using cache ---> 2ed50184e1de Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 2cfaec0caa31 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> b4ad501fe7db Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> ebe95e20e6a9 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> c73a203e9c3f Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 9470f4aa3375 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> bd71605db2e3 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 4930d74e7769 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 76572f27044a Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1f1fc62f9211 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 4f0fd9438964 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> bf5a77332a40 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 8a50fec887a3 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 2687fd173970 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 13e107c430b0 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 79b7a7abbba9 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 73d1a8ea4279 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 9fe0578b6a0e Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 52844398c378 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 8c6814d60368 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 6590ce21dbaa Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 613547bde965 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> b58f193db45b Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> ff5781ea064a Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> c062a2816ebf Step 30/42 : COPY ./Makefile . ---> Using cache ---> c6fde49c4000 Step 31/42 : COPY ./src ./src ---> Using cache ---> e19cc1737126 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 693fa66de63a Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> d1f01be1c628 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && rm -rf lib obj && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 8897ac525c68 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 8897ac525c68 ---> d5f9cb546b70 Step 35/42 : COPY ./data ./data ---> dbe8b8ea6c44 Step 36/42 : COPY ./tests ./tests ---> 7bed93349096 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 62ffe8a79170 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 750491cc3c5d Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 5dfab5fa732c Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 3c68bce4ccaf ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 70 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.050 0.050 104.387 104.387 qs_mol_dyn_low 1 2.0 0.003 0.003 103.663 103.663 qs_forces 11 3.9 0.002 0.002 103.618 103.618 qs_energies 11 4.9 0.001 0.001 96.558 96.558 scf_env_do_scf 11 5.9 0.001 0.001 83.718 83.718 velocity_verlet 10 3.0 0.002 0.002 68.774 68.774 scf_env_do_scf_inner_loop 108 6.5 0.014 0.014 63.702 63.702 rebuild_ks_matrix 119 8.3 0.001 0.001 25.350 25.350 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.016 25.349 25.349 qs_ks_update_qs_env 119 7.6 0.001 0.001 23.987 23.987 qs_rho_update_rho_low 119 7.7 0.001 0.001 23.011 23.011 calculate_rho_elec 119 8.7 1.056 1.056 23.010 23.010 dbcsr_multiply_generic 2286 12.5 0.166 0.166 22.886 22.886 qs_scf_new_mos 108 7.5 0.001 0.001 21.921 21.921 qs_scf_loop_do_ot 108 8.5 0.001 0.001 21.920 21.920 ot_scf_mini 108 9.5 0.003 0.003 20.280 20.280 init_scf_loop 11 6.9 0.000 0.000 19.833 19.833 grid_collocate_task_list 119 9.7 18.321 18.321 18.321 18.321 prepare_preconditioner 11 7.9 0.000 0.000 16.062 16.062 make_preconditioner 11 8.9 0.000 0.000 16.062 16.062 sum_up_and_integrate 119 10.3 1.337 1.337 15.507 15.507 make_full_inverse_cholesky 11 9.9 0.000 0.000 14.432 14.432 integrate_v_rspace 119 11.3 0.148 0.148 14.170 14.170 ot_mini 108 10.5 0.001 0.001 13.188 13.188 make_m2s 4572 13.5 0.047 0.047 11.906 11.906 grid_integrate_task_list 119 12.3 11.760 11.760 11.760 11.760 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.197 7.197 qs_ot_get_derivative 108 11.5 0.001 0.001 6.828 6.828 multiply_cannon 2286 13.5 0.236 0.236 6.535 6.535 pw_transfer 1439 11.6 0.085 0.085 6.461 6.461 ot_diis_step 108 11.5 0.004 0.004 6.355 6.355 make_images 4572 14.5 2.529 2.529 6.337 6.337 fft_wrap_pw1pw2 1201 12.6 0.008 0.008 6.158 6.158 multiply_cannon_loop 2286 14.5 0.052 0.052 5.916 5.916 dbcsr_make_dense_low 5837 15.5 0.068 0.068 5.902 5.902 multiply_cannon_multrec 2286 15.5 5.804 5.804 5.863 5.863 make_dense_data 5837 16.5 5.195 5.195 5.819 5.819 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.682 5.682 apply_single 119 13.6 0.000 0.000 5.682 5.682 fft_wrap_pw1pw2_140 487 13.2 0.501 0.501 5.244 5.244 dbcsr_make_images_dense 3978 14.8 0.017 0.017 5.182 5.182 dbcsr_complete_redistribute 329 12.2 2.866 2.866 5.177 5.177 cp_fm_cholesky_decompose 22 10.9 5.058 5.058 5.058 5.058 init_scf_run 11 5.9 0.002 0.002 4.827 4.827 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.825 4.825 cp_fm_cholesky_invert 11 10.9 4.293 4.293 4.293 4.293 wfi_extrapolate 11 7.9 0.001 0.001 4.289 4.289 copy_dbcsr_to_fm 153 11.3 0.003 0.003 4.121 4.121 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.054 4.054 dbcsr_copy 2102 12.0 0.324 0.324 3.733 3.733 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.670 3.670 density_rs2pw 119 9.7 0.006 0.006 3.633 3.633 qs_create_task_list 11 7.9 0.000 0.000 3.608 3.608 generate_qs_task_list 11 8.9 2.189 2.189 3.608 3.608 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.387 3.387 dbcsr_copy_into_existing 22 7.9 3.352 3.352 3.352 3.352 qs_ot_get_p 119 10.4 0.001 0.001 3.147 3.147 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.128 3.128 pw_poisson_solve 119 10.3 0.765 0.765 3.126 3.126 fft3d_s 1202 14.6 3.007 3.007 3.013 3.013 dbcsr_dot_sd 1205 11.9 2.836 2.836 2.839 2.839 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.680 2.680 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.627 2.627 dbcsr_data_release 279534 16.0 2.517 2.517 2.517 2.517 potential_pw2rs 119 12.3 0.078 0.078 2.261 2.261 dbcsr_finalize 5186 13.8 0.120 0.120 2.122 2.122 pw_integral_ab 119 11.3 2.117 2.117 2.117 2.117 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.089 2.089 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.024 54.896 54.907 qs_mol_dyn_low 1 2.0 0.003 0.004 54.756 54.761 qs_forces 11 3.9 0.002 0.002 54.708 54.709 qs_energies 11 4.9 0.001 0.001 51.139 51.142 scf_env_do_scf 11 5.9 0.001 0.002 46.906 46.907 scf_env_do_scf_inner_loop 108 6.5 0.004 0.027 43.297 43.298 velocity_verlet 10 3.0 0.002 0.004 32.716 32.718 rebuild_ks_matrix 119 8.3 0.001 0.001 20.349 20.544 qs_ks_build_kohn_sham_matrix 119 9.3 0.020 0.024 20.348 20.543 qs_ks_update_qs_env 119 7.6 0.001 0.002 18.114 18.290 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.824 16.837 calculate_rho_elec 119 8.7 0.032 0.033 16.823 16.836 dbcsr_multiply_generic 2286 12.5 0.091 0.100 14.890 15.222 sum_up_and_integrate 119 10.3 0.051 0.056 14.979 15.006 integrate_v_rspace 119 11.3 0.006 0.007 14.928 14.961 qs_scf_new_mos 108 7.5 0.001 0.001 11.764 12.082 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.763 12.081 ot_scf_mini 108 9.5 0.003 0.003 10.999 11.297 multiply_cannon 2286 13.5 0.154 0.180 10.482 11.254 grid_collocate_task_list 119 9.7 10.654 11.013 10.654 11.013 multiply_cannon_loop 2286 14.5 0.108 0.119 9.780 10.597 grid_integrate_task_list 119 12.3 9.518 9.738 9.518 9.738 mp_waitall_1 158411 16.6 7.970 8.848 7.970 8.848 ot_mini 108 10.5 0.001 0.001 6.412 6.746 multiply_cannon_metrocomm3 18288 15.5 0.041 0.044 5.353 6.454 rs_pw_transfer 974 11.9 0.015 0.017 5.561 6.061 pw_transfer 1439 11.6 0.094 0.103 5.951 6.049 fft_wrap_pw1pw2 1201 12.6 0.011 0.012 5.740 5.832 density_rs2pw 119 9.7 0.007 0.007 5.303 5.763 fft_wrap_pw1pw2_140 487 13.2 1.274 1.342 5.032 5.189 potential_pw2rs 119 12.3 0.008 0.009 4.658 4.685 multiply_cannon_multrec 18288 15.5 3.499 3.729 3.511 3.742 init_scf_loop 11 6.9 0.000 0.000 3.589 3.590 fft3d_ps 1201 14.6 1.954 2.091 3.416 3.571 qs_ot_get_derivative 108 11.5 0.001 0.001 3.226 3.528 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.032 3.224 apply_single 119 13.6 0.000 0.001 3.031 3.223 ot_diis_step 108 11.5 0.005 0.005 3.157 3.157 make_m2s 4572 13.5 0.053 0.058 2.961 3.085 init_scf_run 11 5.9 0.000 0.005 2.984 2.984 scf_env_initial_rho_setup 11 6.9 0.000 0.005 2.984 2.984 wfi_extrapolate 11 7.9 0.001 0.001 2.715 2.716 make_images 4572 14.5 0.132 0.136 2.576 2.701 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.444 2.466 mp_waitany 9880 13.7 1.753 2.394 1.753 2.394 multiply_cannon_metrocomm1 18288 15.5 0.021 0.025 0.597 1.916 mp_sum_l 11218 13.2 0.885 1.870 0.885 1.870 rs_pw_transfer_RS2PW_140 130 11.5 0.303 0.362 1.356 1.856 rs_pw_transfer_PW2RS_140 130 13.9 0.566 0.636 1.767 1.853 qs_ot_get_p 119 10.4 0.001 0.001 1.419 1.823 make_images_data 4572 15.5 0.038 0.042 1.329 1.485 hybrid_alltoall_any 4725 16.4 0.079 0.261 1.195 1.382 prepare_preconditioner 11 7.9 0.000 0.000 1.325 1.364 make_preconditioner 11 8.9 0.000 0.000 1.325 1.364 mp_alltoall_z22v 1201 16.6 1.121 1.298 1.121 1.298 mp_alltoall_d11v 2130 13.8 0.934 1.231 0.934 1.231 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.173 1.206 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.056 1.198 rs_pw_transfer_PW2RS_50 119 14.3 0.369 0.409 1.028 1.183 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.999 1.161 mp_sum_d 4135 12.0 0.668 1.131 0.668 1.131 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=58.248999999999995, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.321, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.76, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.804, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.195, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.058, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=21.301000000000002, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.654, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.518, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.499, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=1.954, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.97, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 125.611 125.611 qs_mol_dyn_low 1 2.0 0.003 0.003 124.877 124.877 qs_forces 11 3.9 0.001 0.001 124.835 124.835 qs_energies 11 4.9 0.001 0.001 116.338 116.338 scf_env_do_scf 11 5.9 0.001 0.001 102.241 102.241 scf_env_do_scf_inner_loop 96 6.5 0.012 0.012 82.756 82.756 velocity_verlet 10 3.0 0.002 0.002 80.131 80.131 rebuild_ks_matrix 107 8.3 0.001 0.001 38.597 38.597 qs_ks_build_kohn_sham_matrix 107 9.3 0.015 0.015 38.596 38.596 qs_rho_update_rho_low 107 7.7 0.001 0.001 37.199 37.199 calculate_rho_elec 107 8.7 0.896 0.896 37.198 37.198 qs_ks_update_qs_env 107 7.6 0.001 0.001 34.784 34.784 grid_collocate_task_list 107 9.7 32.881 32.881 32.881 32.881 sum_up_and_integrate 107 10.3 0.994 0.994 30.918 30.918 integrate_v_rspace 107 11.3 0.114 0.114 29.924 29.924 grid_integrate_task_list 107 12.3 27.881 27.881 27.881 27.881 dbcsr_multiply_generic 1966 12.4 0.140 0.140 19.554 19.554 init_scf_loop 11 6.9 0.000 0.000 19.319 19.319 qs_scf_new_mos 96 7.5 0.001 0.001 18.650 18.650 qs_scf_loop_do_ot 96 8.5 0.001 0.001 18.649 18.649 ot_scf_mini 96 9.5 0.003 0.003 17.249 17.249 prepare_preconditioner 11 7.9 0.000 0.000 14.715 14.715 make_preconditioner 11 8.9 0.000 0.000 14.715 14.715 make_full_inverse_cholesky 11 9.9 0.000 0.000 13.447 13.447 ot_mini 96 10.5 0.001 0.001 11.116 11.116 make_m2s 3932 13.4 0.041 0.041 10.357 10.357 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.095 7.095 init_scf_run 11 5.9 0.002 0.002 6.248 6.248 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.246 6.246 pw_transfer 1295 11.6 0.076 0.076 5.950 5.950 qs_ot_get_derivative 96 11.5 0.001 0.001 5.771 5.771 fft_wrap_pw1pw2 1081 12.6 0.007 0.007 5.684 5.684 wfi_extrapolate 11 7.9 0.001 0.001 5.549 5.549 multiply_cannon 1966 13.4 0.169 0.169 5.464 5.464 make_images 3932 14.4 2.080 2.080 5.402 5.402 ot_diis_step 96 11.5 0.004 0.004 5.342 5.342 dbcsr_make_dense_low 4961 15.5 0.084 0.084 5.268 5.268 make_dense_data 4961 16.5 4.641 4.641 5.171 5.171 multiply_cannon_loop 1966 14.4 0.054 0.054 5.020 5.020 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.983 4.983 apply_single 107 13.6 0.000 0.000 4.983 4.983 multiply_cannon_multrec 1966 15.4 4.915 4.915 4.965 4.965 fft_wrap_pw1pw2_140 439 13.2 0.494 0.494 4.863 4.863 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.819 4.819 cp_fm_cholesky_decompose 22 10.9 4.668 4.668 4.668 4.668 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.667 4.667 dbcsr_complete_redistribute 317 12.2 2.128 2.128 4.352 4.352 cp_fm_cholesky_invert 11 10.9 4.176 4.176 4.176 4.176 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.906 3.906 dbcsr_copy 1855 11.9 0.304 0.304 3.886 3.886 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.676 3.676 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.626 3.626 dbcsr_copy_into_existing 22 7.9 3.546 3.546 3.547 3.547 qs_create_task_list 11 7.9 0.000 0.000 3.463 3.463 generate_qs_task_list 11 8.9 2.422 2.422 3.463 3.463 density_rs2pw 107 9.7 0.006 0.006 3.420 3.420 fft3d_s 1082 14.6 2.876 2.876 2.881 2.881 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.807 2.807 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.758 2.758 qs_ot_get_p 107 10.4 0.001 0.001 2.668 2.668 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.024 84.500 84.511 qs_mol_dyn_low 1 2.0 0.003 0.005 84.367 84.372 qs_forces 11 3.9 0.002 0.002 84.318 84.318 qs_energies 11 4.9 0.001 0.001 78.612 78.615 scf_env_do_scf 11 5.9 0.001 0.002 72.672 72.673 scf_env_do_scf_inner_loop 96 6.5 0.003 0.025 67.262 67.262 velocity_verlet 10 3.0 0.002 0.004 50.540 50.541 rebuild_ks_matrix 107 8.3 0.001 0.001 37.126 37.217 qs_ks_build_kohn_sham_matrix 107 9.3 0.016 0.018 37.125 37.217 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.757 32.852 sum_up_and_integrate 107 10.3 0.060 0.071 32.142 32.167 integrate_v_rspace 107 11.3 0.005 0.006 32.081 32.116 qs_rho_update_rho_low 107 7.7 0.001 0.001 31.325 31.330 calculate_rho_elec 107 8.7 0.030 0.033 31.324 31.330 grid_integrate_task_list 107 12.3 26.262 26.724 26.262 26.724 grid_collocate_task_list 107 9.7 25.220 25.742 25.220 25.742 dbcsr_multiply_generic 1966 12.4 0.078 0.083 13.078 13.264 qs_scf_new_mos 96 7.5 0.001 0.001 10.071 10.184 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.070 10.183 ot_scf_mini 96 9.5 0.002 0.003 9.360 9.461 multiply_cannon 1966 13.4 0.137 0.148 8.969 9.200 multiply_cannon_loop 1966 14.4 0.094 0.097 8.360 8.556 mp_waitall_1 136719 16.5 6.630 6.929 6.630 6.929 rs_pw_transfer 878 11.9 0.012 0.013 5.520 6.282 density_rs2pw 107 9.7 0.005 0.006 5.354 6.097 pw_transfer 1295 11.6 0.080 0.088 5.664 5.733 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 5.485 5.551 ot_mini 96 10.5 0.001 0.001 5.378 5.479 init_scf_loop 11 6.9 0.000 0.000 5.391 5.392 fft_wrap_pw1pw2_140 439 13.2 1.161 1.225 4.827 4.932 multiply_cannon_metrocomm3 15728 15.4 0.035 0.036 4.273 4.725 init_scf_run 11 5.9 0.001 0.006 4.649 4.650 scf_env_initial_rho_setup 11 6.9 0.000 0.005 4.649 4.649 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.570 4.585 potential_pw2rs 107 12.3 0.007 0.007 4.321 4.340 wfi_extrapolate 11 7.9 0.001 0.001 4.246 4.246 multiply_cannon_multrec 15728 15.4 3.317 3.428 3.328 3.439 fft3d_ps 1081 14.6 1.705 1.863 3.005 3.120 qs_ot_get_derivative 96 11.5 0.001 0.001 2.763 2.870 mp_waitany 8968 13.7 2.002 2.759 2.002 2.759 make_m2s 3932 13.4 0.045 0.050 2.659 2.729 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.553 2.612 apply_single 107 13.6 0.000 0.001 2.552 2.612 ot_diis_step 96 11.5 0.004 0.004 2.592 2.592 mp_alltoall_d11v 1998 13.7 1.618 2.507 1.618 2.507 rs_pw_transfer_RS2PW_140 118 11.5 0.360 0.433 1.734 2.506 make_images 3932 14.4 0.121 0.134 2.308 2.381 rs_gather_matrices 107 12.3 0.180 0.198 1.410 2.286 rs_pw_transfer_PW2RS_140 118 13.9 0.591 0.631 1.669 1.742 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=50.624999999999986, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=32.881, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.881, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.915, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.668, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.641, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=21.068999999999996, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=25.22, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.262, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.317, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.002, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.63, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.197 0.197 121.598 121.598 qs_energies 1 2.0 0.000 0.000 120.644 120.644 scf_env_do_scf 1 3.0 0.000 0.000 119.419 119.419 qs_ks_update_qs_env 8 5.0 0.000 0.000 113.428 113.428 rebuild_ks_matrix 7 6.0 0.000 0.000 113.359 113.359 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 113.359 113.359 hfx_ks_matrix 7 8.0 0.000 0.000 103.222 103.222 integrate_four_center 7 9.0 1.780 1.780 103.183 103.183 integrate_four_center_main 7 10.0 0.869 0.869 94.063 94.063 integrate_four_center_bin 450 11.0 93.194 93.194 93.194 93.194 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 65.081 65.081 init_scf_loop 1 4.0 0.000 0.000 54.320 54.320 integrate_four_center_load 7 10.0 0.000 0.000 7.051 7.051 hfx_load_balance 1 11.0 0.020 0.020 7.051 7.051 hfx_load_balance_bin 1 12.0 3.509 3.509 3.509 3.509 hfx_load_balance_count 1 12.0 3.504 3.504 3.504 3.504 qs_vxc_create 14 8.0 0.000 0.000 3.432 3.432 xc_vxc_pw_create 14 9.0 0.184 0.184 3.431 3.431 calculate_rho_elec 15 7.4 0.123 0.123 2.869 2.869 prepare_preconditioner 1 5.0 0.000 0.000 2.844 2.844 make_preconditioner 1 6.0 0.000 0.000 2.844 2.844 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.741 2.741 xc_rho_set_and_dset_create 14 10.0 0.112 0.112 2.562 2.562 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.216 0.237 111.549 111.561 qs_energies 1 2.0 0.000 0.000 111.150 111.157 scf_env_do_scf 1 3.0 0.000 0.000 110.731 110.731 qs_ks_update_qs_env 8 5.0 0.000 0.000 108.372 108.373 rebuild_ks_matrix 7 6.0 0.000 0.000 108.360 108.361 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 108.360 108.361 hfx_ks_matrix 7 8.0 0.000 0.000 101.508 101.510 integrate_four_center 7 9.0 0.068 0.397 101.495 101.497 integrate_four_center_main 7 10.0 0.004 0.004 92.063 93.863 integrate_four_center_bin 448 11.0 92.059 93.859 92.059 93.859 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 62.342 62.343 init_scf_loop 1 4.0 0.000 0.000 48.387 48.387 integrate_four_center_load 7 10.0 0.000 0.000 6.502 6.503 hfx_load_balance 1 11.0 0.001 0.001 6.502 6.503 mp_sync 56 11.2 2.129 3.680 2.129 3.680 hfx_load_balance_bin 1 12.0 3.185 3.250 3.185 3.250 hfx_load_balance_count 1 12.0 3.173 3.247 3.173 3.247 qs_vxc_create 14 8.0 0.000 0.000 3.039 3.039 xc_vxc_pw_create 14 9.0 0.009 0.010 3.039 3.039 xc_rho_set_and_dset_create 14 10.0 0.019 0.022 2.293 2.435 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=18.544999999999987, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=93.194, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.509, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.504, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.78, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.869, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.197, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.715000000000003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=92.059, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.185, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.173, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.068, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.216, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=2.129, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 103.992 103.992 qs_energies 1 2.0 0.000 0.000 103.548 103.548 mp2_main 1 3.0 0.000 0.000 100.476 100.476 mp2_gpw_main 1 4.0 0.000 0.000 100.347 100.347 rpa_ri_compute_en 1 5.0 0.000 0.000 96.477 96.477 rpa_num_int 1 6.0 0.001 0.001 96.470 96.470 compute_mat_P_omega 1 7.0 0.003 0.003 84.137 84.137 compute_mat_P_omega_contract 10 8.0 13.793 13.793 83.866 83.866 dbt_total 2336 9.6 0.019 0.019 61.412 61.412 dbt_contract 787 11.0 0.052 0.052 52.998 52.998 dbt_tas_total 1149 12.2 0.366 0.366 51.331 51.331 dbt_tas_multiply 807 12.1 0.002 0.002 49.820 49.820 dbt_tas_dbm 807 14.1 0.004 0.004 42.018 42.018 dbm_multiply 807 16.1 42.006 42.006 42.006 42.006 dbt_tas_mm_1N 524 15.1 0.002 0.002 32.181 32.181 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 28.319 28.319 compute_mat_P_omega_calc_M_occ 250 9.0 13.855 13.855 25.591 25.591 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.321 9.321 dbt_tas_mm_2 251 15.0 0.002 0.002 8.104 8.104 dbt_copy 1103 10.7 0.070 0.070 6.879 6.879 compute_QP_energies 1 7.0 0.000 0.000 6.763 6.763 compute_self_energy_cubic_gw 1 8.0 0.099 0.099 6.762 6.762 contract_cubic_gw 21 9.0 0.000 0.000 5.249 5.249 dbt_tas_reserve_blocks_index 3261 14.3 0.513 0.513 4.539 4.539 dbm_reserve_blocks 3628 15.3 4.092 4.092 4.092 4.092 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.862 3.862 dbt_reserve_blocks_index 2280 13.1 0.082 0.082 3.476 3.476 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 3.418 3.418 dbt_crop 1042 12.0 1.854 1.854 3.029 3.029 scf_env_do_scf 1 3.0 0.000 0.000 2.961 2.961 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.961 2.961 dbt_tas_copy 574 11.4 1.626 1.626 2.744 2.744 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.524 2.524 convert_to_new_pgrid 2421 14.1 0.128 0.128 2.511 2.511 dbt_reshape 278 11.9 1.406 1.406 2.420 2.420 dbm_copy 1614 15.1 2.383 2.383 2.383 2.383 dbt_tas_reshape 367 15.0 0.007 0.007 2.359 2.359 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 42.595 42.606 qs_energies 1 2.0 0.000 0.000 42.473 42.475 mp2_main 1 3.0 0.000 0.000 41.406 41.408 mp2_gpw_main 1 4.0 0.000 0.001 41.370 41.372 rpa_ri_compute_en 1 5.0 0.000 0.000 39.832 39.834 rpa_num_int 1 6.0 0.000 0.002 39.831 39.833 dbt_total 2336 9.6 0.021 0.022 35.253 35.261 compute_mat_P_omega 1 7.0 0.001 0.006 34.155 34.156 compute_mat_P_omega_contract 10 8.0 0.586 0.608 34.023 34.028 dbt_contract 787 11.0 0.051 0.052 25.254 25.259 dbt_tas_total 1149 12.2 0.094 0.101 22.119 22.121 dbt_tas_multiply 807 12.1 0.003 0.003 22.040 22.042 dbt_tas_dbm 807 14.1 0.003 0.004 16.398 16.415 dbm_multiply 807 16.1 13.379 14.279 13.379 14.279 compute_mat_P_omega_calc_M_occ 250 9.0 0.544 0.565 10.485 10.485 dbt_copy 1149 10.8 0.018 0.019 8.908 9.214 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.211 9.212 dbt_reshape 1136 11.8 3.550 3.763 8.544 8.840 dbt_tas_mm_2 251 15.0 0.002 0.002 7.484 7.488 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.945 6.946 dbt_tas_mm_1N 524 15.1 0.002 0.002 6.356 6.783 mp_sync 8688 11.6 5.156 6.007 5.156 6.007 dbt_communicate_buffer 1136 12.8 0.059 0.064 3.694 3.835 mp_waitall_2 3812 15.3 3.611 3.833 3.611 3.833 compute_QP_energies 1 7.0 0.000 0.000 3.583 3.584 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.583 3.583 dbt_reserve_blocks_index 2887 13.1 0.083 0.090 2.862 3.268 dbt_reserve_blocks_index_array 2829 12.2 0.010 0.011 2.847 3.251 dbt_tas_reserve_blocks_index 3347 14.5 0.474 0.509 2.848 3.248 dbm_reserve_blocks 3752 15.4 2.508 2.876 2.508 2.876 contract_cubic_gw 21 9.0 0.000 0.000 2.773 2.773 dbt_crop 1042 12.0 1.208 1.315 2.065 2.299 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.535 1.536 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.491 1.494 dbt_tas_replicate 405 14.1 0.644 0.846 1.341 1.486 convert_to_new_pgrid 2421 14.1 0.031 0.035 1.293 1.409 dbm_copy 1608 15.1 1.254 1.373 1.254 1.373 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.354 1.358 parallel_gemm_fm 105 8.4 0.000 0.000 1.327 1.336 parallel_gemm_fm_cosma 105 9.4 1.327 1.336 1.327 1.336 scf_env_do_scf 1 3.0 0.000 0.000 1.024 1.024 scf_env_do_scf_inner_loop 17 4.0 0.001 0.003 1.024 1.024 compute_W_cubic_GW 10 7.0 0.001 0.001 1.017 1.023 mp_max_i 2002 9.8 0.750 0.995 0.750 0.995 dbm_add 807 14.1 0.898 0.944 0.898 0.944 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=26.457000000000008, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=42.006, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=13.855, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=13.793, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=4.092, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.383, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.406, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=12.006999999999998, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=13.379, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.544, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.586, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.508, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.254, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.55, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.156, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.611, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.022 326.052 326.052 qs_forces 1 2.0 0.000 0.000 325.426 325.426 rebuild_ks_matrix 7 6.6 0.000 0.000 323.471 323.471 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 323.471 323.471 hfx_ks_matrix 7 8.6 0.000 0.000 321.166 321.166 hfx_ri_update_ks 7 9.6 0.000 0.000 278.215 278.215 hfx_ri_update_ks_Pmat 7 10.6 39.515 39.515 278.209 278.209 dbt_total 783 11.1 0.008 0.008 256.116 256.116 qs_energies 1 3.0 0.000 0.000 246.972 246.972 scf_env_do_scf 1 4.0 0.000 0.000 246.583 246.583 qs_ks_update_qs_env 8 6.0 0.000 0.000 245.316 245.316 dbt_contract 207 12.4 0.085 0.085 233.842 233.842 dbt_tas_total 317 14.0 1.892 1.892 230.424 230.424 dbt_tas_multiply 216 13.5 0.001 0.001 226.840 226.840 dbt_tas_dbm 216 15.5 0.002 0.002 216.315 216.315 dbm_multiply 216 17.5 216.311 216.311 216.311 216.311 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 208.816 208.816 dbt_tas_mm_2 91 16.5 0.001 0.001 198.292 198.292 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 155.836 155.836 init_scf_loop 2 5.0 0.000 0.000 90.745 90.745 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 78.161 78.161 hfx_ri_update_forces 1 7.0 1.810 1.810 42.925 42.925 hfx_ri_forces_Pmat_3c 1 8.0 5.381 5.381 23.733 23.733 dbt_copy 409 11.7 0.075 0.075 18.681 18.681 precalc_derivatives 1 8.0 2.368 2.368 14.911 14.911 dbt_tas_mm_3T 77 17.1 0.000 0.000 12.631 12.631 dbt_reshape 132 13.2 7.777 7.777 12.584 12.584 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 10.811 10.811 dbt_tas_reserve_blocks_index 1229 15.4 1.060 1.060 9.384 9.384 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.062 9.062 build_3c_derivatives 3 9.0 2.673 2.673 8.540 8.540 dbm_reserve_blocks 1345 16.4 8.432 8.432 8.432 8.432 dbt_reserve_blocks_index 818 14.4 0.141 0.141 7.420 7.420 dbt_reserve_blocks_index_array 795 13.4 0.009 0.009 7.304 7.304 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.029 62.739 62.749 qs_forces 1 2.0 0.000 0.000 62.563 62.563 rebuild_ks_matrix 7 6.6 0.000 0.000 61.720 61.720 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 61.720 61.720 hfx_ks_matrix 7 8.6 0.000 0.000 60.265 60.273 dbt_total 783 11.1 0.008 0.008 53.714 53.716 dbt_contract 207 12.4 0.032 0.033 41.134 41.148 hfx_ri_update_ks 7 9.6 0.000 0.000 36.414 36.414 hfx_ri_update_ks_Pmat 7 10.6 1.772 1.863 36.407 36.413 dbt_tas_total 317 14.0 0.052 0.064 36.052 36.054 dbt_tas_multiply 216 13.5 0.001 0.001 35.933 35.933 qs_energies 1 3.0 0.000 0.000 34.233 34.234 scf_env_do_scf 1 4.0 0.000 0.001 34.058 34.059 qs_ks_update_qs_env 8 6.0 0.000 0.000 33.404 33.405 dbt_tas_dbm 216 15.5 0.001 0.002 30.161 30.165 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 28.317 28.318 dbm_multiply 216 17.5 27.324 28.277 27.324 28.277 hfx_ri_update_forces 1 7.0 0.080 0.085 23.850 23.859 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 19.036 19.036 hfx_ri_forces_Pmat_3c 1 8.0 0.239 0.262 17.523 17.523 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 15.697 15.697 init_scf_loop 2 5.0 0.000 0.000 15.022 15.022 dbt_tas_mm_2 91 16.5 0.001 0.001 13.247 13.248 dbt_copy 497 12.3 0.016 0.018 12.098 12.376 dbt_tas_mm_3T 77 17.1 0.000 0.000 8.998 9.610 dbt_reshape 365 13.6 5.063 5.190 9.315 9.513 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.998 6.998 mp_sync 2665 13.0 5.087 6.304 5.087 6.304 dbt_tas_mm_3N 37 15.4 0.000 0.000 5.753 6.016 dbt_tas_reserve_blocks_index 1356 15.8 0.978 1.001 4.527 4.814 precalc_derivatives 1 8.0 0.100 0.106 4.803 4.803 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.565 4.565 dbt_reserve_blocks_index 1051 14.7 0.151 0.159 3.793 4.023 dbt_reserve_blocks_index_array 1028 13.8 0.006 0.017 3.755 3.978 dbm_reserve_blocks 1481 16.7 3.642 3.913 3.642 3.913 dbt_crop 372 13.7 2.526 2.592 3.637 3.797 mp_waitall_2 1138 16.4 2.995 3.104 2.995 3.104 dbt_communicate_buffer 365 14.6 0.013 0.014 2.684 2.763 build_3c_derivatives 3 9.0 0.260 0.280 2.707 2.716 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.497 2.502 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.371 2.372 convert_to_new_pgrid 648 15.5 0.037 0.066 2.034 2.129 dbm_copy 452 16.3 1.807 1.898 1.807 1.898 dbt_tas_copy 132 12.5 0.981 1.006 1.809 1.898 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=48.63599999999997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=216.311, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=39.515, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=8.432, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.777, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=5.381, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=16.617000000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=27.324, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.772, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.642, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=5.063, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.239, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.087, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.995, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 230.958 230.958 qs_energies 1 2.0 0.000 0.000 230.749 230.749 mp2_main 1 3.0 0.000 0.000 225.617 225.617 mp2_gpw_main 1 4.0 0.001 0.001 225.204 225.204 mp2_ri_gpw_compute_in 1 5.0 0.427 0.427 175.083 175.083 mp2_ri_gpw_compute_in_loop 1 6.0 0.012 0.012 165.592 165.592 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 132.079 132.079 integrate_v_rspace 2666 8.0 0.693 0.693 118.332 118.332 grid_integrate_task_list 2666 9.0 115.666 115.666 115.666 115.666 mp2_ri_gpw_compute_en 1 5.0 0.091 0.091 50.093 50.093 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.964 11.964 47.938 47.938 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.361 2.361 26.555 26.555 local_gemm 2080 8.0 24.194 24.194 24.194 24.194 dbcsr_multiply_generic 5322 8.0 0.192 0.192 22.332 22.332 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 22.309 22.309 calculate_wavefunction 2656 8.0 8.553 8.553 12.466 12.466 pw_transfer 63872 10.6 1.065 1.065 11.984 11.984 multiply_cannon 5322 9.0 0.492 0.492 11.967 11.967 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.894 2.894 11.090 11.090 fft_wrap_pw1pw2 53228 11.4 0.110 0.110 10.679 10.679 multiply_cannon_loop 5322 10.0 0.137 0.137 10.523 10.523 multiply_cannon_multrec 5322 11.0 9.030 9.030 9.070 9.070 get_2c_integrals 1 6.0 0.000 0.000 9.063 9.063 copy_dbcsr_to_fm 2679 8.0 0.029 0.029 8.685 8.685 compute_2c_integrals 1 7.0 0.008 0.008 8.125 8.125 compute_2c_integrals_loop_lm 1 8.0 0.009 0.009 8.109 8.109 mp2_eri_2c_integrate_gpw 1 9.0 0.921 0.921 8.100 8.100 make_m2s 10644 9.0 0.064 0.064 7.940 7.940 make_images 10644 10.0 3.071 3.071 7.573 7.573 fft_wrap_pw1pw2_20 21271 12.4 0.515 0.515 7.569 7.569 mp2_ri_gpw_compute_en_ener 2080 7.0 6.698 6.698 6.698 6.698 dbcsr_complete_redistribute 2689 9.0 1.480 1.480 6.638 6.638 fft3d_s 53229 13.4 6.578 6.578 6.620 6.620 dbcsr_finalize 10708 9.5 0.255 0.255 5.901 5.901 dbcsr_merge_all 8011 10.3 4.122 4.122 5.169 5.169 scf_env_do_scf 1 3.0 0.000 0.000 4.674 4.674 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.674 4.674 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.027 43.052 43.062 qs_energies 1 2.0 0.000 0.000 42.921 42.921 mp2_main 1 3.0 0.000 0.001 40.833 40.834 mp2_gpw_main 1 4.0 0.002 0.003 40.737 40.737 mp2_ri_gpw_compute_in 1 5.0 0.058 0.063 20.571 21.031 mp2_ri_gpw_compute_en 1 5.0 0.226 0.233 20.060 20.378 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 19.077 19.533 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.515 2.700 18.750 18.765 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 16.106 16.290 integrate_v_rspace 93 8.1 0.146 0.157 15.926 16.117 grid_integrate_task_list 93 9.1 15.544 15.742 15.544 15.742 mp2_ri_gpw_compute_en_expansio 65 7.0 0.186 0.207 12.649 12.807 local_gemm 65 8.0 12.462 12.609 12.462 12.609 mp2_ri_gpw_compute_en_comm 20 7.0 0.082 0.096 3.015 3.466 dbcsr_multiply_generic 176 8.0 0.011 0.012 2.543 2.931 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.524 2.912 mp_sendrecv_dm3 1240 8.0 2.278 2.800 2.278 2.800 scf_env_do_scf 1 3.0 0.000 0.000 1.949 1.950 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.949 1.950 multiply_cannon 176 9.0 0.021 0.022 1.345 1.485 get_2c_integrals 1 6.0 0.000 0.000 1.415 1.439 multiply_cannon_loop 176 10.0 0.003 0.003 1.273 1.410 make_m2s 352 9.0 0.004 0.004 1.147 1.390 make_images 352 10.0 0.060 0.061 1.132 1.375 multiply_cannon_multrec 246 11.0 1.039 1.115 1.046 1.124 compute_2c_integrals 1 7.0 0.003 0.004 1.031 1.046 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 0.912 0.941 mp2_eri_2c_integrate_gpw 1 9.0 0.228 0.235 0.910 0.940 pw_transfer 2120 10.5 0.051 0.053 0.881 0.895 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=61.551000000000016, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=115.666, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=24.194, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.964, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=9.03, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.553, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=9.213999999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.544, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=12.462, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.515, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=1.039, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.278, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.101 0.101 136.006 136.006 qs_energies 1 2.0 0.000 0.000 134.413 134.413 scf_env_do_scf 1 3.0 0.000 0.000 127.143 127.143 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 127.143 127.143 qs_ks_update_qs_env 15 5.0 0.000 0.000 52.498 52.498 rebuild_ks_matrix 15 6.0 0.000 0.000 52.263 52.263 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 52.263 52.263 qs_scf_new_mos 15 5.0 0.000 0.000 46.267 46.267 eigensolver 15 6.0 0.002 0.002 37.536 37.536 qs_vxc_create 15 8.0 0.047 0.047 34.896 34.896 calculate_dispersion_nonloc 15 9.0 6.641 6.641 29.932 29.932 qs_rho_update_rho_low 16 5.0 0.000 0.000 25.010 25.010 calculate_rho_elec 16 6.0 0.223 0.223 25.010 25.010 pw_transfer 1191 10.0 0.077 0.077 23.798 23.798 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.695 23.695 cp_fm_diag_elpa_base 15 8.0 20.957 20.957 23.694 23.694 grid_collocate_task_list 16 7.0 23.582 23.582 23.582 23.582 fft_wrap_pw1pw2 1086 11.0 0.010 0.010 23.551 23.551 fft_wrap_pw1pw2_150 765 12.0 3.700 3.700 16.916 16.916 sum_up_and_integrate 15 8.0 0.170 0.170 16.070 16.070 integrate_v_rspace 15 9.0 0.026 0.026 15.900 15.900 grid_integrate_task_list 15 10.0 15.326 15.326 15.326 15.326 cp_fm_cholesky_restore 45 7.0 11.357 11.357 11.357 11.357 fft3d_s 1087 13.0 10.298 10.298 10.307 10.307 pw_scatter_s 585 13.1 7.424 7.424 7.424 7.424 fft_wrap_pw1pw2_200 197 12.3 0.815 0.815 6.443 6.443 dbcsr_complete_redistribute 46 8.3 2.529 2.529 6.214 6.214 copy_dbcsr_to_fm 16 5.9 0.001 0.001 6.172 6.172 cp_fm_upper_to_full 30 8.0 5.220 5.220 5.220 5.220 xc_vxc_pw_create 15 9.0 0.265 0.265 4.916 4.916 vdW_energy 15 10.0 4.852 4.852 4.852 4.852 gspace_mixing 14 5.0 0.171 0.171 4.748 4.748 broyden_mixing 14 6.0 4.097 4.097 4.097 4.097 init_scf_run 1 3.0 0.000 0.000 3.457 3.457 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.311 3.311 xc_pw_derive 90 11.0 0.001 0.001 3.260 3.260 calculate_dm_sparse 15 6.0 0.023 0.023 2.958 2.958 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 2.849 2.849 xc_rho_set_and_dset_create 15 10.0 0.196 0.196 2.775 2.775 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.030 72.615 72.626 qs_energies 1 2.0 0.000 0.001 72.240 72.245 scf_env_do_scf 1 3.0 0.000 0.001 67.591 67.592 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 67.591 67.591 qs_ks_update_qs_env 15 5.0 0.000 0.000 31.092 31.111 rebuild_ks_matrix 15 6.0 0.000 0.000 31.045 31.064 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 31.045 31.063 qs_rho_update_rho_low 16 5.0 0.000 0.000 23.554 23.565 calculate_rho_elec 16 6.0 0.007 0.007 23.554 23.565 grid_collocate_task_list 16 7.0 21.989 22.326 21.989 22.326 sum_up_and_integrate 15 8.0 0.021 0.024 16.030 16.081 integrate_v_rspace 15 9.0 0.001 0.001 16.009 16.062 grid_integrate_task_list 15 10.0 14.908 15.173 14.908 15.173 qs_vxc_create 15 8.0 0.001 0.001 14.545 14.561 qs_scf_new_mos 15 5.0 0.000 0.001 13.450 13.550 eigensolver 15 6.0 0.002 0.002 12.164 12.207 pw_transfer 1191 10.0 0.099 0.108 12.117 12.194 fft_wrap_pw1pw2 1086 11.0 0.015 0.017 11.876 11.950 calculate_dispersion_nonloc 15 9.0 1.033 1.062 11.666 11.690 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.521 8.523 cp_fm_diag_elpa_base 15 8.0 8.335 8.360 8.519 8.519 fft3d_ps 1086 13.0 4.006 4.382 7.705 8.060 fft_wrap_pw1pw2_150 765 12.0 1.319 1.445 7.850 7.884 fft_wrap_pw1pw2_200 197 12.3 0.839 0.917 3.883 3.972 mp_alltoall_z22v 1086 15.0 2.858 3.524 2.858 3.524 cp_fm_cholesky_restore 45 7.0 3.430 3.492 3.430 3.492 xc_vxc_pw_create 15 9.0 0.023 0.029 2.879 2.927 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.798 2.798 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.456 2.683 x_to_yz 585 14.1 0.456 0.476 1.948 2.216 yz_to_x 501 13.9 0.353 0.388 1.720 2.162 xc_pw_derive 90 11.0 0.001 0.001 2.050 2.154 vdW_energy 15 10.0 1.665 1.730 1.665 1.730 init_scf_run 1 3.0 0.000 0.000 1.604 1.605 build_core_ppnl 1 5.0 1.436 1.567 1.436 1.567 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.499 1.499 xc_rho_set_and_dset_create 15 10.0 0.033 0.046 1.449 1.485 density_rs2pw 16 7.0 0.001 0.001 1.276 1.472 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=54.48599999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.582, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.957, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.326, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.357, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.298, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=19.946999999999996, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.989, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.335, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.908, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.43, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.006, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.095 0.095 342.196 342.196 qs_energies 1 2.0 0.000 0.000 342.038 342.038 ls_scf 1 3.0 0.000 0.000 340.642 340.642 ls_scf_main 1 4.0 0.002 0.002 330.713 330.713 density_matrix_trs4 11 5.0 0.015 0.015 221.917 221.917 ls_scf_dm_to_ks 11 5.0 0.000 0.000 101.643 101.643 arnoldi_extremal 12 6.1 0.000 0.000 100.203 100.203 arnoldi_normal_ev 12 7.1 0.066 0.066 100.202 100.202 build_subspace 23 8.1 0.101 0.101 98.439 98.439 dbcsr_matrix_vector_mult 652 9.0 0.342 0.342 98.108 98.108 dbcsr_multiply_generic 185 6.1 1.044 1.044 98.042 98.042 matrix_ls_to_qs 11 6.0 0.000 0.000 97.600 97.600 dbcsr_matrix_vector_mult_local 652 10.0 96.344 96.344 96.355 96.355 dbcsr_complete_redistribute 23 7.5 37.583 37.583 55.243 55.243 multiply_cannon 185 7.1 0.522 0.522 53.317 53.317 matrix_decluster 11 7.0 0.000 0.000 50.298 50.298 dbcsr_copy_into_existing 11 7.0 47.299 47.299 47.299 47.299 multiply_cannon_loop 185 8.1 0.330 0.330 39.424 39.424 make_m2s 370 7.1 0.043 0.043 38.268 38.268 make_images 370 8.1 16.351 16.351 34.516 34.516 multiply_cannon_multrec 185 9.1 29.662 29.662 29.922 29.922 dbcsr_finalize 646 7.5 0.347 0.347 23.029 23.029 dbcsr_merge_all 597 8.5 3.277 3.277 21.369 21.369 tree_to_linear_d 110 9.4 15.950 15.950 15.950 15.950 dbcsr_sort_indices 1103 9.9 15.335 15.335 15.335 15.335 quick_finalize 395 10.0 0.528 0.528 13.222 13.222 setup_rec_index_2d 370 8.1 13.213 13.213 13.213 13.213 dbcsr_special_finalize 370 9.1 0.002 0.002 12.287 12.287 dbcsr_dot_sd 144 6.3 10.903 10.903 10.904 10.904 calculate_norms 370 9.1 9.172 9.172 9.172 9.172 ls_scf_init_scf 1 4.0 0.000 0.000 9.058 9.058 dbcsr_frobenius_norm 142 6.1 8.643 8.643 8.645 8.645 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.607 8.607 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.893 7.893 matrix_qs_to_ls 12 5.1 0.000 0.000 7.284 7.284 matrix_cluster 12 6.1 0.000 0.000 7.284 7.284 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.030 89.984 89.996 qs_energies 1 2.0 0.000 0.000 89.870 89.871 ls_scf 1 3.0 0.000 0.000 89.815 89.816 ls_scf_main 1 4.0 0.001 0.010 86.278 86.279 density_matrix_trs4 11 5.0 0.007 0.025 83.139 83.219 dbcsr_multiply_generic 185 6.1 0.072 0.087 79.360 79.681 multiply_cannon 185 7.1 0.042 0.046 67.226 68.100 multiply_cannon_loop 185 8.1 0.181 0.215 63.945 64.817 multiply_cannon_multrec 1480 9.1 39.482 43.275 39.892 43.696 mp_waitall_1 11936 10.3 19.801 23.157 19.801 23.157 multiply_cannon_metrocomm3 1480 9.1 0.017 0.021 11.583 17.841 make_m2s 370 7.1 0.040 0.046 8.850 8.928 make_images 370 8.1 0.688 0.728 8.698 8.778 calculate_norms 2960 9.1 7.479 8.723 7.479 8.723 multiply_cannon_metrocomm1 1480 9.1 0.010 0.013 4.738 8.508 make_images_data 370 9.1 0.010 0.011 3.856 4.085 hybrid_alltoall_any 393 9.9 0.307 1.743 3.444 3.707 mp_sum_l 1119 5.6 2.004 2.952 2.004 2.952 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.736 2.825 arnoldi_extremal 12 6.1 0.000 0.000 2.784 2.814 arnoldi_normal_ev 12 7.1 0.002 0.009 2.784 2.814 ls_scf_init_scf 1 4.0 0.000 0.000 2.680 2.681 build_subspace 23 8.1 0.028 0.033 2.669 2.671 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.650 2.657 dbcsr_complete_redistribute 23 7.5 1.458 1.557 2.491 2.587 matrix_ls_to_qs 11 6.0 0.000 0.000 2.422 2.526 make_images_pack 370 9.1 2.289 2.487 2.293 2.490 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.002 2.422 2.424 dbcsr_matrix_vector_mult 652 9.0 0.016 0.056 2.299 2.366 matrix_decluster 11 7.0 0.000 0.000 2.264 2.365 buffer_matrices_ensure_size 370 8.1 2.121 2.297 2.121 2.297 dbcsr_matrix_vector_mult_local 652 10.0 1.908 1.990 1.910 1.993 dbcsr_finalize 646 7.5 0.009 0.010 1.843 1.954 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 1.108 1.876 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=105.78500000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=96.344, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=47.299, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=37.583, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=29.662, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=16.351, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.172, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="buffer_matrices_ensure_size", label="buffer_matrices_ensure_size", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.757999999999996, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.908, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.458, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=39.482, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.688, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=7.479, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.801, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="buffer_matrices_ensure_size", label="buffer_matrices_ensure_size", y=2.121, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.289, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.008 81.215 81.215 lib_test 1 2.0 0.000 0.000 81.206 81.206 dbcsr_run_tests 3 3.0 0.002 0.002 81.206 81.206 test_multiplies_multiproc 3 4.0 0.002 0.002 64.562 64.562 dbcsr_redistribute 9 5.0 37.626 37.626 39.538 39.538 dbcsr_multiply_generic 9 5.0 0.002 0.002 23.277 23.277 multiply_cannon 9 6.0 0.017 0.017 16.666 16.666 dbcsr_make_random_matrix 9 4.0 13.126 13.126 16.528 16.528 multiply_cannon_loop 9 7.0 0.034 0.034 16.226 16.226 multiply_cannon_multrec 9 8.0 16.191 16.191 16.192 16.192 dbcsr_finalize 27 5.7 0.009 0.009 6.738 6.738 dbcsr_merge_all 18 6.5 2.606 2.606 5.806 5.806 dbcsr_data_release 975 7.6 3.503 3.503 3.503 3.503 make_m2s 18 6.0 0.001 0.001 2.427 2.427 make_images 18 7.0 0.861 0.861 2.252 2.252 tree_to_linear_d 9 7.0 2.229 2.229 2.229 2.229 dbcsr_destroy 93 5.8 0.001 0.001 1.713 1.713 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.012 23.869 23.874 lib_test 1 2.0 0.000 0.001 23.839 23.858 dbcsr_run_tests 3 3.0 0.000 0.001 23.839 23.857 test_multiplies_multiproc 3 4.0 0.000 0.003 22.864 22.953 dbcsr_multiply_generic 9 5.0 0.001 0.001 21.294 21.360 multiply_cannon 9 6.0 0.002 0.002 19.007 19.499 multiply_cannon_loop 9 7.0 0.003 0.005 18.640 19.172 multiply_cannon_multrec 72 8.0 15.804 16.598 15.805 16.600 mp_waitall_1 576 9.2 3.245 4.050 3.245 4.050 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.492 3.148 mp_sum_l 390 2.5 0.527 1.205 0.527 1.205 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.521 1.199 dbcsr_finalize 27 5.7 0.000 0.000 0.910 1.034 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.333 1.002 dbcsr_make_random_matrix 9 4.0 0.734 0.741 0.946 0.980 make_m2s 18 6.0 0.001 0.001 0.942 0.974 make_images 18 7.0 0.022 0.023 0.938 0.970 dbcsr_merge_all 18 6.5 0.142 0.164 0.768 0.844 dbcsr_data_release 444 7.6 0.725 0.839 0.725 0.839 dbcsr_destroy 111 5.9 0.001 0.010 0.585 0.689 dbcsr_redistribute 9 5.0 0.322 0.364 0.558 0.589 make_images_data 18 8.0 0.001 0.001 0.475 0.531 dbcsr_data_copy_aa2 18 7.5 0.455 0.505 0.455 0.505 hybrid_alltoall_any 18 9.0 0.050 0.217 0.436 0.489 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.163000000000011, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=37.626, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.191, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.126, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.503, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.606, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.3699999999999974, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.322, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.804, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.734, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.725, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.142, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.245, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.527, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.061 0.061 142.260 142.260 qs_mol_dyn_low 1 2.0 0.003 0.003 140.734 140.734 velocity_verlet 5 3.0 0.004 0.004 115.309 115.309 qmmm_el_coupling 6 3.8 0.000 0.000 87.716 87.716 qmmm_elec_with_gaussian 6 4.8 0.035 0.035 87.712 87.712 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 86.784 86.784 qmmm_elec_gaussian_low_G 6 6.8 85.862 85.862 85.862 85.862 qs_forces 6 3.8 0.001 0.001 43.193 43.193 qs_energies 6 4.8 0.000 0.000 38.014 38.014 scf_env_do_scf 6 5.8 0.001 0.001 35.437 35.437 rebuild_ks_matrix 45 8.4 0.000 0.000 30.881 30.881 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 30.881 30.881 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 30.743 30.743 qs_ks_update_qs_env 45 7.8 0.000 0.000 26.145 26.145 pw_transfer 966 12.3 0.065 0.065 18.877 18.877 fft_wrap_pw1pw2 801 13.6 0.007 0.007 18.596 18.596 fft_wrap_pw1pw2_150 507 15.2 2.692 2.692 18.181 18.181 qs_vxc_create 45 10.4 0.001 0.001 15.790 15.790 xc_vxc_pw_create 45 11.4 0.775 0.775 15.789 15.789 xc_pw_derive 270 13.4 0.002 0.002 10.416 10.416 xc_rho_set_and_dset_create 45 12.4 1.153 1.153 8.544 8.544 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.437 8.437 calculate_rho_elec 45 8.9 0.636 0.636 8.437 8.437 fft3d_s 802 15.6 8.017 8.017 8.027 8.027 pw_integral_ab 2539 7.4 7.133 7.133 7.133 7.133 qmmm_forces 6 3.8 0.003 0.003 6.656 6.656 xc_pw_divergence 45 12.4 0.001 0.001 6.390 6.390 pw_scatter_s 429 15.8 6.337 6.337 6.337 6.337 qmmm_forces_with_gaussian 6 4.8 0.047 0.047 6.215 6.215 qs_ks_ddapc 45 10.4 0.001 0.001 5.305 5.305 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.751 4.751 init_scf_loop 6 6.8 0.000 0.000 4.689 4.689 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.624 4.624 sum_up_and_integrate 45 10.4 0.958 0.958 4.506 4.506 grid_collocate_task_list 45 9.9 4.060 4.060 4.060 4.060 qmmm_forces_gaussian_low_G 6 6.8 3.854 3.854 3.854 3.854 density_rs2pw 45 9.9 0.002 0.002 3.740 3.740 integrate_v_rspace 45 11.4 0.032 0.032 3.548 3.548 cp_ddapc_apply_CD 45 11.4 0.006 0.006 3.512 3.512 pw_poisson_solve 51 9.9 0.759 0.759 3.441 3.441 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.039 0.064 76.841 76.856 qs_mol_dyn_low 1 2.0 0.003 0.005 75.544 75.610 qs_forces 6 3.8 0.001 0.001 57.223 57.224 qs_energies 6 4.8 0.000 0.001 54.725 54.726 scf_env_do_scf 6 5.8 0.000 0.001 53.488 53.488 scf_env_do_scf_inner_loop 113 6.2 0.003 0.025 51.334 51.335 rebuild_ks_matrix 119 8.1 0.000 0.000 39.761 39.775 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.017 39.760 39.775 qs_ks_update_qs_env 119 7.3 0.001 0.001 37.602 37.615 pw_transfer 2446 12.3 0.166 0.182 29.200 30.006 velocity_verlet 5 3.0 0.002 0.005 29.763 29.767 fft_wrap_pw1pw2 2059 13.4 0.021 0.023 28.699 29.520 fft_wrap_pw1pw2_150 1321 14.9 5.795 6.020 27.923 28.692 qs_vxc_create 119 10.1 0.002 0.002 23.400 23.411 xc_vxc_pw_create 119 11.1 0.363 0.439 23.397 23.409 xc_pw_derive 714 13.1 0.007 0.008 17.231 17.890 fft3d_ps 2059 15.4 9.541 9.899 16.475 17.669 qs_rho_update_rho_low 119 7.3 0.001 0.001 14.276 14.284 calculate_rho_elec 119 8.3 0.051 0.059 14.275 14.283 xc_pw_divergence 119 12.1 0.004 0.004 11.527 12.147 xc_rho_set_and_dset_create 119 12.1 0.706 0.865 11.198 11.736 sum_up_and_integrate 119 10.1 0.153 0.174 11.368 11.384 integrate_v_rspace 119 11.1 0.004 0.008 11.214 11.241 density_rs2pw 119 9.3 0.006 0.006 9.065 9.559 qmmm_forces 6 3.8 0.003 0.003 9.358 9.358 qmmm_forces_with_gaussian 6 4.8 0.012 0.014 8.945 9.102 rs_pw_transfer 988 11.5 0.011 0.013 7.864 8.351 qmmm_el_coupling 6 3.8 0.000 0.000 7.990 8.095 qmmm_elec_with_gaussian 6 4.8 0.004 0.005 7.988 8.093 potential_pw2rs 119 12.1 0.007 0.007 7.142 7.163 mp_alltoall_z22v 2059 17.4 4.923 6.287 4.923 6.287 grid_collocate_task_list 119 9.3 5.033 5.407 5.033 5.407 yz_to_x 964 16.0 0.866 1.068 3.397 4.442 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.316 4.357 grid_integrate_task_list 119 12.1 3.790 3.998 3.790 3.998 pw_restrict_s3 18 5.8 1.849 1.908 3.785 3.934 x_to_yz 1095 16.8 1.090 1.195 3.482 3.810 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.661 3.699 mp_waitany 4028 12.8 2.805 3.620 2.805 3.620 qmmm_forces_gaussian_low_G 6 6.8 3.546 3.586 3.546 3.586 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.453 3.557 pw_prolongate_s3 18 6.8 1.708 1.744 3.453 3.557 pw_gather_p 964 15.0 3.163 3.437 3.163 3.437 rs_pw_transfer_PW2RS_150 125 13.9 1.109 1.184 2.995 3.237 qs_ks_ddapc 119 10.1 0.002 0.002 3.165 3.228 pw_scatter_p 1095 15.8 3.140 3.217 3.140 3.217 qmmm_elec_gaussian_low_G 6 6.8 3.031 3.070 3.031 3.070 rs_pw_transfer_RS2PW_150 125 11.2 0.893 1.010 2.475 3.029 pw_integral_ab 2761 7.7 2.103 2.180 2.379 2.502 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.196 2.197 init_scf_loop 6 6.8 0.000 0.000 2.151 2.151 qs_scf_new_mos 113 7.2 0.000 0.001 2.118 2.125 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.117 2.124 ot_scf_mini 113 9.2 0.001 0.002 2.031 2.038 xc_functional_eval 238 13.1 0.002 0.002 1.619 2.010 dbcsr_multiply_generic 2588 12.3 0.064 0.065 1.889 1.939 mp_sum_dm3 33 5.7 1.542 1.604 1.542 1.604 pw_axpy 2291 9.2 1.541 1.597 1.541 1.597 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=28.158999999999992, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=85.862, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.017, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.133, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.337, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.06, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=2.692, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=42.624999999999986, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.031, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.103, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.033, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=5.795, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.79, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.923, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=9.541, yerr=0.0 Summary: Performance test took 40 minutes. Status: OK Removing intermediate container 3c68bce4ccaf ---> 44c5a891f8b3 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in bd4e90862157 Removing intermediate container bd4e90862157 ---> 9010d208a1b0 Step 42/42 : ENTRYPOINT [] ---> Running in 2f614c4f4050 Removing intermediate container 2f614c4f4050 ---> 06595f2d72f5 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 06595f2d72f5 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-04-19 13:37:21+00:00